CREATE PROC dbo.DBA_GetFixedDatatypeUsageStats (
@MinimumRowsRequiredToInclude INT = 1000 -- Only work with tables having "n" or greater number of rows
, @MinimumSizeOfCharColumnToConsider INT = 10 -- Setting this to 10 would say, "Only process columns of CHAR(10) or higher"
)
AS
/*
Author: Scott Whigham from
http://www.LearnSqlServer.com/ Description: This proc returns information about any columns that use CHAR/NCHAR as their datatypes. The returned info includes
the amount of actual data used versus the amount of space the data is taking up (returning wasted space) and breaks the rows
into groupings that show how many rows are efficient or inefficient.
Misc Notes: I worked with a client one time who had all of their primary keys CHAR(25). We ran a similar script and it showed they had
over 50GB of wasted space!
Versions: SQL Server 2005
Creation Date: August 29, 2006
For more scripts like this one, visit
http://forums.learnsqlserver.com/codesamples.aspx ==================== Algorithm ===============================
Step 1: Create temporary tables
-- Can't use a table variable here because it goes out of scope when the dynamic SQL is executed
Step 2: Get all the columns using CHAR/NCHAR with a size > @MinimumSizeOfCharColumnToConsider
-- Insert these into #TableInfo
Step 3: Loop through #TableInfo and calculate details
Step 4: Strip out the qualification info to make it easier to view results\
Final Step: Drop temp objects
*/
/**************************************************************************************
Step 1: Create temporary tables
-- Can't use a table variable here because it goes out of scope when the dynamic SQL is executed
**************************************************************************************/
CREATE TABLE #TableInfo (
LineId INT IDENTITY(1,1) NOT NULL PRIMARY KEY
, DatabaseName SYSNAME NOT NULL DEFAULT ( DB_NAME () )
, TableName SYSNAME NOT NULL -- Qualified with [TableName]
, SchemaName SYSNAME NOT NULL -- Qualified with [SchemaName]
, ColumnName SYSNAME NOT NULL -- Qualified with ColumnName
, Datatype SYSNAME NOT NULL
, Length INT NOT NULL
, Storage_Bytes INT NOT NULL
, ColumnOrder INT NOT NULL
, NumberOfRows INT NOT NULL -- not exact since we use sys.sysindexes.rowcnt but it sure beats looping through every table and running COUNT(*)!
, HasBeenProcessed BIT DEFAULT (0) NOT NULL
)
CREATE TABLE #TableDatatypeUsage (
LineId INT NOT NULL -- , CONSTRAINT FK FOREIGN KEY (LineId) REFERENCES #TableInfo (LineId) ON DELETE CASCADE -- not allowed on temp tables)
, RowsWith_GoodSpaceUtilization INT
, RowsWith_MediocreUtilization INT
, RowsWith_InefficientUtilization INT
, RowsWith_VeryInefficientUtilization INT
)
SET NOCOUNT ON
DECLARE @SqlString NVARCHAR(2000)
, @Table SYSNAME
, @Schema SYSNAME
, @Column SYSNAME
, @Datatype SYSNAME
, @Length INT
, @CurrentRow INT
, @CurrentRow_String NVARCHAR(128) -- EXEC cannot use CAST/CONVERT to convert INT to VARCHAR in a dynamic string
/**************************************************************************************
Step 2: Get all the columns using CHAR/NCHAR with a size > @MinimumSizeOfCharColumnToConsider
-- Insert these into #TableInfo
**************************************************************************************/
INSERT #TableInfo (TableName, SchemaName, ColumnName, Datatype, Length, Storage_Bytes, ColumnOrder, NumberOfRows)
SELECT '[' + TABLE_NAME + ']', '[' + TABLE_SCHEMA + ']', '[' + COLUMN_NAME + ']', DATA_TYPE, CHARACTER_MAXIMUM_LENGTH
, 0 AS Storage_Bytes
, ORDINAL_POSITION
, ( SELECT rowcnt FROM sys.sysindexes i JOIN sys.tables t
ON i.id = t.object_id
WHERE i.indid=1 AND t.object_id = OBJECT_ID(TABLE_NAME)
) AS NumberOfRows
FROM INFORMATION_SCHEMA.COLUMNS
WHERE DATA_TYPE IN ('char', 'nchar')
AND CHARACTER_MAXIMUM_LENGTH > @MinimumSizeOfCharColumnToConsider
AND OBJECTPROPERTY(OBJECT_ID(TABLE_NAME), 'IsTable') = 1
AND OBJECT_ID(TABLE_NAME) IN (
SELECT t.object_id
FROM sys.sysindexes i JOIN sys.tables t
ON i.id = t.object_id
WHERE i.indid=1 AND rowcnt > @MinimumRowsRequiredToInclude
)
ORDER BY TABLE_NAME, ORDINAL_POSITION
/**************************************************************************************
Step 3: Loop through #TableInfo and calculate details
**************************************************************************************/
SELECT @CurrentRow = MIN(LineId), @CurrentRow_String = CAST(@CurrentRow AS VARCHAR) FROM #TableInfo WHERE HasBeenProcessed = 0
WHILE @CurrentRow IS NOT NULL
BEGIN
SELECT @Schema = SchemaName, @Table = TableName, @Column = ColumnName
FROM #TableInfo
WHERE LineId = @CurrentRow
SET @SqlString = '
UPDATE #TableInfo
SET Storage_Bytes = ISNULL ( (SELECT SUM(LEN(' + @Column + ')) FROM ' + @Schema + '.' + @Table + ') , 0 )
FROM #TableInfo
WHERE LineId = ' + @CurrentRow_String
EXEC (@SqlString)
INSERT #TableDatatypeUsage
( LineId, RowsWith_GoodSpaceUtilization, RowsWith_MediocreUtilization, RowsWith_InefficientUtilization, RowsWith_VeryInefficientUtilization)
EXEC ( '
SELECT ' + @CurrentRow_String + ' AS LineId
, SUM ( CASE WHEN PercentageFull BETWEEN 75 AND 100 THEN 1 ELSE 0 END) AS RowsWith_GoodSpaceUtilization
, SUM ( CASE WHEN PercentageFull BETWEEN 50 AND 74 THEN 1 ELSE 0 END) AS RowsWith_MediocreUtilization
, SUM ( CASE WHEN PercentageFull BETWEEN 25 AND 49 THEN 1 ELSE 0 END) AS RowsWith_InefficientUtilization
, SUM ( CASE WHEN PercentageFull < 25 THEN 1 ELSE 0 END) AS RowsWith_VeryInefficientUtilization
FROM (
SELECT ''' + @Schema + ''' AS SchemaName, ''' + @Table + ''' AS TableName
, ROUND(CAST(LEN(' + @Column + ') AS NUMERIC) / CAST(DATALENGTH(' + @Column + ') AS NUMERIC), 2) * 100
AS PercentageFull
FROM ' + @Schema + '.' + @Table + '
) AS x
GROUP BY SchemaName, TableName')
UPDATE #TableInfo SET HasBeenProcessed = 1 WHERE LineId = @CurrentRow
SELECT @CurrentRow = MIN(LineId), @CurrentRow_String = CAST(@CurrentRow AS VARCHAR) FROM #TableInfo WHERE HasBeenProcessed = 0
END
/**************************************************************************************
Step 4: Strip out the qualification info to make it easier to view results
**************************************************************************************/
SELECT
REPLACE(REPLACE(ht.SchemaName, '[', ''), ']', '') AS SchemaName
, REPLACE(REPLACE(ht.TableName, '[', ''), ']', '') AS TableName
, REPLACE(REPLACE(ht.ColumnName, '[', ''), ']', '') AS ColumnName
, ht.NumberOfRows AS NumberOfRows
, ht.DataType
, ht.Length
, CAST ( ht.NumberOfRows * ht.Length / 1024.0 AS DECIMAL(10,2) ) AS MaxPossibleSizeOfColumn_KB
, CAST ( ht.Storage_Bytes / 1024.0 AS DECIMAL(10,2) ) AS ActualSizeOfDataInColumn_KB
-- AmountOfWastedSpace = MaxPossibleSizeOfColumn_KB - ActualSizeOfDataInColumn_KB
, CAST ( ht.NumberOfRows * ht.Length / 1024.0 AS DECIMAL(10,2) )
- CAST ( ht.Storage_Bytes / 1024.0 AS DECIMAL(10,2) ) AS AmountOfWastedSpace_KB
, CAST (
CASE
WHEN (ht.Storage_Bytes / 1024.0 ) > 0 THEN
CASE
WHEN 100 - ROUND(ht.Storage_Bytes / (NULLIF(ht.NumberOfRows, 0) * ht.Length) * 100, 2) <= 0 THEN 0
ELSE 100 - ROUND(CAST(ht.Storage_Bytes AS DECIMAL) / (NULLIF(ht.NumberOfRows, 0) * ht.Length) * 100, 2)
END
ELSE 0
END AS DECIMAL(10,2)
) AS PercentWastedSpace
, dt.RowsWith_GoodSpaceUtilization
, dt.RowsWith_MediocreUtilization
, dt.RowsWith_InefficientUtilization
, dt.RowsWith_VeryInefficientUtilization
FROM #TableInfo ht JOIN #TableDatatypeUsage dt
ON ht.LineId = dt.LineId
ORDER BY ht.ColumnName
/**************************************************************************************
Final Step: Drop temp objects
**************************************************************************************/
DROP TABLE #TableDatatypeUsage
DROP TABLE #TableInfo
GO