Member of the LearnItFirst.com Video Training Network | LearnSqlServer.com | SQL SSIS Training | SQL Programming Tutorials |
LearnSqlServer.com Forums LearnSqlServer.com
Welcome Guest Search | New Posts | Members | Log In | Register

Get Wasted Space from CHAR/NCHAR Column Usage Options
Scott Whigham
Posted: Tuesday, August 29, 2006 12:07:43 PM


Rank: Super Mod

Joined: 3/20/2006
Posts: 345
Points: 748
Location: Dallas, TX
Code:
CREATE PROC dbo.DBA_GetFixedDatatypeUsageStats (
    @MinimumRowsRequiredToInclude INT = 1000 -- Only work with tables having "n" or greater number of rows
    , @MinimumSizeOfCharColumnToConsider INT = 10 -- Setting this to 10 would say, "Only process columns of CHAR(10) or higher"
)
AS
/*
    Author: Scott Whigham from http://www.LearnSqlServer.com/

    Description: This proc returns information about any columns that use CHAR/NCHAR as their datatypes. The returned info includes
        the amount of actual data used versus the amount of space the data is taking up (returning wasted space) and breaks the rows
        into groupings that show how many rows are efficient or inefficient.

    Misc Notes: I worked with a client one time who had all of their primary keys CHAR(25). We ran a similar script and it showed they had
        over 50GB of wasted space!
   
    Versions: SQL Server 2005
   
    Creation Date: August 29, 2006

    For more scripts like this one, visit http://forums.learnsqlserver.com/codesamples.aspx

    ====================  Algorithm  ===============================
    Step 1: Create temporary tables
                    -- Can't use a table variable here because it goes out of scope when the dynamic SQL is executed
    Step 2: Get all the columns using CHAR/NCHAR with a size > @MinimumSizeOfCharColumnToConsider
                    -- Insert these into #TableInfo
    Step 3: Loop through #TableInfo and calculate details
    Step 4: Strip out the qualification info to make it easier to view results\
    Final Step: Drop temp objects
*/

/**************************************************************************************
    Step 1: Create temporary tables
                    -- Can't use a table variable here because it goes out of scope when the dynamic SQL is executed
**************************************************************************************/
CREATE TABLE #TableInfo (
    LineId INT IDENTITY(1,1) NOT NULL PRIMARY KEY
    , DatabaseName SYSNAME NOT NULL DEFAULT ( DB_NAME () )
    , TableName SYSNAME NOT NULL -- Qualified with [TableName]
    , SchemaName SYSNAME NOT NULL -- Qualified with [SchemaName]
    , ColumnName SYSNAME NOT NULL -- Qualified with ColumnName
    , Datatype SYSNAME NOT NULL
    , Length INT NOT NULL
    , Storage_Bytes INT  NOT NULL
    , ColumnOrder INT NOT NULL
    , NumberOfRows INT NOT NULL -- not exact since we use sys.sysindexes.rowcnt but it sure beats looping through every table and running COUNT(*)!
    , HasBeenProcessed BIT DEFAULT (0) NOT NULL
)

CREATE TABLE #TableDatatypeUsage (
    LineId INT NOT NULL -- , CONSTRAINT FK FOREIGN KEY (LineId) REFERENCES #TableInfo (LineId) ON DELETE CASCADE -- not allowed on temp tables)
    , RowsWith_GoodSpaceUtilization INT
    , RowsWith_MediocreUtilization INT
    , RowsWith_InefficientUtilization INT
    , RowsWith_VeryInefficientUtilization INT    
)

SET NOCOUNT ON

DECLARE @SqlString NVARCHAR(2000)
    , @Table SYSNAME
    , @Schema SYSNAME
    , @Column SYSNAME
    , @Datatype SYSNAME
    , @Length INT
    , @CurrentRow INT    
    , @CurrentRow_String NVARCHAR(128) -- EXEC cannot use CAST/CONVERT to convert INT to VARCHAR in a dynamic string

/**************************************************************************************
    Step 2: Get all the columns using CHAR/NCHAR with a size > @MinimumSizeOfCharColumnToConsider
                    -- Insert these into #TableInfo
**************************************************************************************/
INSERT #TableInfo (TableName, SchemaName, ColumnName, Datatype, Length, Storage_Bytes, ColumnOrder, NumberOfRows)
    SELECT '[' + TABLE_NAME + ']', '[' + TABLE_SCHEMA + ']', '[' + COLUMN_NAME + ']', DATA_TYPE, CHARACTER_MAXIMUM_LENGTH
        , 0 AS Storage_Bytes
        , ORDINAL_POSITION
        , ( SELECT rowcnt FROM sys.sysindexes i JOIN sys.tables t
                ON i.id = t.object_id
            WHERE i.indid=1 AND t.object_id = OBJECT_ID(TABLE_NAME)
        ) AS NumberOfRows
    FROM INFORMATION_SCHEMA.COLUMNS
    WHERE DATA_TYPE IN ('char', 'nchar')
        AND CHARACTER_MAXIMUM_LENGTH > @MinimumSizeOfCharColumnToConsider
        AND OBJECTPROPERTY(OBJECT_ID(TABLE_NAME), 'IsTable') = 1
        AND OBJECT_ID(TABLE_NAME) IN (
            SELECT t.object_id
            FROM sys.sysindexes i JOIN sys.tables t
                ON i.id = t.object_id
            WHERE i.indid=1 AND rowcnt > @MinimumRowsRequiredToInclude
        )
    ORDER BY TABLE_NAME, ORDINAL_POSITION

/**************************************************************************************
    Step 3: Loop through #TableInfo and calculate details
**************************************************************************************/
SELECT @CurrentRow = MIN(LineId), @CurrentRow_String = CAST(@CurrentRow AS VARCHAR) FROM #TableInfo WHERE HasBeenProcessed = 0
WHILE @CurrentRow IS NOT NULL
    BEGIN
        SELECT @Schema = SchemaName, @Table = TableName, @Column = ColumnName
        FROM #TableInfo
        WHERE LineId = @CurrentRow

        SET @SqlString =  '
            UPDATE #TableInfo
                    SET Storage_Bytes = ISNULL ( (SELECT SUM(LEN(' + @Column + ')) FROM ' + @Schema + '.' + @Table + ') , 0 )
            FROM #TableInfo
            WHERE LineId = ' + @CurrentRow_String
        EXEC (@SqlString)
    
         INSERT #TableDatatypeUsage
            ( LineId, RowsWith_GoodSpaceUtilization, RowsWith_MediocreUtilization, RowsWith_InefficientUtilization, RowsWith_VeryInefficientUtilization)
         EXEC ( '
         SELECT ' + @CurrentRow_String + ' AS LineId
            , SUM ( CASE WHEN PercentageFull BETWEEN 75 AND 100 THEN 1 ELSE 0 END) AS RowsWith_GoodSpaceUtilization
             , SUM ( CASE WHEN PercentageFull BETWEEN 50 AND 74 THEN 1 ELSE 0 END) AS RowsWith_MediocreUtilization
             , SUM ( CASE WHEN PercentageFull BETWEEN 25 AND 49 THEN 1 ELSE 0 END) AS RowsWith_InefficientUtilization
             , SUM ( CASE WHEN PercentageFull < 25 THEN 1 ELSE 0 END) AS RowsWith_VeryInefficientUtilization
         FROM (
                 SELECT ''' + @Schema + ''' AS SchemaName, ''' + @Table + ''' AS TableName
                    , ROUND(CAST(LEN(' + @Column + ') AS NUMERIC) / CAST(DATALENGTH(' + @Column + ') AS NUMERIC), 2) * 100
                       AS PercentageFull
                 FROM ' + @Schema + '.' + @Table + '
             ) AS x
        GROUP BY SchemaName, TableName')

        UPDATE #TableInfo SET HasBeenProcessed = 1 WHERE LineId = @CurrentRow
        SELECT @CurrentRow = MIN(LineId), @CurrentRow_String = CAST(@CurrentRow AS VARCHAR) FROM #TableInfo WHERE HasBeenProcessed = 0
    END

/**************************************************************************************
    Step 4: Strip out the qualification info to make it easier to view results
**************************************************************************************/
SELECT
    REPLACE(REPLACE(ht.SchemaName, '[', ''), ']', '') AS SchemaName
    , REPLACE(REPLACE(ht.TableName, '[', ''), ']', '') AS TableName
    , REPLACE(REPLACE(ht.ColumnName, '[', ''), ']', '') AS ColumnName
    , ht.NumberOfRows AS NumberOfRows
    , ht.DataType
    , ht.Length
    , CAST ( ht.NumberOfRows * ht.Length / 1024.0 AS DECIMAL(10,2) ) AS MaxPossibleSizeOfColumn_KB
    , CAST ( ht.Storage_Bytes / 1024.0 AS DECIMAL(10,2) ) AS ActualSizeOfDataInColumn_KB
    -- AmountOfWastedSpace = MaxPossibleSizeOfColumn_KB - ActualSizeOfDataInColumn_KB
    , CAST ( ht.NumberOfRows * ht.Length / 1024.0 AS DECIMAL(10,2) )
        - CAST ( ht.Storage_Bytes / 1024.0 AS DECIMAL(10,2) ) AS AmountOfWastedSpace_KB
    , CAST (
            CASE
                WHEN (ht.Storage_Bytes / 1024.0 ) > 0 THEN
                    CASE
                        WHEN 100 - ROUND(ht.Storage_Bytes / (NULLIF(ht.NumberOfRows, 0) * ht.Length) * 100, 2) <= 0 THEN 0
                        ELSE 100 - ROUND(CAST(ht.Storage_Bytes AS DECIMAL) / (NULLIF(ht.NumberOfRows, 0) * ht.Length) * 100, 2)
                    END
                ELSE 0
            END AS DECIMAL(10,2)
        ) AS PercentWastedSpace
    , dt.RowsWith_GoodSpaceUtilization
    , dt.RowsWith_MediocreUtilization
    , dt.RowsWith_InefficientUtilization
    , dt.RowsWith_VeryInefficientUtilization
FROM #TableInfo ht JOIN #TableDatatypeUsage dt
    ON ht.LineId = dt.LineId
ORDER BY ht.ColumnName

/**************************************************************************************
    Final Step: Drop temp objects
**************************************************************************************/
DROP TABLE #TableDatatypeUsage
DROP TABLE #TableInfo
GO
Users browsing this topic
Guest


Forum Jump
You cannot post new topics in this forum.
You cannot reply to topics in this forum.
You cannot delete your posts in this forum.
You cannot edit your posts in this forum.
You cannot create polls in this forum.
You cannot vote in polls in this forum.
     
Don't Forget!
LearnItFirst.com
Don't Forget!
LearnWindows2003.com
 
Home | About Us | Support | Contact Us | Privacy | Site Map | Blogs Blogs Refer a Friend and Get a Free Subscription!
© Copyright 2004-2007 LearnItFirst.com LLC. All rights reserved. All trademarks remain the property of their respective owners.
This site is not affiliated in any way with the Microsoft Corporation.