Computed and Persisted Computed Columns

6 03 2007

This is a simple script to demonstrate how computed columns work in SQL Server. Also demonstrated are the effects of updates/deletes on them. A persisted computed column is also added to demonstrate that indexes can be built on them. It is suggested that you run each segment of code separately to make it easier to understand the overall process.

-- test on adding a computed column to a table to see if existing procs still work

-- setup environment
set nocount on
go

-- create table
if exists (select name from sysobjects where name = 'tbTest') drop table tbTest
go
create table tbTest (TestID int identity, col1 varchar(10), col2 varchar(10), col3 varchar(10))
go

-- create insert procedure
if exists (select name from sysobjects where name = 'spTest_INS') drop procedure spTest_INS
go
create procedure spTest_INS
@col1 varchar(10),
@col2 varchar(10),
@col3 varchar(10)
as
begin
insert into tbTest (col1, col2, col3) values (@col1, @col2, @col3)
end
go

-- create update procedure
if exists (select name from sysobjects where name = 'spTest_UPD') drop procedure spTest_UPD
go
create procedure spTest_UPD -- yes I'm aware this doesn't allow updating columns to null
@TestID int,
@col1 varchar(10) = null,
@col2 varchar(10) = null,
@col3 varchar(10) = null
as
begin
update tbTest set col1 = coalesce(@col1, col1), col2 = coalesce(@col2, col2), col3 = coalesce(@col3, col3) where TestID = @TestID
end
go

-- create delete procedure
if exists (select name from sysobjects where name = 'spTest_DEL') drop procedure spTest_DEL
go
create procedure spTest_DEL
@TestID int
as
begin
delete from tbTest where TestID = @TestID
end
go

-- create view
if exists (select name from sysobjects where name = 'vwTest') drop view vwTest
go
create view vwTest as select * from tbTest
go

-- call insert proc
declare @x int
select @x = 1
while @x





Compare Data in Two Tables

6 03 2007

This script will compare the data in two different tables and return a recordset indicating the differences.

set nocount on
set concat_null_yields_null off
declare    @Table1            varchar(100),
@Table2            varchar(200),
@x                int,
@v                char(22),        -- Junk value used if data is null so coalesce(Table1.SomeNumericField, @Garbage) <> coalesce(Table2.SomeNumericField, @Garbage) works correctly
@Key            varchar(500),    -- ignored if @CustomFrom is used
@Ignore            varchar(500),    -- columns to ignore - mostly used for UpdateDates, etc.
@CustomFrom        varchar(500),    -- only use if you need to join to additional tables to match Table1 and Table2 together
@OutputDetails    bit,
@OutputSummary    bit

select    @Table1            = 'Table1',
@Table2            = 'Table2',
@v                = '-9876543210.0123456789',    -- must be a valid integer, and one not found within your data!
@Key            = 'Field1 = Field3, Field3 = Field9',
@Ignore            = 'Table1ID,Table2ID', --,SourceRecordID,UpdateDate,BatchID,CIC,CDRStatsUpdateFlag,AddedToEMIInd,StaffID',
@CustomFrom        = 'from Table1 t1
inner join Table3 t3 on t1.Table3ID = t3.Table3ID
inner join Table2 as t2 on t2.Table3ID = t3.Table3ID', -- optional parm, overrides @Key, and used when a 3rd table is required for joining
@OutputDetails    = 0,
@OutputSummary    = 1

if not exists (select name from sys.tables where name = @Table1)
begin
select @Table1 + ' does not exist, are you in the right database?'
return
end

if not exists (select name from sys.tables where name = @Table2)
begin
select @Table2 + ' does not exist, are you in the right database?'
return
end

if len(ltrim(@CustomFrom)) = 0
select @CustomFrom = null

select @Key = 't1.' + ltrim(@Key)
select @Key = replace(@Key, ',', ', t1.')
select @Key = replace(@Key, 't1. ', 't1.')
select @Key = replace(@Key, '=', '= t2.')
select @Key = replace(@Key, 't2. ', 't2.')
select @Key = replace(@Key, ', ', ' and ')

select @Ignore = ',' + replace(@Ignore, ' ', '') + ','

declare    @SQL varchar(max)

select    @SQL = @SQL + case
when c.system_type_id in (48,52,56,59,60,62,104,106,108,122,127)
then ' case when coalesce(t1.' + c.name + ', ' + @v + ') <> coalesce(t2.' + c.name + ', ' + @v + ') then ''' + c.name + ''' + '', '' else '''' end '
when c.system_type_id in (58,61)
then ' case when convert(datetime, coalesce(t1.' + c.name + ', ' + @v + ')) <> convert(datetime, coalesce(t2.' + c.name + ', ' + @v + ')) then ''' + c.name + ''' + '', '' else '''' end '
else ' case when coalesce(t1.' + c.name + ', ''' + @v + ''') <> coalesce(t2.' + c.name + ', ''' + @v + ''') then ''' + c.name + ''' +  '', '' else '''' end '
end + '+'
from    sys.tables t
inner join sys.columns c on t.object_id = c.object_id
where    t.name in (@Table1, @Table2)
and        charindex(',' + c.name + ',', @Ignore) = 0
group by
c.name,
c.system_type_id,
c.column_id
having    count(*) = 2

select    @SQL = left(@SQL, len(@SQL) - 2)

select    @SQL = 'select ' + @SQL
select    @SQL = @SQL + ' as Cols_That_Dont_Match, t1.*, t2.* '
select    @SQL = @SQL + ' ' + coalesce(@CustomFrom, ' from ' + @Table1 + ' t1 inner join ' + @Table2 + ' t2 on ' + @Key) + ' where '

select    @SQL = @SQL +    case
when c.system_type_id in (48,52,56,59,60,62,104,106,108,122,127)
then    'coalesce(t1.' + c.name + ', 0) <> coalesce(t2.' + c.name + ', 0) or '
when c.system_type_id in (58,61)
then    'Convert(datetime,Coalesce(t1.' + c.name + ', 0)) <> convert(datetime, coalesce(t2.' + c.name + ', 0)) or '
else        'coalesce(t1.' + c.name + ', ''0'') <> coalesce(t2.' + c.name + ', ''0'') or '
end
from    sys.tables t
inner join sys.columns c on t.object_id = c.object_id
where    t.name in (@Table1, @Table2)
and        charindex(',' + c.name + ',', @Ignore) = 0
group by
c.name,
c.system_type_id
having    count(*) = 2

select @SQL = left(@SQL, len(@SQL) - 3)
if @OutputDetails = 1
begin
select @SQL as 'SQL Statement being executed'
exec(@SQL)
end

if @OutputSummary = 1
begin
-- produce distinct list of problematic columns
select    @SQL = replace(@SQL, ', t1.*, t2.*', '')
select    @SQL = replace(@SQL, 'select ', 'select distinct ')

if not object_id('tempdb..#tbBadCols') is null drop table #tbBadCols
create table #tbBadCols (BadColID int identity, Cols_That_Dont_Match varchar(1000))
insert into #tbBadCols (Cols_That_Dont_Match)
exec(@SQL)

select    @SQL = ''
select    @SQL = @SQL + Cols_That_Dont_Match
from    #tbBadCols

if not object_id('tempdb..#Numbers') is null drop table #Numbers
create table #Numbers (Number int)
select    @x = 1
while @x <= 500
begin
insert into #Numbers (Number) values (@x)
select @x = @x + 1
end

SELECT distinct substring(',' + @SQL + ',', Number + 1,
charindex(',', ',' + @SQL + ',', Number + 1) - Number - 1)
AS List_Of_Distinct_Cols_That_Dont_Match
FROM   #Numbers
WHERE  Number <= len(',' + @SQL + ',') - 1
AND  substring(',' + @SQL + ',', Number, 1) = ','
order by
List_Of_Distinct_Cols_That_Dont_Match
end




Delete Duplicate Rows

6 03 2007

This script will delete duplicate rows.

set nocount on
create table #tbFruit (Fruit varchar(20))
insert into #tbFruit (Fruit) values ('apple')
insert into #tbFruit (Fruit) values ('apple')
insert into #tbFruit (Fruit) values ('apple')
insert into #tbFruit (Fruit) values ('apple')
insert into #tbFruit (Fruit) values ('orange')
insert into #tbFruit (Fruit) values ('orange')
insert into #tbFruit (Fruit) values ('orange')

create table #tbFruitsWithDupes (DupeID int identity, Fruit varchar(20), DupeCount int)

insert into #tbFruitsWithDupes (Fruit, DupeCount)
select Fruit, count(*) DupeCount from #tbFruit
group by Fruit having count(*) > 1

declare @x int, @max int, @Fruit varchar(20), @DupeCount int

select @x = 1, @max    = max(DupeID) from #tbFruitsWithDupes

while (@x <= @max)
begin
select @Fruit = Fruit, @DupeCount = DupeCount - 1
from #tbFruitsWithDupes Where DupeID = @x

set rowcount @DupeCount

delete from #tbFruit where Fruit = @Fruit

select @x = @x + 1
end

select * from #tbFruit

drop table #tbFruit
drop table #tbFruitsWithDupes




List all Indexes of All Tables in the Current Database

6 03 2007

The indexing schema plays a fundamental role in performance tuning, and tools such as the Index Tuning Wizard that can suggest efficient schemas on the grounds of a database usage. Once the indexing schema has been completed, you should document it in order to know which tables have primary keys, which have clustered keys, which and how many are the non clustered indexes of a certain table. You can get this metadata through SQL Server Information Schema’s views, that are based on the following system tables: sysobjects, syscolumns, sysreferences, spt_values, and sysindexes.

Unfortunately these tables don’t provide all the necessary values, but the Books Online documentation lets to derive the meaning of all the fields in these system tables. The following sp_help_db_indexes stored procedure, placed inside the master database, lets to view all the indexes of each table for the current database, with the respective name, type and fields that make it up:

Author : Giuseppe Dimauro

declare @empty varchar(1)
select @empty = ''

-- 35 is the length of the name field of the master.dbo.spt_values table
declare    @IgnoreDuplicateKeys varchar(35),
@Unique varchar(35),
@IgnoreDuplicateRows varchar(35),
@Clustered varchar(35),
@Hypotethical varchar(35),
@Statistics varchar(35),
@PrimaryKey varchar(35),
@UniqueKey varchar(35),
@AutoCreate varchar(35),
@StatsNoRecompute varchar(35)

select    @IgnoreDuplicateKeys = name
from    master.dbo.spt_values
where    type = 'I' and number = 1 --ignore duplicate keys

select    @Unique = name
from    master.dbo.spt_values
where    type = 'I' and number = 2 --unique

select    @IgnoreDuplicateRows = name
from    master.dbo.spt_values
where    type = 'I' and number = 4 --ignore duplicate rows

select    @Clustered = name
from    master.dbo.spt_values
where    type = 'I' and number = 16 --clustered

select    @Hypotethical = name
from    master.dbo.spt_values
where    type = 'I' and number = 32 --hypotethical

select    @Statistics = name
from    master.dbo.spt_values
where    type = 'I' and number = 64 --statistics

select    @PrimaryKey = name
from    master.dbo.spt_values
where    type = 'I' and number = 2048 --primary key

select    @UniqueKey = name
from    master.dbo.spt_values
where    type = 'I' and number = 4096 --unique key

select    @AutoCreate = name
from    master.dbo.spt_values
where    type = 'I' and number = 8388608 --auto create

select    @StatsNoRecompute = name
from    master.dbo.spt_values
where    type = 'I' and number = 16777216 --stats no recompute

select    o.name,
i.name,
'index description' = convert(varchar(210), --bits 16 off, 1, 2, 16777216 on
case
when (i.status & 16)<>0 then @Clustered
else 'non'+@Clustered
end +
case
when (i.status & 1) <> 0 then ', '+ @IgnoreDuplicateKeys
else @empty
end +
case
when (i.status & 2) <> 0 then ', ' + @Unique
else @empty
end +
case
when (i.status & 4) <> 0 then ', ' + @IgnoreDuplicateRows
else @empty
end +
case
when (i.status & 64) <> 0 then ', ' + @Statistics
else case
when (i.status & 32) <> 0 then ', ' + @Hypotethical
else @empty
end
end +
case
when (i.status & 2048) <> 0 then ', ' + @PrimaryKey
else @empty
end +
case
when (i.status & 4096) <> 0 then ', ' + @UniqueKey
else @empty
end +
case
when (i.status & 8388608) <> 0 then ', ' + @AutoCreate
else @empty
end +
case
when (i.status & 16777216) <> 0 then ', ' + @StatsNoRecompute
else @empty
end),
'index column 1' = index_col(o.name,indid, 1),
'index column 2' = index_col(o.name,indid, 2),
'index column 3' = index_col(o.name,indid, 3),
'index column 4' = index_col(o.name,indid, 4),
'index column 5' = index_col(o.name,indid, 5),
'index column 6' = index_col(o.name,indid, 6),
'index column 7' = index_col(o.name,indid, 7),
'index column 9' = index_col(o.name,indid, 9),
'index column 10' = index_col(o.name,indid, 10),
'index column 11' = index_col(o.name,indid, 11),
'index column 12' = index_col(o.name,indid, 12),
'index column 13' = index_col(o.name,indid, 13),
'index column 14' = index_col(o.name,indid, 14),
'index column 15' = index_col(o.name,indid, 15),
'index column 16' = index_col(o.name,indid, 16),
'index column 17' = index_col(o.name,indid, 17),
'index column 18' = index_col(o.name,indid, 18),
'index column 19' = index_col(o.name,indid, 19),
'index column 20' = index_col(o.name,indid, 20)
from    sysindexes i, sysobjects o
where    i.id = o.id
and        indid > 0
and        indid < 255 --all the clustered (=1), non clusterd (>1 and <251), and text or image (=255)
and        o.type = 'U' --user table
--ignore the indexes for the autostat
and        (i.status & 64) = 0 --index with duplicates
and        (i.status & 8388608) = 0 --auto created index
and        (i.status & 16777216)= 0 --stats no recompute
order by
1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20




Quickly Create a Table of Numbers

6 03 2007

Easily create a table with a seres of integers for joining to. This is often useful when needing a mutex table.

set nocount on
create table Numbers (ID int identity(1,1))
insert Numbers default values
while scope_identity() < 1000 insert Numbers default values




Simple SQL CSV to Table

6 03 2007

Sample Call
select @Statecode = ‘IL, IN,KY’
select ColumnData from dbo.fn_CSVToTable(@Statecode)

Create Function dbo.fn_CSVToTable (@CSVList Varchar(3000))
Returns @Table Table (ColumnData Varchar(50))
As
Begin
If right(@CSVList, 1) <> ','
Select @CSVList = @CSVList + ','

Declare @Pos Smallint,
@OldPos Smallint
Select @Pos = 1,
@OldPos = 1

While @Pos < Len(@CSVList)
Begin
Select @Pos = CharIndex(',', @CSVList, @OldPos)
Insert into @Table
Select LTrim(RTrim(SubString(@CSVList, @OldPos, @Pos - @OldPos))) Col001
Select @OldPos = @Pos + 1
End

Return
End




SQL Server: Convert CSV to Columns (table) – Method 1

5 03 2007

Sometimes, you’re blessed with the joy of delimited data within a column. I happened to be one of those lucky people recently, and put together a stored procedure that would produce a table with the data already columnized. Two versions of code were produced to accomplish this task. This is the first method which is more simple, but also slightly slower than Method 2. I’m posting it here to help others, and to hopefully get some feedback on how it can be improved. Method 2 will be posted as well. The procedure accepts the following parameters:

  • Delimiter – how your data is delimited (comma, etc.)
  • SourceTable – table containing your delimited data
  • DataColumn – column containing your delimited data
  • OutputTable – name of the output table to create
  • MaxCols – maximum # of columns any particular row will contain
  • ColPrefix – prefix for columns in OutputTable (‘col_’ for example)
  • ColType – type of each column in OutputTable (‘varchar(500) Null’ for example)
  • IntraColDelimiter – field delimiter for fields that can contain the overall delimiter
  • ReplaceChar – unique character known to never exist in your delimited data
  • FillerColValue – Value to fill columns with when a row is short ‘x’ # of columns

The only tricky parameter is IntraColDelimiter. In our case, the data was comma delimited, but it was also possible for us to get commas within a field. If a comma was going to be found within a field, it would be surrounded by double quotes. So, a 6 “column” delimited record in our situation might look like:

abc,123,def,456,”abc,123,,”,ghi,789

In that example, ‘abc,123,,’ is all one field of data and shouldn’t be broken into separate columns during parsing. Below is the script. Each section is preceeded by a single comment indicating the primary purpose of that section. The goals were to accurately parse the data, maintain performance as much as possible, and ensure the code was maintainable.

create procedure dbo.spParseDelimitedData
@Delimiter			char(1)			= ',',										-- column delimiter found in source data
@SourceTable		varchar(50)		= 'tbSourceRecord',							-- source table containing delimited data
@DataColumn			varchar(50)		= 'SourceRecord',							-- column containing the delimited data
@OutputTable		varchar(50)		= 'tbdump_new',								-- table this script will produce with data in columns
@MaxCols			int				= 10,										-- maximum number of columns found in @DataColumn
@ColPrefix			varchar(10)		= 'col_',									-- column prefix for @OutputTable
@ColType			varchar(20)		= ' varchar(1000) null default null ',		-- column type for each @OutputTable column
@IntraColDelimiter	char(1)			= '"',										-- delimiter for fields that actually contain the delimiter
-- for the case of:  123,432,661,"141,414",abd,afae,afe
@ReplaceChar		char(1)			= '^',										-- unique character known never to be in @DataColumn
@FillerColValue		varchar(20)		= ',Null'									-- value to fill columns with when @DataColumn is short columns
as
/*
* Convert CSV data to columns *

This script reads a delimited (CSV, etc.) column from a table and outputs a new table with the data in columns.
It handles situations where each delimited row may have different numbers of columns.  With the input parameters,
you are able to control the type and value of the columns in the otuput table.
*/
begin
-- setup environment
set nocount on
set concat_null_yields_null off

-- setup vars
declare	@x				int,
@SQL			nvarchar(max),
@Row			int,
@MaxRow			int,
@RowsAffected	int,
@Quit			bit

select	@ReplaceChar	= '^',
@Quit			= 0

-- create a copy of the source table, we need to modify records along the way
-- ensure the table doesn't already exist
select @SQL = 'if exists (select name from sysobjects where name = ' + char(39) + @OutputTable + char(39) + ') drop table ' + @OutputTable
exec (@SQL)

-- do the insert
select	@SQL = 'select *, 0 as startpos into tmpdata from ' + @SourceTable
exec (@SQL)

-- create destination table
select	@x = 2,
@SQL = 'create table ' + @OutputTable + '(id int identity, ' + @ColPrefix + '1' + @ColType + ','
while	@x <= @MaxCols
begin
select	@SQL = @SQL + @ColPrefix + convert(varchar(5), @x) + @ColType + ','
select	@x = @x + 1
end
select	@SQL = left(@SQL, len(@SQL) - 1) + ')'
exec (@SQL)

-- change delimiter chars into something else when they are inside a "field"
select @RowsAffected = 1
while @RowsAffected > 0
begin
select @SQL = '
update	tmpdata
set		' + @DataColumn + ' =
substring(' + @DataColumn + ', 1, charindex(''' + @IntraColDelimiter + ''', ' + @DataColumn + ', startpos) - 1)
+ replace(substring(' + @DataColumn + ', charindex(''' + @IntraColDelimiter + ''', ' + @DataColumn + ', startpos), charindex(''' + @IntraColDelimiter + ''', ' + @DataColumn + ', charindex(''' + @IntraColDelimiter + ''', ' + @DataColumn + ', startpos) + 1) - charindex(''' + @IntraColDelimiter + ''', ' + @DataColumn + ', startpos) + 1), ''' + @Delimiter + ''', ''' + @ReplaceChar + ''')
+ substring(' + @DataColumn + ', charindex(''' + @IntraColDelimiter + ''', ' + @DataColumn + ', charindex(''' + @IntraColDelimiter + ''', ' + @DataColumn + ', startpos) + 1) + 1, len(' + @DataColumn + ') - charindex(''' + @IntraColDelimiter + ''', ' + @DataColumn + ', charindex(''' + @IntraColDelimiter + ''', ' + @DataColumn + ', startpos) + 1)),
startpos = charindex(''' + @IntraColDelimiter + ''', ' + @DataColumn + ', charindex(''' + @IntraColDelimiter + ''', ' + @DataColumn + ', startpos + 1) + 1) + 1
from	tmpdata
where	startpos < charindex(''' + @IntraColDelimiter + ''', ' + @DataColumn + ', charindex(''' + @IntraColDelimiter + ''', ' + @DataColumn + ', startpos + 1) + 1) + 1
and		charindex(''' + @IntraColDelimiter + ''', ' + @DataColumn + ') > 0'
exec sp_executesql @SQL
select @RowsAffected = @@Rowcount
end

-- populate @SQL with insert statements
create table #ExecMe (ExecID int identity, SQL varchar(max))

select @SQL = 'select ''insert into ' + @OutputTable +  ' values ('' + char(39) + replace(replace(' + @DataColumn + ', '','', char(39) + '','' + char(39)), ' + char(39) + @replacechar + char(39) + ', ' + char(39) + @delimiter + char(39) + ') + char(39) + replicate('',Null'', ' + convert(varchar(5), @MaxCols) + ' - (len(' + @DataColumn + ') - len(replace(' + @DataColumn + ', '','', ''''))) - 1) + '')'' from tmpdata d'

insert into #ExecMe (SQL)
exec (@SQL)

declare curInserts2 cursor for
select SQL from #ExecMe

open curInserts2
fetch next from curInserts2 into @SQL
while @@fetch_status = 0
begin
exec sp_executesql @SQL
fetch next from curInserts2 into @SQL
end

-- cleanup
drop table tmpdata
close curInserts2
deallocate curInserts2

end