This script will compare the data in two different tables and return a recordset indicating the differences.
set nocount on
set concat_null_yields_null off
declare @Table1 varchar(100),
@Table2 varchar(200),
@x int,
@v char(22), -- Junk value used if data is null so coalesce(Table1.SomeNumericField, @Garbage) <> coalesce(Table2.SomeNumericField, @Garbage) works correctly
@Key varchar(500), -- ignored if @CustomFrom is used
@Ignore varchar(500), -- columns to ignore - mostly used for UpdateDates, etc.
@CustomFrom varchar(500), -- only use if you need to join to additional tables to match Table1 and Table2 together
@OutputDetails bit,
@OutputSummary bit
select @Table1 = 'Table1',
@Table2 = 'Table2',
@v = '-9876543210.0123456789', -- must be a valid integer, and one not found within your data!
@Key = 'Field1 = Field3, Field3 = Field9',
@Ignore = 'Table1ID,Table2ID', --,SourceRecordID,UpdateDate,BatchID,CIC,CDRStatsUpdateFlag,AddedToEMIInd,StaffID',
@CustomFrom = 'from Table1 t1
inner join Table3 t3 on t1.Table3ID = t3.Table3ID
inner join Table2 as t2 on t2.Table3ID = t3.Table3ID', -- optional parm, overrides @Key, and used when a 3rd table is required for joining
@OutputDetails = 0,
@OutputSummary = 1
if not exists (select name from sys.tables where name = @Table1)
begin
select @Table1 + ' does not exist, are you in the right database?'
return
end
if not exists (select name from sys.tables where name = @Table2)
begin
select @Table2 + ' does not exist, are you in the right database?'
return
end
if len(ltrim(@CustomFrom)) = 0
select @CustomFrom = null
select @Key = 't1.' + ltrim(@Key)
select @Key = replace(@Key, ',', ', t1.')
select @Key = replace(@Key, 't1. ', 't1.')
select @Key = replace(@Key, '=', '= t2.')
select @Key = replace(@Key, 't2. ', 't2.')
select @Key = replace(@Key, ', ', ' and ')
select @Ignore = ',' + replace(@Ignore, ' ', '') + ','
declare @SQL varchar(max)
select @SQL = @SQL + case
when c.system_type_id in (48,52,56,59,60,62,104,106,108,122,127)
then ' case when coalesce(t1.' + c.name + ', ' + @v + ') <> coalesce(t2.' + c.name + ', ' + @v + ') then ''' + c.name + ''' + '', '' else '''' end '
when c.system_type_id in (58,61)
then ' case when convert(datetime, coalesce(t1.' + c.name + ', ' + @v + ')) <> convert(datetime, coalesce(t2.' + c.name + ', ' + @v + ')) then ''' + c.name + ''' + '', '' else '''' end '
else ' case when coalesce(t1.' + c.name + ', ''' + @v + ''') <> coalesce(t2.' + c.name + ', ''' + @v + ''') then ''' + c.name + ''' + '', '' else '''' end '
end + '+'
from sys.tables t
inner join sys.columns c on t.object_id = c.object_id
where t.name in (@Table1, @Table2)
and charindex(',' + c.name + ',', @Ignore) = 0
group by
c.name,
c.system_type_id,
c.column_id
having count(*) = 2
select @SQL = left(@SQL, len(@SQL) - 2)
select @SQL = 'select ' + @SQL
select @SQL = @SQL + ' as Cols_That_Dont_Match, t1.*, t2.* '
select @SQL = @SQL + ' ' + coalesce(@CustomFrom, ' from ' + @Table1 + ' t1 inner join ' + @Table2 + ' t2 on ' + @Key) + ' where '
select @SQL = @SQL + case
when c.system_type_id in (48,52,56,59,60,62,104,106,108,122,127)
then 'coalesce(t1.' + c.name + ', 0) <> coalesce(t2.' + c.name + ', 0) or '
when c.system_type_id in (58,61)
then 'Convert(datetime,Coalesce(t1.' + c.name + ', 0)) <> convert(datetime, coalesce(t2.' + c.name + ', 0)) or '
else 'coalesce(t1.' + c.name + ', ''0'') <> coalesce(t2.' + c.name + ', ''0'') or '
end
from sys.tables t
inner join sys.columns c on t.object_id = c.object_id
where t.name in (@Table1, @Table2)
and charindex(',' + c.name + ',', @Ignore) = 0
group by
c.name,
c.system_type_id
having count(*) = 2
select @SQL = left(@SQL, len(@SQL) - 3)
if @OutputDetails = 1
begin
select @SQL as 'SQL Statement being executed'
exec(@SQL)
end
if @OutputSummary = 1
begin
-- produce distinct list of problematic columns
select @SQL = replace(@SQL, ', t1.*, t2.*', '')
select @SQL = replace(@SQL, 'select ', 'select distinct ')
if not object_id('tempdb..#tbBadCols') is null drop table #tbBadCols
create table #tbBadCols (BadColID int identity, Cols_That_Dont_Match varchar(1000))
insert into #tbBadCols (Cols_That_Dont_Match)
exec(@SQL)
select @SQL = ''
select @SQL = @SQL + Cols_That_Dont_Match
from #tbBadCols
if not object_id('tempdb..#Numbers') is null drop table #Numbers
create table #Numbers (Number int)
select @x = 1
while @x <= 500
begin
insert into #Numbers (Number) values (@x)
select @x = @x + 1
end
SELECT distinct substring(',' + @SQL + ',', Number + 1,
charindex(',', ',' + @SQL + ',', Number + 1) - Number - 1)
AS List_Of_Distinct_Cols_That_Dont_Match
FROM #Numbers
WHERE Number <= len(',' + @SQL + ',') - 1
AND substring(',' + @SQL + ',', Number, 1) = ','
order by
List_Of_Distinct_Cols_That_Dont_Match
end
Brilliant!
Simply Brilliant!
That is the best script I have come across yet on the net.
I have just used it to validate 60,000 records in 6 view files with a backup copy. It would have taken me all day without this script.
The only thing it doesn’t do that woudl have been nice is to list all the fields separately in the details tab by concatenating the t1.fieldname + ‘_1′ and t2.fieldname + ‘_2′ so that the whole thing can be wrapped in a select statement and allow me to choose only lines with differences: This can’t be done at the moment becuase you have duplicated column names in the results set.
Just used this script again, thanks a million.
I have a problem with it though if the tables are not in the default schema (dbo)
The validation for the table checks fails if you put the schema name in to the variables @Table1 = ‘dev.tablename’
Also, if you have the same table names in different schemas then the select from sys.tables fails because you have more than 2 columns with the same name