Simple SQL CSV to Table

6 03 2007

Sample Call
select @Statecode = ‘IL, IN,KY’
select ColumnData from dbo.fn_CSVToTable(@Statecode)

Create Function dbo.fn_CSVToTable (@CSVList Varchar(3000))
Returns @Table Table (ColumnData Varchar(50))
As
Begin
If right(@CSVList, 1) <> ','
Select @CSVList = @CSVList + ','

Declare	@Pos	Smallint,
@OldPos	Smallint
Select	@Pos	= 1,
@OldPos = 1

While	@Pos < Len(@CSVList)
Begin
Select	@Pos = CharIndex(',', @CSVList, @OldPos)
Insert into @Table
Select	LTrim(RTrim(SubString(@CSVList, @OldPos, @Pos - @OldPos))) Col001
Select	@OldPos = @Pos + 1
End

Return
End




Simple SQL CSV to Table

6 03 2007

Sample Call
select @Statecode = ‘IL, IN,KY’
select ColumnData from dbo.fn_CSVToTable(@Statecode)

Create Function dbo.fn_CSVToTable (@CSVList Varchar(3000))
Returns @Table Table (ColumnData Varchar(50))
As
Begin
If right(@CSVList, 1) <> ','
Select @CSVList = @CSVList + ','

Declare @Pos Smallint,
@OldPos Smallint
Select @Pos = 1,
@OldPos = 1

While @Pos < Len(@CSVList)
Begin
Select @Pos = CharIndex(',', @CSVList, @OldPos)
Insert into @Table
Select LTrim(RTrim(SubString(@CSVList, @OldPos, @Pos - @OldPos))) Col001
Select @OldPos = @Pos + 1
End

Return
End




SQL Server: Convert CSV to Columns (table) – Method 1

5 03 2007

Sometimes, you’re blessed with the joy of delimited data within a column. I happened to be one of those lucky people recently, and put together a stored procedure that would produce a table with the data already columnized. Two versions of code were produced to accomplish this task. This is the first method which is more simple, but also slightly slower than Method 2. I’m posting it here to help others, and to hopefully get some feedback on how it can be improved. Method 2 will be posted as well. The procedure accepts the following parameters:

  • Delimiter – how your data is delimited (comma, etc.)
  • SourceTable – table containing your delimited data
  • DataColumn – column containing your delimited data
  • OutputTable – name of the output table to create
  • MaxCols – maximum # of columns any particular row will contain
  • ColPrefix – prefix for columns in OutputTable (‘col_’ for example)
  • ColType – type of each column in OutputTable (‘varchar(500) Null’ for example)
  • IntraColDelimiter – field delimiter for fields that can contain the overall delimiter
  • ReplaceChar – unique character known to never exist in your delimited data
  • FillerColValue – Value to fill columns with when a row is short ‘x’ # of columns

The only tricky parameter is IntraColDelimiter. In our case, the data was comma delimited, but it was also possible for us to get commas within a field. If a comma was going to be found within a field, it would be surrounded by double quotes. So, a 6 “column” delimited record in our situation might look like:

abc,123,def,456,”abc,123,,”,ghi,789

In that example, ‘abc,123,,’ is all one field of data and shouldn’t be broken into separate columns during parsing. Below is the script. Each section is preceeded by a single comment indicating the primary purpose of that section. The goals were to accurately parse the data, maintain performance as much as possible, and ensure the code was maintainable.

create procedure dbo.spParseDelimitedData
@Delimiter			char(1)			= ',',										-- column delimiter found in source data
@SourceTable		varchar(50)		= 'tbSourceRecord',							-- source table containing delimited data
@DataColumn			varchar(50)		= 'SourceRecord',							-- column containing the delimited data
@OutputTable		varchar(50)		= 'tbdump_new',								-- table this script will produce with data in columns
@MaxCols			int				= 10,										-- maximum number of columns found in @DataColumn
@ColPrefix			varchar(10)		= 'col_',									-- column prefix for @OutputTable
@ColType			varchar(20)		= ' varchar(1000) null default null ',		-- column type for each @OutputTable column
@IntraColDelimiter	char(1)			= '"',										-- delimiter for fields that actually contain the delimiter
-- for the case of:  123,432,661,"141,414",abd,afae,afe
@ReplaceChar		char(1)			= '^',										-- unique character known never to be in @DataColumn
@FillerColValue		varchar(20)		= ',Null'									-- value to fill columns with when @DataColumn is short columns
as
/*
* Convert CSV data to columns *

This script reads a delimited (CSV, etc.) column from a table and outputs a new table with the data in columns.
It handles situations where each delimited row may have different numbers of columns.  With the input parameters,
you are able to control the type and value of the columns in the otuput table.
*/
begin
-- setup environment
set nocount on
set concat_null_yields_null off

-- setup vars
declare	@x				int,
@SQL			nvarchar(max),
@Row			int,
@MaxRow			int,
@RowsAffected	int,
@Quit			bit

select	@ReplaceChar	= '^',
@Quit			= 0

-- create a copy of the source table, we need to modify records along the way
-- ensure the table doesn't already exist
select @SQL = 'if exists (select name from sysobjects where name = ' + char(39) + @OutputTable + char(39) + ') drop table ' + @OutputTable
exec (@SQL)

-- do the insert
select	@SQL = 'select *, 0 as startpos into tmpdata from ' + @SourceTable
exec (@SQL)

-- create destination table
select	@x = 2,
@SQL = 'create table ' + @OutputTable + '(id int identity, ' + @ColPrefix + '1' + @ColType + ','
while	@x <= @MaxCols
begin
select	@SQL = @SQL + @ColPrefix + convert(varchar(5), @x) + @ColType + ','
select	@x = @x + 1
end
select	@SQL = left(@SQL, len(@SQL) - 1) + ')'
exec (@SQL)

-- change delimiter chars into something else when they are inside a "field"
select @RowsAffected = 1
while @RowsAffected > 0
begin
select @SQL = '
update	tmpdata
set		' + @DataColumn + ' =
substring(' + @DataColumn + ', 1, charindex(''' + @IntraColDelimiter + ''', ' + @DataColumn + ', startpos) - 1)
+ replace(substring(' + @DataColumn + ', charindex(''' + @IntraColDelimiter + ''', ' + @DataColumn + ', startpos), charindex(''' + @IntraColDelimiter + ''', ' + @DataColumn + ', charindex(''' + @IntraColDelimiter + ''', ' + @DataColumn + ', startpos) + 1) - charindex(''' + @IntraColDelimiter + ''', ' + @DataColumn + ', startpos) + 1), ''' + @Delimiter + ''', ''' + @ReplaceChar + ''')
+ substring(' + @DataColumn + ', charindex(''' + @IntraColDelimiter + ''', ' + @DataColumn + ', charindex(''' + @IntraColDelimiter + ''', ' + @DataColumn + ', startpos) + 1) + 1, len(' + @DataColumn + ') - charindex(''' + @IntraColDelimiter + ''', ' + @DataColumn + ', charindex(''' + @IntraColDelimiter + ''', ' + @DataColumn + ', startpos) + 1)),
startpos = charindex(''' + @IntraColDelimiter + ''', ' + @DataColumn + ', charindex(''' + @IntraColDelimiter + ''', ' + @DataColumn + ', startpos + 1) + 1) + 1
from	tmpdata
where	startpos < charindex(''' + @IntraColDelimiter + ''', ' + @DataColumn + ', charindex(''' + @IntraColDelimiter + ''', ' + @DataColumn + ', startpos + 1) + 1) + 1
and		charindex(''' + @IntraColDelimiter + ''', ' + @DataColumn + ') > 0'
exec sp_executesql @SQL
select @RowsAffected = @@Rowcount
end

-- populate @SQL with insert statements
create table #ExecMe (ExecID int identity, SQL varchar(max))

select @SQL = 'select ''insert into ' + @OutputTable +  ' values ('' + char(39) + replace(replace(' + @DataColumn + ', '','', char(39) + '','' + char(39)), ' + char(39) + @replacechar + char(39) + ', ' + char(39) + @delimiter + char(39) + ') + char(39) + replicate('',Null'', ' + convert(varchar(5), @MaxCols) + ' - (len(' + @DataColumn + ') - len(replace(' + @DataColumn + ', '','', ''''))) - 1) + '')'' from tmpdata d'

insert into #ExecMe (SQL)
exec (@SQL)

declare curInserts2 cursor for
select SQL from #ExecMe

open curInserts2
fetch next from curInserts2 into @SQL
while @@fetch_status = 0
begin
exec sp_executesql @SQL
fetch next from curInserts2 into @SQL
end

-- cleanup
drop table tmpdata
close curInserts2
deallocate curInserts2

end