Friday, April 06, 2007

Here's another C#/.Net code sample. This one takes a CSV file, optionally with quotes around fields, or not, or mixed within one line (THANKS Excel 2007, for throwing that in the gears..). IT converts it into a DataTable, optionally with ColumnNames from the file. Actually, looking at the code, it HAS to have ColumnNames in the first row. So, if you want to use it that way, adjust the code. I could have used this code yesterday, so I decided to post it for others.
        private System.Data.DataTable ConvertCSVToTable(String file, bool firstRowHasColumnNames)
        {
            System.Data.DataTable dt = new System.Data.DataTable();
            String[] lines = System.IO.File.ReadAllLines(file);
            foreach (String line in lines)
            {
                System.Collections.Specialized.StringCollection parts = ParseCSVString(line);
                if (dt.Columns.Count < 1 && firstRowHasColumnNames)
                {
                    // this row contains column names
                    foreach (String part in parts)
                        dt.Columns.Add(new System.Data.DataColumn(part.Trim('"')));
                }
                else
                {
                    // FIXME TODO: Adjust to dynamically create new columns as needed if a row has more items than we have columns (due to broken CSV implementations, or if we didn't have column names in row 1)
                    // this row contains data
                    System.Data.DataRow dr = dt.NewRow();
                    for (int i = 0; i < parts.Count; i++)
                        dr[i] = parts[i].Trim('"');
                    dt.Rows.Add(dr);
                }
            }
            return dt;
        }

        private System.Collections.Specialized.StringCollection ParseCSVString(String line)
        {
            System.Collections.Specialized.StringCollection sc = new System.Collections.Specialized.StringCollection();
            String buf = "";
            bool inQuotes = false;
            foreach (Char c in line) // each character
            {
                if (c == '\"') {
                    inQuotes = !inQuotes;
                }
                else if (c == ',' && !inQuotes) // if comma and NOT in quotes, delimit!
                {
                    sc.Add(buf);
                    buf = ""; // add new item, clear buffer
                }
                else
                {
                    buf += c.ToString();
                }
            }
            if (buf != "") sc.Add(buf); // add remaining buffer as last item
            return sc;
        }

No comments: