如何使用OpenXML SDK将Excel转换为CSV?

我有要求将Excel(2010)文件转换为csv。 目前我使用Excel Interop来打开和SaveAs csv,效果很好。 但是,Interop在我们使用它的环境中存在一些问题,所以我正在寻找另一个解决scheme。

我发现在不使用interop的情况下使用Excel文件的方式是使用OpenXML SDK。 我得到了一些代码,以通过每张表中的所有单元格,并简单地将它们写入另一个CSV文件。

我有一个问题是处理空白行和单元格。 看来,用这个代码,空行和单元格是完全不存在的,所以我没有办法知道它们。 是否有通过所有行和单元格,包括空白?

string filename = @"D:\test.xlsx"; string outputDir = Path.GetDirectoryName(filename); //-------------------------------------------------------- using (SpreadsheetDocument document = SpreadsheetDocument.Open(filename, false)) { foreach (Sheet sheet in document.WorkbookPart.Workbook.Descendants<Sheet>()) { WorksheetPart worksheetPart = (WorksheetPart) document.WorkbookPart.GetPartById(sheet.Id); Worksheet worksheet = worksheetPart.Worksheet; SharedStringTablePart shareStringPart = document.WorkbookPart.GetPartsOfType<SharedStringTablePart>().First(); SharedStringItem[] items = shareStringPart.SharedStringTable.Elements<SharedStringItem>().ToArray(); // Create a new filename and save this file out. if (string.IsNullOrWhiteSpace(outputDir)) outputDir = Path.GetDirectoryName(filename); string newFilename = string.Format("{0}_{1}.csv", Path.GetFileNameWithoutExtension(filename), sheet.Name); newFilename = Path.Combine(outputDir, newFilename); using (var outputFile = File.CreateText(newFilename)) { foreach (var row in worksheet.Descendants<Row>()) { StringBuilder sb = new StringBuilder(); foreach (Cell cell in row) { string value = string.Empty; if (cell.CellValue != null) { // If the content of the first cell is stored as a shared string, get the text // from the SharedStringTablePart. Otherwise, use the string value of the cell. if (cell.DataType != null && cell.DataType.Value == CellValues.SharedString) value = items[int.Parse(cell.CellValue.Text)].InnerText; else value = cell.CellValue.Text; } // to be safe, always use double quotes. sb.Append(string.Format("\"{0}\",", value.Trim())); } outputFile.WriteLine(sb.ToString().TrimEnd(',')); } } } } 

如果我有以下Excel文件数据:

 one,two,three ,, last,,row 

我会得到以下CSV(这是错误的):

 one,two,three last,row 

我不认为OpenXml是这个问题的正确工具。 我build议使用OleDbConnection将数据从工作表中取出,然后使用此方法将其转换为csv文件。

一旦将数据存储在数据表中,您就可以更好地控制这种情况。

您可以使用oledb连接并查询excel文件,将行转换为csv格式并将结果保存到文件中

这里是一个简单的例子,我为此testing它创build一个不同的csv文件Unicode编码,制表符分隔的每个工作表在Excel文件

 using System; using System.Collections.Generic; using System.Data; using System.Data.OleDb; using System.IO; using System.Linq; using System.Text; namespace XlsTests { class Program { static void Main(string[] args) { string _XlsConnectionStringFormat = "Provider=Microsoft.ACE.OLEDB.12.0;Data Source={0};Extended Properties=\"Excel 12.0 Xml;HDR=NO;IMEX=1\""; string xlsFilename = @"C:\test.xlsx"; using (OleDbConnection conn = new OleDbConnection(string.Format(_XlsConnectionStringFormat, xlsFilename))) { try { conn.Open(); string outputFilenameHeade = Path.GetFileNameWithoutExtension(xlsFilename); string dir = Path.GetDirectoryName(xlsFilename); string[] sheetNames = conn.GetSchema("Tables") .AsEnumerable() .Select(a => a["TABLE_NAME"].ToString()) .ToArray(); foreach (string sheetName in sheetNames) { string outputFilename = Path.Combine(dir, string.Format("{0}_{1}.csv", outputFilenameHeade, sheetName)); using (StreamWriter sw = new StreamWriter(File.Create(outputFilename), Encoding.Unicode)) { using (DataSet ds = new DataSet()) { using (OleDbDataAdapter adapter = new OleDbDataAdapter(string.Format("SELECT * FROM [{0}]", sheetName), conn)) { adapter.Fill(ds); foreach (DataRow dr in ds.Tables[0].Rows) { string[] cells = dr.ItemArray.Select(a => a.ToString()).ToArray(); sw.WriteLine("\"{0}\"", string.Join("\"\t\"", cells)); } } } } } } catch (Exception exp) { // handle exception } finally { if (conn.State != ConnectionState.Open) { try { conn.Close(); } catch (Exception ex) { // handle exception } } } } } } } 
 //Xlsx to Csv ConvertXlsxToCsv(@"D:\test.xlsx", @"C:\"); internal static void ConvertXlsxToCsv(string SourceXlsxName, string DestinationCsvDirectory) { try { using (SpreadsheetDocument document = SpreadsheetDocument.Open(SourceXlsxName, false)) { foreach (Sheet _Sheet in document.WorkbookPart.Workbook.Descendants<Sheet>()) { WorksheetPart _WorksheetPart = (WorksheetPart)document.WorkbookPart.GetPartById(_Sheet.Id); Worksheet _Worksheet = _WorksheetPart.Worksheet; SharedStringTablePart _SharedStringTablePart = document.WorkbookPart.GetPartsOfType<SharedStringTablePart>().First(); SharedStringItem[] _SharedStringItem = _SharedStringTablePart.SharedStringTable.Elements<SharedStringItem>().ToArray(); if (string.IsNullOrEmpty(DestinationCsvDirectory)) DestinationCsvDirectory = Path.GetDirectoryName(SourceXlsxName); string newFilename = string.Format("{0}_{1}.csv", Path.GetFileNameWithoutExtension(SourceXlsxName), _Sheet.Name); newFilename = Path.Combine(DestinationCsvDirectory, newFilename); using (var outputFile = File.CreateText(newFilename)) { foreach (var row in _Worksheet.Descendants<Row>()) { StringBuilder _StringBuilder = new StringBuilder(); foreach (Cell _Cell in row) { string Value = string.Empty; if (_Cell.CellValue != null) { if (_Cell.DataType != null && _Cell.DataType.Value == CellValues.SharedString) Value = _SharedStringItem[int.Parse(_Cell.CellValue.Text)].InnerText; else Value = _Cell.CellValue.Text; } _StringBuilder.Append(string.Format("{0},", Value.Trim())); } outputFile.WriteLine(_StringBuilder.ToString().TrimEnd(',')); } } } } } catch (Exception Ex) { throw Ex; } }