从Excel文件通过C#导入通用格式的单元格

我目前使用:

  • Visual Studio 2015更新3
  • Microsoft Office Professional Plus 2013
  • .Net Framework 4.5.1
  • Windows 7 64位

我正在阅读一个Excel工作表到一个DataTable使用下面的代码:

using System; using System.Collections.Generic; using System.ComponentModel; using System.Data; using System.Data.OleDb; using System.Data.SqlClient; using System.Drawing; using System.IO; using System.Linq; using System.Reflection; using System.Text; using System.Windows.Forms; string filename = "C:\\Users\\myusername\\Documents\\MyFile.xlsx"; DataTable dt = null; try { string ExcelName = filename.Split(("\\").ToCharArray()[0])[filename.Split(("\\").ToCharArray()[0]).Length - 1].Split('.')[0]; string ConnectionString = "Provider=Microsoft.ACE.OLEDB.12.0;Data Source=" + filename + ";Extended Properties=\"Excel 12.0;HDR=YES;IMEX=1;TypeGuessRows=0;ImportMixedTypes=Text\";"; string SheetName = CommonFunctions.GetExcelSheetNames(ConnectionString)[0]; using (OleDbConnection conn = new OleDbConnection(ConnectionString)) { string query = string.Format("SELECT * FROM [" + SheetName + "]"); conn.Open(); using (OleDbCommand cmd = new OleDbCommand(query, conn)) { using (OleDbDataReader rdr = cmd.ExecuteReader()) { if (rdr.HasRows) { dt = new DataTable(); dt.TableName = ExcelName; for (int i = 0; i < rdr.FieldCount; i++) { dt.Columns.Add(new DataColumn(rdr.GetName(i), typeof(string))); } while (rdr.Read()) { DataRow dr = dt.NewRow(); for (int i = 0; i < rdr.FieldCount; i++) { dr[i] = rdr[i].ToString(); } dt.Rows.Add(dr); } foreach (DataRow row in dt.Rows) { string s = row[0].ToString(); } } } } } } catch (Exception ex) { MessageBox.Show(ex.StackTrace, ex.Message, MessageBoxButtons.OK, MessageBoxIcon.Error); } 

示例Excel工作表数据如下,以CSV格式提供,便于在Excel中打开:

 "store_number", "stock_code", "desired_quantity" "64004", "BI_KRA_SEL_350065", "1" "64004", "BI_KRA_SEL_500080", "1" "86208", "BI_KRA_SEL_350065", "1" "86208", "BI_KRA_SEL_500080", "1" "64019", "BI_KRA_SEL_350065", "1" "64019", "BI_KRA_SEL_500080", "1" "85858", "BI_KRA_SEL_350065", "1" "85858", "BI_KRA_SEL_500080", "1" "72122", "BI_KRA_SEL_350065", "1" "72122", "BI_KRA_SEL_500080", "1" "68427", "BI_KRA_SEL_350065", "1" "68427", "BI_KRA_SEL_500080", "1" "79031", "BI_KRA_SEL_350065", "1" "79031", "BI_KRA_SEL_500080", "1" "67662", "BI_KRA_SEL_350065", "1" "67662", "BI_KRA_SEL_500080", "1" "92246", "BI_KRA_SEL_350065", "1" "92246", "BI_KRA_SEL_500080", "1" "85432", "BI_KRA_SEL_350065", "1" "85432", "BI_KRA_SEL_500080", "1" "87188", "BI_KRA_SEL_350065", "1" "87188", "BI_KRA_SEL_500080", "1" "91021", "BI_KRA_SEL_350065", "1" "91021", "BI_KRA_SEL_500080", "1" "79022", "BI_KRA_SEL_350065", "1" "79022", "BI_KRA_SEL_500080", "1" "86369", "BI_KRA_SEL_350065", "1" "86369", "BI_KRA_SEL_500080", "1" "67670", "BI_KRA_SEL_350065", "1" "67670", "BI_KRA_SEL_500080", "1" "92605", "BI_KRA_SEL_350065", "1" "92605", "BI_KRA_SEL_500080", "1" "92609", "BI_KRA_SEL_350065", "1" "92609", "BI_KRA_SEL_500080", "1" "92610", "BI_KRA_SEL_350065", "1" "92610", "BI_KRA_SEL_500080", "1" "92611", "BI_KRA_SEL_350065", "1" "92611", "BI_KRA_SEL_500080", "1" "92612", "BI_KRA_SEL_350065", "1" "92612", "BI_KRA_SEL_500080", "1" "92613", "BI_KRA_SEL_350065", "1" "92613", "BI_KRA_SEL_500080", "1" "92614", "BI_KRA_SEL_350065", "1" "92614", "BI_KRA_SEL_500080", "1" "92615", "BI_KRA_SEL_350065", "1" "92615", "BI_KRA_SEL_500080", "1" "92616", "BI_KRA_SEL_350065", "1" "92616", "BI_KRA_SEL_500080", "1" "w090", "BI_KRA_SEL_350065", "1" "w090", "BI_KRA_SEL_500080", "1" "C908", "BI_KRA_SEL_350065", "1" "C908", "BI_KRA_SEL_500080", "1" "w0901", "BI_KRA_SEL_350065", "1" "w0901", "BI_KRA_SEL_500080", "1" "G202", "BI_KRA_SEL_350065", "1" "G202", "BI_KRA_SEL_500080", "1" 

问题是当第一列包含字母和/或空格时。 这些单元格在生成的DataTable(dt)(即从“w090”到“G202”)中显示为空白。

我发现这种情况发生在单元格被格式化为“常规”时。 但是,将这些单元格的格式更改为“文本”似乎可以解决问题。

我现在唯一的问题是,我不能依靠我的客户提供的单元格设置为“文本”格式的文件。

有没有人知道这个问题的解决方法,或者可能的方式克隆Excel文件与'文本'格式?

也许有人知道一个更聪明的方法来导入Excel文件到DataTables / DataSets。

任何帮助将非常感激。

由于您使用Excel作为数据库,每个字段(列)必须具有其确切的数据types。 Excel数据库驱动程序从第一个值猜测这种types。 在你的情况下,第一个值是第一列的数字。 所以数据库在那里猜测数字数据types。 所以后来发生的string不适合这种types。

数据库驱动程序有一个参数IMEX ,导致将所有数据视为文本。 请参阅https://www.connectionstrings.com/ace-oledb-12-0/treating-data-as-text/

所以试试

 string connectionString = "Provider=Microsoft.ACE.OLEDB.12.0;Data Source=" + filename + ";Extended Properties=\"Excel 12.0;IMEX=1\";"; 

要么

 string connectionString = "Provider=Microsoft.ACE.OLEDB.12.0;Data Source=" + filename + ";Extended Properties=\"Excel 12.0;HDR=NO;IMEX=1\";"; 

这比使用adapter.Fill()稍微多了一些工作,但是如果你想对数据types有更多的控制,你可以先声明它们,然后一次填充DataTable中的一行。 因为这是Excel和Excel将愉快地混合和匹配列中的数据types,我认为OleDb几乎不可能确定正确的数据types。

下面是一个如何显式声明数据types然后手动将它们插入到数据表中的例子:

 OleDbCommand cmd = new OleDbCommand(query, conn); OleDbDataReader reader = cmd.ExecuteReader(); object[] fields = new object[reader.FieldCount]; for (int i = 0; i < fields.Length; i++) dt.Columns.Add(new DataColumn(reader.GetName(i))); dt.Columns[0].DataType = typeof(string); dt.Columns[1].DataType = typeof(string); dt.Columns[2].DataType = typeof(int); while (reader.Read()) { reader.GetValues(fields); dt.Rows.Add(fields); } reader.Close(); 

– 编辑1/3/2017 –

这是一个使用POCO的解决scheme,我相信会起作用:

如果你的POCO看起来像这样:

 public class Stock { public string StoreNumber { get; set; } public string StockCode { get; set; } public double DesiredQuantity { get; set; } } 

这段代码应该从Excel中读取数据并将其放入域对象列表中:

 OleDbConnection conn = new OleDbConnection(ConnectionString); conn.Open(); OleDbCommand cmd = new OleDbCommand(query, conn); OleDbDataReader reader = cmd.ExecuteReader(); List<Stock> stockData = new List<Stock>(); while (reader.Read()) { stockData.Add(new Stock() { StoreNumber = reader.GetValue(0).ToString(), StockCode = reader.GetValue(1).ToString(), DesiredQuantity = reader.GetDouble(2) }); } reader.Close(); 

我认为.GetString(x)可能会因为你突出显示的问题而抛出一个错误,但是通过使用.GetValue(x).ToString() ,你可以强制数据types,知道它们都应该是string。

从这里,我认为与数据List<Stock>相比,使用List<Stock>将是一件乐事。 最好的部分是对数据进行全面控制。

这是使用Interop服务的一个很好的可靠解决scheme。

添加参考: Microsoft.Office.Interop.Excel版本12.0.0.0,可通过Nuget。

注意:确保传递完整path作为文件名参数。

 public static DataTable LoadExcelFile(string fileName, string worksheetName, int headerRowNumber, int firstDataRowNumber) { DataTable dt = new DataTable(); Microsoft.Office.Interop.Excel.Application ExcelApplication = new Microsoft.Office.Interop.Excel.Application(); Microsoft.Office.Interop.Excel.Workbook ExcelWorkbook = ExcelApplication.Workbooks.Open(fileName, 0, true, 5, "", "", true, Microsoft.Office.Interop.Excel.XlPlatform.xlWindows, "\t", false, false, 0, true, 1, 0); Microsoft.Office.Interop.Excel.Worksheet ExcelWorksheet = null; string WorksheetName = worksheetName; if (string.IsNullOrWhiteSpace(worksheetName)) { WorksheetName = ExcelWorkbook.ActiveSheet.Name; } ExcelWorksheet = (Microsoft.Office.Interop.Excel.Worksheet)ExcelWorkbook.Worksheets[WorksheetName]; dt.TableName = WorksheetName; // Add the columns Dictionary<string, int> Columns = new Dictionary<string, int>(); for (int i = 0; i < ExcelWorksheet.UsedRange.Columns.Count; i++) { string ColumnHeading = Convert.ToString(((Microsoft.Office.Interop.Excel.Range)ExcelWorksheet.Cells[headerRowNumber, i + 1]).Value2); if (!String.IsNullOrWhiteSpace(ColumnHeading) && !dt.Columns.Contains(ColumnHeading)) { Columns.Add(ColumnHeading, i + 1); dt.Columns.Add(ColumnHeading); } } // Add the rows for (int i = 0; i < ExcelWorksheet.UsedRange.Rows.Count - firstDataRowNumber + 1; i++) { try { int ColumnCount = 0; DataRow Row = dt.NewRow(); bool RowHasContent = false; foreach (KeyValuePair<string, int> kvp in Columns) { string CellContent = Convert.ToString(((Microsoft.Office.Interop.Excel.Range)ExcelWorksheet.Cells[i + firstDataRowNumber, kvp.Value]).Value2); Row[ColumnCount] = CellContent; ColumnCount++; if (!string.IsNullOrWhiteSpace(CellContent)) { RowHasContent = true; } } if (RowHasContent) { dt.Rows.Add(Row); ; } } catch { } } // Clean up try { ExcelWorksheet = null; } catch { } try { ExcelWorkbook.Close(); } catch { } try { ExcelWorkbook = null; } catch { } try { ExcelApplication = null; } catch { } return dt; } 

为什么要使用Interop服务?

Interop服务可以避免使用基于JetAce数据库引擎的解决scheme时遇到的types猜测和格式错误。

越野车解决scheme的例子如下:

  • ExcelDataReader
  • LinqToExcel
  • 的OleDb

如果您目前使用上述解决scheme之一,那么值得对错误进行testing,并请将成功的解决scheme发布到此页面。

令人愉快的惊喜

使用Interop解决scheme时,我预料到一个文件在Excel中打开时已经与程序中打开的名称相同,但是在testing过程中我没有遇到过这样的问题。