从IIS上的Excel文件中读取有限的行

我有一个在IIS上托pipe的asp.net mvc应用程序。 我有一个用户上传包含50k +行的excel文件的表单。 我用下面的C#代码读取了excel文件。

public DataTable GetExcelDataTable(string fileName) { string connectionString = Path.GetExtension(fileName) == "xls" ? string.Format("Provider=Microsoft.Jet.OLEDB.4.0;Data source={0}; Extended Properties=Excel 8.0;", fileName) : string.Format("Provider=Microsoft.ACE.OLEDB.12.0;Data Source={0}; Extended Properties=Excel 12.0;", fileName); var conn = new OleDbConnection(connectionString); using (var adapter = new OleDbDataAdapter("SELECT * FROM [Sheet1$]", conn)) { var ds = new DataSet(); adapter.Fill(ds); DataTable data = ds.Tables[0]; conn.Close(); conn.Dispose(); adapter.Dispose(); return data; } } 

问题是,它只能读取30k行,但从来没有整个Excel文件。

有趣的是,如果我使用visual studio运行mvc应用程序,但是从来没有从IIS(IIS也在我的机器上)托pipe的网站上运行,我可以使用相同的代码读取所有行。

任何想法,为什么会发生?

在这种方法中,不需要在目标机器上安装excel

  NPOI.SS.UserModel.IWorkbook hssfworkbook; bool InitializeWorkbook(string path) { try { if (path.ToLower().EndsWith(".xlsx")) { FileStream file1 = File.OpenRead(path); hssfworkbook = new XSSFWorkbook(file1); } else { //read the template via FileStream, it is suggested to use FileAccess.Read to prevent file lock. //book1.xls is an Excel-2007-generated file, so some new unknown BIFF records are added. using (FileStream file = new FileStream(path, FileMode.Open, FileAccess.Read)) { hssfworkbook = new HSSFWorkbook(file); } } return true; } catch { return false; } } 

在下面的 :

 public DataTable GetExcelDataTable(NPOI.SS.UserModel.IWorkbook hssfworkbook, int rowCount) { NPOI.SS.UserModel.ISheet sheet = hssfworkbook.GetSheetAt(0); System.Collections.IEnumerator rows = sheet.GetRowEnumerator(); DataTable dt = new DataTable(); bool skipReadingHeaderRow = rows.MoveNext(); if (skipReadingHeaderRow) { dynamic row; if (rows.Current is NPOI.HSSF.UserModel.HSSFRow) row = (NPOI.HSSF.UserModel.HSSFRow)rows.Current; else row = (NPOI.XSSF.UserModel.XSSFRow)rows.Current; for (int i = 0; i < row.LastCellNum; i++) { ICell cell = row.GetCell(i); if (cell != null) { dt.Columns.Add(cell.ToString()); } else { dt.Columns.Add(string.Empty); } } } int cnt = 0; while (rows.MoveNext() && cnt < rowCount) { cnt++; dynamic row; if (rows.Current is NPOI.HSSF.UserModel.HSSFRow) row = (NPOI.HSSF.UserModel.HSSFRow)rows.Current; else row = (XSSFRow)rows.Current; DataRow dr = dt.NewRow(); for (int i = 0; i < row.LastCellNum; i++) { ICell cell = row.GetCell(i); if (cell == null) { dr[i - 1] = null; } else if (i > 0) { dr[i - 1] = cell.ToString(); } } dt.Rows.Add(dr); } return dt; } 

要么:

  public DataTable GetExcelDataTable(NPOI.SS.UserModel.IWorkbook hssfworkbook, int rowCount) { NPOI.SS.UserModel.ISheet sheet = hssfworkbook.GetSheetAt(0); System.Collections.IEnumerator rows = sheet.GetRowEnumerator(); DataTable dt = new DataTable(); bool skipReadingHeaderRow = rows.MoveNext(); if (skipReadingHeaderRow) { dynamic row; if (rows.Current is NPOI.HSSF.UserModel.HSSFRow) row = (NPOI.HSSF.UserModel.HSSFRow)rows.Current; else row = (NPOI.XSSF.UserModel.XSSFRow)rows.Current; for (int i = 0; i < row.LastCellNum; i++) { ICell cell = row.GetCell(i); if (cell != null) { dt.Columns.Add(cell.ToString()); } else { dt.Columns.Add(string.Empty); } } } int cnt = 0; while (rows.MoveNext() && cnt < rowCount) { cnt++; dynamic row; if (rows.Current is NPOI.HSSF.UserModel.HSSFRow) row = (HSSFRow)rows.Current; else row = (XSSFRow)rows.Current; DataRow dr = dt.NewRow(); for (int i = 0; i < row.LastCellNum; i++) { ICell cell = row.GetCell(i); if (cell == null && i > 0) { dr[i - 1] = null; } else if (i > 0) { switch (cell.CellType) { case CellType.Blank: dr[i - 1] = "[null]"; break; case CellType.Boolean: dr[i - 1] = cell.BooleanCellValue; break; case CellType.Numeric: dr[i - 1] = cell.ToString(); break; case CellType.String: dr[i - 1] = cell.StringCellValue; break; case CellType.Error: dr[i - 1] = cell.ErrorCellValue; break; case CellType.Formula: default: dr[i - 1] = "=" + cell.CellFormula; break; } } } dt.Rows.Add(dr); } return dt; } 

要么 :

 public DataTable GetExcelDataTable(NPOI.SS.UserModel.IWorkbook hssfworkbook, int segment, int rowCount) { NPOI.SS.UserModel.ISheet sheet = hssfworkbook.GetSheetAt(0); System.Collections.IEnumerator rows = sheet.GetRowEnumerator(); DataTable dt = new DataTable(); bool skipReadingHeaderRow = rows.MoveNext(); if (skipReadingHeaderRow) { dynamic row; if (rows.Current is NPOI.HSSF.UserModel.HSSFRow) row = (NPOI.HSSF.UserModel.HSSFRow)rows.Current; else row = (NPOI.XSSF.UserModel.XSSFRow)rows.Current; for (int i = 0; i < row.LastCellNum; i++) { ICell cell = row.GetCell(i); if (cell != null) { dt.Columns.Add(cell.ToString()); } else { dt.Columns.Add(string.Empty); } } } for (int i = 0; i < (segment - 1)*rowCount; i++) { if (!rows.MoveNext()) break; } int cnt = 0; while (rows.MoveNext() && cnt < rowCount) { cnt++; dynamic row; if (rows.Current is NPOI.HSSF.UserModel.HSSFRow) row = (NPOI.HSSF.UserModel.HSSFRow) rows.Current; else row = (NPOI.XSSF.UserModel.XSSFRow) rows.Current; DataRow dr = dt.NewRow(); for (int i = 0; i < row.LastCellNum; i++) { ICell cell = row.GetCell(i); if (cell == null) { dr[i - 1] = null; } else if (i > 0) { switch (cell.CellType) { case CellType.Blank: dr[i - 1] = "[null]"; break; case CellType.Boolean: dr[i - 1] = cell.BooleanCellValue; break; case CellType.Numeric: dr[i - 1] = cell.ToString(); break; case CellType.String: dr[i - 1] = cell.StringCellValue; break; case CellType.Error: dr[i - 1] = cell.ErrorCellValue; break; case CellType.Formula: default: dr[i - 1] = "=" + cell.CellFormula; break; } } } dt.Rows.Add(dr); } return dt; } 

你能够发布你的服务器的一些规格? 虚拟机和云是基于什么机会? 在过去我已经成功地使用:

  1. Koogra: https ://sourceforge.net/projects/koogra/
  2. NPOI: http : //npoi.codeplex.com/

    要读取.xls文件,但是如果您可以将文件限制为.xlsx,我将使用ClosedXML。 我已经阅读了很多大文件,在Azure的一个强大的虚拟机上,使用ClosedXML的50K +没有问题。 我有一种感觉,你打在服务器上的用户空间。 如果用户达到这样的百分比,则达到其使用限额并结束该任务。

这个问题可以通过读取25K + 25k = 50K两部分的数据来解决。 您只需要将select的查询更新为:

 SELECT TOP 25000 * FROM [Sheet1$] 

我在我身边创build了一个小样本,使用SELECT TOP和ORDER BY一起使用,就可以得到结果:

检查代码:

  public DataSet GetExcelDataTable(string fileName) { string connectionString = Path.GetExtension(fileName) == "xls" ? string.Format("Provider=Microsoft.Jet.OLEDB.4.0;Data source={0}; Extended Properties=Excel 8.0;", fileName) : string.Format("Provider=Microsoft.ACE.OLEDB.12.0;Data Source={0}; Extended Properties=Excel 12.0;", fileName); var conn = new OleDbConnection(connectionString); DataTable data = new DataTable(); DataTable data2 = new DataTable(); var ds = new DataSet(); using (var adapter = new OleDbDataAdapter("SELECT TOP 25000 Name, Surname FROM [Sheet1$] ORDER BY Name asc", conn)) { adapter.Fill(data); } using (var adapter = new OleDbDataAdapter("SELECT TOP 25000 Name, Surname FROM [Sheet1$] ORDER BY Name desc", conn)) { adapter.Fill(data2); } if (data.Rows.Count > 0)ds.Tables.Add(data); if (data2.Rows.Count > 0) ds.Tables.Add(data2); return ds; } 

您可以尝试通过读取块之后的块来填充数据表,而不是一次读取。

这种方法的美妙之处在于,你不仅限于5万条logging,而且还适应数据表的实际基数。

此代码适用于我的机器(Win10 X64,VS2010 Express):

  public DataTable GetExcelDataTable(string fileName) { string connectionString = Path.GetExtension(fileName) == "xls" ? string.Format("Provider=Microsoft.Jet.OLEDB.4.0;Data source={0}; Extended Properties=Excel 8.0;", fileName) : string.Format("Provider=Microsoft.ACE.OLEDB.12.0;Data Source={0}; Extended Properties=Excel 12.0;", fileName); var conn = new OleDbConnection(connectionString); using (var adapter = new OleDbDataAdapter("SELECT * FROM [Sheet1$]", conn)) { var dt = new DataTable(); int recordRead = 0; int recordCur = 0; //starting point int recordStep = 6789; //records to read //here, we read **recordStep** records instead of reading //all excel data do { recordRead = adapter.Fill( recordCur, recordStep, dt); recordCur += recordRead; //increment starting point accordingly } while (recordRead > 0); conn.Close(); conn.Dispose(); adapter.Dispose(); return dt; } }