如何在电子表格中打开XML电子表格“uncollapse”单元格?

我在C#中使用服务器端的xslx Excel文件。 在电子表格中,总共有15列(单元格)。 在单元格行中,有些值缺失。 所以第一行是我的头将正确的有15个单元格。 但是我的数据行,有些单元格可能有空的值,所以Open XML有一组“锯齿”的单元格值。 第1行将具有完整的15个单元格,第2行可能有13个单元格,因为其中的两个值为空。 什么! 如何正确映射这些数据? 它基本上将所有东西都转移到左边,我的单元格值是错误的。 我错过了什么? 看起来他们正在使用Open XML术语“折叠”。

WorkbookPart workbookPart = spreadSheetDocument.WorkbookPart; IEnumerable<Sheet> sheets = spreadSheetDocument.WorkbookPart.Workbook.GetFirstChild<Sheets>().Elements<Sheet>(); string relationshipId = sheets.First().Id.Value; WorksheetPart worksheetPart = (WorksheetPart)spreadSheetDocument.WorkbookPart.GetPartById(relationshipId); Worksheet workSheet = worksheetPart.Worksheet; SheetData sheetData = workSheet.GetFirstChild<SheetData>(); IEnumerable<Row> rows = sheetData.Descendants<Row>(); 

澄清:这是另一种方法来问这个问题。 如果我想将Excel文件中的内容放入DataTable中,该怎么办? 我想要所有的数据列正确排列。 我怎么能做到这一点?

这个问题比我这里问得好: 阅读Excel Open XML忽略空白单元格

一种方法,你可以完成你想要的是找出所有行中最大的列索引,然后填写所有的空单元格值将空白。 这将保持所有列正确排列。

以下是找出最大列索引的快速片段:

 int? biggestColumnIndex = 0; foreach (Row row in rows) { if (row.Descendants<Cell>().Any()) { // Figure out the if this row has a bigger column index than the previous rows int? columnIndex = GetColumnIndexFromName(((Cell)(row.LastChild)).CellReference); biggestColumnIndex = columnIndex.HasValue && columnIndex > biggestColumnIndex ? columnIndex : biggestColumnIndex; } } /// <summary> /// Given just the column name (no row index), it will return the zero based column index. /// Note: This method will only handle columns with a length of up to two (ie. A to Z and AA to ZZ). /// A length of three can be implemented when needed. /// </summary> /// <param name="columnName">Column Name (ie. A or AB)</param> /// <returns>Zero based index if the conversion was successful; otherwise null</returns> public static int? GetColumnIndexFromName(string columnName) { int? columnIndex = null; string[] colLetters = Regex.Split(columnName, "([AZ]+)"); colLetters = colLetters.Where(s => !string.IsNullOrEmpty(s)).ToArray(); if (colLetters.Count() <= 2) { int index = 0; foreach (string col in colLetters) { List<char> col1 = colLetters.ElementAt(index).ToCharArray().ToList(); int? indexValue = Letters.IndexOf(col1.ElementAt(index)); if (indexValue != -1) { // The first letter of a two digit column needs some extra calculations if (index == 0 && colLetters.Count() == 2) { columnIndex = columnIndex == null ? (indexValue + 1) * 26 : columnIndex + ((indexValue + 1) * 26); } else { columnIndex = columnIndex == null ? indexValue : columnIndex + indexValue; } } index++; } } return columnIndex; } 

然后调用InsetCellsForCellRange方法,使用最大的列索引填充所有空白单元格的空单元格。 然后阅读您的数据,他们应该全部排队。 (所有帮助器方法都在InsetCellsForCellRange方法的下面)

 /// <summary> /// Inserts cells if required for a rectangular range of cells /// </summary> /// <param name="startCellReference">Upper left cell of the rectangle</param> /// <param name="endCellReference">Lower right cell of the rectangle</param> /// <param name="worksheetPart">Worksheet part to insert cells</param> public static void InsertCellsForCellRange(string startCellReference, string endCellReference, WorksheetPart worksheetPart) { uint startRow = GetRowIndex(startCellReference); uint endRow = GetRowIndex(endCellReference); string startColumn = GetColumnName(startCellReference); string endColumn = GetColumnName(endCellReference); // Insert the cells row by row if necessary for (uint currentRow = startRow; currentRow <= endRow; currentRow++) { string currentCell = startColumn + currentRow.ToString(); string endCell = IncrementCellReference(endColumn + currentRow.ToString(), CellReferencePartEnum.Column); // Check to make sure all cells exist in the range; if not create them while (!currentCell.Equals(endCell)) { if (GetCell(worksheetPart, currentCell) == null) { InsertCell(GetColumnName(currentCell), GetRowIndex(currentCell), worksheetPart); } // Move the reference to the next cell in the range currentCell = IncrementCellReference(currentCell, CellReferencePartEnum.Column); } } } /// <summary> /// Given a cell name, parses the specified cell to get the row index. /// </summary> /// <param name="cellReference">Address of the cell (ie. B2)</param> /// <returns>Row Index (ie. 2)</returns> public static uint GetRowIndex(string cellReference) { // Create a regular expression to match the row index portion the cell name. Regex regex = new Regex(@"\d+"); Match match = regex.Match(cellReference); return uint.Parse(match.Value); } /// <summary> /// Given a cell name, parses the specified cell to get the column name. /// </summary> /// <param name="cellReference">Address of the cell (ie. B2)</param> /// <returns>Column Name (ie. B)</returns> public static string GetColumnName(string cellReference) { // Create a regular expression to match the column name portion of the cell name. Regex regex = new Regex("[A-Za-z]+"); Match match = regex.Match(cellReference); return match.Value; } /// <summary> /// Increments the reference of a given cell. This reference comes from the CellReference property /// on a Cell. /// </summary> /// <param name="reference">reference string</param> /// <param name="cellRefPart">indicates what is to be incremented</param> /// <returns></returns> public static string IncrementCellReference(string reference, CellReferencePartEnum cellRefPart) { string newReference = reference; if (cellRefPart != CellReferencePartEnum.None && !String.IsNullOrEmpty(reference)) { string[] parts = Regex.Split(reference, "([AZ]+)"); if (cellRefPart == CellReferencePartEnum.Column || cellRefPart == CellReferencePartEnum.Both) { List<char> col = parts[1].ToCharArray().ToList(); bool needsIncrement = true; int index = col.Count - 1; do { // increment the last letter col[index] = Letters[Letters.IndexOf(col[index]) + 1]; // if it is the last letter, then we need to roll it over to 'A' if (col[index] == Letters[Letters.Count - 1]) { col[index] = Letters[0]; } else { needsIncrement = false; } } while (needsIncrement && --index >= 0); // If true, then we need to add another letter to the mix. Initial value was something like "ZZ" if (needsIncrement) { col.Add(Letters[0]); } parts[1] = new String(col.ToArray()); } if (cellRefPart == CellReferencePartEnum.Row || cellRefPart == CellReferencePartEnum.Both) { // Increment the row number. A reference is invalid without this componenet, so we assume it will always be present. parts[2] = (int.Parse(parts[2]) + 1).ToString(); } newReference = parts[1] + parts[2]; } return newReference; } /// <summary> /// Returns a cell Object corresponding to a specifc address on the worksheet /// </summary> /// <param name="workSheetPart">WorkSheet to search for cell adress</param> /// <param name="cellAddress">Cell Address (ie. B2)</param> /// <returns>Cell Object</returns> public static Cell GetCell(WorksheetPart workSheetPart, string cellAddress) { return workSheetPart.Worksheet.Descendants<Cell>() .Where(c => cellAddress.Equals(c.CellReference)) .SingleOrDefault(); } /// <summary> /// Inserts a new cell at the specified colName and rowIndex. If a cell /// already exists, then the existing cell is returned. /// </summary> /// <param name="colName">Column Name</param> /// <param name="rowIndex">Row Index</param> /// <param name="worksheetPart">Worksheet Part</param> /// <returns>Inserted Cell</returns> public static Cell InsertCell(string colName, uint rowIndex, WorksheetPart worksheetPart) { return InsertCell(colName, rowIndex, worksheetPart, null); } /// <summary> /// Inserts a new cell at the specified colName and rowIndex. If a cell /// already exists, then the existing cells are shifted to the right. /// </summary> /// <param name="colName">Column Name</param> /// <param name="rowIndex">Row Index</param> /// <param name="worksheetPart">Worksheet Part</param> /// <param name="cell"></param> /// <returns>Inserted Cell</returns> public static Cell InsertCell(string colName, uint rowIndex, WorksheetPart worksheetPart, Cell insertCell) { Worksheet worksheet = worksheetPart.Worksheet; SheetData sheetData = worksheet.GetFirstChild<SheetData>(); string insertReference = colName + rowIndex; // If the worksheet does not contain a row with the specified row index, insert one. Row row; if (sheetData.Elements<Row>().Where(r => r.RowIndex == rowIndex).Count() != 0) { row = sheetData.Elements<Row>().Where(r => r.RowIndex == rowIndex).First(); } else { row = new Row() { RowIndex = rowIndex }; sheetData.Append(row); } Cell retCell = row.Elements<Cell>().FirstOrDefault(c => c.CellReference.Value == colName + rowIndex); // If retCell is not null and we are not inserting a new cell, then just skip everything and return the cell if (retCell != null) { // NOTE: if conditions are not combined because we want to skip the parent 'else when the outside 'if' is true. // if retCell is not null and we are inserting a new cell, then move all existing cells to the right. if (insertCell != null) { // Get all the cells in the row with equal or higher column values than the one being inserted. // Add the cell to be inserted into the temp list and re-index all of the cells. List<Cell> cells = row.Descendants<Cell>().Where(c => String.Compare(c.CellReference.Value, insertReference) >= 0).ToList(); cells.Insert(0, insertCell); string cellReference = insertReference; foreach (Cell cell in cells) { // Update the references for the rows cells. cell.CellReference = new StringValue(cellReference); IncrementCellReference(cellReference, CellReferencePartEnum.Column); } // actually insert the new cell into the row retCell = row.InsertBefore(insertCell, retCell); // at this point, retCell still points to the row that had the insertReference } } // Else retCell is null, this means no cell exists at the specified location so we need to put a new cell in that space. // If a cell was passed into this method, then it will be inserted. If not, a new one will be inserted. else { // Cells must be in sequential order according to CellReference. Determine where to insert the new cell. // Sequencial order can't be string comparison order, has to be Excel order ("A", "B", ... "AA", "BB", etc) Cell refCell = null; foreach (Cell cell in row.Elements<Cell>()) { string cellColumn = Regex.Replace(cell.CellReference.Value, @"\d", ""); if (colName.Length <= cellColumn.Length && string.Compare(cell.CellReference.Value, insertReference, true) > 0) { refCell = cell; break; } } // Insert cell parameter is supplied, otherwise, create a new cell retCell = insertCell ?? new Cell() { CellReference = insertReference }; row.InsertBefore(retCell, refCell); } return retCell; } //Other missing pieces public enum CellReferencePartEnum { None, Column, Row, Both } private static List<char> Letters = new List<char>() { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', ' ' }; 

据我所知,你是在一行中迭代单元格,并假设你读的第一个单元格是在A列,第二个在B列等等?

我build议你(parsing?)正则expression式的列位置/参考

 DocumentFormat.OpenXml.Spreadsheet.Cell currentcell currentcell.CellReference 

CellReference为您提供“A1”表示法中的单元格引用。 提取列部分(“A,B,CC等)

你将不得不为连续的每个单元格做这个。 那么如果一个列的单元格丢失,只需填写一个占位符值即可。 Null或DbNull也许?

我不知道是否有另一种方法来找出一个细胞所属的列。