如何添加多个表从Excel XLS文件转换为XML(C#)

我从这里下载了这个项目 – >> http://code.msdn.microsoft.com/office/How-to-convert-excel-file-7a9bb404

这个工作正常,它是一个伟大的评论代码,即使是像我这样的初学者也能理解,但是它只适用于每张表一个表。 一旦我在工作表中添加第二个表格,它就会在我的电子表格中的某一列中引发错误:错误发生!错误消息是:找不到第4列。

基本上,我的电子表格中有这种types的表格:

电子表格中的两个表格

所以我希望我的程序能将这两个表格导出到一个.XML文件中(只读两个表格)。 而这两个表应该分成两个XML孩子:上面的一个应该是Order-Header ,而下面的一个是Line-Items ,如下所示:

<ROOT> <Order-Header> ..... </Order-Header> <Line-Items> ..... </Line-Items> </ROOT> 

这是这个程序的代码:

 using System; using System.Collections.Generic; using System.Data; using System.IO; using System.Linq; using System.Text.RegularExpressions; using DocumentFormat.OpenXml.Packaging; using DocumentFormat.OpenXml.Spreadsheet; namespace CSOpenXmlExcelToXml { public class ConvertExcelToXml { /// <summary> /// Read Data from selected excel file into DataTable /// </summary> /// <param name="filename">Excel File Path</param> /// <returns></returns> private DataTable ReadExcelFile(string filename) { // Initialize an instance of DataTable DataTable dt = new DataTable("Line-Item"); try { // Use SpreadSheetDocument class of Open XML SDK to open excel file using (SpreadsheetDocument spreadsheetDocument = SpreadsheetDocument.Open(filename, false)) { // Get Workbook Part of Spread Sheet Document WorkbookPart workbookPart = spreadsheetDocument.WorkbookPart; // Get all sheets in spread sheet document IEnumerable<Sheet> sheetcollection = spreadsheetDocument.WorkbookPart.Workbook.GetFirstChild<Sheets>().Elements<Sheet>(); // Get relationship Id string relationshipId = sheetcollection.First().Id.Value; // Get sheet1 Part of Spread Sheet Document WorksheetPart worksheetPart = (WorksheetPart)spreadsheetDocument.WorkbookPart.GetPartById(relationshipId); // Get Data in Excel file SheetData sheetData = worksheetPart.Worksheet.Elements<SheetData>().First(); IEnumerable<Row> rowcollection = sheetData.Descendants<Row>(); // If there is no rows in the spreadsheet at all, when just return in how it is and output it if (rowcollection.Count() == 0) { return dt; } // Add columns foreach (Cell cell in rowcollection.ElementAt(0)) { dt.Columns.Add(GetValueOfCell(spreadsheetDocument, cell)); } // Add rows into DataTable foreach (Row row in rowcollection) { // Create temporary row to read rows in spreadsheet DataRow temprow = dt.NewRow(); int columnIndex = 0; foreach (Cell cell in row.Descendants<Cell>()) { // Get Cell Column Index int cellColumnIndex = GetColumnIndex(GetColumnName(cell.CellReference)); if (columnIndex < cellColumnIndex) { do { temprow[columnIndex] = string.Empty; columnIndex++; } while (columnIndex < cellColumnIndex); } temprow[columnIndex] = GetValueOfCell(spreadsheetDocument, cell); columnIndex++; } // Add the row to DataTable // the rows include header row dt.Rows.Add(temprow); } } // Here remove header row dt.Rows.RemoveAt(0); return dt; } // Throw error message catch (IOException ex) { throw new IOException(ex.Message); } } /// <summary> /// Get Value of Cell /// </summary> /// <param name="spreadsheetdocument">SpreadSheet Document Object</param> /// <param name="cell">Cell Object</param> /// <returns>The Value in Cell</returns> private static string GetValueOfCell(SpreadsheetDocument spreadsheetdocument, Cell cell) { // Get value in Cell SharedStringTablePart sharedString = spreadsheetdocument.WorkbookPart.SharedStringTablePart; if (cell.CellValue == null) { // If there is no data in cell, just return it return string.Empty; } string cellValue = cell.CellValue.InnerText; // The condition that the Cell DataType is SharedString if (cell.DataType != null && cell.DataType.Value == CellValues.SharedString) { return sharedString.SharedStringTable.ChildElements[int.Parse(cellValue)].InnerText; } else { return cellValue; } } /// <summary> /// Get Column Name From given cell name /// </summary> /// <param name="cellReference">Cell Name(For example,A1)</param> /// <returns>Column Name(For example, A)</returns> private string GetColumnName(string cellReference) { // Create a regular expression to match the column name of cell Regex regex = new Regex("[A-Za-z]+"); Match match = regex.Match(cellReference); return match.Value; } /// <summary> /// Get Index of Column from given column name /// </summary> /// <param name="columnName">Column Name(For Example,A or AA)</param> /// <returns>Column Index</returns> private int GetColumnIndex(string columnName) { int columnIndex = 0; int factor = 1; // From right to left for (int position = columnName.Length - 1; position >= 0; position--) { // For letters if (Char.IsLetter(columnName[position])) { columnIndex += factor * ((columnName[position] - 'A') + 1) - 1; factor *= 26; } } return columnIndex; } /// <summary> /// Convert DataTable to Xml format /// </summary> /// <param name="filename">Excel File Path</param> /// <returns>Xml format string</returns> public string GetXML(string filename) { using (DataSet ds = new DataSet()) { ds![enter image description here][2].DataSetName = "Document-Order"; ds.Tables.Add(this.ReadExcelFile(filename));a return ds.GetXml(); } } } } 

PS我复制了所有的代码,因为我认为这是更容易的方法来find改变它,读一个电子表格中的这两个表,并将它们导出到一个XML文件。

将不胜感激任何帮助或只是一些想法,我怎么可以archieve这种function。 谢谢。

更新这是MainForm.cs代码:

 using System; using System.IO; using System.Windows.Forms; namespace CSOpenXmlExcelToXml { public partial class MainForm : Form { public MainForm() { InitializeComponent(); this.btnSaveAs.Enabled = false; } /// <summary> /// Open an dialog to let users select Excel file /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private void btnBrowser_Click(object sender, EventArgs e) { // Initializes a OpenFileDialog instance using (OpenFileDialog openfileDialog = new OpenFileDialog()) { openfileDialog.RestoreDirectory = true; openfileDialog.Filter = "Excel files(*.xlsx;*.xls)|*.xlsx;*.xls"; if (openfileDialog.ShowDialog() == DialogResult.OK) { tbExcelName.Text = openfileDialog.FileName; } } } /// <summary> /// Convert Excel file to Xml format and view in Listbox control /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private void btnConvert_Click(object sender, EventArgs e) { tbXmlView.Clear(); string excelfileName = tbExcelName.Text; if (string.IsNullOrEmpty(excelfileName) || !File.Exists(excelfileName)) { MessageBox.Show("The Excel file is invalid! Please select a valid file."); return; } try { string xmlFormatstring = new ConvertExcelToXml().GetXML(excelfileName); if (string.IsNullOrEmpty(xmlFormatstring)) { // Line just for checking, if Excel document is empty. If it's true, when just print out an error message MessageBox.Show("The content of Excel file is Empty!"); return; } // Print it in TextView tbXmlView.Text = xmlFormatstring; // If txbXmlView has text, set btnSaveAs button to be enable btnSaveAs.Enabled = true; } catch (Exception ex) { // General error message checking for errors MessageBox.Show("Error occurs! The error message is: " +ex.Message); } } /// <summary> /// Save the XMl format string as Xml file /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private void btnSaveAs_Click(object sender, EventArgs e) { // Initializes a SaveFileDialog instance using (SaveFileDialog savefiledialog = new SaveFileDialog()) { savefiledialog.RestoreDirectory = true; savefiledialog.DefaultExt = "xml"; savefiledialog.Filter = "All Files(*.xml)|*.xml"; if (savefiledialog.ShowDialog() == DialogResult.OK) { Stream filestream = savefiledialog.OpenFile(); StreamWriter streamwriter = new StreamWriter(filestream); streamwriter.Write("<?xml version='1.0'?>" + Environment.NewLine + tbXmlView.Text); streamwriter.Close(); } } } // Disable maximize button of the form private void MainForm_Load(object sender, EventArgs e) { this.MaximizeBox = false; //this is an instance of Form or its decendant } } } 

你遇到的问题是,该代码假定你的数据是在一个单一的表,我想它在顶部表的最后一列的末尾或最后一行炸弹。

Open XML API不易处理。 大多数与之合作的人都同意这一点。 你需要做很多事情才能到达你可以做基本的东西的阶段。

我build议你尝试使用一个库来解决原始API的一些问题。

你为什么不看ClosedXML ? 当你开始处理文件的时候,它并不聪明,并且有一些烦人的错误,但是对于读取数据应该没问题。

希望文档中的这个页面能够把你推向正确的方向。