如何根据使用apache poi的行数将excel文件分割成多个文件?

我是新来的apache poi,我想根据行数将一个excel文件分成多个文件。

例如data.xlsx有15k行,新文件应该像data_1.xlsx与5k行,data_2.xlsx应该是5-10k,data_3.xlsx应该是10-15k。

我有你。

package com.industries.seanimus; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.poi.EncryptedDocumentException; import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.DateUtil; import org.apache.poi.ss.usermodel.Row; import org.apache.poi.xssf.streaming.SXSSFCell; import org.apache.poi.xssf.streaming.SXSSFRow; import org.apache.poi.xssf.streaming.SXSSFSheet; import org.apache.poi.xssf.streaming.SXSSFWorkbook; import org.apache.poi.xssf.usermodel.XSSFSheet; import org.apache.poi.xssf.usermodel.XSSFWorkbook; public class ReportSplitter { private final String fileName; private final int maxRows; public ReportSplitter(String fileName, final int maxRows) { ZipSecureFile.setMinInflateRatio(0); this.fileName = fileName; this.maxRows = maxRows; try { /* Read in the original Excel file. */ OPCPackage pkg = OPCPackage.open(new File(fileName)); XSSFWorkbook workbook = new XSSFWorkbook(pkg); XSSFSheet sheet = workbook.getSheetAt(0); /* Only split if there are more rows than the desired amount. */ if (sheet.getPhysicalNumberOfRows() >= maxRows) { List<SXSSFWorkbook> wbs = splitWorkbook(workbook); writeWorkBooks(wbs); } pkg.close(); } catch (EncryptedDocumentException | IOException | InvalidFormatException e) { e.printStackTrace(); } } private List<SXSSFWorkbook> splitWorkbook(XSSFWorkbook workbook) { List<SXSSFWorkbook> workbooks = new ArrayList<SXSSFWorkbook>(); SXSSFWorkbook wb = new SXSSFWorkbook(); SXSSFSheet sh = wb.createSheet(); SXSSFRow newRow; SXSSFCell newCell; int rowCount = 0; int colCount = 0; XSSFSheet sheet = workbook.getSheetAt(0); for (Row row : sheet) { newRow = sh.createRow(rowCount++); /* Time to create a new workbook? */ if (rowCount == maxRows) { workbooks.add(wb); wb = new SXSSFWorkbook(); sh = wb.createSheet(); rowCount = 0; } for (Cell cell : row) { newCell = newRow.createCell(colCount++); newCell = setValue(newCell, cell); CellStyle newStyle = wb.createCellStyle(); newStyle.cloneStyleFrom(cell.getCellStyle()); newCell.setCellStyle(newStyle); } colCount = 0; } /* Only add the last workbook if it has content */ if (wb.getSheetAt(0).getPhysicalNumberOfRows() > 0) { workbooks.add(wb); } return workbooks; } /* * Grabbing cell contents can be tricky. We first need to determine what * type of cell it is. */ private SXSSFCell setValue(SXSSFCell newCell, Cell cell) { switch (cell.getCellType()) { case Cell.CELL_TYPE_STRING: newCell.setCellValue(cell.getRichStringCellValue().getString()); break; case Cell.CELL_TYPE_NUMERIC: if (DateUtil.isCellDateFormatted(cell)) { newCell.setCellValue(cell.getDateCellValue()); } else { newCell.setCellValue(cell.getNumericCellValue()); } break; case Cell.CELL_TYPE_BOOLEAN: newCell.setCellValue(cell.getBooleanCellValue()); break; case Cell.CELL_TYPE_FORMULA: newCell.setCellFormula(cell.getCellFormula()); break; default: System.out.println("Could not determine cell type"); } return newCell; } /* Write all the workbooks to disk. */ private void writeWorkBooks(List<SXSSFWorkbook> wbs) { FileOutputStream out; try { for (int i = 0; i < wbs.size(); i++) { String newFileName = fileName.substring(0, fileName.length() - 5); out = new FileOutputStream(new File(newFileName + "_" + (i + 1) + ".xlsx")); wbs.get(i).write(out); out.close(); } } catch (IOException e) { e.printStackTrace(); } } public static void main(String[] args){ /* This will create a new workbook every 1000 rows. */ new ReportSplitter("Data.xlsx", 1000); } } 

一些注意事项:

  • 为了编写工作簿,我使用SXSSFWorkbook 。 它比HSSF或XSSF快得多,因为它在写入之前并不把所有东西都放在内存中(这会导致可怕的GC混乱)。

  • 繁忙的开发者指南是学习Apache POI的朋友;)

请享用!


编辑:我已经更新了代码以复制单元格样式。 有两点需要注意:

  • 复制样式会使事情大大减less。
  • POI创build一个模板文件,可能会变得太大 ,无法解压缩,抛出一个Zip炸弹检测错误。 您可以通过ZipSecureFile.setMinInflateRatio(0)更改最小通货膨胀率来解决此问题。