读取XLSB文件的exceptionApache POI java.io.CharConversionException

我正在开发一个Java应用程序,它使用Apache POI读取一个Excel的xlsb文件,但是我在阅读时遇到exception,我的代码如下:

import java.io.IOException; import java.io.InputStream; import org.apache.poi.xssf.eventusermodel.XSSFReader; import org.apache.poi.xssf.model.SharedStringsTable; import org.apache.poi.xssf.usermodel.XSSFRichTextString; import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.openxml4j.exceptions.OpenXML4JException; import org.apache.poi.openxml4j.opc.Package; import org.xml.sax.Attributes; import org.xml.sax.ContentHandler; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.XMLReader; import org.xml.sax.helpers.DefaultHandler; import org.xml.sax.helpers.XMLReaderFactory; import java.util.Iterator; public class Prueba { public static void main (String [] args){ String direccion = "C:/Documents and Settings/RSalasL/My Documents/New Folder/masstigeoct12.xlsb"; Package pkg; try { pkg = Package.open(direccion); XSSFReader r = new XSSFReader(pkg); SharedStringsTable sst = r.getSharedStringsTable(); XMLReader parser = fetchSheetParser(sst); Iterator<InputStream> sheets = r.getSheetsData(); while(sheets.hasNext()) { System.out.println("Processing new sheet:\n"); InputStream sheet = sheets.next(); InputSource sheetSource = new InputSource(sheet); parser.parse(sheetSource); sheet.close(); System.out.println(""); } } catch (InvalidFormatException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (OpenXML4JException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (SAXException e) { // TODO Auto-generated catch block e.printStackTrace(); } } public void processAllSheets(String filename) throws Exception { Package pkg = Package.open(filename); XSSFReader r = new XSSFReader( pkg ); SharedStringsTable sst = r.getSharedStringsTable(); XMLReader parser = fetchSheetParser(sst); Iterator<InputStream> sheets = r.getSheetsData(); while(sheets.hasNext()) { System.out.println("Processing new sheet:\n"); InputStream sheet = sheets.next(); InputSource sheetSource = new InputSource(sheet); parser.parse(sheetSource); sheet.close(); System.out.println(""); } } public static XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException { XMLReader parser = XMLReaderFactory.createXMLReader( "org.apache.xerces.parsers.SAXParser" ); ContentHandler handler = new SheetHandler(sst); parser.setContentHandler(handler); return parser; } private static class SheetHandler extends DefaultHandler { private SharedStringsTable sst; private String lastContents; private boolean nextIsString; private SheetHandler(SharedStringsTable sst) { this.sst = sst; } public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException { // c => cell if(name.equals("c")) { // Print the cell reference System.out.print(attributes.getValue("r") + " - "); // Figure out if the value is an index in the SST String cellType = attributes.getValue("t"); if(cellType != null && cellType.equals("s")) { nextIsString = true; } else { nextIsString = false; } } // Clear contents cache lastContents = ""; } public void endElement(String uri, String localName, String name) throws SAXException { // Process the last contents as required. // Do now, as characters() may be called more than once if(nextIsString) { int idx = Integer.parseInt(lastContents); lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString(); nextIsString = false; } // v => contents of a cell // Output after we've seen the string contents if(name.equals("v")) { System.out.println(lastContents); } } public void characters(char[] ch, int start, int length) throws SAXException { lastContents += new String(ch, start, length); } } } 

而这个例外是:

 java.io.CharConversionException: Characters larger than 4 bytes are not supported: byte 0x83 implies a length of more than 4 bytes at org.apache.xmlbeans.impl.piccolo.xml.UTF8XMLDecoder.decode(UTF8XMLDecoder.java:162) at org.apache.xmlbeans.impl.piccolo.xml.XMLStreamReader$FastStreamDecoder.read(XMLStreamReader.java:762) at org.apache.xmlbeans.impl.piccolo.xml.XMLStreamReader.read(XMLStreamReader.java:162) at org.apache.xmlbeans.impl.piccolo.xml.PiccoloLexer.yy_refill(PiccoloLexer.java:3474) at org.apache.xmlbeans.impl.piccolo.xml.PiccoloLexer.yylex(PiccoloLexer.java:3958) at org.apache.xmlbeans.impl.piccolo.xml.Piccolo.yylex(Piccolo.java:1290) at org.apache.xmlbeans.impl.piccolo.xml.Piccolo.yyparse(Piccolo.java:1400) at org.apache.xmlbeans.impl.piccolo.xml.Piccolo.parse(Piccolo.java:714) at org.apache.xmlbeans.impl.store.Locale$SaxLoader.load(Locale.java:3439) at org.apache.xmlbeans.impl.store.Locale.parseToXmlObject(Locale.java:1270) at org.apache.xmlbeans.impl.store.Locale.parseToXmlObject(Locale.java:1257) at org.apache.xmlbeans.impl.schema.SchemaTypeLoaderBase.parse(SchemaTypeLoaderBase.java:345) at org.openxmlformats.schemas.spreadsheetml.x2006.main.WorkbookDocument$Factory.parse(Unknown Source) at org.apache.poi.xssf.eventusermodel.XSSFReader$SheetIterator.<init>(XSSFReader.java:207) at org.apache.poi.xssf.eventusermodel.XSSFReader$SheetIterator.<init>(XSSFReader.java:166) at org.apache.poi.xssf.eventusermodel.XSSFReader.getSheetsData(XSSFReader.java:160) at EDManager.Prueba.main(Prueba.java:36) 

该文件有2张,其中一个329行3列,另一个566行和3列,我只是想读取文件,以查找是否在第二张表中的值。

除了文本提取以外,Apache POI不支持.xlsb文件格式。 Apache POI将愉快地提供完整的读取或写入支持.xls文件(通过HSSF)和.xlsx文件(通过XSSF)或两者(通过普通的SS UserModel接口)。

然而,.xlsb格式并不支持生成操作 – 这两者之间的混合是非常奇怪的,大量的工作意味着没有人愿意自愿/赞助所需的工作。

Apache POI为Apache POI 3.15 beta3 / 3.16提供的.xlsb是.xlsb文件的文本提取程序 – XSSFBEventBasedExcelExtractor 。 你可以使用它来获取你的文件的文本,或者一些调整将其转换为像CSV一样

对于完整的读/写支持,您需要将文件转换为.xls(如果没有大量行/列)或.xlsx(如果是)。 如果您真的非常热衷于帮助,那么您可以查看XSSFBEventBasedExcelExtractor的源代码 ,然后在贡献补丁中添加对POI的完全支持!

(另外,我认为从例外情况来看,特定的.xlsb文件是部分损坏的,但即使不是,除了文本提取以外,Apache POI仍然不支持)

我有一个使用smartxls的实现,我的代码firts将xlsb转换为xlsx,之后可以使用ApachePoi。 下一个方法接收一个java.io.File并validation它的扩展是否为xlsb,并将其转换为xlsx,并replace新的文件。 这对我有用。

 private void processXLSBFile(File file) { WorkBook workBook = new WorkBook(); String filePath = file.getAbsolutePath(); if (FilenameUtils.getExtension(filePath).equalsIgnoreCase((Static.XLSB_EXT))) { try { workBook.readXLSB(new java.io.FileInputStream(filePath)); filePath = filePath.replaceAll("(?i)".concat(Static.XLSB), Static.XLSX_EXT.toLowerCase()); workBook.writeXLSX(new java.io.FileOutputStream(filePath)); final File xlsb = new File(filePath); file = xlsb; } catch (Exception e) { logger.error(e.getMessage(), e); MensajesJSFUtil .mostrarMensajeNegocio(new GTMException(e, ClaveMensaje.COMANDAS_ADJUNTAR_XLSBFILE_READERROR)); } } }