apachepoi:outofmemory

332nm8kg  于 2021-06-30  发布在  Java
关注(0)|答案(1)|浏览(400)

我正在为kindle fire开发一个应用程序,它从dropbox中提取一个.xlsx文件,并使用ApachePOI将数据解析到一个sqlite数据库中(一个表有10个属性—一旦解析成功,我将把它分解成更多的表)。这个文件只有2mb(约28000行,每行10列),所以当我开始在物理设备上测试时(emulator工作正常,但速度非常慢),我遇到了outofmemoryerrors。我做了大量的挖掘,发现我可以实现sax来减少我使用的内存量。但是,我不太确定如何将所有数据放入表中—根据我看到的示例代码,每个单元格(至少从我所知道的情况来看)都是单独计算的,所以我不能对每行迭代进行一次查询。我遇到的另一个问题是,我有一列数字(价格)打印到控制台(通过debug.print())两次,我不知道为什么。我对这件事简直不知所措——我花了好几天时间来解决dropbox和poi的不同问题,但这件事让我很为难。到目前为止,我使用这三个作为模板/指南(主要是后者):
http://www.saxproject.org/quickstart.html
https://github.com/apache/poi/blob/230453d76e6e912dfa22088544488a0a6baf83a2/src/ooxml/java/org/apache/poi/xssf/eventusermodel/xssfsheetxmlhandler.java
https://dzone.com/articles/introduction-to-apache-poi-library
我已经讨论过这些(还有其他几个):
如何读取大小大于40mb的xlsx文件
http://svn.apache.org/repos/asf/poi/trunk/src/examples/src/org/apache/poi/xssf/eventusermodel/examples/fromhowto.java
http://poi.apache.org/components/spreadsheet/how-to.html#xssf_sax_api
使用poi的xssf和sax(事件api)读取excel表
https://svn.apache.org/repos/asf/poi/trunk/src/examples/src/org/apache/poi/xssf/eventusermodel/xlsx2csv.java
这是一个相当简单的应用程序,所以我不需要任何太花哨的东西-只需要在这一点上运行。所以我想我的问题是:sax是避免内存问题的最好方法吗?如果是这样,我如何实现它来将每一行准确地解析到我的数据库中?如果没有,我应该往哪个方向走?下面是我的解析类的内容(runningtime和isparsing用于测试):

  1. import android.content.Context;
  2. import java.io.File;
  3. import java.io.IOException;
  4. import java.io.InputStream;
  5. import java.util.Iterator;
  6. import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
  7. import org.apache.poi.openxml4j.opc.OPCPackage;
  8. import org.apache.poi.xssf.eventusermodel.XSSFReader;
  9. import org.apache.poi.xssf.model.SharedStringsTable;
  10. import org.apache.poi.xssf.usermodel.XSSFRichTextString;
  11. import org.xml.sax.Attributes;
  12. import org.xml.sax.ContentHandler;
  13. import org.xml.sax.InputSource;
  14. import org.xml.sax.SAXException;
  15. import org.xml.sax.XMLReader;
  16. import org.xml.sax.helpers.DefaultHandler;
  17. import org.xml.sax.helpers.XMLReaderFactory;
  18. public class ExcelParser {
  19. public static boolean isParsing = false;
  20. private DBHelper dbHelper;
  21. private File dropboxFile;
  22. // purpose: parameterized constructor
  23. // parameters: FileInputStream inputStream
  24. // returns: nothing
  25. public ExcelParser(Context context, File dropboxFile) {
  26. this.dropboxFile = dropboxFile;
  27. dbHelper = new DBHelper(context);
  28. }// end ExcelParser(FileInputStream inputStream)
  29. // purpose: parses the inputStream (.xlsx) into a list of Product objects
  30. // parameters: none
  31. // returns: void
  32. public void parseToDB() {
  33. Long runningTime = System.currentTimeMillis();
  34. isParsing = true;
  35. Debug.print(this.getClass().getSimpleName(), "----- STARTING -----");
  36. try {
  37. OPCPackage pkg = OPCPackage.open(dropboxFile);
  38. XSSFReader xssfReader = new XSSFReader(pkg);
  39. SharedStringsTable sharedStringsTable = xssfReader.getSharedStringsTable();
  40. XMLReader parser = getSheetParser(sharedStringsTable);
  41. Iterator<InputStream> sheets = xssfReader.getSheetsData();
  42. Debug.print(this.getClass().getSimpleName(), "sheet processing");
  43. while(sheets.hasNext()) {
  44. Debug.print(this.getClass().getSimpleName(), "Processing sheet");
  45. InputStream sheet = sheets.next();
  46. InputSource sheetSource = new InputSource(sheet);
  47. parser.parse(sheetSource);
  48. sheet.close();
  49. Debug.print(this.getClass().getSimpleName(), "Done processing sheet");
  50. }// end while-loop
  51. } catch (SAXException | OpenXML4JException | IOException e) {
  52. e.printStackTrace();
  53. } finally {
  54. isParsing = false;
  55. Debug.print(this.getClass().getSimpleName(), "----- FINISHED : " + ((System.currentTimeMillis() - runningTime) / 1000) + " seconds -----");
  56. }// end try-catch
  57. }// end parseToDB()
  58. //
  59. public XMLReader getSheetParser(SharedStringsTable sharedStringsTable) throws SAXException {
  60. XMLReader parser = XMLReaderFactory.createXMLReader();
  61. ContentHandler handler = new SheetHandler(sharedStringsTable);
  62. parser.setContentHandler(handler);
  63. return parser;
  64. }// end getSheetParser(SharedStringsTable sharedStringsTable)
  65. // SHEET HANDLER CLASS
  66. private static class SheetHandler extends DefaultHandler {
  67. private SharedStringsTable sharedStringsTable;
  68. private boolean fromSST, isCellValue, isNumber;
  69. private String contents;
  70. //
  71. private SheetHandler(SharedStringsTable sharedStringsTable) {
  72. this.sharedStringsTable = sharedStringsTable;
  73. }// end SheetHandler(SharedStringsTable sharedStringsTable)
  74. @Override
  75. public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException {
  76. // clear contents cache
  77. contents = "";
  78. // element row represents Row
  79. if(name.equals("row")) {
  80. String rowNumStr = attributes.getValue("r");
  81. Debug.print(this.getClass().getSimpleName(), "Row# " + rowNumStr);
  82. }
  83. // element c represents Cell
  84. else if(name.equals("c")) {
  85. // attribute r represents the cell reference
  86. Debug.print(this.getClass().getSimpleName(), attributes.getValue("r") + " - ");
  87. // attribute t represents the cell type
  88. String cellType = attributes.getValue("t");
  89. if (cellType != null && cellType.equals("s")) {
  90. // cell type s means value will be extracted from SharedStringsTable
  91. fromSST = true;
  92. } else if(cellType == null) {
  93. // *likely a number
  94. isNumber = true;
  95. }
  96. }
  97. // element v represents value of Cell
  98. else if(name.equals("v")) {
  99. isCellValue = true;
  100. }
  101. }// end startElement(String uri, String localName, String name, Attributes attributes)
  102. @Override
  103. public void characters(char[] ch, int start, int length) {
  104. if(isCellValue)
  105. contents += new String(ch, start, length);
  106. }// end characters(char[] ch, int start, int length)
  107. @Override
  108. public void endElement(String uri, String localName, String name) throws SAXException {
  109. if(isCellValue) {
  110. if(fromSST) {
  111. int index = Integer.parseInt(contents);
  112. contents = new XSSFRichTextString(sharedStringsTable.getEntryAt(index)).toString();
  113. Debug.print(this.getClass().getSimpleName(), "Contents: " + contents + " >>");
  114. isCellValue = false;
  115. fromSST = false;
  116. } else if(isNumber) {
  117. Debug.print(this.getClass().getSimpleName(), "Contents (num?): " + contents + " >>");
  118. }
  119. }
  120. }// end endElement(String uri, String localName, String name)
  121. }// end class SheetHandler
  122. }// end class ExcelParser
x6yk4ghg

x6yk4ghg1#

多亏了@gagravarr的建议,我才想出了一个解决办法。我找到了xlsx2csv.java文件的更新实现(同时寻找一种有效的方法来解决我的问题),它将.xlsx文件的每一行打印到csvwriter中。我调整了endrow()方法中的代码,将新行插入数据库,而不是写入csvwriter。还是有点慢,但我已经没有记忆问题了!

相关问题