700字范文,内容丰富有趣,生活中的好帮手!
700字范文 > Java代码实现百万级数据XLS文件和XLSX文件的读取(已经过测试 安心使用)

Java代码实现百万级数据XLS文件和XLSX文件的读取(已经过测试 安心使用)

时间:2021-12-27 07:43:29

相关推荐

Java代码实现百万级数据XLS文件和XLSX文件的读取(已经过测试  安心使用)

前言

工作中通常会有XLS和XLSX文件转换成CSV文件的需求,虽然Excel和WPS可以将这两种格式文件很方便的转换,但是不可能将生产上的数据下载下来转换后再上传上去(这真是个疯狂的想法...)。目前网上的解决方案是通过在python脚本中导入pandas模块然后实现转换,虽然很便捷,但是在java程序中又显得格格不入。

/02/27 更新最新版官方方案:

提示:在测试官方的csv转换功能时,生成的CSV文件总会出现乱码,待跟踪......推荐有需要的程序员读取后自行写入csv文件。

0、pom.xml文件需引入依赖

<dependency><groupId>org.apache.poi</groupId><artifactId>poi-examples</artifactId><version>5.2.3</version></dependency>

1、百万级数据文件读取

// 测试方法import org.apache.poi.examples.hssf.eventusermodel.XLS2CSVmra;import org.apache.poi.examples.xssf.eventusermodel.XLSX2CSV;public class Test001 {public static void main(String[] args) throws Exception {// xlsx文件读取Xlsx2Csv.main(new String[]{"D:\\通知消息记录浏览.xlsx"});// xls文件读取XLS2CSVmra.main(new String[]{"D:\\通知消息记录浏览.xls"});}}

------------------------------------------------时间分割线------------------------------------------------

旧版官方推荐方案正文代码​​​​​​​:

0、pom.xml文件需引入依赖

<dependency><groupId>org.apache.poi</groupId><artifactId>poi</artifactId><version>5.2.2</version></dependency><dependency><groupId>org.apache.poi</groupId><artifactId>poi-ooxml</artifactId><version>5.2.2</version></dependency>

1、百万级数据Xlsx文件读取

import org.apache.poi.ooxml.util.SAXHelper;import org.apache.poi.openxml4j.exceptions.OpenXML4JException;import org.apache.poi.openxml4j.opc.OPCPackage;import org.apache.poi.openxml4j.opc.PackageAccess;import org.apache.poi.ss.usermodel.DataFormatter;import org.apache.poi.ss.util.CellAddress;import org.apache.poi.ss.util.CellReference;import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;import org.apache.poi.xssf.eventusermodel.XSSFReader;import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;import org.apache.poi.xssf.model.StylesTable;import org.apache.poi.xssf.usermodel.XSSFComment;import org.xml.sax.ContentHandler;import org.xml.sax.InputSource;import org.xml.sax.SAXException;import org.xml.sax.XMLReader;import javax.xml.parsers.ParserConfigurationException;import java.io.File;import java.io.IOException;import java.io.InputStream;import java.io.PrintStream;public class Xlsx2Csv {private final OPCPackage xlsxPackage;private final int minColumns;private final PrintStream output;private class SheetToCSV implements XSSFSheetXMLHandler.SheetContentsHandler {private boolean firstCellOfRow = false;private int currentRow = -1;private int currentCol = -1;private void outputMissingRows(int number) {for (int i = 0; i < number; i++) {for (int j = 0; j < minColumns; j++) {output.append(',');}output.append('\n');}}@Overridepublic void startRow(int rowNum) {outputMissingRows(rowNum - currentRow - 1);firstCellOfRow = true;currentRow = rowNum;currentCol = -1;}public void endRow(int rowNum) {for (int i = currentCol; i < minColumns; i++) {output.append(',');}output.append(',');}public void cell(String cellReference, String formattedValue, XSSFComment xssfComment) {if (firstCellOfRow) {firstCellOfRow = false;} else {output.append(',');}if (cellReference == null) {cellReference = new CellAddress(currentRow, currentCol).formatAsString();}int thisCol = (new CellReference(cellReference)).getCol();int missedCols = thisCol - currentCol - 1;for (int i = 0; i < missedCols; i++) {output.append(',');}currentCol = thisCol;try {Double.parseDouble(formattedValue);output.append(formattedValue);} catch (NumberFormatException e) {output.append('"');output.append(formattedValue);output.append('"');}}@Overridepublic void headerFooter(String s, boolean b, String s1) {//CSV文件没有页眉页脚}}public Xlsx2Csv(OPCPackage pkg, PrintStream output, int minColumns) {this.xlsxPackage = pkg;this.output = output;this.minColumns = minColumns;}public void processSheet(StylesTable styles, ReadOnlySharedStringsTable readOnlyTableString, XSSFSheetXMLHandler.SheetContentsHandler sheetHandler, InputStream sheetInputStream) {DataFormatter formatter = new DataFormatter();InputSource sheetSource = new InputSource(sheetInputStream);try {XMLReader reader = SAXHelper.newXMLReader();ContentHandler handler = new XSSFSheetXMLHandler(styles, readOnlyTableString, sheetHandler, formatter, false);reader.setContentHandler(handler);reader.parse(sheetSource);} catch (SAXException | ParserConfigurationException | IOException e) {e.printStackTrace();}}public void process() throws IOException, SAXException, OpenXML4JException {ReadOnlySharedStringsTable readOnlyTableString = new ReadOnlySharedStringsTable(this.xlsxPackage);XSSFReader xssfReader = new XSSFReader(this.xlsxPackage);StylesTable styles = xssfReader.getStylesTable();XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();while (iter.hasNext()) {InputStream inputStream = iter.next();processSheet(styles, readOnlyTableString, new SheetToCSV(), inputStream);inputStream.close();}}public static void main(String[] args) throws OpenXML4JException, SAXException, IOException {File xlsxFile = new File(args[0]);if (!xlsxFile.exists()) {return;}int minColumns = -1;OPCPackage p = OPCPackage.open(xlsxFile.getPath(), PackageAccess.READ);Xlsx2Csv xlsx2Csv = new Xlsx2Csv(p, System.out, minColumns);xlsx2Csv.process();p.close();}}

// 测试方法public class Test001 {public static void main(String[] args) throws OpenXML4JException, IOException, SAXException {Xlsx2Csv.main(new String[]{"D:\\通知消息记录浏览.xls"});}}

2、百万级数据Xls文件读取

import org.apache.poi.examples.hssf.eventusermodel.XLS2CSVmra;import org.apache.poi.hssf.eventusermodel.*;import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;import org.apache.poi.hssf.model.HSSFFormulaParser;import org.apache.poi.hssf.record.*;import org.apache.poi.hssf.usermodel.HSSFWorkbook;import org.apache.poi.poifs.filesystem.POIFSFileSystem;import java.io.FileInputStream;import java.io.IOException;import java.io.PrintStream;import java.util.ArrayList;import java.util.List;public class Xls2CsvMra implements HSSFListener {private int minColumns;private POIFSFileSystem fs;private PrintStream output;private int lastRowNumber;private int lastColumnNumber;// 是否输出表达式或者值private boolean outputFormulaValues = false;// 解析公式private EventWorkbookBuilder.SheetRecordCollectingListener workbookBuildingListener;private HSSFWorkbook stubWorkbook;// 处理过程中收集的记录private SSTRecord sstRecord;private FormatTrackingHSSFListener formatListener;// 正在处理的sheetprivate int sheetIndex = -1;private BoundSheetRecord[] orderedBSRs;private List<BoundSheetRecord> boundSheetRecords = new ArrayList<>();// 处理带有字符串结果的公式private int nextRow;private int nextColumn;private boolean outputNextStringRecord;// 创建新的XLS到CSV的转化器public Xls2CsvMra(POIFSFileSystem fs, PrintStream output, int minColumns) {this.fs = fs;this.output = output;this.minColumns = minColumns;}// 创建新的XLS到CSV的转换器public Xls2CsvMra(String filename, int minColumns) throws IOException {this(new POIFSFileSystem(new FileInputStream(filename)), System.out, minColumns);}public Xls2CsvMra(String inputFilePath, String outputFilePath) throws IOException {fs = new POIFSFileSystem(new FileInputStream(inputFilePath));output = new PrintStream(outputFilePath);minColumns = -1;}// 开始将XLS文件处理为CSVpublic void process() throws IOException {MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this);formatListener = new FormatTrackingHSSFListener(listener);HSSFEventFactory factory = new HSSFEventFactory();HSSFRequest request = new HSSFRequest();if (outputFormulaValues) {request.addListenerForAllRecords(formatListener);} else {workbookBuildingListener = new EventWorkbookBuilder.SheetRecordCollectingListener(formatListener);request.addListenerForAllRecords(workbookBuildingListener);}factory.processWorkbookEvents(request, fs);}@Overridepublic void processRecord(Record record) {int thisRow = -1;int thisColumn = -1;String thisStr = null;switch (record.getSid()) {case BoundSheetRecord.sid:boundSheetRecords.add((BoundSheetRecord) record);break;case BOFRecord.sid:BOFRecord br = (BOFRecord) record;if (br.getType() == BOFRecord.TYPE_WORKSHEET) {// 创建子工作簿if (workbookBuildingListener != null && stubWorkbook == null) {stubWorkbook = workbookBuildingListener.getStubHSSFWorkbook();}sheetIndex++;if (orderedBSRs == null) {orderedBSRs = BoundSheetRecord.orderByBofPosition(boundSheetRecords);}output.println();output.println(orderedBSRs[sheetIndex].getSheetname() + " [" + (sheetIndex + 1) + "]:");}break;case SSTRecord.sid:sstRecord = (SSTRecord) record;break;case BlankRecord.sid:BlankRecord berec = (BlankRecord) record;thisRow = berec.getRow();thisColumn = berec.getColumn();thisStr = "";break;case FormulaRecord.sid:FormulaRecord frec = (FormulaRecord) record;thisRow = frec.getRow();thisColumn = frec.getColumn();if (outputFormulaValues) {if (Double.isNaN(frec.getValue())) {outputNextStringRecord = true;nextRow = frec.getRow();nextColumn = frec.getColumn();} else {thisStr = formatListener.formatNumberDateCell(frec);}} else {thisStr = '"' + HSSFFormulaParser.toFormulaString(stubWorkbook, frec.getParsedExpression()) + '"';}break;case StringRecord.sid:if (outputNextStringRecord) {StringRecord srec = (StringRecord) record;thisStr = srec.getString();thisRow = nextRow;thisColumn = nextColumn;outputNextStringRecord = false;}break;case LabelRecord.sid:LabelRecord lrec = (LabelRecord) record;thisRow = lrec.getRow();thisColumn = lrec.getColumn();thisStr = '"' + lrec.getValue() + '"';break;case LabelSSTRecord.sid:LabelSSTRecord lsrec = (LabelSSTRecord) record;thisRow = lsrec.getRow();thisColumn = lsrec.getColumn();if (sstRecord == null) {thisStr = '"' + "(No SST Record, can't identify string)" + '"';} else {thisStr = '"' + sstRecord.getString(lsrec.getSSTIndex()).toString() + '"';}break;case NoteRecord.sid:NoteRecord nrec = (NoteRecord) record;thisRow = nrec.getRow();thisColumn = nrec.getColumn();thisStr = '"' + "(TODO)" + '"';break;case NumberRecord.sid:NumberRecord numrec = (NumberRecord) record;thisRow = numrec.getRow();thisColumn = numrec.getColumn();thisStr = formatListener.formatNumberDateCell(numrec);break;case RKRecord.sid:RKRecord rkrec = (RKRecord) record;thisRow = rkrec.getRow();thisColumn = rkrec.getColumn();thisStr = '"' + "(TODO)" + '"';break;default:break;}if (thisRow != -1 && thisRow != lastRowNumber) {lastColumnNumber = -1;}if (record instanceof MissingCellDummyRecord) {MissingCellDummyRecord mc = (MissingCellDummyRecord) record;thisRow = mc.getRow();thisColumn = mc.getColumn();thisStr = "";}if (thisStr != null) {if (thisColumn > 0) {output.print(',');}output.print(thisStr);}if (thisRow > -1) {lastRowNumber = thisRow;}if (thisColumn > -1) {lastColumnNumber = thisColumn;}if (record instanceof LastCellOfRowDummyRecord) {if (minColumns > 0) {if (lastColumnNumber == -1) {lastColumnNumber = 0;}for (int i = lastColumnNumber; i < (minColumns); i++) {output.print(',');}}lastColumnNumber = -1;output.println();}}public static void main(String[] args) throws IOException {if (args.length < 1) {System.err.println("Use:");System.err.println(" XLS2CSVmra <xls file> [min columns]");System.exit(1);}int minColumns = -1;if (args.length >= 2) {minColumns = Integer.parseInt(args[1]);}XLS2CSVmra xls2csv = new XLS2CSVmra(args[0], minColumns);xls2csv.process();}}

// 测试方法public class Test001 {public static void main(String[] args) throws Exception {XLS2CSVmra.main(new String[]{"D:\\通知消息记录浏览.xls"});}}

如有问题请在下方留言,我会认真的查看每一条并尽量的给予回复。谢谢观看^^

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。