/* * Copyright (C) 2003-2007 eXo Platform SAS. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Affero General Public License * as published by the Free Software Foundation; either version 3 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see. */ package org.exoplatform.services.document.impl; import java.io.IOException; import java.io.InputStream; import java.text.SimpleDateFormat; import java.util.Date; import java.util.Properties; import org.apache.poi.hssf.usermodel.HSSFCell; import org.apache.poi.hssf.usermodel.HSSFCellStyle; import org.apache.poi.hssf.usermodel.HSSFDateUtil; import org.apache.poi.hssf.usermodel.HSSFRow; import org.apache.poi.hssf.usermodel.HSSFSheet; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.exoplatform.services.document.DocumentReadException; /** * Created by The eXo Platform SAS A parser of Microsoft Excel files. * * @author Phung Hai Nam * @author Gennady Azarenkov * @version Oct 21, 2005 */ public class MSExcelDocumentReader extends BaseDocumentReader { private static final SimpleDateFormat DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSSZ"); /** * Get the application/excel mime type. * * @return The string with application/excel mime type. */ public String[] getMimeTypes() { return new String[] { "application/excel", "application/xls" }; } /** * Returns only a text from .xls file content. * * @param is an input stream with .xls file content. * @return The string only with text from file content. */ public String getContentAsText(InputStream is) throws IOException, DocumentReadException { if (is == null) { throw new NullPointerException("InputStream is null."); } String text = ""; try { HSSFWorkbook wb; try{ wb = new HSSFWorkbook(is); }catch(IOException e){ return text; } for (int sheetNum = 0; sheetNum < wb.getNumberOfSheets(); sheetNum++) { HSSFSheet sheet = wb.getSheetAt(sheetNum); if (sheet != null) { for (int rowNum = sheet.getFirstRowNum(); rowNum <= sheet.getLastRowNum(); rowNum++) { HSSFRow row = sheet.getRow(rowNum); if (row != null) { int lastcell = row.getLastCellNum(); for (int k = 0; k < lastcell; k++) { HSSFCell cell = row.getCell((short) k); if (cell != null) { switch (cell.getCellType()) { case HSSFCell.CELL_TYPE_NUMERIC: { double d = cell.getNumericCellValue(); if (isCellDateFormatted(cell)) { Date date = HSSFDateUtil.getJavaDate(d); String cellText = this.DATE_FORMAT.format(date); text = text + cellText + " "; } else { text = text + d + " "; } break; } case HSSFCell.CELL_TYPE_FORMULA: text = text + cell.getCellFormula().toString() + " "; break; case HSSFCell.CELL_TYPE_BOOLEAN: text = text + cell.getBooleanCellValue() + " "; break; case HSSFCell.CELL_TYPE_ERROR: text = text + cell.getErrorCellValue() + " "; break; case HSSFCell.CELL_TYPE_STRING: text = text + cell.getStringCellValue().toString() + " "; break; default: break; } } } } } } } } finally { if (is != null) { try { is.close(); } catch (IOException e) { } } } return text; } public String getContentAsText(InputStream is, String encoding) throws IOException, DocumentReadException { // Ignore encoding return getContentAsText(is); } /* * (non-Javadoc) * * @see org.exoplatform.services.document.DocumentReader#getProperties(java.io. * InputStream) */ public Properties getProperties(InputStream is) throws IOException, DocumentReadException { POIPropertiesReader reader = new POIPropertiesReader(); reader.readDCProperties(is); return reader.getProperties(); } public static boolean isCellDateFormatted(HSSFCell cell) { boolean bDate = false; double d = cell.getNumericCellValue(); if (HSSFDateUtil.isValidExcelDate(d)) { HSSFCellStyle style = cell.getCellStyle(); int i = style.getDataFormat(); switch (i) { case 0xe: // m/d/yy case 0xf: // d-mmm-yy case 0x10: // d-mmm case 0x11: // mmm-yy case 0x12: // h:mm AM/PM case 0x13: // h:mm:ss AM/PM case 0x14: // h:mm case 0x15: // h:mm:ss case 0x16: // m/d/yy h:mm case 0x2d: // mm:ss case 0x2e: // [h]:mm:ss case 0x2f: // mm:ss.0 case 0xa5: // ?? case 0xa7: // ?? case 0xa9: // ?? case 0xac: // mm:dd:yy not specified in javadoc case 0xad: // yyyy-mm-dd not specified in javadoc case 0xae: // mm:dd:yyyy not specified in javadoc case 0xaf: // m:d:yy not specified in javadoc bDate = true; break; default: bDate = false; break; } } return bDate; } }