Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members  

CsvTool.java

Go to the documentation of this file.
00001 package net.threebit.utils.sosc;
00002 
00003 /*
00004         Copyright 2003 Shawn Deleurme
00005         Copyright 2003 Kevin O'Donnell
00006 
00007         This program is free software; you can redistribute it and/or modify
00008         it under the terms of the GNU General Public License as published by
00009         the Free Software Foundation; either version 2 of the License, or
00010         (at your option) any later version.
00011 
00012         This program is distributed in the hope that it will be useful,
00013         but WITHOUT ANY WARRANTY; without even the implied warranty of
00014         MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015         GNU General Public License for more details.
00016 
00017         You should have received a copy of the GNU General Public License
00018         along with this program; if not, write to the Free Software
00019         Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00020 */
00021 
00022 import com.oreilly.servlet.*;
00023 import java.io.*;
00024 import java.sql.*;
00025 import java.util.*;
00026 import java.util.logging.*;
00027 
00032 public class CsvTool {
00033 
00037         private static Logger logger = Logger.getLogger("net.threebit.utils.sosc");
00038 
00042         static public List csvToList (File csvFile, String[] columnOrder) throws Exception {
00043                 return csvToList( new FileReader(csvFile), columnOrder);
00044         }
00045 
00046         static public List csvToList (StringBuffer sb, String[] columnOrder) throws Exception {
00047                 return csvToList( new StringReader(sb.toString()), columnOrder);
00048         }
00049 
00053         static public List csvToList (Reader reader, String[] columnOrder) throws Exception {
00054                 if (columnOrder == null) { throw new Exception("Must specify a column order."); }
00055                 BufferedReader br = new BufferedReader( reader );
00056                 String line;
00057 
00058                 // This array provides a mapping from the column offset in the incoming CSV file to the
00059                 // requested offset in "columnOrder"
00060                 int[] columnMapping = new int[columnOrder.length];
00061 
00062                 // This array keeps track of what columns are encapsulated in quotation marks.
00063                 boolean[] columnQuotes = new boolean[columnOrder.length];
00064 
00065                 // Read in the first line; it contains the names of the incoming CSV file.
00066                 line = br.readLine();
00067                 if (Sosc.debug) { System.out.println("header: '"+line+"'"); }
00068                 if (line == null) { throw new Exception("No data in CSV File"); }
00069                 String[] columnNames = line.split(",");
00070 
00071                 // Make sure that all of the columns specified in columnOrder are avaible.
00072                 // TODO: this is O(n^2)
00073                 for (int x = 0; x < columnOrder.length; x++) {
00074                         boolean ok = false;
00075                         for (int y = 0; y < columnNames.length; y++) {
00076                                 boolean quoted = false;
00077                                 String n = columnNames[y];
00078                                 if (n.matches("^\\\".*\\\"$")) {
00079                                         n = n.substring(1,n.length()-1);
00080                                         quoted = true;
00081                                 }
00082                                 if (columnOrder[x].equalsIgnoreCase(n)) {
00083                                         ok = true;
00084                                         columnMapping[x] = y;
00085                                         columnQuotes[x] = quoted; // remember if this column is quoted in the original CSV file.
00086                                         break;
00087                                 }
00088                         }
00089                         if (Sosc.debug) {
00090                                 System.out.println(
00091                                         "column["+x+"] " +
00092                                         " name: " + columnOrder[x] + 
00093                                         " quoted: " + columnQuotes[x] +
00094                                         " column: " + columnMapping[x]
00095                                 );
00096                         }
00097                         if (! ok) {
00098                                 throw new Exception("CSVFORMATERROR: Could not find column '" + columnOrder[x] + "' in the CSV file");
00099                         }
00100                 }
00101 
00102                 // Read in the whole file and construct a List of String[]s
00103                 int row = 0;
00104                 List lines = new ArrayList(10000);
00105                 String data[] = null;
00106                 String dout[] = null;
00107                 while ((line = br.readLine()) != null) {
00108                         row ++;
00109                         if (Sosc.debug) { System.out.println("line["+row+"]: '"+line+"'"); }
00110                         if (line.equals("")) { continue; }
00111 
00112                         data = line.split(",");
00113                         if (Sosc.debug) { for (int x = 0; x < data.length; x++) { System.out.println("["+x+"]: '"+data[x]+"'"); } }
00114 
00115                         if (data.length < columnOrder.length) {
00116                                 throw new Exception(
00117                                         "Row "+row+" has fewer columns ("+data.length+") than the header row ("+columnOrder.length+"); " +
00118                                         "'"+line+"'"
00119                                 );
00120                         }
00121 
00122                         //
00123                         // Handle the case where there are embedded commas within quoted cells.
00124                         // ie: "124 Madison Avenue, Apartment 4".
00125                         // See CsvToolTest.test5()
00126                         //
00127                         int maxLoops = data.length+2; // avoid infinite loops (+2 is just an adhoc buffer)
00128                         int loop = 1;
00129                         if (Sosc.debug && data.length > columnNames.length) {
00130                                 System.out.println("Line has one or more commas within quoted fields");
00131                         }
00132                         while (data.length > columnNames.length && loop++ < maxLoops) {
00133                                 for (int x = 0; x < data.length; x++) {
00134                                         if (data[x].startsWith("\"")) {
00135                                                 if (data[x+1].endsWith("\"")) {
00136 
00137                                                         String newCell = data[x] + "," + data[x+1];
00138                                                         newCell = newCell.substring(1,newCell.length()-1);
00139                                                         // System.out.println("newCell: '" + newCell + "'");
00140         
00141                                                         String[] newData = new String[data.length-1];
00142                                                         for (int a = 0; a < newData.length; a++) {
00143                                                                 if (a < x) { newData[a] = data[a]; }
00144                                                                 else if (a == x) { newData[a] = newCell; }
00145                                                                 else { newData[a] = data[a+1]; }
00146                                                         }
00147                                                         // for (int a = 0; a < newData.length; a++) { System.out.println("newData: " + a + " " + newData[a]); }
00148                                                         data = newData;
00149                                                         x = data.length;
00150                                                 }
00151                                         }
00152                                 }
00153                         }
00154 
00155                         dout = new String[ columnOrder.length ];
00156                         for (int x = 0; x < columnOrder.length; x++) {
00157                                 dout[x] = data[ columnMapping[x] ];
00158                                 if (Sosc.debug) { System.out.println("  out["+x+"]: dout: '"+dout[x]+"'"); }
00159                                 if (columnQuotes[x]) {
00160                                         if (dout[x].matches("^\".*\"$")) {
00161                                                 dout[x] = dout[x].substring(1,dout[x].length()-1);
00162                                         }
00163                                         else {
00164                                                 throw new Exception("Unhandled embedded comma condition; dout["+x+"]: '"+dout[x]+"' line: '"+line+"'");
00165                                         }
00166                                 }
00167                         }
00168                         lines.add(dout);
00169                 }
00170 
00171                 return lines;
00172         }
00173 
00177         static public StringBuffer listOfListsToCsv (List list) throws Exception {
00178                 return listOfListsToCsv(list,new StringBuffer());
00179         }
00180 
00184         static public StringBuffer listOfListsToCsv (List list, StringBuffer sb) throws Exception {
00185                 if (sb == null) { throw new Exception("No stringbuffer provided; use listOfMapsToCsv(List list) instead."); }
00186 
00187                 // Silently allow a null list; just means the CVS has no rows
00188                 if (list == null) { return sb; }
00189 
00190                 for (Iterator i = list.iterator(); i.hasNext(); ) {
00191                         List row = (List) i.next();
00192                         for (Iterator j = row.iterator(); j.hasNext(); ) {
00193                                 sb.append(j.next());
00194                                 sb.append(",");
00195                         }
00196                         sb.deleteCharAt( sb.length()-1 ); // remove trailing ","
00197                         sb.append("\n");
00198                 }
00199 
00200                 return sb;
00201         }
00202 
00206         static public StringBuffer resultSetToCsv (ResultSet resultSet) throws Exception {
00207                 return resultSetToCsv(resultSet,new StringBuffer(),null);
00208         }
00209 
00236         static public StringBuffer resultSetToCsv (ResultSet resultSet, StringBuffer b, String[] columns) throws Exception {
00237                 if (resultSet == null) { throw new Exception("ResultSet was null"); }
00238                 if (b == null) { throw new Exception("The specified string buffer was null."); }
00239 
00240                 // If no columns where given, then just assume the full set
00241                 System.out.println("Debug A");
00242                 if (columns == null) {
00243                         Set names = DbTool.toMap(resultSet,false).keySet();
00244                         columns = new String[names.size()];
00245                         int x = 0;
00246                         for (Iterator i = names.iterator(); i.hasNext(); ) {
00247                                 columns[x++] = (String) i.next();
00248                         }
00249                 }
00250                 // Make sure that all of the specified columsn are available.
00251                 else {
00252                         Set names = DbTool.toMap(resultSet,false).keySet();
00253                         for (int x = 0; x < columns.length; x++) {
00254                                 if (! names.contains( columns[x] )) {
00255                                         throw new Exception("Specified columns is not available in the result set: '" + columns[x] + "'");
00256                                 }
00257                         }
00258                 }
00259 
00260                 // Add the headers
00261                 System.out.println("Debug B");
00262                 for (int x = 0; x < columns.length; x++) {
00263                         b.append(columns[x]);
00264                         b.append(",");
00265                 }
00266                 b.deleteCharAt(b.length()-1);
00267                 b.append("\n");
00268 
00269                 Map rm = null;
00270                 System.out.println("Debug C");
00271                 do {
00272                         System.out.println("Debug D");
00273                         rm = DbTool.toMap(resultSet,false);
00274                         String col = null;
00275                         for (int x = 0; x < columns.length; x++) {
00276                                 b.append(rm.get(columns[x]));
00277                                 b.append(",");
00278                         }
00279                         b.deleteCharAt(b.length()-1);
00280                         b.append("\n");
00281                 } while (resultSet.next());
00282 
00283                 return b;
00284         }
00285 
00294         static public String[] csvLine (String line) throws Exception {
00295                 try {
00296                         // Outlier
00297                         if (line == null || line.equals("")) { return new String[0]; }
00298         
00299                         ArrayList items = new ArrayList();
00300                         char[] c = line.toCharArray();
00301         
00302                         boolean quoted = false;
00303                         boolean tokenDone = false;
00304                         int TOKEN_START = 1;
00305                         int TOKEN_MIDDLE = 2;
00306                         int mode = TOKEN_START;
00307                         StringBuffer token = null;
00308                         for (int x = 0; x < c.length; x++) {
00309                                 if (mode == TOKEN_START) {
00310 
00311                                         // If the previous token was quoted, then this char must
00312                                         // be a comma and we should skip it.
00313                                         if (quoted && c[x] == ',') { continue; }
00314 
00315                                         // If true, then this token is empty.  Insert an empty string
00316                                         // into 'items' and continue processing.
00317                                         if (c[x] == ',') {
00318                                                 items.add("");
00319                                                 continue;
00320                                         }
00321         
00322                                         // Otherwise, allocate space for a new token.
00323                                         token = new StringBuffer();
00324                                         quoted = false;
00325                                         tokenDone = false;
00326         
00327                                         // If true, then this token starts with a full quote, so ignore
00328                                         // the quote and mark state so we stop processing this token on
00329                                         // sight of a '"' instead of a ','
00330                                         if (c[x] == '"') {
00331                                                 quoted = true;
00332                                         }
00333                                         // Otherwise, this is the first character for a "normal" token.
00334                                         else {
00335                                                 token.append(c[x]);
00336                                         }
00337                                         mode = TOKEN_MIDDLE;
00338                                         continue;
00339                                 }
00340                                 if (mode == TOKEN_MIDDLE) {
00341         
00342                                         // There are only two conditions that can end a token (except for
00343                                         // end of line, which is handed outside this loop.
00344                                         if (c[x] == ',' && (!quoted)) { tokenDone = true; }
00345                                         if (c[x] == '"' && (quoted)) { tokenDone = true; }
00346         
00347                                         // If the token is finished, add the current token contents to "items"
00348                                         // and reset for another token.
00349                                         if (tokenDone) {
00350                                                 items.add(token.toString());
00351                                                 mode = TOKEN_START;
00352                                                 token = null;
00353                                         }
00354                                         // Otherwise, this is just another character in the token; add it and
00355                                         // continue.
00356                                         else {
00357                                                 token.append(c[x]);
00358                                         }
00359                                 }
00360                         }
00361                         // There should always be one more token after running out of charaters (because
00362                         // the terminating "," is not contained on the line.  For example, a line containing
00363                         // '1,2,3' is {1,2,3}, whereas '1,2,3,' is '{1,2,3,}' (empty string as last element).
00364                         if (token != null) {
00365                                 if (quoted) {
00366                                         // This is an error, since the line was not terminated by the end-quote.
00367                                         throw new RuntimeException("Line does not end in '\"'");
00368                                 }
00369                                 else {
00370                                         // OK, just add the data as the last item.
00371                                         items.add( token.toString() );
00372                                 }
00373                         }
00374                         else {
00375                                 if (! quoted) {
00376                                         items.add("");
00377                                 }
00378                                 else {
00379                                         // Do nothing because the last token was quoted, meaning we are done because the
00380                                         // last character was '"'
00381                                 }
00382                         }
00383 
00384                         // Convert to String[] and return.
00385                         String[] result = new String[items.size()];
00386                         for (int x = 0; x < items.size(); x++) { result[x] = (String) items.get(x); }
00387                         return result;
00388 
00389                 }
00390                 catch (Exception e) {
00391                         throw new Exception("Error processing line: '"+line+"'",e);
00392                 }
00393         }
00394 
00395 }

Generated on Mon Jul 14 17:19:19 2003 for SOSC by doxygen1.2.15