View Javadoc
1   /*
2   Copyright (c) 2007 Health Market Science, Inc.
3   
4   Licensed under the Apache License, Version 2.0 (the "License");
5   you may not use this file except in compliance with the License.
6   You may obtain a copy of the License at
7   
8       http://www.apache.org/licenses/LICENSE-2.0
9   
10  Unless required by applicable law or agreed to in writing, software
11  distributed under the License is distributed on an "AS IS" BASIS,
12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  See the License for the specific language governing permissions and
14  limitations under the License.
15   */
16  
17  package com.healthmarketscience.jackcess.util;
18  
19  import com.healthmarketscience.jackcess.ColumnBuilder;
20  import com.healthmarketscience.jackcess.DataType;
21  import com.healthmarketscience.jackcess.Database;
22  import com.healthmarketscience.jackcess.Table;
23  import com.healthmarketscience.jackcess.TableBuilder;
24  import com.healthmarketscience.jackcess.impl.ByteUtil;
25  import com.healthmarketscience.jackcess.impl.DatabaseImpl;
26  import java.io.BufferedReader;
27  import java.io.EOFException;
28  import java.io.File;
29  import java.io.FileReader;
30  import java.io.IOException;
31  import java.sql.ResultSet;
32  import java.sql.ResultSetMetaData;
33  import java.sql.SQLException;
34  import java.util.ArrayList;
35  import java.util.List;
36  import java.util.regex.Matcher;
37  import java.util.regex.Pattern;
38  
39  /**
40   * Utility class for importing tables to an Access database from other
41   * sources.  See the {@link Builder} for convenient configuration of the
42   * import functionality.  Note that most scenarios for customizing input data
43   * can be handled by implementing a custom {@link ImportFilter}.
44   *
45   * @author James Ahlborn
46   * @usage _general_class_
47   */
48  public class ImportUtil
49  {
50    /** Batch commit size for copying other result sets into this database */
51    private static final int COPY_TABLE_BATCH_SIZE = 200;
52  
53    /** the platform line separator */
54    static final String LINE_SEPARATOR = System.getProperty("line.separator");
55  
56    private ImportUtil() {}
57  
58    /**
59     * Returns a List of Column instances converted from the given
60     * ResultSetMetaData (this is the same method used by the various {@code
61     * importResultSet()} methods).
62     *
63     * @return a List of Columns
64     */
65    public static List<ColumnBuilder> toColumns(ResultSetMetaData md)
66        throws SQLException, IOException
67    {
68        List<ColumnBuilder> columns = new ArrayList<ColumnBuilder>();
69        for (int i = 1; i <= md.getColumnCount(); i++) {
70          ColumnBuilderlumnBuilder.html#ColumnBuilder">ColumnBuilder column = new ColumnBuilder(md.getColumnLabel(i))
71            .escapeName();
72          int lengthInUnits = md.getColumnDisplaySize(i);
73          column.setSQLType(md.getColumnType(i), lengthInUnits);
74          DataType type = column.getType();
75          // we check for isTrueVariableLength here to avoid setting the length
76          // for a NUMERIC column, which pretends to be var-len, even though it
77          // isn't
78          if(type.isTrueVariableLength() && !type.isLongValue()) {
79            column.setLengthInUnits((short)lengthInUnits);
80          }
81          if(type.getHasScalePrecision()) {
82            int scale = md.getScale(i);
83            int precision = md.getPrecision(i);
84            if(type.isValidScale(scale)) {
85              column.setScale((byte)scale);
86            }
87            if(type.isValidPrecision(precision)) {
88              column.setPrecision((byte)precision);
89            }
90          }
91          columns.add(column);
92        }
93        return columns;
94    }
95  
96    /**
97     * Copy an existing JDBC ResultSet into a new table in this database.
98     * <p>
99     * Equivalent to:
100    * {@code  importResultSet(source, db, name, SimpleImportFilter.INSTANCE);}
101    *
102    * @param name Name of the new table to create
103    * @param source ResultSet to copy from
104    *
105    * @return the name of the copied table
106    *
107    * @see #importResultSet(ResultSet,Database,String,ImportFilter)
108    * @see Builder
109    */
110   public static String importResultSet(ResultSet source, Database db,
111                                        String name)
112     throws SQLException, IOException
113   {
114     return importResultSet(source, db, name, SimpleImportFilter.INSTANCE);
115   }
116 
117   /**
118    * Copy an existing JDBC ResultSet into a new table in this database.
119    * <p>
120    * Equivalent to:
121    * {@code  importResultSet(source, db, name, filter, false);}
122    *
123    * @param name Name of the new table to create
124    * @param source ResultSet to copy from
125    * @param filter valid import filter
126    *
127    * @return the name of the imported table
128    *
129    * @see #importResultSet(ResultSet,Database,String,ImportFilter,boolean)
130    * @see Builder
131    */
132   public static String importResultSet(ResultSet source, Database db,
133                                        String name, ImportFilter filter)
134     throws SQLException, IOException
135   {
136     return importResultSet(source, db, name, filter, false);
137   }
138 
139   /**
140    * Copy an existing JDBC ResultSet into a new (or optionally existing) table
141    * in this database.
142    *
143    * @param name Name of the new table to create
144    * @param source ResultSet to copy from
145    * @param filter valid import filter
146    * @param useExistingTable if {@code true} use current table if it already
147    *                         exists, otherwise, create new table with unique
148    *                         name
149    *
150    * @return the name of the imported table
151    *
152    * @see Builder
153    */
154   public static String importResultSet(ResultSet source, Database db,
155                                        String name, ImportFilter filter,
156                                        boolean useExistingTable)
157     throws SQLException, IOException
158   {
159     ResultSetMetaData md = source.getMetaData();
160 
161     name = TableBuilder.escapeIdentifier(name);
162     Table table = null;
163     if(!useExistingTable || ((table = db.getTable(name)) == null)) {
164       List<ColumnBuilder> columns = toColumns(md);
165       table = createUniqueTable(db, name, columns, md, filter);
166     }
167 
168     List<Object[]> rows = new ArrayList<Object[]>(COPY_TABLE_BATCH_SIZE);
169     int numColumns = md.getColumnCount();
170 
171     while (source.next()) {
172       Object[] row = new Object[numColumns];
173       for (int i = 0; i < row.length; i++) {
174         row[i] = source.getObject(i + 1);
175       }
176       row = filter.filterRow(row);
177       if(row == null) {
178         continue;
179       }
180       rows.add(row);
181       if (rows.size() == COPY_TABLE_BATCH_SIZE) {
182         table.addRows(rows);
183         rows.clear();
184       }
185     }
186     if (rows.size() > 0) {
187       table.addRows(rows);
188     }
189 
190     return table.getName();
191   }
192 
193   /**
194    * Copy a delimited text file into a new table in this database.
195    * <p>
196    * Equivalent to:
197    * {@code  importFile(f, name, db, delim, SimpleImportFilter.INSTANCE);}
198    *
199    * @param name Name of the new table to create
200    * @param f Source file to import
201    * @param delim Regular expression representing the delimiter string.
202    *
203    * @return the name of the imported table
204    *
205    * @see #importFile(File,Database,String,String,ImportFilter)
206    * @see Builder
207    */
208   public static String importFile(File f, Database db, String name,
209                                   String delim)
210     throws IOException
211   {
212     return importFile(f, db, name, delim, SimpleImportFilter.INSTANCE);
213   }
214 
215   /**
216    * Copy a delimited text file into a new table in this database.
217    * <p>
218    * Equivalent to:
219    * {@code  importFile(f, name, db, delim, "'", filter, false);}
220    *
221    * @param name Name of the new table to create
222    * @param f Source file to import
223    * @param delim Regular expression representing the delimiter string.
224    * @param filter valid import filter
225    *
226    * @return the name of the imported table
227    *
228    * @see #importReader(BufferedReader,Database,String,String,ImportFilter)
229    * @see Builder
230    */
231   public static String importFile(File f, Database db, String name,
232                                   String delim, ImportFilter filter)
233     throws IOException
234   {
235     return importFile(f, db, name, delim, ExportUtil.DEFAULT_QUOTE_CHAR,
236                       filter, false);
237   }
238 
239   /**
240    * Copy a delimited text file into a new table in this database.
241    * <p>
242    * Equivalent to:
243    * {@code  importReader(new BufferedReader(new FileReader(f)), db, name, delim, "'", filter, useExistingTable, true);}
244    *
245    * @param name Name of the new table to create
246    * @param f Source file to import
247    * @param delim Regular expression representing the delimiter string.
248    * @param quote the quote character
249    * @param filter valid import filter
250    * @param useExistingTable if {@code true} use current table if it already
251    *                         exists, otherwise, create new table with unique
252    *                         name
253    *
254    * @return the name of the imported table
255    *
256    * @see #importReader(BufferedReader,Database,String,String,ImportFilter,boolean)
257    * @see Builder
258    */
259   public static String importFile(File f, Database db, String name,
260                                   String delim, char quote,
261                                   ImportFilter filter,
262                                   boolean useExistingTable)
263     throws IOException
264   {
265     return importFile(f, db, name, delim, quote, filter, useExistingTable, true);
266   }
267 
268   /**
269    * Copy a delimited text file into a new table in this database.
270    * <p>
271    * Equivalent to:
272    * {@code  importReader(new BufferedReader(new FileReader(f)), db, name, delim, "'", filter, useExistingTable, header);}
273    *
274    * @param name Name of the new table to create
275    * @param f Source file to import
276    * @param delim Regular expression representing the delimiter string.
277    * @param quote the quote character
278    * @param filter valid import filter
279    * @param useExistingTable if {@code true} use current table if it already
280    *                         exists, otherwise, create new table with unique
281    *                         name
282    * @param header if {@code false} the first line is not a header row, only
283    *               valid if useExistingTable is {@code true}
284    * @return the name of the imported table
285    *
286    * @see #importReader(BufferedReader,Database,String,String,char,ImportFilter,boolean,boolean)
287    * @see Builder
288    */
289   public static String importFile(File f, Database db, String name,
290                                   String delim, char quote,
291                                   ImportFilter filter,
292                                   boolean useExistingTable,
293                                   boolean header)
294     throws IOException
295   {
296     BufferedReader in = null;
297     try {
298       in = new BufferedReader(new FileReader(f));
299       return importReader(in, db, name, delim, quote, filter,
300                           useExistingTable, header);
301     } finally {
302       ByteUtil.closeQuietly(in);
303     }
304   }
305 
306   /**
307    * Copy a delimited text file into a new table in this database.
308    * <p>
309    * Equivalent to:
310    * {@code  importReader(in, db, name, delim, SimpleImportFilter.INSTANCE);}
311    *
312    * @param name Name of the new table to create
313    * @param in Source reader to import
314    * @param delim Regular expression representing the delimiter string.
315    *
316    * @return the name of the imported table
317    *
318    * @see #importReader(BufferedReader,Database,String,String,ImportFilter)
319    * @see Builder
320    */
321   public static String importReader(BufferedReader in, Database db,
322                                     String name, String delim)
323     throws IOException
324   {
325     return importReader(in, db, name, delim, SimpleImportFilter.INSTANCE);
326   }
327 
328   /**
329    * Copy a delimited text file into a new table in this database.
330    * <p>
331    * Equivalent to:
332    * {@code  importReader(in, db, name, delim, filter, false);}
333    *
334    * @param name Name of the new table to create
335    * @param in Source reader to import
336    * @param delim Regular expression representing the delimiter string.
337    * @param filter valid import filter
338    *
339    * @return the name of the imported table
340    *
341    * @see #importReader(BufferedReader,Database,String,String,ImportFilter,boolean)
342    * @see Builder
343    */
344   public static String importReader(BufferedReader in, Database db,
345                                     String name, String delim,
346                                     ImportFilter filter)
347     throws IOException
348   {
349     return importReader(in, db, name, delim, filter, false);
350   }
351 
352   /**
353    * Copy a delimited text file into a new (or optionally exixsting) table in
354    * this database.
355    * <p>
356    * Equivalent to:
357    * {@code  importReader(in, db, name, delim, '"', filter, false);}
358    *
359    * @param name Name of the new table to create
360    * @param in Source reader to import
361    * @param delim Regular expression representing the delimiter string.
362    * @param filter valid import filter
363    * @param useExistingTable if {@code true} use current table if it already
364    *                         exists, otherwise, create new table with unique
365    *                         name
366    *
367    * @return the name of the imported table
368    *
369    * @see Builder
370    */
371   public static String importReader(BufferedReader in, Database db,
372                                     String name, String delim,
373                                     ImportFilter filter,
374                                     boolean useExistingTable)
375     throws IOException
376   {
377     return importReader(in, db, name, delim, ExportUtil.DEFAULT_QUOTE_CHAR,
378                         filter, useExistingTable);
379   }
380 
381   /**
382    * Copy a delimited text file into a new (or optionally exixsting) table in
383    * this database.
384    * <p>
385    * Equivalent to:
386    * {@code  importReader(in, db, name, delim, '"', filter, useExistingTable, true);}
387    *
388    * @param name Name of the new table to create
389    * @param in Source reader to import
390    * @param delim Regular expression representing the delimiter string.
391    * @param quote the quote character
392    * @param filter valid import filter
393    * @param useExistingTable if {@code true} use current table if it already
394    *                         exists, otherwise, create new table with unique
395    *                         name
396    *
397    * @return the name of the imported table
398    *
399    * @see Builder
400    */
401   public static String importReader(BufferedReader in, Database db,
402                                     String name, String delim, char quote,
403                                     ImportFilter filter,
404                                     boolean useExistingTable)
405     throws IOException
406   {
407     return importReader(in, db, name, delim, quote, filter, useExistingTable,
408                         true);
409   }
410 
411   /**
412    * Copy a delimited text file into a new (or optionally exixsting) table in
413    * this database.
414    *
415    * @param name Name of the new table to create
416    * @param in Source reader to import
417    * @param delim Regular expression representing the delimiter string.
418    * @param quote the quote character
419    * @param filter valid import filter
420    * @param useExistingTable if {@code true} use current table if it already
421    *                         exists, otherwise, create new table with unique
422    *                         name
423    * @param header if {@code false} the first line is not a header row, only
424    *               valid if useExistingTable is {@code true}
425    *
426    * @return the name of the imported table
427    *
428    * @see Builder
429    */
430   public static String importReader(BufferedReader in, Database db,
431                                     String name, String delim, char quote,
432                                     ImportFilter filter,
433                                     boolean useExistingTable, boolean header)
434     throws IOException
435   {
436     String line = in.readLine();
437     if(DatabaseImpl.isBlank(line)) {
438       return null;
439     }
440 
441     Pattern delimPat = Pattern.compile(delim);
442 
443     try {
444       name = TableBuilder.escapeIdentifier(name);
445       Table table = null;
446       if(!useExistingTable || ((table = db.getTable(name)) == null)) {
447 
448         List<ColumnBuilder> columns = new ArrayList<ColumnBuilder>();
449         Object[] columnNames = splitLine(line, delimPat, quote, in, 0);
450 
451         for (int i = 0; i < columnNames.length; i++) {
452           columns.add(new ColumnBuilder((String)columnNames[i], DataType.TEXT)
453                       .escapeName()
454                       .setLength((short)DataType.TEXT.getMaxSize())
455                       .toColumn());
456         }
457 
458         table = createUniqueTable(db, name, columns, null, filter);
459 
460         // the first row was a header row
461         header = true;
462       }
463 
464       List<Object[]> rows = new ArrayList<Object[]>(COPY_TABLE_BATCH_SIZE);
465       int numColumns = table.getColumnCount();
466 
467       if(!header) {
468         // first line is _not_ a header line
469         Object[] data = splitLine(line, delimPat, quote, in, numColumns);
470         data = filter.filterRow(data);
471         if(data != null) {
472           rows.add(data);
473         }
474       }
475 
476       while ((line = in.readLine()) != null)
477       {
478         Object[] data = splitLine(line, delimPat, quote, in, numColumns);
479         data = filter.filterRow(data);
480         if(data == null) {
481           continue;
482         }
483         rows.add(data);
484         if (rows.size() == COPY_TABLE_BATCH_SIZE) {
485           table.addRows(rows);
486           rows.clear();
487         }
488       }
489       if (rows.size() > 0) {
490         table.addRows(rows);
491       }
492 
493       return table.getName();
494 
495     } catch(SQLException e) {
496       throw new IOException(e.getMessage(), e);
497     }
498   }
499 
500   /**
501    * Splits the given line using the given delimiter pattern and quote
502    * character.  May read additional lines for quotes spanning newlines.
503    */
504   private static Object[] splitLine(String line, Pattern delim, char quote,
505                                     BufferedReader in, int numColumns)
506     throws IOException
507   {
508     List<String> tokens = new ArrayList<String>();
509     StringBuilder sb = new StringBuilder();
510     Matcher m = delim.matcher(line);
511     int idx = 0;
512 
513     while(idx < line.length()) {
514 
515       if(line.charAt(idx) == quote) {
516 
517         // find quoted value
518         sb.setLength(0);
519         ++idx;
520         while(true) {
521 
522           int endIdx = line.indexOf(quote, idx);
523 
524           if(endIdx >= 0) {
525 
526             sb.append(line, idx, endIdx);
527             ++endIdx;
528             if((endIdx < line.length()) && (line.charAt(endIdx) == quote)) {
529 
530               // embedded quote
531               sb.append(quote);
532               // keep searching
533               idx = endIdx + 1;
534 
535             } else {
536 
537               // done
538               idx = endIdx;
539               break;
540             }
541 
542           } else {
543 
544             // line wrap
545             sb.append(line, idx, line.length());
546             sb.append(LINE_SEPARATOR);
547 
548             idx = 0;
549             line = in.readLine();
550             if(line == null) {
551               throw new EOFException("Missing end of quoted value " + sb);
552             }
553           }
554         }
555 
556         tokens.add(sb.toString());
557 
558         // skip next delim
559         idx = (m.find(idx) ? m.end() : line.length());
560 
561       } else if(m.find(idx)) {
562 
563         // next unquoted value
564         tokens.add(line.substring(idx, m.start()));
565         idx = m.end();
566 
567       } else {
568 
569         // trailing token
570         tokens.add(line.substring(idx));
571         idx = line.length();
572       }
573     }
574 
575     return tokens.toArray(new Object[Math.max(tokens.size(), numColumns)]);
576   }
577 
578   /**
579    * Returns a new table with a unique name and the given table definition.
580    */
581   private static Table createUniqueTable(Database db, String name,
582                                          List<ColumnBuilder> columns,
583                                          ResultSetMetaData md,
584                                          ImportFilter filter)
585     throws IOException, SQLException
586   {
587     // otherwise, find unique name and create new table
588     String baseName = name;
589     int counter = 2;
590     while(db.getTable(name) != null) {
591       name = baseName + (counter++);
592     }
593 
594     return new TableBuilder(name)
595       .addColumns(filter.filterColumns(columns, md))
596       .toTable(db);
597   }
598 
599   /**
600    * Builder which simplifies configuration of an import operation.
601    */
602   public static class Builder
603   {
604     private Database _db;
605     private String _tableName;
606     private String _delim = ExportUtil.DEFAULT_DELIMITER;
607     private char _quote = ExportUtil.DEFAULT_QUOTE_CHAR;
608     private ImportFilter _filter = SimpleImportFilter.INSTANCE;
609     private boolean _useExistingTable;
610     private boolean _header = true;
611 
612     public Builder(Database db) {
613       this(db, null);
614     }
615 
616     public Builder(Database db, String tableName) {
617       _db = db;
618       _tableName = tableName;
619     }
620 
621     public Builder setDatabase(Database db) {
622       _db = db;
623       return this;
624     }
625 
626     public Builder setTableName(String tableName) {
627       _tableName = tableName;
628       return this;
629     }
630 
631     public Builder setDelimiter(String delim) {
632       _delim = delim;
633       return this;
634     }
635 
636     public Builder setQuote(char quote) {
637       _quote = quote;
638       return this;
639     }
640 
641     public Builder setFilter(ImportFilter filter) {
642       _filter = filter;
643       return this;
644     }
645 
646     public Builder setUseExistingTable(boolean useExistingTable) {
647       _useExistingTable = useExistingTable;
648       return this;
649     }
650 
651     public Builder setHeader(boolean header) {
652       _header = header;
653       return this;
654     }
655 
656     /**
657      * @see ImportUtil#importResultSet(ResultSet,Database,String,ImportFilter,boolean)
658      */
659     public String importResultSet(ResultSet source)
660       throws SQLException, IOException
661     {
662       return ImportUtil.importResultSet(source, _db, _tableName, _filter,
663                                         _useExistingTable);
664     }
665 
666     /**
667      * @see ImportUtil#importFile(File,Database,String,String,char,ImportFilter,boolean,boolean)
668      */
669     public String importFile(File f) throws IOException {
670       return ImportUtil.importFile(f, _db, _tableName, _delim, _quote, _filter,
671                                    _useExistingTable, _header);
672     }
673 
674     /**
675      * @see ImportUtil#importReader(BufferedReader,Database,String,String,char,ImportFilter,boolean,boolean)
676      */
677     public String importReader(BufferedReader reader) throws IOException {
678       return ImportUtil.importReader(reader, _db, _tableName, _delim, _quote,
679                                      _filter, _useExistingTable, _header);
680     }
681   }
682 
683 }