/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* * InstanceQuery.java * Copyright (C) 1999 University of Waikato, Hamilton, New Zealand * */ package weka.experiment; import weka.core.Attribute; import weka.core.FastVector; import weka.core.Instance; import weka.core.DenseInstance; import weka.core.Instances; import weka.core.Option; import weka.core.OptionHandler; import weka.core.RevisionUtils; import weka.core.SparseInstance; import weka.core.Utils; import java.sql.ResultSet; import java.sql.ResultSetMetaData; import java.sql.Time; import java.util.Date; import java.util.Enumeration; import java.util.Hashtable; import java.util.Vector; /** * Convert the results of a database query into instances. The jdbc * driver and database to be used default to "jdbc.idbDriver" and * "jdbc:idb=experiments.prp". These may be changed by creating * a java properties file called DatabaseUtils.props in user.home or * the current directory. eg:
*
*
* jdbcDriver=jdbc.idbDriver
* jdbcURL=jdbc:idb=experiments.prp
*
* * Command line use just outputs the instances to System.out.
* * Valid options are: * *-Q <query> * SQL query to execute.* *
-S * Return sparse rather than normal instances.* *
-U <username> * The username to use for connecting.* *
-P <password> * The password to use for connecting.* *
-D * Enables debug output.* * * @author Len Trigg (trigg@cs.waikato.ac.nz) * @version $Revision: 5987 $ */ public class InstanceQuery extends DatabaseUtils implements OptionHandler { /** for serialization */ static final long serialVersionUID = 718158370917782584L; /** Determines whether sparse data is created */ boolean m_CreateSparseData = false; /** Query to execute */ String m_Query = "SELECT * from ?"; /** * Sets up the database drivers * * @throws Exception if an error occurs */ public InstanceQuery() throws Exception { super(); } /** * Returns an enumeration describing the available options
*
* @return an enumeration of all options
*/
public Enumeration listOptions () {
Vector result = new Vector();
result.addElement(
new Option("\tSQL query to execute.",
"Q",1,"-Q -Q <query>
* SQL query to execute.
*
* -S
* Return sparse rather than normal instances.
*
* -U <username>
* The username to use for connecting.
*
* -P <password>
* The password to use for connecting.
*
* -D
* Enables debug output.
*
*
* @param options the list of options as an array of strings
* @throws Exception if an option is not supported
*/
public void setOptions (String[] options)
throws Exception {
String tmpStr;
setSparseData(Utils.getFlag('S',options));
tmpStr = Utils.getOption('Q',options);
if (tmpStr.length() != 0)
setQuery(tmpStr);
tmpStr = Utils.getOption('U',options);
if (tmpStr.length() != 0)
setUsername(tmpStr);
tmpStr = Utils.getOption('P',options);
if (tmpStr.length() != 0)
setPassword(tmpStr);
setDebug(Utils.getFlag('D',options));
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String queryTipText() {
return "The SQL query to execute against the database.";
}
/**
* Set the query to execute against the database
* @param q the query to execute
*/
public void setQuery(String q) {
m_Query = q;
}
/**
* Get the query to execute against the database
* @return the query
*/
public String getQuery() {
return m_Query;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String sparseDataTipText() {
return "Encode data as sparse instances.";
}
/**
* Sets whether data should be encoded as sparse instances
* @param s true if data should be encoded as a set of sparse instances
*/
public void setSparseData(boolean s) {
m_CreateSparseData = s;
}
/**
* Gets whether data is to be returned as a set of sparse instances
* @return true if data is to be encoded as sparse instances
*/
public boolean getSparseData() {
return m_CreateSparseData;
}
/**
* Gets the current settings of InstanceQuery
*
* @return an array of strings suitable for passing to setOptions()
*/
public String[] getOptions () {
Vector options = new Vector();
options.add("-Q");
options.add(getQuery());
if (getSparseData())
options.add("-S");
if (!getUsername().equals("")) {
options.add("-U");
options.add(getUsername());
}
if (!getPassword().equals("")) {
options.add("-P");
options.add(getPassword());
}
if (getDebug())
options.add("-D");
return (String[]) options.toArray(new String[options.size()]);
}
/**
* Makes a database query using the query set through the -Q option
* to convert a table into a set of instances
*
* @return the instances contained in the result of the query
* @throws Exception if an error occurs
*/
public Instances retrieveInstances() throws Exception {
return retrieveInstances(m_Query);
}
/**
* Makes a database query to convert a table into a set of instances
*
* @param query the query to convert to instances
* @return the instances contained in the result of the query, NULL if the
* SQL query doesn't return a ResultSet, e.g., DELETE/INSERT/UPDATE
* @throws Exception if an error occurs
*/
public Instances retrieveInstances(String query) throws Exception {
if (m_Debug)
System.err.println("Executing query: " + query);
connectToDatabase();
if (execute(query) == false) {
if (m_PreparedStatement.getUpdateCount() == -1) {
throw new Exception("Query didn't produce results");
}
else {
if (m_Debug)
System.err.println(m_PreparedStatement.getUpdateCount()
+ " rows affected.");
close();
return null;
}
}
ResultSet rs = getResultSet();
if (m_Debug)
System.err.println("Getting metadata...");
ResultSetMetaData md = rs.getMetaData();
if (m_Debug)
System.err.println("Completed getting metadata...");
// Determine structure of the instances
int numAttributes = md.getColumnCount();
int [] attributeTypes = new int [numAttributes];
Hashtable [] nominalIndexes = new Hashtable [numAttributes];
FastVector [] nominalStrings = new FastVector [numAttributes];
for (int i = 1; i <= numAttributes; i++) {
/* switch (md.getColumnType(i)) {
case Types.CHAR:
case Types.VARCHAR:
case Types.LONGVARCHAR:
case Types.BINARY:
case Types.VARBINARY:
case Types.LONGVARBINARY:*/
switch (translateDBColumnType(md.getColumnTypeName(i))) {
case STRING :
//System.err.println("String --> nominal");
attributeTypes[i - 1] = Attribute.NOMINAL;
nominalIndexes[i - 1] = new Hashtable();
nominalStrings[i - 1] = new FastVector();
break;
case TEXT:
//System.err.println("Text --> string");
attributeTypes[i - 1] = Attribute.STRING;
nominalIndexes[i - 1] = new Hashtable();
nominalStrings[i - 1] = new FastVector();
break;
case BOOL:
//System.err.println("boolean --> nominal");
attributeTypes[i - 1] = Attribute.NOMINAL;
nominalIndexes[i - 1] = new Hashtable();
nominalIndexes[i - 1].put("false", new Double(0));
nominalIndexes[i - 1].put("true", new Double(1));
nominalStrings[i - 1] = new FastVector();
nominalStrings[i - 1].addElement("false");
nominalStrings[i - 1].addElement("true");
break;
case DOUBLE:
//System.err.println("BigDecimal --> numeric");
attributeTypes[i - 1] = Attribute.NUMERIC;
break;
case BYTE:
//System.err.println("byte --> numeric");
attributeTypes[i - 1] = Attribute.NUMERIC;
break;
case SHORT:
//System.err.println("short --> numeric");
attributeTypes[i - 1] = Attribute.NUMERIC;
break;
case INTEGER:
//System.err.println("int --> numeric");
attributeTypes[i - 1] = Attribute.NUMERIC;
break;
case LONG:
//System.err.println("long --> numeric");
attributeTypes[i - 1] = Attribute.NUMERIC;
break;
case FLOAT:
//System.err.println("float --> numeric");
attributeTypes[i - 1] = Attribute.NUMERIC;
break;
case DATE:
attributeTypes[i - 1] = Attribute.DATE;
break;
case TIME:
attributeTypes[i - 1] = Attribute.DATE;
break;
default:
//System.err.println("Unknown column type");
attributeTypes[i - 1] = Attribute.STRING;
}
}
// For sqlite
// cache column names because the last while(rs.next()) { iteration for
// the tuples below will close the md object:
Vector