Context Navigation

source: src/main/java/weka/experiment/DatabaseResultProducer.java @ 27

Last change on this file since 27 was 4, checked in by gnappo, 14 years ago
Import di weka.
File size: 22.1 KB

Rev	Line
[4]	1	/*
	2	* This program is free software; you can redistribute it and/or modify
	3	* it under the terms of the GNU General Public License as published by
	4	* the Free Software Foundation; either version 2 of the License, or
	5	* (at your option) any later version.
	6	*
	7	* This program is distributed in the hope that it will be useful,
	8	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	10	* GNU General Public License for more details.
	11	*
	12	* You should have received a copy of the GNU General Public License
	13	* along with this program; if not, write to the Free Software
	14	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
	15	*/
	16
	17	/*
	18	* DatabaseResultProducer.java
	19	* Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
	20	*
	21	*/
	22
	23
	24	package weka.experiment;
	25
	26	import weka.core.AdditionalMeasureProducer;
	27	import weka.core.Instances;
	28	import weka.core.Option;
	29	import weka.core.OptionHandler;
	30	import weka.core.RevisionUtils;
	31	import weka.core.Utils;
	32
	33	import java.util.Enumeration;
	34	import java.util.Vector;
	35
	36	/**
	37	<!-- globalinfo-start -->
	38	* Examines a database and extracts out the results produced by the specified ResultProducer and submits them to the specified ResultListener. If a result needs to be generated, the ResultProducer is used to obtain the result.
	39	* <p/>
	40	<!-- globalinfo-end -->
	41	*
	42	<!-- options-start -->
	43	* Valid options are: <p/>
	44	*
	45	* <pre> -F <field name>
	46	* The name of the database field to cache over.
	47	* eg: "Fold" (default none)</pre>
	48	*
	49	* <pre> -W <class name>
	50	* The full class name of a ResultProducer.
	51	* eg: weka.experiment.CrossValidationResultProducer</pre>
	52	*
	53	* <pre>
	54	* Options specific to result producer weka.experiment.CrossValidationResultProducer:
	55	* </pre>
	56	*
	57	* <pre> -X <number of folds>
	58	* The number of folds to use for the cross-validation.
	59	* (default 10)</pre>
	60	*
	61	* <pre> -D
	62	* Save raw split evaluator output.</pre>
	63	*
	64	* <pre> -O <file/directory name/path>
	65	* The filename where raw output will be stored.
	66	* If a directory name is specified then then individual
	67	* outputs will be gzipped, otherwise all output will be
	68	* zipped to the named file. Use in conjuction with -D. (default splitEvalutorOut.zip)</pre>
	69	*
	70	* <pre> -W <class name>
	71	* The full class name of a SplitEvaluator.
	72	* eg: weka.experiment.ClassifierSplitEvaluator</pre>
	73	*
	74	* <pre>
	75	* Options specific to split evaluator weka.experiment.ClassifierSplitEvaluator:
	76	* </pre>
	77	*
	78	* <pre> -W <class name>
	79	* The full class name of the classifier.
	80	* eg: weka.classifiers.bayes.NaiveBayes</pre>
	81	*
	82	* <pre> -C <index>
	83	* The index of the class for which IR statistics
	84	* are to be output. (default 1)</pre>
	85	*
	86	* <pre> -I <index>
	87	* The index of an attribute to output in the
	88	* results. This attribute should identify an
	89	* instance in order to know which instances are
	90	* in the test set of a cross validation. if 0
	91	* no output (default 0).</pre>
	92	*
	93	* <pre> -P
	94	* Add target and prediction columns to the result
	95	* for each fold.</pre>
	96	*
	97	* <pre>
	98	* Options specific to classifier weka.classifiers.rules.ZeroR:
	99	* </pre>
	100	*
	101	* <pre> -D
	102	* If set, classifier is run in debug mode and
	103	* may output additional info to the console</pre>
	104	*
	105	<!-- options-end -->
	106	*
	107	* @author Len Trigg (trigg@cs.waikato.ac.nz)
	108	* @version $Revision: 1.18 $
	109	*/
	110	public class DatabaseResultProducer
	111	extends DatabaseResultListener
	112	implements ResultProducer, OptionHandler, AdditionalMeasureProducer {
	113
	114	/** for serialization */
	115	static final long serialVersionUID = -5620660780203158666L;
	116
	117	/** The dataset of interest */
	118	protected Instances m_Instances;
	119
	120	/** The ResultListener to send results to */
	121	protected ResultListener m_ResultListener = new CSVResultListener();
	122
	123	/** The ResultProducer used to generate results */
	124	protected ResultProducer m_ResultProducer
	125	= new CrossValidationResultProducer();
	126
	127	/** The names of any additional measures to look for in SplitEvaluators */
	128	protected String [] m_AdditionalMeasures = null;
	129
	130	/**
	131	* Returns a string describing this result producer
	132	* @return a description of the result producer suitable for
	133	* displaying in the explorer/experimenter gui
	134	*/
	135	public String globalInfo() {
	136	return "Examines a database and extracts out "
	137	+"the results produced by the specified ResultProducer "
	138	+"and submits them to the specified ResultListener. If a result needs "
	139	+"to be generated, the ResultProducer is used to obtain the result.";
	140	}
	141
	142	/**
	143	* Creates the DatabaseResultProducer, letting the parent constructor do
	144	* it's thing.
	145	*
	146	* @throws Exception if an error occurs
	147	*/
	148	public DatabaseResultProducer() throws Exception {
	149
	150	super();
	151	}
	152
	153	/**
	154	* Gets the keys for a specified run number. Different run
	155	* numbers correspond to different randomizations of the data. Keys
	156	* produced should be sent to the current ResultListener
	157	*
	158	* @param run the run number to get keys for.
	159	* @throws Exception if a problem occurs while getting the keys
	160	*/
	161	public void doRunKeys(int run) throws Exception {
	162
	163	if (m_ResultProducer == null) {
	164	throw new Exception("No ResultProducer set");
	165	}
	166	if (m_ResultListener == null) {
	167	throw new Exception("No ResultListener set");
	168	}
	169	if (m_Instances == null) {
	170	throw new Exception("No Instances set");
	171	}
	172
	173	// Tell the resultproducer to send results to us
	174	m_ResultProducer.setResultListener(this);
	175	m_ResultProducer.setInstances(m_Instances);
	176	m_ResultProducer.doRunKeys(run);
	177	}
	178
	179	/**
	180	* Gets the results for a specified run number. Different run
	181	* numbers correspond to different randomizations of the data. Results
	182	* produced should be sent to the current ResultListener
	183	*
	184	* @param run the run number to get results for.
	185	* @throws Exception if a problem occurs while getting the results
	186	*/
	187	public void doRun(int run) throws Exception {
	188
	189	if (m_ResultProducer == null) {
	190	throw new Exception("No ResultProducer set");
	191	}
	192	if (m_ResultListener == null) {
	193	throw new Exception("No ResultListener set");
	194	}
	195	if (m_Instances == null) {
	196	throw new Exception("No Instances set");
	197	}
	198
	199	// Tell the resultproducer to send results to us
	200	m_ResultProducer.setResultListener(this);
	201	m_ResultProducer.setInstances(m_Instances);
	202	m_ResultProducer.doRun(run);
	203
	204	}
	205
	206	/**
	207	* Prepare for the results to be received.
	208	*
	209	* @param rp the ResultProducer that will generate the results
	210	* @throws Exception if an error occurs during preprocessing.
	211	*/
	212	public void preProcess(ResultProducer rp) throws Exception {
	213
	214	super.preProcess(rp);
	215	if (m_ResultListener == null) {
	216	throw new Exception("No ResultListener set");
	217	}
	218	m_ResultListener.preProcess(this);
	219	}
	220
	221	/**
	222	* When this method is called, it indicates that no more results
	223	* will be sent that need to be grouped together in any way.
	224	*
	225	* @param rp the ResultProducer that generated the results
	226	* @throws Exception if an error occurs
	227	*/
	228	public void postProcess(ResultProducer rp) throws Exception {
	229
	230	super.postProcess(rp);
	231	m_ResultListener.postProcess(this);
	232	}
	233
	234	/**
	235	* Prepare to generate results. The ResultProducer should call
	236	* preProcess(this) on the ResultListener it is to send results to.
	237	*
	238	* @throws Exception if an error occurs during preprocessing.
	239	*/
	240	public void preProcess() throws Exception {
	241
	242	if (m_ResultProducer == null) {
	243	throw new Exception("No ResultProducer set");
	244	}
	245	m_ResultProducer.setResultListener(this);
	246	m_ResultProducer.preProcess();
	247	}
	248
	249	/**
	250	* When this method is called, it indicates that no more requests to
	251	* generate results for the current experiment will be sent. The
	252	* ResultProducer should call preProcess(this) on the
	253	* ResultListener it is to send results to.
	254	*
	255	* @throws Exception if an error occurs
	256	*/
	257	public void postProcess() throws Exception {
	258
	259	m_ResultProducer.postProcess();
	260	}
	261
	262	/**
	263	* Accepts results from a ResultProducer.
	264	*
	265	* @param rp the ResultProducer that generated the results
	266	* @param key an array of Objects (Strings or Doubles) that uniquely
	267	* identify a result for a given ResultProducer with given compatibilityState
	268	* @param result the results stored in an array. The objects stored in
	269	* the array may be Strings, Doubles, or null (for the missing value).
	270	* @throws Exception if the result could not be accepted.
	271	*/
	272	public void acceptResult(ResultProducer rp, Object [] key, Object [] result)
	273	throws Exception {
	274
	275	if (m_ResultProducer != rp) {
	276	throw new Error("Unrecognized ResultProducer sending results!!");
	277	}
	278	// System.err.println("DBRP::acceptResult");
	279
	280	// Is the result needed by the listener?
	281	boolean isRequiredByListener = m_ResultListener.isResultRequired(this,
	282	key);
	283	// Is the result already in the database?
	284	boolean isRequiredByDatabase = super.isResultRequired(rp, key);
	285
	286	// Insert it into the database here
	287	if (isRequiredByDatabase) {
	288	// We could alternatively throw an exception if we only want values
	289	// that are already in the database
	290	if (result != null) {
	291
	292	// null result could occur from a chain of doRunKeys calls
	293	super.acceptResult(rp, key, result);
	294	}
	295	}
	296
	297	// Pass it on
	298	if (isRequiredByListener) {
	299	m_ResultListener.acceptResult(this, key, result);
	300	}
	301	}
	302
	303	/**
	304	* Determines whether the results for a specified key must be
	305	* generated.
	306	*
	307	* @param rp the ResultProducer wanting to generate the results
	308	* @param key an array of Objects (Strings or Doubles) that uniquely
	309	* identify a result for a given ResultProducer with given compatibilityState
	310	* @return true if the result should be generated
	311	* @throws Exception if it could not be determined if the result
	312	* is needed.
	313	*/
	314	public boolean isResultRequired(ResultProducer rp, Object [] key)
	315	throws Exception {
	316
	317	if (m_ResultProducer != rp) {
	318	throw new Error("Unrecognized ResultProducer sending results!!");
	319	}
	320	// System.err.println("DBRP::isResultRequired");
	321
	322	// Is the result needed by the listener?
	323	boolean isRequiredByListener = m_ResultListener.isResultRequired(this,
	324	key);
	325	// Is the result already in the database?
	326	boolean isRequiredByDatabase = super.isResultRequired(rp, key);
	327
	328	if (!isRequiredByDatabase && isRequiredByListener) {
	329	// Pass the result through to the listener
	330	Object [] result = getResultFromTable(m_ResultsTableName,
	331	rp, key);
	332	System.err.println("Got result from database: "
	333	+ DatabaseUtils.arrayToString(result));
	334	m_ResultListener.acceptResult(this, key, result);
	335	return false;
	336	}
	337
	338	return (isRequiredByListener \|\| isRequiredByDatabase);
	339	}
	340
	341	/**
	342	* Gets the names of each of the columns produced for a single run.
	343	*
	344	* @return an array containing the name of each column
	345	* @throws Exception if something goes wrong.
	346	*/
	347	public String [] getKeyNames() throws Exception {
	348
	349	return m_ResultProducer.getKeyNames();
	350	}
	351
	352	/**
	353	* Gets the data types of each of the columns produced for a single run.
	354	* This method should really be static.
	355	*
	356	* @return an array containing objects of the type of each column. The
	357	* objects should be Strings, or Doubles.
	358	* @throws Exception if something goes wrong.
	359	*/
	360	public Object [] getKeyTypes() throws Exception {
	361
	362	return m_ResultProducer.getKeyTypes();
	363	}
	364
	365	/**
	366	* Gets the names of each of the columns produced for a single run.
	367	* A new result field is added for the number of results used to
	368	* produce each average.
	369	* If only averages are being produced the names are not altered, if
	370	* standard deviations are produced then "Dev_" and "Avg_" are prepended
	371	* to each result deviation and average field respectively.
	372	*
	373	* @return an array containing the name of each column
	374	* @throws Exception if something goes wrong.
	375	*/
	376	public String [] getResultNames() throws Exception {
	377
	378	return m_ResultProducer.getResultNames();
	379	}
	380
	381	/**
	382	* Gets the data types of each of the columns produced for a single run.
	383	*
	384	* @return an array containing objects of the type of each column. The
	385	* objects should be Strings, or Doubles.
	386	* @throws Exception if something goes wrong.
	387	*/
	388	public Object [] getResultTypes() throws Exception {
	389
	390	return m_ResultProducer.getResultTypes();
	391	}
	392
	393	/**
	394	* Gets a description of the internal settings of the result
	395	* producer, sufficient for distinguishing a ResultProducer
	396	* instance from another with different settings (ignoring
	397	* those settings set through this interface). For example,
	398	* a cross-validation ResultProducer may have a setting for the
	399	* number of folds. For a given state, the results produced should
	400	* be compatible. Typically if a ResultProducer is an OptionHandler,
	401	* this string will represent the command line arguments required
	402	* to set the ResultProducer to that state.
	403	*
	404	* @return the description of the ResultProducer state, or null
	405	* if no state is defined
	406	*/
	407	public String getCompatibilityState() {
	408
	409	String result = "";
	410	if (m_ResultProducer == null) {
	411	result += "<null ResultProducer>";
	412	} else {
	413	result += "-W " + m_ResultProducer.getClass().getName();
	414	}
	415	result += " -- " + m_ResultProducer.getCompatibilityState();
	416	return result.trim();
	417	}
	418
	419
	420	/**
	421	* Returns an enumeration describing the available options..
	422	*
	423	* @return an enumeration of all the available options.
	424	*/
	425	public Enumeration listOptions() {
	426
	427	Vector newVector = new Vector(2);
	428
	429	newVector.addElement(new Option(
	430	"\tThe name of the database field to cache over.\n"
	431	+"\teg: \"Fold\" (default none)",
	432	"F", 1,
	433	"-F <field name>"));
	434	newVector.addElement(new Option(
	435	"\tThe full class name of a ResultProducer.\n"
	436	+"\teg: weka.experiment.CrossValidationResultProducer",
	437	"W", 1,
	438	"-W <class name>"));
	439
	440	if ((m_ResultProducer != null) &&
	441	(m_ResultProducer instanceof OptionHandler)) {
	442	newVector.addElement(new Option(
	443	"",
	444	"", 0, "\nOptions specific to result producer "
	445	+ m_ResultProducer.getClass().getName() + ":"));
	446	Enumeration enu = ((OptionHandler)m_ResultProducer).listOptions();
	447	while (enu.hasMoreElements()) {
	448	newVector.addElement(enu.nextElement());
	449	}
	450	}
	451	return newVector.elements();
	452	}
	453
	454	/**
	455	* Parses a given list of options. <p/>
	456	*
	457	<!-- options-start -->
	458	* Valid options are: <p/>
	459	*
	460	* <pre> -F <field name>
	461	* The name of the database field to cache over.
	462	* eg: "Fold" (default none)</pre>
	463	*
	464	* <pre> -W <class name>
	465	* The full class name of a ResultProducer.
	466	* eg: weka.experiment.CrossValidationResultProducer</pre>
	467	*
	468	* <pre>
	469	* Options specific to result producer weka.experiment.CrossValidationResultProducer:
	470	* </pre>
	471	*
	472	* <pre> -X <number of folds>
	473	* The number of folds to use for the cross-validation.
	474	* (default 10)</pre>
	475	*
	476	* <pre> -D
	477	* Save raw split evaluator output.</pre>
	478	*
	479	* <pre> -O <file/directory name/path>
	480	* The filename where raw output will be stored.
	481	* If a directory name is specified then then individual
	482	* outputs will be gzipped, otherwise all output will be
	483	* zipped to the named file. Use in conjuction with -D. (default splitEvalutorOut.zip)</pre>
	484	*
	485	* <pre> -W <class name>
	486	* The full class name of a SplitEvaluator.
	487	* eg: weka.experiment.ClassifierSplitEvaluator</pre>
	488	*
	489	* <pre>
	490	* Options specific to split evaluator weka.experiment.ClassifierSplitEvaluator:
	491	* </pre>
	492	*
	493	* <pre> -W <class name>
	494	* The full class name of the classifier.
	495	* eg: weka.classifiers.bayes.NaiveBayes</pre>
	496	*
	497	* <pre> -C <index>
	498	* The index of the class for which IR statistics
	499	* are to be output. (default 1)</pre>
	500	*
	501	* <pre> -I <index>
	502	* The index of an attribute to output in the
	503	* results. This attribute should identify an
	504	* instance in order to know which instances are
	505	* in the test set of a cross validation. if 0
	506	* no output (default 0).</pre>
	507	*
	508	* <pre> -P
	509	* Add target and prediction columns to the result
	510	* for each fold.</pre>
	511	*
	512	* <pre>
	513	* Options specific to classifier weka.classifiers.rules.ZeroR:
	514	* </pre>
	515	*
	516	* <pre> -D
	517	* If set, classifier is run in debug mode and
	518	* may output additional info to the console</pre>
	519	*
	520	<!-- options-end -->
	521	*
	522	* All option after -- will be passed to the result producer.
	523	*
	524	* @param options the list of options as an array of strings
	525	* @throws Exception if an option is not supported
	526	*/
	527	public void setOptions(String[] options) throws Exception {
	528
	529	setCacheKeyName(Utils.getOption('F', options));
	530
	531	String rpName = Utils.getOption('W', options);
	532	if (rpName.length() == 0) {
	533	throw new Exception("A ResultProducer must be specified with"
	534	+ " the -W option.");
	535	}
	536	// Do it first without options, so if an exception is thrown during
	537	// the option setting, listOptions will contain options for the actual
	538	// RP.
	539	setResultProducer((ResultProducer)Utils.forName(
	540	ResultProducer.class,
	541	rpName,
	542	null));
	543	if (getResultProducer() instanceof OptionHandler) {
	544	((OptionHandler) getResultProducer())
	545	.setOptions(Utils.partitionOptions(options));
	546	}
	547	}
	548
	549	/**
	550	* Gets the current settings of the result producer.
	551	*
	552	* @return an array of strings suitable for passing to setOptions
	553	*/
	554	public String [] getOptions() {
	555
	556	String [] seOptions = new String [0];
	557	if ((m_ResultProducer != null) &&
	558	(m_ResultProducer instanceof OptionHandler)) {
	559	seOptions = ((OptionHandler)m_ResultProducer).getOptions();
	560	}
	561
	562	String [] options = new String [seOptions.length + 8];
	563	int current = 0;
	564
	565	if (!getCacheKeyName().equals("")) {
	566	options[current++] = "-F";
	567	options[current++] = getCacheKeyName();
	568	}
	569	if (getResultProducer() != null) {
	570	options[current++] = "-W";
	571	options[current++] = getResultProducer().getClass().getName();
	572	}
	573	options[current++] = "--";
	574
	575	System.arraycopy(seOptions, 0, options, current,
	576	seOptions.length);
	577	current += seOptions.length;
	578	while (current < options.length) {
	579	options[current++] = "";
	580	}
	581	return options;
	582	}
	583
	584	/**
	585	* Set a list of method names for additional measures to look for
	586	* in SplitEvaluators. This could contain many measures (of which only a
	587	* subset may be produceable by the current resultProducer) if an experiment
	588	* is the type that iterates over a set of properties.
	589	* @param additionalMeasures an array of measure names, null if none
	590	*/
	591	public void setAdditionalMeasures(String [] additionalMeasures) {
	592	m_AdditionalMeasures = additionalMeasures;
	593
	594	if (m_ResultProducer != null) {
	595	System.err.println("DatabaseResultProducer: setting additional "
	596	+"measures for "
	597	+"ResultProducer");
	598	m_ResultProducer.setAdditionalMeasures(m_AdditionalMeasures);
	599	}
	600	}
	601
	602	/**
	603	* Returns an enumeration of any additional measure names that might be
	604	* in the result producer
	605	* @return an enumeration of the measure names
	606	*/
	607	public Enumeration enumerateMeasures() {
	608	Vector newVector = new Vector();
	609	if (m_ResultProducer instanceof AdditionalMeasureProducer) {
	610	Enumeration en = ((AdditionalMeasureProducer)m_ResultProducer).
	611	enumerateMeasures();
	612	while (en.hasMoreElements()) {
	613	String mname = (String)en.nextElement();
	614	newVector.addElement(mname);
	615	}
	616	}
	617	return newVector.elements();
	618	}
	619
	620	/**
	621	* Returns the value of the named measure
	622	* @param additionalMeasureName the name of the measure to query for its value
	623	* @return the value of the named measure
	624	* @throws IllegalArgumentException if the named measure is not supported
	625	*/
	626	public double getMeasure(String additionalMeasureName) {
	627	if (m_ResultProducer instanceof AdditionalMeasureProducer) {
	628	return ((AdditionalMeasureProducer)m_ResultProducer).
	629	getMeasure(additionalMeasureName);
	630	} else {
	631	throw new IllegalArgumentException("DatabaseResultProducer: "
	632	+"Can't return value for : "+additionalMeasureName
	633	+". "+m_ResultProducer.getClass().getName()+" "
	634	+"is not an AdditionalMeasureProducer");
	635	}
	636	}
	637
	638
	639	/**
	640	* Sets the dataset that results will be obtained for.
	641	*
	642	* @param instances a value of type 'Instances'.
	643	*/
	644	public void setInstances(Instances instances) {
	645
	646	m_Instances = instances;
	647	}
	648
	649	/**
	650	* Sets the object to send results of each run to.
	651	*
	652	* @param listener a value of type 'ResultListener'
	653	*/
	654	public void setResultListener(ResultListener listener) {
	655
	656	m_ResultListener = listener;
	657	}
	658
	659	/**
	660	* Returns the tip text for this property
	661	* @return tip text for this property suitable for
	662	* displaying in the explorer/experimenter gui
	663	*/
	664	public String resultProducerTipText() {
	665	return "Set the result producer to use. If some results are not found "
	666	+"in the source database then this result producer is used to generate "
	667	+"them.";
	668	}
	669
	670	/**
	671	* Get the ResultProducer.
	672	*
	673	* @return the ResultProducer.
	674	*/
	675	public ResultProducer getResultProducer() {
	676
	677	return m_ResultProducer;
	678	}
	679
	680	/**
	681	* Set the ResultProducer.
	682	*
	683	* @param newResultProducer new ResultProducer to use.
	684	*/
	685	public void setResultProducer(ResultProducer newResultProducer) {
	686
	687	m_ResultProducer = newResultProducer;
	688	}
	689
	690	/**
	691	* Gets a text descrption of the result producer.
	692	*
	693	* @return a text description of the result producer.
	694	*/
	695	public String toString() {
	696
	697	String result = "DatabaseResultProducer: ";
	698	result += getCompatibilityState();
	699	if (m_Instances == null) {
	700	result += ": <null Instances>";
	701	} else {
	702	result += ": " + Utils.backQuoteChars(m_Instances.relationName());
	703	}
	704	return result;
	705	}
	706
	707	/**
	708	* Returns the revision string.
	709	*
	710	* @return the revision
	711	*/
	712	public String getRevision() {
	713	return RevisionUtils.extract("$Revision: 1.18 $");
	714	}
	715	} // DatabaseResultProducer

Note: See TracBrowser for help on using the repository browser.

Download in other formats: