Context Navigation

source: src/main/java/weka/experiment/DatabaseResultProducer.java @ 12

Last change on this file since 12 was 4, checked in by gnappo, 14 years ago
Import di weka.
File size: 22.1 KB

Line
1	/*
2	* This program is free software; you can redistribute it and/or modify
3	* it under the terms of the GNU General Public License as published by
4	* the Free Software Foundation; either version 2 of the License, or
5	* (at your option) any later version.
6	*
7	* This program is distributed in the hope that it will be useful,
8	* but WITHOUT ANY WARRANTY; without even the implied warranty of
9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10	* GNU General Public License for more details.
11	*
12	* You should have received a copy of the GNU General Public License
13	* along with this program; if not, write to the Free Software
14	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15	*/
16
17	/*
18	* DatabaseResultProducer.java
19	* Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
20	*
21	*/
22
23
24	package weka.experiment;
25
26	import weka.core.AdditionalMeasureProducer;
27	import weka.core.Instances;
28	import weka.core.Option;
29	import weka.core.OptionHandler;
30	import weka.core.RevisionUtils;
31	import weka.core.Utils;
32
33	import java.util.Enumeration;
34	import java.util.Vector;
35
36	/**
37	<!-- globalinfo-start -->
38	* Examines a database and extracts out the results produced by the specified ResultProducer and submits them to the specified ResultListener. If a result needs to be generated, the ResultProducer is used to obtain the result.
39	* <p/>
40	<!-- globalinfo-end -->
41	*
42	<!-- options-start -->
43	* Valid options are: <p/>
44	*
45	* <pre> -F <field name>
46	* The name of the database field to cache over.
47	* eg: "Fold" (default none)</pre>
48	*
49	* <pre> -W <class name>
50	* The full class name of a ResultProducer.
51	* eg: weka.experiment.CrossValidationResultProducer</pre>
52	*
53	* <pre>
54	* Options specific to result producer weka.experiment.CrossValidationResultProducer:
55	* </pre>
56	*
57	* <pre> -X <number of folds>
58	* The number of folds to use for the cross-validation.
59	* (default 10)</pre>
60	*
61	* <pre> -D
62	* Save raw split evaluator output.</pre>
63	*
64	* <pre> -O <file/directory name/path>
65	* The filename where raw output will be stored.
66	* If a directory name is specified then then individual
67	* outputs will be gzipped, otherwise all output will be
68	* zipped to the named file. Use in conjuction with -D. (default splitEvalutorOut.zip)</pre>
69	*
70	* <pre> -W <class name>
71	* The full class name of a SplitEvaluator.
72	* eg: weka.experiment.ClassifierSplitEvaluator</pre>
73	*
74	* <pre>
75	* Options specific to split evaluator weka.experiment.ClassifierSplitEvaluator:
76	* </pre>
77	*
78	* <pre> -W <class name>
79	* The full class name of the classifier.
80	* eg: weka.classifiers.bayes.NaiveBayes</pre>
81	*
82	* <pre> -C <index>
83	* The index of the class for which IR statistics
84	* are to be output. (default 1)</pre>
85	*
86	* <pre> -I <index>
87	* The index of an attribute to output in the
88	* results. This attribute should identify an
89	* instance in order to know which instances are
90	* in the test set of a cross validation. if 0
91	* no output (default 0).</pre>
92	*
93	* <pre> -P
94	* Add target and prediction columns to the result
95	* for each fold.</pre>
96	*
97	* <pre>
98	* Options specific to classifier weka.classifiers.rules.ZeroR:
99	* </pre>
100	*
101	* <pre> -D
102	* If set, classifier is run in debug mode and
103	* may output additional info to the console</pre>
104	*
105	<!-- options-end -->
106	*
107	* @author Len Trigg (trigg@cs.waikato.ac.nz)
108	* @version $Revision: 1.18 $
109	*/
110	public class DatabaseResultProducer
111	extends DatabaseResultListener
112	implements ResultProducer, OptionHandler, AdditionalMeasureProducer {
113
114	/** for serialization */
115	static final long serialVersionUID = -5620660780203158666L;
116
117	/** The dataset of interest */
118	protected Instances m_Instances;
119
120	/** The ResultListener to send results to */
121	protected ResultListener m_ResultListener = new CSVResultListener();
122
123	/** The ResultProducer used to generate results */
124	protected ResultProducer m_ResultProducer
125	= new CrossValidationResultProducer();
126
127	/** The names of any additional measures to look for in SplitEvaluators */
128	protected String [] m_AdditionalMeasures = null;
129
130	/**
131	* Returns a string describing this result producer
132	* @return a description of the result producer suitable for
133	* displaying in the explorer/experimenter gui
134	*/
135	public String globalInfo() {
136	return "Examines a database and extracts out "
137	+"the results produced by the specified ResultProducer "
138	+"and submits them to the specified ResultListener. If a result needs "
139	+"to be generated, the ResultProducer is used to obtain the result.";
140	}
141
142	/**
143	* Creates the DatabaseResultProducer, letting the parent constructor do
144	* it's thing.
145	*
146	* @throws Exception if an error occurs
147	*/
148	public DatabaseResultProducer() throws Exception {
149
150	super();
151	}
152
153	/**
154	* Gets the keys for a specified run number. Different run
155	* numbers correspond to different randomizations of the data. Keys
156	* produced should be sent to the current ResultListener
157	*
158	* @param run the run number to get keys for.
159	* @throws Exception if a problem occurs while getting the keys
160	*/
161	public void doRunKeys(int run) throws Exception {
162
163	if (m_ResultProducer == null) {
164	throw new Exception("No ResultProducer set");
165	}
166	if (m_ResultListener == null) {
167	throw new Exception("No ResultListener set");
168	}
169	if (m_Instances == null) {
170	throw new Exception("No Instances set");
171	}
172
173	// Tell the resultproducer to send results to us
174	m_ResultProducer.setResultListener(this);
175	m_ResultProducer.setInstances(m_Instances);
176	m_ResultProducer.doRunKeys(run);
177	}
178
179	/**
180	* Gets the results for a specified run number. Different run
181	* numbers correspond to different randomizations of the data. Results
182	* produced should be sent to the current ResultListener
183	*
184	* @param run the run number to get results for.
185	* @throws Exception if a problem occurs while getting the results
186	*/
187	public void doRun(int run) throws Exception {
188
189	if (m_ResultProducer == null) {
190	throw new Exception("No ResultProducer set");
191	}
192	if (m_ResultListener == null) {
193	throw new Exception("No ResultListener set");
194	}
195	if (m_Instances == null) {
196	throw new Exception("No Instances set");
197	}
198
199	// Tell the resultproducer to send results to us
200	m_ResultProducer.setResultListener(this);
201	m_ResultProducer.setInstances(m_Instances);
202	m_ResultProducer.doRun(run);
203
204	}
205
206	/**
207	* Prepare for the results to be received.
208	*
209	* @param rp the ResultProducer that will generate the results
210	* @throws Exception if an error occurs during preprocessing.
211	*/
212	public void preProcess(ResultProducer rp) throws Exception {
213
214	super.preProcess(rp);
215	if (m_ResultListener == null) {
216	throw new Exception("No ResultListener set");
217	}
218	m_ResultListener.preProcess(this);
219	}
220
221	/**
222	* When this method is called, it indicates that no more results
223	* will be sent that need to be grouped together in any way.
224	*
225	* @param rp the ResultProducer that generated the results
226	* @throws Exception if an error occurs
227	*/
228	public void postProcess(ResultProducer rp) throws Exception {
229
230	super.postProcess(rp);
231	m_ResultListener.postProcess(this);
232	}
233
234	/**
235	* Prepare to generate results. The ResultProducer should call
236	* preProcess(this) on the ResultListener it is to send results to.
237	*
238	* @throws Exception if an error occurs during preprocessing.
239	*/
240	public void preProcess() throws Exception {
241
242	if (m_ResultProducer == null) {
243	throw new Exception("No ResultProducer set");
244	}
245	m_ResultProducer.setResultListener(this);
246	m_ResultProducer.preProcess();
247	}
248
249	/**
250	* When this method is called, it indicates that no more requests to
251	* generate results for the current experiment will be sent. The
252	* ResultProducer should call preProcess(this) on the
253	* ResultListener it is to send results to.
254	*
255	* @throws Exception if an error occurs
256	*/
257	public void postProcess() throws Exception {
258
259	m_ResultProducer.postProcess();
260	}
261
262	/**
263	* Accepts results from a ResultProducer.
264	*
265	* @param rp the ResultProducer that generated the results
266	* @param key an array of Objects (Strings or Doubles) that uniquely
267	* identify a result for a given ResultProducer with given compatibilityState
268	* @param result the results stored in an array. The objects stored in
269	* the array may be Strings, Doubles, or null (for the missing value).
270	* @throws Exception if the result could not be accepted.
271	*/
272	public void acceptResult(ResultProducer rp, Object [] key, Object [] result)
273	throws Exception {
274
275	if (m_ResultProducer != rp) {
276	throw new Error("Unrecognized ResultProducer sending results!!");
277	}
278	// System.err.println("DBRP::acceptResult");
279
280	// Is the result needed by the listener?
281	boolean isRequiredByListener = m_ResultListener.isResultRequired(this,
282	key);
283	// Is the result already in the database?
284	boolean isRequiredByDatabase = super.isResultRequired(rp, key);
285
286	// Insert it into the database here
287	if (isRequiredByDatabase) {
288	// We could alternatively throw an exception if we only want values
289	// that are already in the database
290	if (result != null) {
291
292	// null result could occur from a chain of doRunKeys calls
293	super.acceptResult(rp, key, result);
294	}
295	}
296
297	// Pass it on
298	if (isRequiredByListener) {
299	m_ResultListener.acceptResult(this, key, result);
300	}
301	}
302
303	/**
304	* Determines whether the results for a specified key must be
305	* generated.
306	*
307	* @param rp the ResultProducer wanting to generate the results
308	* @param key an array of Objects (Strings or Doubles) that uniquely
309	* identify a result for a given ResultProducer with given compatibilityState
310	* @return true if the result should be generated
311	* @throws Exception if it could not be determined if the result
312	* is needed.
313	*/
314	public boolean isResultRequired(ResultProducer rp, Object [] key)
315	throws Exception {
316
317	if (m_ResultProducer != rp) {
318	throw new Error("Unrecognized ResultProducer sending results!!");
319	}
320	// System.err.println("DBRP::isResultRequired");
321
322	// Is the result needed by the listener?
323	boolean isRequiredByListener = m_ResultListener.isResultRequired(this,
324	key);
325	// Is the result already in the database?
326	boolean isRequiredByDatabase = super.isResultRequired(rp, key);
327
328	if (!isRequiredByDatabase && isRequiredByListener) {
329	// Pass the result through to the listener
330	Object [] result = getResultFromTable(m_ResultsTableName,
331	rp, key);
332	System.err.println("Got result from database: "
333	+ DatabaseUtils.arrayToString(result));
334	m_ResultListener.acceptResult(this, key, result);
335	return false;
336	}
337
338	return (isRequiredByListener \|\| isRequiredByDatabase);
339	}
340
341	/**
342	* Gets the names of each of the columns produced for a single run.
343	*
344	* @return an array containing the name of each column
345	* @throws Exception if something goes wrong.
346	*/
347	public String [] getKeyNames() throws Exception {
348
349	return m_ResultProducer.getKeyNames();
350	}
351
352	/**
353	* Gets the data types of each of the columns produced for a single run.
354	* This method should really be static.
355	*
356	* @return an array containing objects of the type of each column. The
357	* objects should be Strings, or Doubles.
358	* @throws Exception if something goes wrong.
359	*/
360	public Object [] getKeyTypes() throws Exception {
361
362	return m_ResultProducer.getKeyTypes();
363	}
364
365	/**
366	* Gets the names of each of the columns produced for a single run.
367	* A new result field is added for the number of results used to
368	* produce each average.
369	* If only averages are being produced the names are not altered, if
370	* standard deviations are produced then "Dev_" and "Avg_" are prepended
371	* to each result deviation and average field respectively.
372	*
373	* @return an array containing the name of each column
374	* @throws Exception if something goes wrong.
375	*/
376	public String [] getResultNames() throws Exception {
377
378	return m_ResultProducer.getResultNames();
379	}
380
381	/**
382	* Gets the data types of each of the columns produced for a single run.
383	*
384	* @return an array containing objects of the type of each column. The
385	* objects should be Strings, or Doubles.
386	* @throws Exception if something goes wrong.
387	*/
388	public Object [] getResultTypes() throws Exception {
389
390	return m_ResultProducer.getResultTypes();
391	}
392
393	/**
394	* Gets a description of the internal settings of the result
395	* producer, sufficient for distinguishing a ResultProducer
396	* instance from another with different settings (ignoring
397	* those settings set through this interface). For example,
398	* a cross-validation ResultProducer may have a setting for the
399	* number of folds. For a given state, the results produced should
400	* be compatible. Typically if a ResultProducer is an OptionHandler,
401	* this string will represent the command line arguments required
402	* to set the ResultProducer to that state.
403	*
404	* @return the description of the ResultProducer state, or null
405	* if no state is defined
406	*/
407	public String getCompatibilityState() {
408
409	String result = "";
410	if (m_ResultProducer == null) {
411	result += "<null ResultProducer>";
412	} else {
413	result += "-W " + m_ResultProducer.getClass().getName();
414	}
415	result += " -- " + m_ResultProducer.getCompatibilityState();
416	return result.trim();
417	}
418
419
420	/**
421	* Returns an enumeration describing the available options..
422	*
423	* @return an enumeration of all the available options.
424	*/
425	public Enumeration listOptions() {
426
427	Vector newVector = new Vector(2);
428
429	newVector.addElement(new Option(
430	"\tThe name of the database field to cache over.\n"
431	+"\teg: \"Fold\" (default none)",
432	"F", 1,
433	"-F <field name>"));
434	newVector.addElement(new Option(
435	"\tThe full class name of a ResultProducer.\n"
436	+"\teg: weka.experiment.CrossValidationResultProducer",
437	"W", 1,
438	"-W <class name>"));
439
440	if ((m_ResultProducer != null) &&
441	(m_ResultProducer instanceof OptionHandler)) {
442	newVector.addElement(new Option(
443	"",
444	"", 0, "\nOptions specific to result producer "
445	+ m_ResultProducer.getClass().getName() + ":"));
446	Enumeration enu = ((OptionHandler)m_ResultProducer).listOptions();
447	while (enu.hasMoreElements()) {
448	newVector.addElement(enu.nextElement());
449	}
450	}
451	return newVector.elements();
452	}
453
454	/**
455	* Parses a given list of options. <p/>
456	*
457	<!-- options-start -->
458	* Valid options are: <p/>
459	*
460	* <pre> -F <field name>
461	* The name of the database field to cache over.
462	* eg: "Fold" (default none)</pre>
463	*
464	* <pre> -W <class name>
465	* The full class name of a ResultProducer.
466	* eg: weka.experiment.CrossValidationResultProducer</pre>
467	*
468	* <pre>
469	* Options specific to result producer weka.experiment.CrossValidationResultProducer:
470	* </pre>
471	*
472	* <pre> -X <number of folds>
473	* The number of folds to use for the cross-validation.
474	* (default 10)</pre>
475	*
476	* <pre> -D
477	* Save raw split evaluator output.</pre>
478	*
479	* <pre> -O <file/directory name/path>
480	* The filename where raw output will be stored.
481	* If a directory name is specified then then individual
482	* outputs will be gzipped, otherwise all output will be
483	* zipped to the named file. Use in conjuction with -D. (default splitEvalutorOut.zip)</pre>
484	*
485	* <pre> -W <class name>
486	* The full class name of a SplitEvaluator.
487	* eg: weka.experiment.ClassifierSplitEvaluator</pre>
488	*
489	* <pre>
490	* Options specific to split evaluator weka.experiment.ClassifierSplitEvaluator:
491	* </pre>
492	*
493	* <pre> -W <class name>
494	* The full class name of the classifier.
495	* eg: weka.classifiers.bayes.NaiveBayes</pre>
496	*
497	* <pre> -C <index>
498	* The index of the class for which IR statistics
499	* are to be output. (default 1)</pre>
500	*
501	* <pre> -I <index>
502	* The index of an attribute to output in the
503	* results. This attribute should identify an
504	* instance in order to know which instances are
505	* in the test set of a cross validation. if 0
506	* no output (default 0).</pre>
507	*
508	* <pre> -P
509	* Add target and prediction columns to the result
510	* for each fold.</pre>
511	*
512	* <pre>
513	* Options specific to classifier weka.classifiers.rules.ZeroR:
514	* </pre>
515	*
516	* <pre> -D
517	* If set, classifier is run in debug mode and
518	* may output additional info to the console</pre>
519	*
520	<!-- options-end -->
521	*
522	* All option after -- will be passed to the result producer.
523	*
524	* @param options the list of options as an array of strings
525	* @throws Exception if an option is not supported
526	*/
527	public void setOptions(String[] options) throws Exception {
528
529	setCacheKeyName(Utils.getOption('F', options));
530
531	String rpName = Utils.getOption('W', options);
532	if (rpName.length() == 0) {
533	throw new Exception("A ResultProducer must be specified with"
534	+ " the -W option.");
535	}
536	// Do it first without options, so if an exception is thrown during
537	// the option setting, listOptions will contain options for the actual
538	// RP.
539	setResultProducer((ResultProducer)Utils.forName(
540	ResultProducer.class,
541	rpName,
542	null));
543	if (getResultProducer() instanceof OptionHandler) {
544	((OptionHandler) getResultProducer())
545	.setOptions(Utils.partitionOptions(options));
546	}
547	}
548
549	/**
550	* Gets the current settings of the result producer.
551	*
552	* @return an array of strings suitable for passing to setOptions
553	*/
554	public String [] getOptions() {
555
556	String [] seOptions = new String [0];
557	if ((m_ResultProducer != null) &&
558	(m_ResultProducer instanceof OptionHandler)) {
559	seOptions = ((OptionHandler)m_ResultProducer).getOptions();
560	}
561
562	String [] options = new String [seOptions.length + 8];
563	int current = 0;
564
565	if (!getCacheKeyName().equals("")) {
566	options[current++] = "-F";
567	options[current++] = getCacheKeyName();
568	}
569	if (getResultProducer() != null) {
570	options[current++] = "-W";
571	options[current++] = getResultProducer().getClass().getName();
572	}
573	options[current++] = "--";
574
575	System.arraycopy(seOptions, 0, options, current,
576	seOptions.length);
577	current += seOptions.length;
578	while (current < options.length) {
579	options[current++] = "";
580	}
581	return options;
582	}
583
584	/**
585	* Set a list of method names for additional measures to look for
586	* in SplitEvaluators. This could contain many measures (of which only a
587	* subset may be produceable by the current resultProducer) if an experiment
588	* is the type that iterates over a set of properties.
589	* @param additionalMeasures an array of measure names, null if none
590	*/
591	public void setAdditionalMeasures(String [] additionalMeasures) {
592	m_AdditionalMeasures = additionalMeasures;
593
594	if (m_ResultProducer != null) {
595	System.err.println("DatabaseResultProducer: setting additional "
596	+"measures for "
597	+"ResultProducer");
598	m_ResultProducer.setAdditionalMeasures(m_AdditionalMeasures);
599	}
600	}
601
602	/**
603	* Returns an enumeration of any additional measure names that might be
604	* in the result producer
605	* @return an enumeration of the measure names
606	*/
607	public Enumeration enumerateMeasures() {
608	Vector newVector = new Vector();
609	if (m_ResultProducer instanceof AdditionalMeasureProducer) {
610	Enumeration en = ((AdditionalMeasureProducer)m_ResultProducer).
611	enumerateMeasures();
612	while (en.hasMoreElements()) {
613	String mname = (String)en.nextElement();
614	newVector.addElement(mname);
615	}
616	}
617	return newVector.elements();
618	}
619
620	/**
621	* Returns the value of the named measure
622	* @param additionalMeasureName the name of the measure to query for its value
623	* @return the value of the named measure
624	* @throws IllegalArgumentException if the named measure is not supported
625	*/
626	public double getMeasure(String additionalMeasureName) {
627	if (m_ResultProducer instanceof AdditionalMeasureProducer) {
628	return ((AdditionalMeasureProducer)m_ResultProducer).
629	getMeasure(additionalMeasureName);
630	} else {
631	throw new IllegalArgumentException("DatabaseResultProducer: "
632	+"Can't return value for : "+additionalMeasureName
633	+". "+m_ResultProducer.getClass().getName()+" "
634	+"is not an AdditionalMeasureProducer");
635	}
636	}
637
638
639	/**
640	* Sets the dataset that results will be obtained for.
641	*
642	* @param instances a value of type 'Instances'.
643	*/
644	public void setInstances(Instances instances) {
645
646	m_Instances = instances;
647	}
648
649	/**
650	* Sets the object to send results of each run to.
651	*
652	* @param listener a value of type 'ResultListener'
653	*/
654	public void setResultListener(ResultListener listener) {
655
656	m_ResultListener = listener;
657	}
658
659	/**
660	* Returns the tip text for this property
661	* @return tip text for this property suitable for
662	* displaying in the explorer/experimenter gui
663	*/
664	public String resultProducerTipText() {
665	return "Set the result producer to use. If some results are not found "
666	+"in the source database then this result producer is used to generate "
667	+"them.";
668	}
669
670	/**
671	* Get the ResultProducer.
672	*
673	* @return the ResultProducer.
674	*/
675	public ResultProducer getResultProducer() {
676
677	return m_ResultProducer;
678	}
679
680	/**
681	* Set the ResultProducer.
682	*
683	* @param newResultProducer new ResultProducer to use.
684	*/
685	public void setResultProducer(ResultProducer newResultProducer) {
686
687	m_ResultProducer = newResultProducer;
688	}
689
690	/**
691	* Gets a text descrption of the result producer.
692	*
693	* @return a text description of the result producer.
694	*/
695	public String toString() {
696
697	String result = "DatabaseResultProducer: ";
698	result += getCompatibilityState();
699	if (m_Instances == null) {
700	result += ": <null Instances>";
701	} else {
702	result += ": " + Utils.backQuoteChars(m_Instances.relationName());
703	}
704	return result;
705	}
706
707	/**
708	* Returns the revision string.
709	*
710	* @return the revision
711	*/
712	public String getRevision() {
713	return RevisionUtils.extract("$Revision: 1.18 $");
714	}
715	} // DatabaseResultProducer

Note: See TracBrowser for help on using the repository browser.

Download in other formats: