Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

Dagging.java @ 38

Last change on this file since 38 was 29, checked in by gnappo, 14 years ago
Taggata versione per la demo e aggiunto branch.
File size: 16.4 KB

Line
1	/*
2	* This program is free software; you can redistribute it and/or modify
3	* it under the terms of the GNU General Public License as published by
4	* the Free Software Foundation; either version 2 of the License, or
5	* (at your option) any later version.
6	*
7	* This program is distributed in the hope that it will be useful,
8	* but WITHOUT ANY WARRANTY; without even the implied warranty of
9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10	* GNU General Public License for more details.
11	*
12	* You should have received a copy of the GNU General Public License
13	* along with this program; if not, write to the Free Software
14	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15	*/
16
17	/*
18	* Dagging.java
19	* Copyright (C) 2005 University of Waikato, Hamilton, New Zealand
20	*
21	*/
22
23	package weka.classifiers.meta;
24
25	import weka.classifiers.Classifier;
26	import weka.classifiers.AbstractClassifier;
27	import weka.classifiers.RandomizableSingleClassifierEnhancer;
28	import weka.core.Instance;
29	import weka.core.Instances;
30	import weka.core.Option;
31	import weka.core.RevisionUtils;
32	import weka.core.TechnicalInformation;
33	import weka.core.TechnicalInformationHandler;
34	import weka.core.Utils;
35	import weka.core.TechnicalInformation.Field;
36	import weka.core.TechnicalInformation.Type;
37
38	import java.util.Enumeration;
39	import java.util.Vector;
40
41	/**
42	<!-- globalinfo-start -->
43	* This meta classifier creates a number of disjoint, stratified folds out of the data and feeds each chunk of data to a copy of the supplied base classifier. Predictions are made via majority vote, since all the generated base classifiers are put into the Vote meta classifier. <br/>
44	* Useful for base classifiers that are quadratic or worse in time behavior, regarding number of instances in the training data. <br/>
45	* <br/>
46	* For more information, see: <br/>
47	* Ting, K. M., Witten, I. H.: Stacking Bagged and Dagged Models. In: Fourteenth international Conference on Machine Learning, San Francisco, CA, 367-375, 1997.
48	* <p/>
49	<!-- globalinfo-end -->
50	*
51	<!-- technical-bibtex-start -->
52	* BibTeX:
53	* <pre>
54	* @inproceedings{Ting1997,
55	* address = {San Francisco, CA},
56	* author = {Ting, K. M. and Witten, I. H.},
57	* booktitle = {Fourteenth international Conference on Machine Learning},
58	* editor = {D. H. Fisher},
59	* pages = {367-375},
60	* publisher = {Morgan Kaufmann Publishers},
61	* title = {Stacking Bagged and Dagged Models},
62	* year = {1997}
63	* }
64	* </pre>
65	* <p/>
66	<!-- technical-bibtex-end -->
67	*
68	<!-- options-start -->
69	* Valid options are: <p/>
70	*
71	* <pre> -F <folds>
72	* The number of folds for splitting the training set into
73	* smaller chunks for the base classifier.
74	* (default 10)</pre>
75	*
76	* <pre> -verbose
77	* Whether to print some more information during building the
78	* classifier.
79	* (default is off)</pre>
80	*
81	* <pre> -S <num>
82	* Random number seed.
83	* (default 1)</pre>
84	*
85	* <pre> -D
86	* If set, classifier is run in debug mode and
87	* may output additional info to the console</pre>
88	*
89	* <pre> -W
90	* Full name of base classifier.
91	* (default: weka.classifiers.functions.SMO)</pre>
92	*
93	* <pre>
94	* Options specific to classifier weka.classifiers.functions.SMO:
95	* </pre>
96	*
97	* <pre> -D
98	* If set, classifier is run in debug mode and
99	* may output additional info to the console</pre>
100	*
101	* <pre> -no-checks
102	* Turns off all checks - use with caution!
103	* Turning them off assumes that data is purely numeric, doesn't
104	* contain any missing values, and has a nominal class. Turning them
105	* off also means that no header information will be stored if the
106	* machine is linear. Finally, it also assumes that no instance has
107	* a weight equal to 0.
108	* (default: checks on)</pre>
109	*
110	* <pre> -C <double>
111	* The complexity constant C. (default 1)</pre>
112	*
113	* <pre> -N
114	* Whether to 0=normalize/1=standardize/2=neither. (default 0=normalize)</pre>
115	*
116	* <pre> -L <double>
117	* The tolerance parameter. (default 1.0e-3)</pre>
118	*
119	* <pre> -P <double>
120	* The epsilon for round-off error. (default 1.0e-12)</pre>
121	*
122	* <pre> -M
123	* Fit logistic models to SVM outputs. </pre>
124	*
125	* <pre> -V <double>
126	* The number of folds for the internal
127	* cross-validation. (default -1, use training data)</pre>
128	*
129	* <pre> -W <double>
130	* The random number seed. (default 1)</pre>
131	*
132	* <pre> -K <classname and parameters>
133	* The Kernel to use.
134	* (default: weka.classifiers.functions.supportVector.PolyKernel)</pre>
135	*
136	* <pre>
137	* Options specific to kernel weka.classifiers.functions.supportVector.PolyKernel:
138	* </pre>
139	*
140	* <pre> -D
141	* Enables debugging output (if available) to be printed.
142	* (default: off)</pre>
143	*
144	* <pre> -no-checks
145	* Turns off all checks - use with caution!
146	* (default: checks on)</pre>
147	*
148	* <pre> -C <num>
149	* The size of the cache (a prime number), 0 for full cache and
150	* -1 to turn it off.
151	* (default: 250007)</pre>
152	*
153	* <pre> -E <num>
154	* The Exponent to use.
155	* (default: 1.0)</pre>
156	*
157	* <pre> -L
158	* Use lower-order terms.
159	* (default: no)</pre>
160	*
161	<!-- options-end -->
162	*
163	* Options after -- are passed to the designated classifier.<p/>
164	*
165	* @author Bernhard Pfahringer (bernhard at cs dot waikato dot ac dot nz)
166	* @author FracPete (fracpete at waikato dot ac dot nz)
167	* @version $Revision: 5928 $
168	* @see Vote
169	*/
170	public class Dagging
171	extends RandomizableSingleClassifierEnhancer
172	implements TechnicalInformationHandler {
173
174	/** for serialization */
175	static final long serialVersionUID = 4560165876570074309L;
176
177	/** the number of folds to use to split the training data */
178	protected int m_NumFolds = 10;
179
180	/** the classifier used for voting */
181	protected Vote m_Vote = null;
182
183	/** whether to output some progress information during building */
184	protected boolean m_Verbose = false;
185
186	/**
187	* Returns a string describing classifier
188	* @return a description suitable for
189	* displaying in the explorer/experimenter gui
190	*/
191	public String globalInfo() {
192	return
193	"This meta classifier creates a number of disjoint, stratified folds out "
194	+ "of the data and feeds each chunk of data to a copy of the supplied "
195	+ "base classifier. Predictions are made via averaging, since all the "
196	+ "generated base classifiers are put into the Vote meta classifier. \n"
197	+ "Useful for base classifiers that are quadratic or worse in time "
198	+ "behavior, regarding number of instances in the training data. \n"
199	+ "\n"
200	+ "For more information, see: \n"
201	+ getTechnicalInformation().toString();
202	}
203
204	/**
205	* Returns an instance of a TechnicalInformation object, containing
206	* detailed information about the technical background of this class,
207	* e.g., paper reference or book this class is based on.
208	*
209	* @return the technical information about this class
210	*/
211	public TechnicalInformation getTechnicalInformation() {
212	TechnicalInformation result;
213
214	result = new TechnicalInformation(Type.INPROCEEDINGS);
215	result.setValue(Field.AUTHOR, "Ting, K. M. and Witten, I. H.");
216	result.setValue(Field.TITLE, "Stacking Bagged and Dagged Models");
217	result.setValue(Field.BOOKTITLE, "Fourteenth international Conference on Machine Learning");
218	result.setValue(Field.EDITOR, "D. H. Fisher");
219	result.setValue(Field.YEAR, "1997");
220	result.setValue(Field.PAGES, "367-375");
221	result.setValue(Field.PUBLISHER, "Morgan Kaufmann Publishers");
222	result.setValue(Field.ADDRESS, "San Francisco, CA");
223
224	return result;
225	}
226
227	/**
228	* Constructor.
229	*/
230	public Dagging() {
231	m_Classifier = new weka.classifiers.functions.SMO();
232	}
233
234	/**
235	* String describing default classifier.
236	*
237	* @return the default classifier classname
238	*/
239	protected String defaultClassifierString() {
240	return weka.classifiers.functions.SMO.class.getName();
241	}
242
243	/**
244	* Returns an enumeration describing the available options.
245	*
246	* @return an enumeration of all the available options.
247	*/
248	public Enumeration listOptions() {
249	Vector result = new Vector();
250
251	result.addElement(new Option(
252	"\tThe number of folds for splitting the training set into\n"
253	+ "\tsmaller chunks for the base classifier.\n"
254	+ "\t(default 10)",
255	"F", 1, "-F <folds>"));
256
257	result.addElement(new Option(
258	"\tWhether to print some more information during building the\n"
259	+ "\tclassifier.\n"
260	+ "\t(default is off)",
261	"verbose", 0, "-verbose"));
262
263	Enumeration en = super.listOptions();
264	while (en.hasMoreElements())
265	result.addElement(en.nextElement());
266
267	return result.elements();
268	}
269
270
271	/**
272	* Parses a given list of options. <p/>
273	*
274	<!-- options-start -->
275	* Valid options are: <p/>
276	*
277	* <pre> -F <folds>
278	* The number of folds for splitting the training set into
279	* smaller chunks for the base classifier.
280	* (default 10)</pre>
281	*
282	* <pre> -verbose
283	* Whether to print some more information during building the
284	* classifier.
285	* (default is off)</pre>
286	*
287	* <pre> -S <num>
288	* Random number seed.
289	* (default 1)</pre>
290	*
291	* <pre> -D
292	* If set, classifier is run in debug mode and
293	* may output additional info to the console</pre>
294	*
295	* <pre> -W
296	* Full name of base classifier.
297	* (default: weka.classifiers.functions.SMO)</pre>
298	*
299	* <pre>
300	* Options specific to classifier weka.classifiers.functions.SMO:
301	* </pre>
302	*
303	* <pre> -D
304	* If set, classifier is run in debug mode and
305	* may output additional info to the console</pre>
306	*
307	* <pre> -no-checks
308	* Turns off all checks - use with caution!
309	* Turning them off assumes that data is purely numeric, doesn't
310	* contain any missing values, and has a nominal class. Turning them
311	* off also means that no header information will be stored if the
312	* machine is linear. Finally, it also assumes that no instance has
313	* a weight equal to 0.
314	* (default: checks on)</pre>
315	*
316	* <pre> -C <double>
317	* The complexity constant C. (default 1)</pre>
318	*
319	* <pre> -N
320	* Whether to 0=normalize/1=standardize/2=neither. (default 0=normalize)</pre>
321	*
322	* <pre> -L <double>
323	* The tolerance parameter. (default 1.0e-3)</pre>
324	*
325	* <pre> -P <double>
326	* The epsilon for round-off error. (default 1.0e-12)</pre>
327	*
328	* <pre> -M
329	* Fit logistic models to SVM outputs. </pre>
330	*
331	* <pre> -V <double>
332	* The number of folds for the internal
333	* cross-validation. (default -1, use training data)</pre>
334	*
335	* <pre> -W <double>
336	* The random number seed. (default 1)</pre>
337	*
338	* <pre> -K <classname and parameters>
339	* The Kernel to use.
340	* (default: weka.classifiers.functions.supportVector.PolyKernel)</pre>
341	*
342	* <pre>
343	* Options specific to kernel weka.classifiers.functions.supportVector.PolyKernel:
344	* </pre>
345	*
346	* <pre> -D
347	* Enables debugging output (if available) to be printed.
348	* (default: off)</pre>
349	*
350	* <pre> -no-checks
351	* Turns off all checks - use with caution!
352	* (default: checks on)</pre>
353	*
354	* <pre> -C <num>
355	* The size of the cache (a prime number), 0 for full cache and
356	* -1 to turn it off.
357	* (default: 250007)</pre>
358	*
359	* <pre> -E <num>
360	* The Exponent to use.
361	* (default: 1.0)</pre>
362	*
363	* <pre> -L
364	* Use lower-order terms.
365	* (default: no)</pre>
366	*
367	<!-- options-end -->
368	*
369	* Options after -- are passed to the designated classifier.<p>
370	*
371	* @param options the list of options as an array of strings
372	* @throws Exception if an option is not supported
373	*/
374	public void setOptions(String[] options) throws Exception {
375	String tmpStr;
376
377	tmpStr = Utils.getOption('F', options);
378	if (tmpStr.length() != 0)
379	setNumFolds(Integer.parseInt(tmpStr));
380	else
381	setNumFolds(10);
382
383	setVerbose(Utils.getFlag("verbose", options));
384
385	super.setOptions(options);
386	}
387
388	/**
389	* Gets the current settings of the Classifier.
390	*
391	* @return an array of strings suitable for passing to setOptions
392	*/
393	public String[] getOptions() {
394	Vector result;
395	String[] options;
396	int i;
397
398	result = new Vector();
399
400	result.add("-F");
401	result.add("" + getNumFolds());
402
403	if (getVerbose())
404	result.add("-verbose");
405
406	options = super.getOptions();
407	for (i = 0; i < options.length; i++)
408	result.add(options[i]);
409
410	return (String[]) result.toArray(new String[result.size()]);
411	}
412
413	/**
414	* Gets the number of folds to use for splitting the training set.
415	*
416	* @return the number of folds
417	*/
418	public int getNumFolds() {
419	return m_NumFolds;
420	}
421
422	/**
423	* Sets the number of folds to use for splitting the training set.
424	*
425	* @param value the new number of folds
426	*/
427	public void setNumFolds(int value) {
428	if (value > 0)
429	m_NumFolds = value;
430	else
431	System.out.println(
432	"At least 1 fold is necessary (provided: " + value + ")!");
433	}
434
435	/**
436	* Returns the tip text for this property
437	*
438	* @return tip text for this property suitable for
439	* displaying in the explorer/experimenter gui
440	*/
441	public String numFoldsTipText() {
442	return "The number of folds to use for splitting the training set into smaller chunks for the base classifier.";
443	}
444
445	/**
446	* Set the verbose state.
447	*
448	* @param value the verbose state
449	*/
450	public void setVerbose(boolean value) {
451	m_Verbose = value;
452	}
453
454	/**
455	* Gets the verbose state
456	*
457	* @return the verbose state
458	*/
459	public boolean getVerbose() {
460	return m_Verbose;
461	}
462
463	/**
464	* Returns the tip text for this property
465	* @return tip text for this property suitable for
466	* displaying in the explorer/experimenter gui
467	*/
468	public String verboseTipText() {
469	return "Whether to ouput some additional information during building.";
470	}
471
472	/**
473	* Bagging method.
474	*
475	* @param data the training data to be used for generating the
476	* bagged classifier.
477	* @throws Exception if the classifier could not be built successfully
478	*/
479	public void buildClassifier(Instances data) throws Exception {
480	Classifier[] base;
481	int i;
482	int n;
483	int fromIndex;
484	int toIndex;
485	Instances train;
486	double chunkSize;
487
488	// can classifier handle the data?
489	getCapabilities().testWithFail(data);
490
491	// remove instances with missing class
492	data = new Instances(data);
493	data.deleteWithMissingClass();
494
495	m_Vote = new Vote();
496	base = new Classifier[getNumFolds()];
497	chunkSize = (double) data.numInstances() / (double) getNumFolds();
498
499	// stratify data
500	if (getNumFolds() > 1) {
501	data.randomize(data.getRandomNumberGenerator(getSeed()));
502	data.stratify(getNumFolds());
503	}
504
505	// generate <folds> classifiers
506	for (i = 0; i < getNumFolds(); i++) {
507	base[i] = makeCopy(getClassifier());
508
509	// generate training data
510	if (getNumFolds() > 1) {
511	// some progress information
512	if (getVerbose())
513	System.out.print(".");
514
515	train = data.testCV(getNumFolds(), i);
516	}
517	else {
518	train = data;
519	}
520
521	// train classifier
522	base[i].buildClassifier(train);
523	}
524
525	// init vote
526	m_Vote.setClassifiers(base);
527
528	if (getVerbose())
529	System.out.println();
530	}
531
532	/**
533	* Calculates the class membership probabilities for the given test
534	* instance.
535	*
536	* @param instance the instance to be classified
537	* @return preedicted class probability distribution
538	* @throws Exception if distribution can't be computed successfully
539	*/
540	public double[] distributionForInstance(Instance instance) throws Exception {
541	return m_Vote.distributionForInstance(instance);
542	}
543
544	/**
545	* Returns description of the classifier.
546	*
547	* @return description of the classifier as a string
548	*/
549	public String toString() {
550	if (m_Vote == null)
551	return this.getClass().getName().replaceAll(".*\\.", "")
552	+ ": No model built yet.";
553	else
554	return m_Vote.toString();
555	}
556
557	/**
558	* Returns the revision string.
559	*
560	* @return the revision
561	*/
562	public String getRevision() {
563	return RevisionUtils.extract("$Revision: 5928 $");
564	}
565
566	/**
567	* Main method for testing this class.
568	*
569	* @param args the options
570	*/
571	public static void main(String[] args) {
572	runClassifier(new Dagging(), args);
573	}
574	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: branches/MetisMQI/src/main/java/weka/classifiers/meta/Dagging.java @ 38

Download in other formats: