Context Navigation

source: src/main/java/weka/classifiers/rules/PART.java @ 11

Last change on this file since 11 was 4, checked in by gnappo, 14 years ago
Import di weka.
File size: 20.3 KB

Line
1	/*
2	* This program is free software; you can redistribute it and/or modify
3	* it under the terms of the GNU General Public License as published by
4	* the Free Software Foundation; either version 2 of the License, or
5	* (at your option) any later version.
6	*
7	* This program is distributed in the hope that it will be useful,
8	* but WITHOUT ANY WARRANTY; without even the implied warranty of
9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10	* GNU General Public License for more details.
11	*
12	* You should have received a copy of the GNU General Public License
13	* along with this program; if not, write to the Free Software
14	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15	*/
16
17	/*
18	* PART.java
19	* Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
20	*
21	*/
22
23	package weka.classifiers.rules;
24
25	import weka.classifiers.Classifier;
26	import weka.classifiers.AbstractClassifier;
27	import weka.classifiers.rules.part.MakeDecList;
28	import weka.classifiers.trees.j48.BinC45ModelSelection;
29	import weka.classifiers.trees.j48.C45ModelSelection;
30	import weka.classifiers.trees.j48.ModelSelection;
31	import weka.core.AdditionalMeasureProducer;
32	import weka.core.Capabilities;
33	import weka.core.Instance;
34	import weka.core.Instances;
35	import weka.core.Option;
36	import weka.core.OptionHandler;
37	import weka.core.RevisionUtils;
38	import weka.core.Summarizable;
39	import weka.core.TechnicalInformation;
40	import weka.core.TechnicalInformationHandler;
41	import weka.core.Utils;
42	import weka.core.WeightedInstancesHandler;
43	import weka.core.TechnicalInformation.Field;
44	import weka.core.TechnicalInformation.Type;
45
46	import java.util.Enumeration;
47	import java.util.Vector;
48
49	/**
50	<!-- globalinfo-start -->
51	* Class for generating a PART decision list. Uses separate-and-conquer. Builds a partial C4.5 decision tree in each iteration and makes the "best" leaf into a rule.<br/>
52	* <br/>
53	* For more information, see:<br/>
54	* <br/>
55	* Eibe Frank, Ian H. Witten: Generating Accurate Rule Sets Without Global Optimization. In: Fifteenth International Conference on Machine Learning, 144-151, 1998.
56	* <p/>
57	<!-- globalinfo-end -->
58	*
59	<!-- technical-bibtex-start -->
60	* BibTeX:
61	* <pre>
62	* @inproceedings{Frank1998,
63	* author = {Eibe Frank and Ian H. Witten},
64	* booktitle = {Fifteenth International Conference on Machine Learning},
65	* editor = {J. Shavlik},
66	* pages = {144-151},
67	* publisher = {Morgan Kaufmann},
68	* title = {Generating Accurate Rule Sets Without Global Optimization},
69	* year = {1998},
70	* PS = {http://www.cs.waikato.ac.nz/\~eibe/pubs/ML98-57.ps.gz}
71	* }
72	* </pre>
73	* <p/>
74	<!-- technical-bibtex-end -->
75	*
76	<!-- options-start -->
77	* Valid options are: <p/>
78	*
79	* <pre> -C <pruning confidence>
80	* Set confidence threshold for pruning.
81	* (default 0.25)</pre>
82	*
83	* <pre> -M <minimum number of objects>
84	* Set minimum number of objects per leaf.
85	* (default 2)</pre>
86	*
87	* <pre> -R
88	* Use reduced error pruning.</pre>
89	*
90	* <pre> -N <number of folds>
91	* Set number of folds for reduced error
92	* pruning. One fold is used as pruning set.
93	* (default 3)</pre>
94	*
95	* <pre> -B
96	* Use binary splits only.</pre>
97	*
98	* <pre> -U
99	* Generate unpruned decision list.</pre>
100	*
101	* <pre> -J
102	* Do not use MDL correction for info gain on numeric attributes.</pre>
103	*
104	* <pre> -Q <seed>
105	* Seed for random data shuffling (default 1).</pre>
106	*
107	<!-- options-end -->
108	*
109	* @author Eibe Frank (eibe@cs.waikato.ac.nz)
110	* @version $Revision: 6089 $
111	*/
112	public class PART
113	extends AbstractClassifier
114	implements OptionHandler, WeightedInstancesHandler, Summarizable,
115	AdditionalMeasureProducer, TechnicalInformationHandler {
116
117	/** for serialization */
118	static final long serialVersionUID = 8121455039782598361L;
119
120	/** The decision list */
121	private MakeDecList m_root;
122
123	/** Confidence level */
124	private float m_CF = 0.25f;
125
126	/** Minimum number of objects */
127	private int m_minNumObj = 2;
128
129	/** Use MDL correction? */
130	private boolean m_useMDLcorrection = true;
131
132	/** Use reduced error pruning? */
133	private boolean m_reducedErrorPruning = false;
134
135	/** Number of folds for reduced error pruning. */
136	private int m_numFolds = 3;
137
138	/** Binary splits on nominal attributes? */
139	private boolean m_binarySplits = false;
140
141	/** Generate unpruned list? */
142	private boolean m_unpruned = false;
143
144	/** The seed for random number generation. */
145	private int m_Seed = 1;
146
147	/**
148	* Returns a string describing classifier
149	* @return a description suitable for
150	* displaying in the explorer/experimenter gui
151	*/
152	public String globalInfo() {
153
154	return "Class for generating a PART decision list. Uses "
155	+ "separate-and-conquer. Builds a partial C4.5 decision tree "
156	+ "in each iteration and makes the \"best\" leaf into a rule.\n\n"
157	+ "For more information, see:\n\n"
158	+ getTechnicalInformation().toString();
159	}
160
161	/**
162	* Returns an instance of a TechnicalInformation object, containing
163	* detailed information about the technical background of this class,
164	* e.g., paper reference or book this class is based on.
165	*
166	* @return the technical information about this class
167	*/
168	public TechnicalInformation getTechnicalInformation() {
169	TechnicalInformation result;
170
171	result = new TechnicalInformation(Type.INPROCEEDINGS);
172	result.setValue(Field.AUTHOR, "Eibe Frank and Ian H. Witten");
173	result.setValue(Field.TITLE, "Generating Accurate Rule Sets Without Global Optimization");
174	result.setValue(Field.BOOKTITLE, "Fifteenth International Conference on Machine Learning");
175	result.setValue(Field.EDITOR, "J. Shavlik");
176	result.setValue(Field.YEAR, "1998");
177	result.setValue(Field.PAGES, "144-151");
178	result.setValue(Field.PUBLISHER, "Morgan Kaufmann");
179	result.setValue(Field.PS, "http://www.cs.waikato.ac.nz/~eibe/pubs/ML98-57.ps.gz");
180
181	return result;
182	}
183
184	/**
185	* Returns default capabilities of the classifier.
186	*
187	* @return the capabilities of this classifier
188	*/
189	public Capabilities getCapabilities() {
190	Capabilities result;
191
192	if (m_unpruned)
193	result = new MakeDecList(null, m_minNumObj).getCapabilities();
194	else if (m_reducedErrorPruning)
195	result = new MakeDecList(null, m_numFolds, m_minNumObj, m_Seed).getCapabilities();
196	else
197	result = new MakeDecList(null, m_CF, m_minNumObj).getCapabilities();
198
199	return result;
200	}
201
202	/**
203	* Generates the classifier.
204	*
205	* @param instances the data to train with
206	* @throws Exception if classifier can't be built successfully
207	*/
208	public void buildClassifier(Instances instances)
209	throws Exception {
210
211	// can classifier handle the data?
212	getCapabilities().testWithFail(instances);
213
214	// remove instances with missing class
215	instances = new Instances(instances);
216	instances.deleteWithMissingClass();
217
218	ModelSelection modSelection;
219
220	if (m_binarySplits)
221	modSelection = new BinC45ModelSelection(m_minNumObj, instances, m_useMDLcorrection);
222	else
223	modSelection = new C45ModelSelection(m_minNumObj, instances, m_useMDLcorrection);
224	if (m_unpruned)
225	m_root = new MakeDecList(modSelection, m_minNumObj);
226	else if (m_reducedErrorPruning)
227	m_root = new MakeDecList(modSelection, m_numFolds, m_minNumObj, m_Seed);
228	else
229	m_root = new MakeDecList(modSelection, m_CF, m_minNumObj);
230	m_root.buildClassifier(instances);
231	if (m_binarySplits) {
232	((BinC45ModelSelection)modSelection).cleanup();
233	} else {
234	((C45ModelSelection)modSelection).cleanup();
235	}
236	}
237
238	/**
239	* Classifies an instance.
240	*
241	* @param instance the instance to classify
242	* @return the classification
243	* @throws Exception if instance can't be classified successfully
244	*/
245	public double classifyInstance(Instance instance)
246	throws Exception {
247
248	return m_root.classifyInstance(instance);
249	}
250
251	/**
252	* Returns class probabilities for an instance.
253	*
254	* @param instance the instance to get the distribution for
255	* @return the class probabilities
256	* @throws Exception if the distribution can't be computed successfully
257	*/
258	public final double [] distributionForInstance(Instance instance)
259	throws Exception {
260
261	return m_root.distributionForInstance(instance);
262	}
263
264	/**
265	* Returns an enumeration describing the available options.
266	*
267	* Valid options are: <p>
268	*
269	* -C confidence <br>
270	* Set confidence threshold for pruning. (Default: 0.25) <p>
271	*
272	* -M number <br>
273	* Set minimum number of instances per leaf. (Default: 2) <p>
274	*
275	* -R <br>
276	* Use reduced error pruning. <p>
277	*
278	* -N number <br>
279	* Set number of folds for reduced error pruning. One fold is
280	* used as the pruning set. (Default: 3) <p>
281	*
282	* -B <br>
283	* Use binary splits for nominal attributes. <p>
284	*
285	* -U <br>
286	* Generate unpruned decision list. <p>
287	*
288	* -Q <br>
289	* The seed for reduced-error pruning. <p>
290	*
291	* @return an enumeration of all the available options.
292	*/
293	public Enumeration listOptions() {
294
295	Vector newVector = new Vector(8);
296
297	newVector.
298	addElement(new Option("\tSet confidence threshold for pruning.\n" +
299	"\t(default 0.25)",
300	"C", 1, "-C <pruning confidence>"));
301	newVector.
302	addElement(new Option("\tSet minimum number of objects per leaf.\n" +
303	"\t(default 2)",
304	"M", 1, "-M <minimum number of objects>"));
305	newVector.
306	addElement(new Option("\tUse reduced error pruning.",
307	"R", 0, "-R"));
308	newVector.
309	addElement(new Option("\tSet number of folds for reduced error\n" +
310	"\tpruning. One fold is used as pruning set.\n" +
311	"\t(default 3)",
312	"N", 1, "-N <number of folds>"));
313	newVector.
314	addElement(new Option("\tUse binary splits only.",
315	"B", 0, "-B"));
316	newVector.
317	addElement(new Option("\tGenerate unpruned decision list.",
318	"U", 0, "-U"));
319	newVector.
320	addElement(new Option("\tDo not use MDL correction for info gain on numeric attributes.",
321	"J", 0, "-J"));
322	newVector.
323	addElement(new Option("\tSeed for random data shuffling (default 1).",
324	"Q", 1, "-Q <seed>"));
325
326	return newVector.elements();
327	}
328
329	/**
330	* Parses a given list of options. <p/>
331	*
332	<!-- options-start -->
333	* Valid options are: <p/>
334	*
335	* <pre> -C <pruning confidence>
336	* Set confidence threshold for pruning.
337	* (default 0.25)</pre>
338	*
339	* <pre> -M <minimum number of objects>
340	* Set minimum number of objects per leaf.
341	* (default 2)</pre>
342	*
343	* <pre> -R
344	* Use reduced error pruning.</pre>
345	*
346	* <pre> -N <number of folds>
347	* Set number of folds for reduced error
348	* pruning. One fold is used as pruning set.
349	* (default 3)</pre>
350	*
351	* <pre> -B
352	* Use binary splits only.</pre>
353	*
354	* <pre> -U
355	* Generate unpruned decision list.</pre>
356	*
357	* <pre> -J
358	* Do not use MDL correction for info gain on numeric attributes.</pre>
359	*
360	* <pre> -Q <seed>
361	* Seed for random data shuffling (default 1).</pre>
362	*
363	<!-- options-end -->
364	*
365	* @param options the list of options as an array of strings
366	* @throws Exception if an option is not supported
367	*/
368	public void setOptions(String[] options) throws Exception {
369
370	// Pruning options
371	m_unpruned = Utils.getFlag('U', options);
372	m_reducedErrorPruning = Utils.getFlag('R', options);
373	m_binarySplits = Utils.getFlag('B', options);
374	m_useMDLcorrection = !Utils.getFlag('J', options);
375	String confidenceString = Utils.getOption('C', options);
376	if (confidenceString.length() != 0) {
377	if (m_reducedErrorPruning) {
378	throw new Exception("Setting CF doesn't make sense " +
379	"for reduced error pruning.");
380	} else {
381	m_CF = (new Float(confidenceString)).floatValue();
382	if ((m_CF <= 0) \|\| (m_CF >= 1)) {
383	throw new Exception("CF has to be greater than zero and smaller than one!");
384	}
385	}
386	} else {
387	m_CF = 0.25f;
388	}
389	String numFoldsString = Utils.getOption('N', options);
390	if (numFoldsString.length() != 0) {
391	if (!m_reducedErrorPruning) {
392	throw new Exception("Setting the number of folds" +
393	" does only make sense for" +
394	" reduced error pruning.");
395	} else {
396	m_numFolds = Integer.parseInt(numFoldsString);
397	}
398	} else {
399	m_numFolds = 3;
400	}
401
402	// Other options
403	String minNumString = Utils.getOption('M', options);
404	if (minNumString.length() != 0) {
405	m_minNumObj = Integer.parseInt(minNumString);
406	} else {
407	m_minNumObj = 2;
408	}
409	String seedString = Utils.getOption('Q', options);
410	if (seedString.length() != 0) {
411	m_Seed = Integer.parseInt(seedString);
412	} else {
413	m_Seed = 1;
414	}
415	}
416
417	/**
418	* Gets the current settings of the Classifier.
419	*
420	* @return an array of strings suitable for passing to setOptions
421	*/
422	public String [] getOptions() {
423
424	String [] options = new String [12];
425	int current = 0;
426
427	if (m_unpruned) {
428	options[current++] = "-U";
429	}
430	if (m_reducedErrorPruning) {
431	options[current++] = "-R";
432	}
433	if (m_binarySplits) {
434	options[current++] = "-B";
435	}
436	options[current++] = "-M"; options[current++] = "" + m_minNumObj;
437	if (!m_reducedErrorPruning) {
438	options[current++] = "-C"; options[current++] = "" + m_CF;
439	}
440	if (m_reducedErrorPruning) {
441	options[current++] = "-N"; options[current++] = "" + m_numFolds;
442	}
443	options[current++] = "-Q"; options[current++] = "" + m_Seed;
444	if (!m_useMDLcorrection) {
445	options[current++] = "-J";
446	}
447
448	while (current < options.length) {
449	options[current++] = "";
450	}
451	return options;
452	}
453
454	/**
455	* Returns a description of the classifier
456	*
457	* @return a string representation of the classifier
458	*/
459	public String toString() {
460
461	if (m_root == null) {
462	return "No classifier built";
463	}
464	return "PART decision list\n------------------\n\n" + m_root.toString();
465	}
466
467	/**
468	* Returns a superconcise version of the model
469	*
470	* @return a concise version of the model
471	*/
472	public String toSummaryString() {
473
474	return "Number of rules: " + m_root.numRules() + "\n";
475	}
476
477	/**
478	* Return the number of rules.
479	* @return the number of rules
480	*/
481	public double measureNumRules() {
482	return m_root.numRules();
483	}
484
485	/**
486	* Returns an enumeration of the additional measure names
487	* @return an enumeration of the measure names
488	*/
489	public Enumeration enumerateMeasures() {
490	Vector newVector = new Vector(1);
491	newVector.addElement("measureNumRules");
492	return newVector.elements();
493	}
494
495	/**
496	* Returns the value of the named measure
497	* @param additionalMeasureName the name of the measure to query for its value
498	* @return the value of the named measure
499	* @throws IllegalArgumentException if the named measure is not supported
500	*/
501	public double getMeasure(String additionalMeasureName) {
502	if (additionalMeasureName.compareToIgnoreCase("measureNumRules") == 0) {
503	return measureNumRules();
504	} else {
505	throw new IllegalArgumentException(additionalMeasureName
506	+ " not supported (PART)");
507	}
508	}
509
510	/**
511	* Returns the tip text for this property
512	* @return tip text for this property suitable for
513	* displaying in the explorer/experimenter gui
514	*/
515	public String confidenceFactorTipText() {
516	return "The confidence factor used for pruning (smaller values incur "
517	+ "more pruning).";
518	}
519
520	/**
521	* Get the value of CF.
522	*
523	* @return Value of CF.
524	*/
525	public float getConfidenceFactor() {
526
527	return m_CF;
528	}
529
530	/**
531	* Set the value of CF.
532	*
533	* @param v Value to assign to CF.
534	*/
535	public void setConfidenceFactor(float v) {
536
537	m_CF = v;
538	}
539
540	/**
541	* Returns the tip text for this property
542	* @return tip text for this property suitable for
543	* displaying in the explorer/experimenter gui
544	*/
545	public String minNumObjTipText() {
546	return "The minimum number of instances per rule.";
547	}
548
549	/**
550	* Get the value of minNumObj.
551	*
552	* @return Value of minNumObj.
553	*/
554	public int getMinNumObj() {
555
556	return m_minNumObj;
557	}
558
559	/**
560	* Set the value of minNumObj.
561	*
562	* @param v Value to assign to minNumObj.
563	*/
564	public void setMinNumObj(int v) {
565
566	m_minNumObj = v;
567	}
568
569	/**
570	* Returns the tip text for this property
571	* @return tip text for this property suitable for
572	* displaying in the explorer/experimenter gui
573	*/
574	public String reducedErrorPruningTipText() {
575	return "Whether reduced-error pruning is used instead of C.4.5 pruning.";
576	}
577
578	/**
579	* Get the value of reducedErrorPruning.
580	*
581	* @return Value of reducedErrorPruning.
582	*/
583	public boolean getReducedErrorPruning() {
584
585	return m_reducedErrorPruning;
586	}
587
588	/**
589	* Set the value of reducedErrorPruning.
590	*
591	* @param v Value to assign to reducedErrorPruning.
592	*/
593	public void setReducedErrorPruning(boolean v) {
594
595	m_reducedErrorPruning = v;
596	}
597
598	/**
599	* Returns the tip text for this property
600	* @return tip text for this property suitable for
601	* displaying in the explorer/experimenter gui
602	*/
603	public String unprunedTipText() {
604	return "Whether pruning is performed.";
605	}
606
607	/**
608	* Get the value of unpruned.
609	*
610	* @return Value of unpruned.
611	*/
612	public boolean getUnpruned() {
613
614	return m_unpruned;
615	}
616
617	/**
618	* Set the value of unpruned.
619	*
620	* @param newunpruned Value to assign to unpruned.
621	*/
622	public void setUnpruned(boolean newunpruned) {
623
624	m_unpruned = newunpruned;
625	}
626
627	/**
628	* Returns the tip text for this property
629	* @return tip text for this property suitable for
630	* displaying in the explorer/experimenter gui
631	*/
632	public String useMDLcorrectionTipText() {
633	return "Whether MDL correction is used when finding splits on numeric attributes.";
634	}
635
636	/**
637	* Get the value of useMDLcorrection.
638	*
639	* @return Value of useMDLcorrection.
640	*/
641	public boolean getUseMDLcorrection() {
642
643	return m_useMDLcorrection;
644	}
645
646	/**
647	* Set the value of useMDLcorrection.
648	*
649	* @param newuseMDLcorrection Value to assign to useMDLcorrection.
650	*/
651	public void setUseMDLcorrection(boolean newuseMDLcorrection) {
652
653	m_useMDLcorrection = newuseMDLcorrection;
654	}
655
656	/**
657	* Returns the tip text for this property
658	* @return tip text for this property suitable for
659	* displaying in the explorer/experimenter gui
660	*/
661	public String numFoldsTipText() {
662	return "Determines the amount of data used for reduced-error pruning. "
663	+ " One fold is used for pruning, the rest for growing the rules.";
664	}
665
666	/**
667	* Get the value of numFolds.
668	*
669	* @return Value of numFolds.
670	*/
671	public int getNumFolds() {
672
673	return m_numFolds;
674	}
675
676	/**
677	* Set the value of numFolds.
678	*
679	* @param v Value to assign to numFolds.
680	*/
681	public void setNumFolds(int v) {
682
683	m_numFolds = v;
684	}
685
686	/**
687	* Returns the tip text for this property
688	* @return tip text for this property suitable for
689	* displaying in the explorer/experimenter gui
690	*/
691	public String seedTipText() {
692	return "The seed used for randomizing the data " +
693	"when reduced-error pruning is used.";
694	}
695
696	/**
697	* Get the value of Seed.
698	*
699	* @return Value of Seed.
700	*/
701	public int getSeed() {
702
703	return m_Seed;
704	}
705
706	/**
707	* Set the value of Seed.
708	*
709	* @param newSeed Value to assign to Seed.
710	*/
711	public void setSeed(int newSeed) {
712
713	m_Seed = newSeed;
714	}
715
716	/**
717	* Returns the tip text for this property
718	* @return tip text for this property suitable for
719	* displaying in the explorer/experimenter gui
720	*/
721	public String binarySplitsTipText() {
722	return "Whether to use binary splits on nominal attributes when "
723	+ "building the partial trees.";
724	}
725
726	/**
727	* Get the value of binarySplits.
728	*
729	* @return Value of binarySplits.
730	*/
731	public boolean getBinarySplits() {
732
733	return m_binarySplits;
734	}
735
736	/**
737	* Set the value of binarySplits.
738	*
739	* @param v Value to assign to binarySplits.
740	*/
741	public void setBinarySplits(boolean v) {
742
743	m_binarySplits = v;
744	}
745
746	/**
747	* Returns the revision string.
748	*
749	* @return the revision
750	*/
751	public String getRevision() {
752	return RevisionUtils.extract("$Revision: 6089 $");
753	}
754
755	/**
756	* Main method for testing this class.
757	*
758	* @param argv command line options
759	*/
760	public static void main(String [] argv){
761	runClassifier(new PART(), argv);
762	}
763	}
764

Note: See TracBrowser for help on using the repository browser.

Download in other formats: