Context Navigation

source: src/main/java/weka/classifiers/rules/PART.java @ 18

Last change on this file since 18 was 4, checked in by gnappo, 14 years ago
Import di weka.
File size: 20.3 KB

Rev	Line
[4]	1	/*
	2	* This program is free software; you can redistribute it and/or modify
	3	* it under the terms of the GNU General Public License as published by
	4	* the Free Software Foundation; either version 2 of the License, or
	5	* (at your option) any later version.
	6	*
	7	* This program is distributed in the hope that it will be useful,
	8	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	10	* GNU General Public License for more details.
	11	*
	12	* You should have received a copy of the GNU General Public License
	13	* along with this program; if not, write to the Free Software
	14	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
	15	*/
	16
	17	/*
	18	* PART.java
	19	* Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
	20	*
	21	*/
	22
	23	package weka.classifiers.rules;
	24
	25	import weka.classifiers.Classifier;
	26	import weka.classifiers.AbstractClassifier;
	27	import weka.classifiers.rules.part.MakeDecList;
	28	import weka.classifiers.trees.j48.BinC45ModelSelection;
	29	import weka.classifiers.trees.j48.C45ModelSelection;
	30	import weka.classifiers.trees.j48.ModelSelection;
	31	import weka.core.AdditionalMeasureProducer;
	32	import weka.core.Capabilities;
	33	import weka.core.Instance;
	34	import weka.core.Instances;
	35	import weka.core.Option;
	36	import weka.core.OptionHandler;
	37	import weka.core.RevisionUtils;
	38	import weka.core.Summarizable;
	39	import weka.core.TechnicalInformation;
	40	import weka.core.TechnicalInformationHandler;
	41	import weka.core.Utils;
	42	import weka.core.WeightedInstancesHandler;
	43	import weka.core.TechnicalInformation.Field;
	44	import weka.core.TechnicalInformation.Type;
	45
	46	import java.util.Enumeration;
	47	import java.util.Vector;
	48
	49	/**
	50	<!-- globalinfo-start -->
	51	* Class for generating a PART decision list. Uses separate-and-conquer. Builds a partial C4.5 decision tree in each iteration and makes the "best" leaf into a rule.<br/>
	52	* <br/>
	53	* For more information, see:<br/>
	54	* <br/>
	55	* Eibe Frank, Ian H. Witten: Generating Accurate Rule Sets Without Global Optimization. In: Fifteenth International Conference on Machine Learning, 144-151, 1998.
	56	* <p/>
	57	<!-- globalinfo-end -->
	58	*
	59	<!-- technical-bibtex-start -->
	60	* BibTeX:
	61	* <pre>
	62	* @inproceedings{Frank1998,
	63	* author = {Eibe Frank and Ian H. Witten},
	64	* booktitle = {Fifteenth International Conference on Machine Learning},
	65	* editor = {J. Shavlik},
	66	* pages = {144-151},
	67	* publisher = {Morgan Kaufmann},
	68	* title = {Generating Accurate Rule Sets Without Global Optimization},
	69	* year = {1998},
	70	* PS = {http://www.cs.waikato.ac.nz/\~eibe/pubs/ML98-57.ps.gz}
	71	* }
	72	* </pre>
	73	* <p/>
	74	<!-- technical-bibtex-end -->
	75	*
	76	<!-- options-start -->
	77	* Valid options are: <p/>
	78	*
	79	* <pre> -C <pruning confidence>
	80	* Set confidence threshold for pruning.
	81	* (default 0.25)</pre>
	82	*
	83	* <pre> -M <minimum number of objects>
	84	* Set minimum number of objects per leaf.
	85	* (default 2)</pre>
	86	*
	87	* <pre> -R
	88	* Use reduced error pruning.</pre>
	89	*
	90	* <pre> -N <number of folds>
	91	* Set number of folds for reduced error
	92	* pruning. One fold is used as pruning set.
	93	* (default 3)</pre>
	94	*
	95	* <pre> -B
	96	* Use binary splits only.</pre>
	97	*
	98	* <pre> -U
	99	* Generate unpruned decision list.</pre>
	100	*
	101	* <pre> -J
	102	* Do not use MDL correction for info gain on numeric attributes.</pre>
	103	*
	104	* <pre> -Q <seed>
	105	* Seed for random data shuffling (default 1).</pre>
	106	*
	107	<!-- options-end -->
	108	*
	109	* @author Eibe Frank (eibe@cs.waikato.ac.nz)
	110	* @version $Revision: 6089 $
	111	*/
	112	public class PART
	113	extends AbstractClassifier
	114	implements OptionHandler, WeightedInstancesHandler, Summarizable,
	115	AdditionalMeasureProducer, TechnicalInformationHandler {
	116
	117	/** for serialization */
	118	static final long serialVersionUID = 8121455039782598361L;
	119
	120	/** The decision list */
	121	private MakeDecList m_root;
	122
	123	/** Confidence level */
	124	private float m_CF = 0.25f;
	125
	126	/** Minimum number of objects */
	127	private int m_minNumObj = 2;
	128
	129	/** Use MDL correction? */
	130	private boolean m_useMDLcorrection = true;
	131
	132	/** Use reduced error pruning? */
	133	private boolean m_reducedErrorPruning = false;
	134
	135	/** Number of folds for reduced error pruning. */
	136	private int m_numFolds = 3;
	137
	138	/** Binary splits on nominal attributes? */
	139	private boolean m_binarySplits = false;
	140
	141	/** Generate unpruned list? */
	142	private boolean m_unpruned = false;
	143
	144	/** The seed for random number generation. */
	145	private int m_Seed = 1;
	146
	147	/**
	148	* Returns a string describing classifier
	149	* @return a description suitable for
	150	* displaying in the explorer/experimenter gui
	151	*/
	152	public String globalInfo() {
	153
	154	return "Class for generating a PART decision list. Uses "
	155	+ "separate-and-conquer. Builds a partial C4.5 decision tree "
	156	+ "in each iteration and makes the \"best\" leaf into a rule.\n\n"
	157	+ "For more information, see:\n\n"
	158	+ getTechnicalInformation().toString();
	159	}
	160
	161	/**
	162	* Returns an instance of a TechnicalInformation object, containing
	163	* detailed information about the technical background of this class,
	164	* e.g., paper reference or book this class is based on.
	165	*
	166	* @return the technical information about this class
	167	*/
	168	public TechnicalInformation getTechnicalInformation() {
	169	TechnicalInformation result;
	170
	171	result = new TechnicalInformation(Type.INPROCEEDINGS);
	172	result.setValue(Field.AUTHOR, "Eibe Frank and Ian H. Witten");
	173	result.setValue(Field.TITLE, "Generating Accurate Rule Sets Without Global Optimization");
	174	result.setValue(Field.BOOKTITLE, "Fifteenth International Conference on Machine Learning");
	175	result.setValue(Field.EDITOR, "J. Shavlik");
	176	result.setValue(Field.YEAR, "1998");
	177	result.setValue(Field.PAGES, "144-151");
	178	result.setValue(Field.PUBLISHER, "Morgan Kaufmann");
	179	result.setValue(Field.PS, "http://www.cs.waikato.ac.nz/~eibe/pubs/ML98-57.ps.gz");
	180
	181	return result;
	182	}
	183
	184	/**
	185	* Returns default capabilities of the classifier.
	186	*
	187	* @return the capabilities of this classifier
	188	*/
	189	public Capabilities getCapabilities() {
	190	Capabilities result;
	191
	192	if (m_unpruned)
	193	result = new MakeDecList(null, m_minNumObj).getCapabilities();
	194	else if (m_reducedErrorPruning)
	195	result = new MakeDecList(null, m_numFolds, m_minNumObj, m_Seed).getCapabilities();
	196	else
	197	result = new MakeDecList(null, m_CF, m_minNumObj).getCapabilities();
	198
	199	return result;
	200	}
	201
	202	/**
	203	* Generates the classifier.
	204	*
	205	* @param instances the data to train with
	206	* @throws Exception if classifier can't be built successfully
	207	*/
	208	public void buildClassifier(Instances instances)
	209	throws Exception {
	210
	211	// can classifier handle the data?
	212	getCapabilities().testWithFail(instances);
	213
	214	// remove instances with missing class
	215	instances = new Instances(instances);
	216	instances.deleteWithMissingClass();
	217
	218	ModelSelection modSelection;
	219
	220	if (m_binarySplits)
	221	modSelection = new BinC45ModelSelection(m_minNumObj, instances, m_useMDLcorrection);
	222	else
	223	modSelection = new C45ModelSelection(m_minNumObj, instances, m_useMDLcorrection);
	224	if (m_unpruned)
	225	m_root = new MakeDecList(modSelection, m_minNumObj);
	226	else if (m_reducedErrorPruning)
	227	m_root = new MakeDecList(modSelection, m_numFolds, m_minNumObj, m_Seed);
	228	else
	229	m_root = new MakeDecList(modSelection, m_CF, m_minNumObj);
	230	m_root.buildClassifier(instances);
	231	if (m_binarySplits) {
	232	((BinC45ModelSelection)modSelection).cleanup();
	233	} else {
	234	((C45ModelSelection)modSelection).cleanup();
	235	}
	236	}
	237
	238	/**
	239	* Classifies an instance.
	240	*
	241	* @param instance the instance to classify
	242	* @return the classification
	243	* @throws Exception if instance can't be classified successfully
	244	*/
	245	public double classifyInstance(Instance instance)
	246	throws Exception {
	247
	248	return m_root.classifyInstance(instance);
	249	}
	250
	251	/**
	252	* Returns class probabilities for an instance.
	253	*
	254	* @param instance the instance to get the distribution for
	255	* @return the class probabilities
	256	* @throws Exception if the distribution can't be computed successfully
	257	*/
	258	public final double [] distributionForInstance(Instance instance)
	259	throws Exception {
	260
	261	return m_root.distributionForInstance(instance);
	262	}
	263
	264	/**
	265	* Returns an enumeration describing the available options.
	266	*
	267	* Valid options are: <p>
	268	*
	269	* -C confidence <br>
	270	* Set confidence threshold for pruning. (Default: 0.25) <p>
	271	*
	272	* -M number <br>
	273	* Set minimum number of instances per leaf. (Default: 2) <p>
	274	*
	275	* -R <br>
	276	* Use reduced error pruning. <p>
	277	*
	278	* -N number <br>
	279	* Set number of folds for reduced error pruning. One fold is
	280	* used as the pruning set. (Default: 3) <p>
	281	*
	282	* -B <br>
	283	* Use binary splits for nominal attributes. <p>
	284	*
	285	* -U <br>
	286	* Generate unpruned decision list. <p>
	287	*
	288	* -Q <br>
	289	* The seed for reduced-error pruning. <p>
	290	*
	291	* @return an enumeration of all the available options.
	292	*/
	293	public Enumeration listOptions() {
	294
	295	Vector newVector = new Vector(8);
	296
	297	newVector.
	298	addElement(new Option("\tSet confidence threshold for pruning.\n" +
	299	"\t(default 0.25)",
	300	"C", 1, "-C <pruning confidence>"));
	301	newVector.
	302	addElement(new Option("\tSet minimum number of objects per leaf.\n" +
	303	"\t(default 2)",
	304	"M", 1, "-M <minimum number of objects>"));
	305	newVector.
	306	addElement(new Option("\tUse reduced error pruning.",
	307	"R", 0, "-R"));
	308	newVector.
	309	addElement(new Option("\tSet number of folds for reduced error\n" +
	310	"\tpruning. One fold is used as pruning set.\n" +
	311	"\t(default 3)",
	312	"N", 1, "-N <number of folds>"));
	313	newVector.
	314	addElement(new Option("\tUse binary splits only.",
	315	"B", 0, "-B"));
	316	newVector.
	317	addElement(new Option("\tGenerate unpruned decision list.",
	318	"U", 0, "-U"));
	319	newVector.
	320	addElement(new Option("\tDo not use MDL correction for info gain on numeric attributes.",
	321	"J", 0, "-J"));
	322	newVector.
	323	addElement(new Option("\tSeed for random data shuffling (default 1).",
	324	"Q", 1, "-Q <seed>"));
	325
	326	return newVector.elements();
	327	}
	328
	329	/**
	330	* Parses a given list of options. <p/>
	331	*
	332	<!-- options-start -->
	333	* Valid options are: <p/>
	334	*
	335	* <pre> -C <pruning confidence>
	336	* Set confidence threshold for pruning.
	337	* (default 0.25)</pre>
	338	*
	339	* <pre> -M <minimum number of objects>
	340	* Set minimum number of objects per leaf.
	341	* (default 2)</pre>
	342	*
	343	* <pre> -R
	344	* Use reduced error pruning.</pre>
	345	*
	346	* <pre> -N <number of folds>
	347	* Set number of folds for reduced error
	348	* pruning. One fold is used as pruning set.
	349	* (default 3)</pre>
	350	*
	351	* <pre> -B
	352	* Use binary splits only.</pre>
	353	*
	354	* <pre> -U
	355	* Generate unpruned decision list.</pre>
	356	*
	357	* <pre> -J
	358	* Do not use MDL correction for info gain on numeric attributes.</pre>
	359	*
	360	* <pre> -Q <seed>
	361	* Seed for random data shuffling (default 1).</pre>
	362	*
	363	<!-- options-end -->
	364	*
	365	* @param options the list of options as an array of strings
	366	* @throws Exception if an option is not supported
	367	*/
	368	public void setOptions(String[] options) throws Exception {
	369
	370	// Pruning options
	371	m_unpruned = Utils.getFlag('U', options);
	372	m_reducedErrorPruning = Utils.getFlag('R', options);
	373	m_binarySplits = Utils.getFlag('B', options);
	374	m_useMDLcorrection = !Utils.getFlag('J', options);
	375	String confidenceString = Utils.getOption('C', options);
	376	if (confidenceString.length() != 0) {
	377	if (m_reducedErrorPruning) {
	378	throw new Exception("Setting CF doesn't make sense " +
	379	"for reduced error pruning.");
	380	} else {
	381	m_CF = (new Float(confidenceString)).floatValue();
	382	if ((m_CF <= 0) \|\| (m_CF >= 1)) {
	383	throw new Exception("CF has to be greater than zero and smaller than one!");
	384	}
	385	}
	386	} else {
	387	m_CF = 0.25f;
	388	}
	389	String numFoldsString = Utils.getOption('N', options);
	390	if (numFoldsString.length() != 0) {
	391	if (!m_reducedErrorPruning) {
	392	throw new Exception("Setting the number of folds" +
	393	" does only make sense for" +
	394	" reduced error pruning.");
	395	} else {
	396	m_numFolds = Integer.parseInt(numFoldsString);
	397	}
	398	} else {
	399	m_numFolds = 3;
	400	}
	401
	402	// Other options
	403	String minNumString = Utils.getOption('M', options);
	404	if (minNumString.length() != 0) {
	405	m_minNumObj = Integer.parseInt(minNumString);
	406	} else {
	407	m_minNumObj = 2;
	408	}
	409	String seedString = Utils.getOption('Q', options);
	410	if (seedString.length() != 0) {
	411	m_Seed = Integer.parseInt(seedString);
	412	} else {
	413	m_Seed = 1;
	414	}
	415	}
	416
	417	/**
	418	* Gets the current settings of the Classifier.
	419	*
	420	* @return an array of strings suitable for passing to setOptions
	421	*/
	422	public String [] getOptions() {
	423
	424	String [] options = new String [12];
	425	int current = 0;
	426
	427	if (m_unpruned) {
	428	options[current++] = "-U";
	429	}
	430	if (m_reducedErrorPruning) {
	431	options[current++] = "-R";
	432	}
	433	if (m_binarySplits) {
	434	options[current++] = "-B";
	435	}
	436	options[current++] = "-M"; options[current++] = "" + m_minNumObj;
	437	if (!m_reducedErrorPruning) {
	438	options[current++] = "-C"; options[current++] = "" + m_CF;
	439	}
	440	if (m_reducedErrorPruning) {
	441	options[current++] = "-N"; options[current++] = "" + m_numFolds;
	442	}
	443	options[current++] = "-Q"; options[current++] = "" + m_Seed;
	444	if (!m_useMDLcorrection) {
	445	options[current++] = "-J";
	446	}
	447
	448	while (current < options.length) {
	449	options[current++] = "";
	450	}
	451	return options;
	452	}
	453
	454	/**
	455	* Returns a description of the classifier
	456	*
	457	* @return a string representation of the classifier
	458	*/
	459	public String toString() {
	460
	461	if (m_root == null) {
	462	return "No classifier built";
	463	}
	464	return "PART decision list\n------------------\n\n" + m_root.toString();
	465	}
	466
	467	/**
	468	* Returns a superconcise version of the model
	469	*
	470	* @return a concise version of the model
	471	*/
	472	public String toSummaryString() {
	473
	474	return "Number of rules: " + m_root.numRules() + "\n";
	475	}
	476
	477	/**
	478	* Return the number of rules.
	479	* @return the number of rules
	480	*/
	481	public double measureNumRules() {
	482	return m_root.numRules();
	483	}
	484
	485	/**
	486	* Returns an enumeration of the additional measure names
	487	* @return an enumeration of the measure names
	488	*/
	489	public Enumeration enumerateMeasures() {
	490	Vector newVector = new Vector(1);
	491	newVector.addElement("measureNumRules");
	492	return newVector.elements();
	493	}
	494
	495	/**
	496	* Returns the value of the named measure
	497	* @param additionalMeasureName the name of the measure to query for its value
	498	* @return the value of the named measure
	499	* @throws IllegalArgumentException if the named measure is not supported
	500	*/
	501	public double getMeasure(String additionalMeasureName) {
	502	if (additionalMeasureName.compareToIgnoreCase("measureNumRules") == 0) {
	503	return measureNumRules();
	504	} else {
	505	throw new IllegalArgumentException(additionalMeasureName
	506	+ " not supported (PART)");
	507	}
	508	}
	509
	510	/**
	511	* Returns the tip text for this property
	512	* @return tip text for this property suitable for
	513	* displaying in the explorer/experimenter gui
	514	*/
	515	public String confidenceFactorTipText() {
	516	return "The confidence factor used for pruning (smaller values incur "
	517	+ "more pruning).";
	518	}
	519
	520	/**
	521	* Get the value of CF.
	522	*
	523	* @return Value of CF.
	524	*/
	525	public float getConfidenceFactor() {
	526
	527	return m_CF;
	528	}
	529
	530	/**
	531	* Set the value of CF.
	532	*
	533	* @param v Value to assign to CF.
	534	*/
	535	public void setConfidenceFactor(float v) {
	536
	537	m_CF = v;
	538	}
	539
	540	/**
	541	* Returns the tip text for this property
	542	* @return tip text for this property suitable for
	543	* displaying in the explorer/experimenter gui
	544	*/
	545	public String minNumObjTipText() {
	546	return "The minimum number of instances per rule.";
	547	}
	548
	549	/**
	550	* Get the value of minNumObj.
	551	*
	552	* @return Value of minNumObj.
	553	*/
	554	public int getMinNumObj() {
	555
	556	return m_minNumObj;
	557	}
	558
	559	/**
	560	* Set the value of minNumObj.
	561	*
	562	* @param v Value to assign to minNumObj.
	563	*/
	564	public void setMinNumObj(int v) {
	565
	566	m_minNumObj = v;
	567	}
	568
	569	/**
	570	* Returns the tip text for this property
	571	* @return tip text for this property suitable for
	572	* displaying in the explorer/experimenter gui
	573	*/
	574	public String reducedErrorPruningTipText() {
	575	return "Whether reduced-error pruning is used instead of C.4.5 pruning.";
	576	}
	577
	578	/**
	579	* Get the value of reducedErrorPruning.
	580	*
	581	* @return Value of reducedErrorPruning.
	582	*/
	583	public boolean getReducedErrorPruning() {
	584
	585	return m_reducedErrorPruning;
	586	}
	587
	588	/**
	589	* Set the value of reducedErrorPruning.
	590	*
	591	* @param v Value to assign to reducedErrorPruning.
	592	*/
	593	public void setReducedErrorPruning(boolean v) {
	594
	595	m_reducedErrorPruning = v;
	596	}
	597
	598	/**
	599	* Returns the tip text for this property
	600	* @return tip text for this property suitable for
	601	* displaying in the explorer/experimenter gui
	602	*/
	603	public String unprunedTipText() {
	604	return "Whether pruning is performed.";
	605	}
	606
	607	/**
	608	* Get the value of unpruned.
	609	*
	610	* @return Value of unpruned.
	611	*/
	612	public boolean getUnpruned() {
	613
	614	return m_unpruned;
	615	}
	616
	617	/**
	618	* Set the value of unpruned.
	619	*
	620	* @param newunpruned Value to assign to unpruned.
	621	*/
	622	public void setUnpruned(boolean newunpruned) {
	623
	624	m_unpruned = newunpruned;
	625	}
	626
	627	/**
	628	* Returns the tip text for this property
	629	* @return tip text for this property suitable for
	630	* displaying in the explorer/experimenter gui
	631	*/
	632	public String useMDLcorrectionTipText() {
	633	return "Whether MDL correction is used when finding splits on numeric attributes.";
	634	}
	635
	636	/**
	637	* Get the value of useMDLcorrection.
	638	*
	639	* @return Value of useMDLcorrection.
	640	*/
	641	public boolean getUseMDLcorrection() {
	642
	643	return m_useMDLcorrection;
	644	}
	645
	646	/**
	647	* Set the value of useMDLcorrection.
	648	*
	649	* @param newuseMDLcorrection Value to assign to useMDLcorrection.
	650	*/
	651	public void setUseMDLcorrection(boolean newuseMDLcorrection) {
	652
	653	m_useMDLcorrection = newuseMDLcorrection;
	654	}
	655
	656	/**
	657	* Returns the tip text for this property
	658	* @return tip text for this property suitable for
	659	* displaying in the explorer/experimenter gui
	660	*/
	661	public String numFoldsTipText() {
	662	return "Determines the amount of data used for reduced-error pruning. "
	663	+ " One fold is used for pruning, the rest for growing the rules.";
	664	}
	665
	666	/**
	667	* Get the value of numFolds.
	668	*
	669	* @return Value of numFolds.
	670	*/
	671	public int getNumFolds() {
	672
	673	return m_numFolds;
	674	}
	675
	676	/**
	677	* Set the value of numFolds.
	678	*
	679	* @param v Value to assign to numFolds.
	680	*/
	681	public void setNumFolds(int v) {
	682
	683	m_numFolds = v;
	684	}
	685
	686	/**
	687	* Returns the tip text for this property
	688	* @return tip text for this property suitable for
	689	* displaying in the explorer/experimenter gui
	690	*/
	691	public String seedTipText() {
	692	return "The seed used for randomizing the data " +
	693	"when reduced-error pruning is used.";
	694	}
	695
	696	/**
	697	* Get the value of Seed.
	698	*
	699	* @return Value of Seed.
	700	*/
	701	public int getSeed() {
	702
	703	return m_Seed;
	704	}
	705
	706	/**
	707	* Set the value of Seed.
	708	*
	709	* @param newSeed Value to assign to Seed.
	710	*/
	711	public void setSeed(int newSeed) {
	712
	713	m_Seed = newSeed;
	714	}
	715
	716	/**
	717	* Returns the tip text for this property
	718	* @return tip text for this property suitable for
	719	* displaying in the explorer/experimenter gui
	720	*/
	721	public String binarySplitsTipText() {
	722	return "Whether to use binary splits on nominal attributes when "
	723	+ "building the partial trees.";
	724	}
	725
	726	/**
	727	* Get the value of binarySplits.
	728	*
	729	* @return Value of binarySplits.
	730	*/
	731	public boolean getBinarySplits() {
	732
	733	return m_binarySplits;
	734	}
	735
	736	/**
	737	* Set the value of binarySplits.
	738	*
	739	* @param v Value to assign to binarySplits.
	740	*/
	741	public void setBinarySplits(boolean v) {
	742
	743	m_binarySplits = v;
	744	}
	745
	746	/**
	747	* Returns the revision string.
	748	*
	749	* @return the revision
	750	*/
	751	public String getRevision() {
	752	return RevisionUtils.extract("$Revision: 6089 $");
	753	}
	754
	755	/**
	756	* Main method for testing this class.
	757	*
	758	* @param argv command line options
	759	*/
	760	public static void main(String [] argv){
	761	runClassifier(new PART(), argv);
	762	}
	763	}
	764

Note: See TracBrowser for help on using the repository browser.

Download in other formats: