Context Navigation

source: src/main/java/weka/classifiers/rules/JRip.java @ 8

Last change on this file since 8 was 4, checked in by gnappo, 14 years ago
Import di weka.
File size: 63.6 KB

Rev	Line
[4]	1	/*
	2	* This program is free software; you can redistribute it and/or modify
	3	* it under the terms of the GNU General Public License as published by
	4	* the Free Software Foundation; either version 2 of the License, or
	5	* (at your option) any later version.
	6	*
	7	* This program is distributed in the hope that it will be useful,
	8	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	10	* GNU General Public License for more details.
	11	*
	12	* You should have received a copy of the GNU General Public License
	13	* along with this program; if not, write to the Free Software
	14	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
	15	*/
	16
	17	/*
	18	* JRip.java
	19	* Copyright (C) 2001 University of Waikato, Hamilton, New Zealand
	20	*/
	21
	22	package weka.classifiers.rules;
	23
	24	import weka.classifiers.Classifier;
	25	import weka.classifiers.AbstractClassifier;
	26	import weka.core.AdditionalMeasureProducer;
	27	import weka.core.Attribute;
	28	import weka.core.Capabilities;
	29	import weka.core.Copyable;
	30	import weka.core.FastVector;
	31	import weka.core.Instance;
	32	import weka.core.Instances;
	33	import weka.core.Option;
	34	import weka.core.RevisionHandler;
	35	import weka.core.RevisionUtils;
	36	import weka.core.TechnicalInformation;
	37	import weka.core.TechnicalInformationHandler;
	38	import weka.core.Utils;
	39	import weka.core.WeightedInstancesHandler;
	40	import weka.core.Capabilities.Capability;
	41	import weka.core.TechnicalInformation.Field;
	42	import weka.core.TechnicalInformation.Type;
	43	import weka.filters.Filter;
	44	import weka.filters.supervised.attribute.ClassOrder;
	45
	46	import java.io.Serializable;
	47	import java.util.Enumeration;
	48	import java.util.Random;
	49	import java.util.Vector;
	50
	51	/**
	52	<!-- globalinfo-start -->
	53	* This class implements a propositional rule learner, Repeated Incremental Pruning to Produce Error Reduction (RIPPER), which was proposed by William W. Cohen as an optimized version of IREP. <br/>
	54	* <br/>
	55	* The algorithm is briefly described as follows: <br/>
	56	* <br/>
	57	* Initialize RS = {}, and for each class from the less prevalent one to the more frequent one, DO: <br/>
	58	* <br/>
	59	* 1. Building stage:<br/>
	60	* Repeat 1.1 and 1.2 until the descrition length (DL) of the ruleset and examples is 64 bits greater than the smallest DL met so far, or there are no positive examples, or the error rate >= 50%. <br/>
	61	* <br/>
	62	* 1.1. Grow phase:<br/>
	63	* Grow one rule by greedily adding antecedents (or conditions) to the rule until the rule is perfect (i.e. 100% accurate). The procedure tries every possible value of each attribute and selects the condition with highest information gain: p(log(p/t)-log(P/T)).<br/>
	64	* <br/>
	65	* 1.2. Prune phase:<br/>
	66	* Incrementally prune each rule and allow the pruning of any final sequences of the antecedents;The pruning metric is (p-n)/(p+n) -- but it's actually 2p/(p+n) -1, so in this implementation we simply use p/(p+n) (actually (p+1)/(p+n+2), thus if p+n is 0, it's 0.5).<br/>
	67	* <br/>
	68	* 2. Optimization stage:<br/>
	69	* after generating the initial ruleset {Ri}, generate and prune two variants of each rule Ri from randomized data using procedure 1.1 and 1.2. But one variant is generated from an empty rule while the other is generated by greedily adding antecedents to the original rule. Moreover, the pruning metric used here is (TP+TN)/(P+N).Then the smallest possible DL for each variant and the original rule is computed. The variant with the minimal DL is selected as the final representative of Ri in the ruleset.After all the rules in {Ri} have been examined and if there are still residual positives, more rules are generated based on the residual positives using Building Stage again. <br/>
	70	* 3. Delete the rules from the ruleset that would increase the DL of the whole ruleset if it were in it. and add resultant ruleset to RS. <br/>
	71	* ENDDO<br/>
	72	* <br/>
	73	* Note that there seem to be 2 bugs in the original ripper program that would affect the ruleset size and accuracy slightly. This implementation avoids these bugs and thus is a little bit different from Cohen's original implementation. Even after fixing the bugs, since the order of classes with the same frequency is not defined in ripper, there still seems to be some trivial difference between this implementation and the original ripper, especially for audiology data in UCI repository, where there are lots of classes of few instances.<br/>
	74	* <br/>
	75	* Details please see:<br/>
	76	* <br/>
	77	* William W. Cohen: Fast Effective Rule Induction. In: Twelfth International Conference on Machine Learning, 115-123, 1995.<br/>
	78	* <br/>
	79	* PS. We have compared this implementation with the original ripper implementation in aspects of accuracy, ruleset size and running time on both artificial data "ab+bcd+defg" and UCI datasets. In all these aspects it seems to be quite comparable to the original ripper implementation. However, we didn't consider memory consumption optimization in this implementation.<br/>
	80	* <br/>
	81	* <p/>
	82	<!-- globalinfo-end -->
	83	*
	84	<!-- technical-bibtex-start -->
	85	* BibTeX:
	86	* <pre>
	87	* @inproceedings{Cohen1995,
	88	* author = {William W. Cohen},
	89	* booktitle = {Twelfth International Conference on Machine Learning},
	90	* pages = {115-123},
	91	* publisher = {Morgan Kaufmann},
	92	* title = {Fast Effective Rule Induction},
	93	* year = {1995}
	94	* }
	95	* </pre>
	96	* <p/>
	97	<!-- technical-bibtex-end -->
	98	*
	99	<!-- options-start -->
	100	* Valid options are: <p/>
	101	*
	102	* <pre> -F <number of folds>
	103	* Set number of folds for REP
	104	* One fold is used as pruning set.
	105	* (default 3)</pre>
	106	*
	107	* <pre> -N <min. weights>
	108	* Set the minimal weights of instances
	109	* within a split.
	110	* (default 2.0)</pre>
	111	*
	112	* <pre> -O <number of runs>
	113	* Set the number of runs of
	114	* optimizations. (Default: 2)</pre>
	115	*
	116	* <pre> -D
	117	* Set whether turn on the
	118	* debug mode (Default: false)</pre>
	119	*
	120	* <pre> -S <seed>
	121	* The seed of randomization
	122	* (Default: 1)</pre>
	123	*
	124	* <pre> -E
	125	* Whether NOT check the error rate>=0.5
	126	* in stopping criteria (default: check)</pre>
	127	*
	128	* <pre> -P
	129	* Whether NOT use pruning
	130	* (default: use pruning)</pre>
	131	*
	132	<!-- options-end -->
	133	*
	134	* @author Xin Xu (xx5@cs.waikato.ac.nz)
	135	* @author Eibe Frank (eibe@cs.waikato.ac.nz)
	136	* @version $Revision: 6041 $
	137	*/
	138	public class JRip
	139	extends AbstractClassifier
	140	implements AdditionalMeasureProducer,
	141	WeightedInstancesHandler,
	142	TechnicalInformationHandler {
	143
	144	/** for serialization */
	145	static final long serialVersionUID = -6589312996832147161L;
	146
	147	/** The limit of description length surplus in ruleset generation */
	148	private static double MAX_DL_SURPLUS = 64.0;
	149
	150	/** The class attribute of the data*/
	151	private Attribute m_Class;
	152
	153	/** The ruleset */
	154	private FastVector m_Ruleset;
	155
	156	/** The predicted class distribution */
	157	private FastVector m_Distributions;
	158
	159	/** Runs of optimizations */
	160	private int m_Optimizations = 2;
	161
	162	/** Random object used in this class */
	163	private Random m_Random = null;
	164
	165	/** # of all the possible conditions in a rule */
	166	private double m_Total = 0;
	167
	168	/** The seed to perform randomization */
	169	private long m_Seed = 1;
	170
	171	/** The number of folds to split data into Grow and Prune for IREP */
	172	private int m_Folds = 3;
	173
	174	/** The minimal number of instance weights within a split*/
	175	private double m_MinNo = 2.0;
	176
	177	/** Whether in a debug mode */
	178	private boolean m_Debug = false;
	179
	180	/** Whether check the error rate >= 0.5 in stopping criteria */
	181	private boolean m_CheckErr = true;
	182
	183	/** Whether use pruning, i.e. the data is clean or not */
	184	private boolean m_UsePruning = true;
	185
	186	/** The filter used to randomize the class order */
	187	private Filter m_Filter = null;
	188
	189	/** The RuleStats for the ruleset of each class value */
	190	private FastVector m_RulesetStats;
	191
	192	/**
	193	* Returns a string describing classifier
	194	* @return a description suitable for
	195	* displaying in the explorer/experimenter gui
	196	*/
	197	public String globalInfo() {
	198
	199	return "This class implements a propositional rule learner, Repeated Incremental "
	200	+ "Pruning to Produce Error Reduction (RIPPER), which was proposed by William "
	201	+ "W. Cohen as an optimized version of IREP. \n\n"
	202	+ "The algorithm is briefly described as follows: \n\n"
	203	+ "Initialize RS = {}, and for each class from the less prevalent one to "
	204	+ "the more frequent one, DO: \n\n"
	205	+ "1. Building stage:\nRepeat 1.1 and 1.2 until the descrition length (DL) "
	206	+ "of the ruleset and examples is 64 bits greater than the smallest DL "
	207	+ "met so far, or there are no positive examples, or the error rate >= 50%. "
	208	+ "\n\n"
	209	+ "1.1. Grow phase:\n"
	210	+ "Grow one rule by greedily adding antecedents (or conditions) to "
	211	+ "the rule until the rule is perfect (i.e. 100% accurate). The "
	212	+ "procedure tries every possible value of each attribute and selects "
	213	+ "the condition with highest information gain: p(log(p/t)-log(P/T))."
	214	+ "\n\n"
	215	+ "1.2. Prune phase:\n"
	216	+ "Incrementally prune each rule and allow the pruning of any "
	217	+ "final sequences of the antecedents;"
	218	+ "The pruning metric is (p-n)/(p+n) -- but it's actually "
	219	+ "2p/(p+n) -1, so in this implementation we simply use p/(p+n) "
	220	+ "(actually (p+1)/(p+n+2), thus if p+n is 0, it's 0.5).\n\n"
	221	+ "2. Optimization stage:\n after generating the initial ruleset {Ri}, "
	222	+ "generate and prune two variants of each rule Ri from randomized data "
	223	+ "using procedure 1.1 and 1.2. But one variant is generated from an "
	224	+ "empty rule while the other is generated by greedily adding antecedents "
	225	+ "to the original rule. Moreover, the pruning metric used here is "
	226	+ "(TP+TN)/(P+N)."
	227	+ "Then the smallest possible DL for each variant and the original rule "
	228	+ "is computed. The variant with the minimal DL is selected as the final "
	229	+ "representative of Ri in the ruleset."
	230	+ "After all the rules in {Ri} have been examined and if there are still "
	231	+ "residual positives, more rules are generated based on the residual "
	232	+ "positives using Building Stage again. \n"
	233	+ "3. Delete the rules from the ruleset that would increase the DL of the "
	234	+ "whole ruleset if it were in it. and add resultant ruleset to RS. \n"
	235	+ "ENDDO\n\n"
	236	+ "Note that there seem to be 2 bugs in the original ripper program that would "
	237	+ "affect the ruleset size and accuracy slightly. This implementation avoids "
	238	+ "these bugs and thus is a little bit different from Cohen's original "
	239	+ "implementation. Even after fixing the bugs, since the order of classes with "
	240	+ "the same frequency is not defined in ripper, there still seems to be "
	241	+ "some trivial difference between this implementation and the original ripper, "
	242	+ "especially for audiology data in UCI repository, where there are lots of "
	243	+ "classes of few instances.\n\n"
	244	+ "Details please see:\n\n"
	245	+ getTechnicalInformation().toString() + "\n\n"
	246	+ "PS. We have compared this implementation with the original ripper "
	247	+ "implementation in aspects of accuracy, ruleset size and running time "
	248	+ "on both artificial data \"ab+bcd+defg\" and UCI datasets. In all these "
	249	+ "aspects it seems to be quite comparable to the original ripper "
	250	+ "implementation. However, we didn't consider memory consumption "
	251	+ "optimization in this implementation.\n\n";
	252	}
	253
	254	/**
	255	* Returns an instance of a TechnicalInformation object, containing
	256	* detailed information about the technical background of this class,
	257	* e.g., paper reference or book this class is based on.
	258	*
	259	* @return the technical information about this class
	260	*/
	261	public TechnicalInformation getTechnicalInformation() {
	262	TechnicalInformation result;
	263
	264	result = new TechnicalInformation(Type.INPROCEEDINGS);
	265	result.setValue(Field.AUTHOR, "William W. Cohen");
	266	result.setValue(Field.TITLE, "Fast Effective Rule Induction");
	267	result.setValue(Field.BOOKTITLE, "Twelfth International Conference on Machine Learning");
	268	result.setValue(Field.YEAR, "1995");
	269	result.setValue(Field.PAGES, "115-123");
	270	result.setValue(Field.PUBLISHER, "Morgan Kaufmann");
	271
	272	return result;
	273	}
	274
	275	/**
	276	* Returns an enumeration describing the available options
	277	* Valid options are: <p>
	278	*
	279	* -F number <br>
	280	* The number of folds for reduced error pruning. One fold is
	281	* used as the pruning set. (Default: 3) <p>
	282	*
	283	* -N number <br>
	284	* The minimal weights of instances within a split.
	285	* (Default: 2) <p>
	286	*
	287	* -O number <br>
	288	* Set the number of runs of optimizations. (Default: 2)<p>
	289	*
	290	* -D <br>
	291	* Whether turn on the debug mode
	292	*
	293	* -S number <br>
	294	* The seed of randomization used in Ripper.(Default: 1)<p>
	295	*
	296	* -E <br>
	297	* Whether NOT check the error rate >= 0.5 in stopping criteria.
	298	* (default: check)<p>
	299	*
	300	* -P <br>
	301	* Whether NOT use pruning. (default: use pruning)<p>
	302	*
	303	* @return an enumeration of all the available options
	304	*/
	305	public Enumeration listOptions() {
	306	Vector newVector = new Vector(3);
	307	newVector.addElement(new Option("\tSet number of folds for REP\n" +
	308	"\tOne fold is used as pruning set.\n" +
	309	"\t(default 3)","F", 1, "-F <number of folds>"));
	310	newVector.addElement(new Option("\tSet the minimal weights of instances\n" +
	311	"\twithin a split.\n" +
	312	"\t(default 2.0)","N", 1, "-N <min. weights>"));
	313	newVector.addElement(new Option("\tSet the number of runs of\n"+
	314	"\toptimizations. (Default: 2)", "O",
	315	1,"-O <number of runs>"));
	316
	317	newVector.addElement(new Option("\tSet whether turn on the\n"+
	318	"\tdebug mode (Default: false)", "D",
	319	0,"-D"));
	320
	321	newVector.addElement(new Option("\tThe seed of randomization\n"+
	322	"\t(Default: 1)", "S",
	323	1,"-S <seed>"));
	324
	325	newVector.addElement(new Option("\tWhether NOT check the error rate>=0.5\n"
	326	+"\tin stopping criteria "
	327	+"\t(default: check)", "E",
	328	0, "-E"));
	329
	330	newVector.addElement(new Option("\tWhether NOT use pruning\n"
	331	+"\t(default: use pruning)", "P",
	332	0, "-P"));
	333	return newVector.elements();
	334	}
	335
	336	/**
	337	* Parses a given list of options. <p/>
	338	*
	339	<!-- options-start -->
	340	* Valid options are: <p/>
	341	*
	342	* <pre> -F <number of folds>
	343	* Set number of folds for REP
	344	* One fold is used as pruning set.
	345	* (default 3)</pre>
	346	*
	347	* <pre> -N <min. weights>
	348	* Set the minimal weights of instances
	349	* within a split.
	350	* (default 2.0)</pre>
	351	*
	352	* <pre> -O <number of runs>
	353	* Set the number of runs of
	354	* optimizations. (Default: 2)</pre>
	355	*
	356	* <pre> -D
	357	* Set whether turn on the
	358	* debug mode (Default: false)</pre>
	359	*
	360	* <pre> -S <seed>
	361	* The seed of randomization
	362	* (Default: 1)</pre>
	363	*
	364	* <pre> -E
	365	* Whether NOT check the error rate>=0.5
	366	* in stopping criteria (default: check)</pre>
	367	*
	368	* <pre> -P
	369	* Whether NOT use pruning
	370	* (default: use pruning)</pre>
	371	*
	372	<!-- options-end -->
	373	*
	374	* @param options the list of options as an array of strings
	375	* @throws Exception if an option is not supported
	376	*/
	377	public void setOptions(String[] options) throws Exception {
	378	String numFoldsString = Utils.getOption('F', options);
	379	if (numFoldsString.length() != 0)
	380	m_Folds = Integer.parseInt(numFoldsString);
	381	else
	382	m_Folds = 3;
	383
	384	String minNoString = Utils.getOption('N', options);
	385	if (minNoString.length() != 0)
	386	m_MinNo = Double.parseDouble(minNoString);
	387	else
	388	m_MinNo = 2.0;
	389
	390	String seedString = Utils.getOption('S', options);
	391	if (seedString.length() != 0)
	392	m_Seed = Long.parseLong(seedString);
	393	else
	394	m_Seed = 1;
	395
	396	String runString = Utils.getOption('O', options);
	397	if (runString.length() != 0)
	398	m_Optimizations = Integer.parseInt(runString);
	399	else
	400	m_Optimizations = 2;
	401
	402	m_Debug = Utils.getFlag('D', options);
	403	m_CheckErr = !Utils.getFlag('E', options);
	404	m_UsePruning = !Utils.getFlag('P', options);
	405	}
	406
	407	/**
	408	* Gets the current settings of the Classifier.
	409	*
	410	* @return an array of strings suitable for passing to setOptions
	411	*/
	412	public String [] getOptions() {
	413
	414	String [] options = new String [11];
	415	int current = 0;
	416	options[current++] = "-F"; options[current++] = "" + m_Folds;
	417	options[current++] = "-N"; options[current++] = "" + m_MinNo;
	418	options[current++] = "-O"; options[current++] = "" + m_Optimizations;
	419	options[current++] = "-S"; options[current++] = "" + m_Seed;
	420
	421	if(m_Debug)
	422	options[current++] = "-D";
	423
	424	if(!m_CheckErr)
	425	options[current++] = "-E";
	426
	427	if(!m_UsePruning)
	428	options[current++] = "-P";
	429
	430	while(current < options.length)
	431	options[current++] = "";
	432
	433	return options;
	434	}
	435
	436	/**
	437	* Returns an enumeration of the additional measure names
	438	* @return an enumeration of the measure names
	439	*/
	440	public Enumeration enumerateMeasures() {
	441	Vector newVector = new Vector(1);
	442	newVector.addElement("measureNumRules");
	443	return newVector.elements();
	444	}
	445
	446	/**
	447	* Returns the value of the named measure
	448	* @param additionalMeasureName the name of the measure to query for its value
	449	* @return the value of the named measure
	450	* @throws IllegalArgumentException if the named measure is not supported
	451	*/
	452	public double getMeasure(String additionalMeasureName) {
	453	if (additionalMeasureName.compareToIgnoreCase("measureNumRules") == 0)
	454	return m_Ruleset.size();
	455	else
	456	throw new IllegalArgumentException(additionalMeasureName+" not supported (RIPPER)");
	457	}
	458
	459	/**
	460	* Returns the tip text for this property
	461	* @return tip text for this property suitable for
	462	* displaying in the explorer/experimenter gui
	463	*/
	464	public String foldsTipText() {
	465	return "Determines the amount of data used for pruning. One fold is used for "
	466	+ "pruning, the rest for growing the rules.";
	467	}
	468
	469	/**
	470	* Sets the number of folds to use
	471	*
	472	* @param fold the number of folds
	473	*/
	474	public void setFolds(int fold) {
	475	m_Folds = fold;
	476	}
	477
	478	/**
	479	* Gets the number of folds
	480	*
	481	* @return the number of folds
	482	*/
	483	public int getFolds(){
	484	return m_Folds;
	485	}
	486
	487	/**
	488	* Returns the tip text for this property
	489	* @return tip text for this property suitable for
	490	* displaying in the explorer/experimenter gui
	491	*/
	492	public String minNoTipText() {
	493	return "The minimum total weight of the instances in a rule.";
	494	}
	495
	496	/**
	497	* Sets the minimum total weight of the instances in a rule
	498	*
	499	* @param m the minimum total weight of the instances in a rule
	500	*/
	501	public void setMinNo(double m) {
	502	m_MinNo = m;
	503	}
	504
	505	/**
	506	* Gets the minimum total weight of the instances in a rule
	507	*
	508	* @return the minimum total weight of the instances in a rule
	509	*/
	510	public double getMinNo(){
	511	return m_MinNo;
	512	}
	513
	514	/**
	515	* Returns the tip text for this property
	516	* @return tip text for this property suitable for
	517	* displaying in the explorer/experimenter gui
	518	*/
	519	public String seedTipText() {
	520	return "The seed used for randomizing the data.";
	521	}
	522
	523	/**
	524	* Sets the seed value to use in randomizing the data
	525	*
	526	* @param s the new seed value
	527	*/
	528	public void setSeed(long s) {
	529	m_Seed = s;
	530	}
	531
	532	/**
	533	* Gets the current seed value to use in randomizing the data
	534	*
	535	* @return the seed value
	536	*/
	537	public long getSeed(){
	538	return m_Seed;
	539	}
	540
	541	/**
	542	* Returns the tip text for this property
	543	* @return tip text for this property suitable for
	544	* displaying in the explorer/experimenter gui
	545	*/
	546	public String optimizationsTipText() {
	547	return "The number of optimization runs.";
	548	}
	549
	550	/**
	551	* Sets the number of optimization runs
	552	*
	553	* @param run the number of optimization runs
	554	*/
	555	public void setOptimizations(int run) {
	556	m_Optimizations = run;
	557	}
	558
	559	/**
	560	* Gets the the number of optimization runs
	561	*
	562	* @return the number of optimization runs
	563	*/
	564	public int getOptimizations() {
	565	return m_Optimizations;
	566	}
	567
	568	/**
	569	* Returns the tip text for this property
	570	* @return tip text for this property suitable for
	571	* displaying in the explorer/experimenter gui
	572	*/
	573	public String debugTipText() {
	574	return "Whether debug information is output to the console.";
	575	}
	576
	577	/**
	578	* Sets whether debug information is output to the console
	579	*
	580	* @param d whether debug information is output to the console
	581	*/
	582	public void setDebug(boolean d) {
	583	m_Debug = d;
	584	}
	585
	586	/**
	587	* Gets whether debug information is output to the console
	588	*
	589	* @return whether debug information is output to the console
	590	*/
	591	public boolean getDebug(){
	592	return m_Debug;
	593	}
	594
	595	/**
	596	* Returns the tip text for this property
	597	* @return tip text for this property suitable for
	598	* displaying in the explorer/experimenter gui
	599	*/
	600	public String checkErrorRateTipText() {
	601	return "Whether check for error rate >= 1/2 is included" +
	602	" in stopping criterion.";
	603	}
	604
	605	/**
	606	* Sets whether to check for error rate is in stopping criterion
	607	*
	608	* @param d whether to check for error rate is in stopping criterion
	609	*/
	610	public void setCheckErrorRate(boolean d) {
	611	m_CheckErr = d;
	612	}
	613
	614	/**
	615	* Gets whether to check for error rate is in stopping criterion
	616	*
	617	* @return true if checking for error rate is in stopping criterion
	618	*/
	619	public boolean getCheckErrorRate(){
	620	return m_CheckErr;
	621	}
	622
	623	/**
	624	* Returns the tip text for this property
	625	* @return tip text for this property suitable for
	626	* displaying in the explorer/experimenter gui
	627	*/
	628	public String usePruningTipText() {
	629	return "Whether pruning is performed.";
	630	}
	631
	632	/**
	633	* Sets whether pruning is performed
	634	*
	635	* @param d Whether pruning is performed
	636	*/
	637	public void setUsePruning(boolean d) {
	638	m_UsePruning = d;
	639	}
	640
	641	/**
	642	* Gets whether pruning is performed
	643	*
	644	* @return true if pruning is performed
	645	*/
	646	public boolean getUsePruning(){
	647	return m_UsePruning;
	648	}
	649
	650	/**
	651	* Get the ruleset generated by Ripper
	652	*
	653	* @return the ruleset
	654	*/
	655	public FastVector getRuleset(){ return m_Ruleset; }
	656
	657	/**
	658	* Get the statistics of the ruleset in the given position
	659	*
	660	* @param pos the position of the stats, assuming correct
	661	* @return the statistics of the ruleset in the given position
	662	*/
	663	public RuleStats getRuleStats(int pos) {
	664	return (RuleStats)m_RulesetStats.elementAt(pos);
	665	}
	666
	667	/**
	668	* The single antecedent in the rule, which is composed of an attribute and
	669	* the corresponding value. There are two inherited classes, namely NumericAntd
	670	* and NominalAntd in which the attributes are numeric and nominal respectively.
	671	*/
	672	private abstract class Antd
	673	implements WeightedInstancesHandler, Copyable, Serializable, RevisionHandler {
	674
	675	/** for serialization */
	676	private static final long serialVersionUID = -8929754772994154334L;
	677
	678	/** The attribute of the antecedent */
	679	protected Attribute att;
	680
	681	/** The attribute value of the antecedent.
	682	For numeric attribute, value is either 0(1st bag) or 1(2nd bag) */
	683	protected double value;
	684
	685	/** The maximum infoGain achieved by this antecedent test
	686	* in the growing data */
	687	protected double maxInfoGain;
	688
	689	/** The accurate rate of this antecedent test on the growing data */
	690	protected double accuRate;
	691
	692	/** The coverage of this antecedent in the growing data */
	693	protected double cover;
	694
	695	/** The accurate data for this antecedent in the growing data */
	696	protected double accu;
	697
	698	/**
	699	* Constructor
	700	*/
	701	public Antd(Attribute a){
	702	att=a;
	703	value=Double.NaN;
	704	maxInfoGain = 0;
	705	accuRate = Double.NaN;
	706	cover = Double.NaN;
	707	accu = Double.NaN;
	708	}
	709
	710	/* The abstract members for inheritance */
	711	public abstract Instances[] splitData(Instances data, double defAcRt,
	712	double cla);
	713	public abstract boolean covers(Instance inst);
	714	public abstract String toString();
	715
	716	/**
	717	* Implements Copyable
	718	*
	719	* @return a copy of this object
	720	*/
	721	public abstract Object copy();
	722
	723	/* Get functions of this antecedent */
	724	public Attribute getAttr(){ return att; }
	725	public double getAttrValue(){ return value; }
	726	public double getMaxInfoGain(){ return maxInfoGain; }
	727	public double getAccuRate(){ return accuRate; }
	728	public double getAccu(){ return accu; }
	729	public double getCover(){ return cover; }
	730
	731	/**
	732	* Returns the revision string.
	733	*
	734	* @return the revision
	735	*/
	736	public String getRevision() {
	737	return RevisionUtils.extract("$Revision: 6041 $");
	738	}
	739	}
	740
	741	/**
	742	* The antecedent with numeric attribute
	743	*/
	744	private class
	745	NumericAntd extends Antd {
	746
	747	/** for serialization */
	748	static final long serialVersionUID = 5699457269983735442L;
	749
	750	/** The split point for this numeric antecedent */
	751	private double splitPoint;
	752
	753	/**
	754	* Constructor
	755	*/
	756	public NumericAntd(Attribute a){
	757	super(a);
	758	splitPoint = Double.NaN;
	759	}
	760
	761	/**
	762	* Get split point of this numeric antecedent
	763	*
	764	* @return the split point of this numeric antecedent
	765	*/
	766	public double getSplitPoint(){
	767	return splitPoint;
	768	}
	769
	770	/**
	771	* Implements Copyable
	772	*
	773	* @return a copy of this object
	774	*/
	775	public Object copy(){
	776	NumericAntd na = new NumericAntd(getAttr());
	777	na.value = this.value;
	778	na.splitPoint = this.splitPoint;
	779	return na;
	780	}
	781
	782	/**
	783	* Implements the splitData function.
	784	* This procedure is to split the data into two bags according
	785	* to the information gain of the numeric attribute value
	786	* The maximum infoGain is also calculated.
	787	*
	788	* @param insts the data to be split
	789	* @param defAcRt the default accuracy rate for data
	790	* @param cl the class label to be predicted
	791	* @return the array of data after split
	792	*/
	793	public Instances[] splitData(Instances insts, double defAcRt,
	794	double cl){
	795	Instances data = insts;
	796	int total=data.numInstances();// Total number of instances without
	797	// missing value for att
	798
	799	int split=1; // Current split position
	800	int prev=0; // Previous split position
	801	int finalSplit=split; // Final split position
	802	maxInfoGain = 0;
	803	value = 0;
	804
	805	double fstCover=0, sndCover=0, fstAccu=0, sndAccu=0;
	806
	807	data.sort(att);
	808	// Find the las instance without missing value
	809	for(int x=0; x<data.numInstances(); x++){
	810	Instance inst = data.instance(x);
	811	if(inst.isMissing(att)){
	812	total = x;
	813	break;
	814	}
	815
	816	sndCover += inst.weight();
	817	if(Utils.eq(inst.classValue(), cl))
	818	sndAccu += inst.weight();
	819	}
	820
	821	if(total == 0) return null; // Data all missing for the attribute
	822	splitPoint = data.instance(total-1).value(att);
	823
	824	for(; split <= total; split++){
	825	if((split == total) \|\|
	826	(data.instance(split).value(att) > // Can't split within
	827	data.instance(prev).value(att))){ // same value
	828
	829	for(int y=prev; y<split; y++){
	830	Instance inst = data.instance(y);
	831	fstCover += inst.weight();
	832	if(Utils.eq(data.instance(y).classValue(), cl)){
	833	fstAccu += inst.weight(); // First bag positive# ++
	834	}
	835	}
	836
	837	double fstAccuRate = (fstAccu+1.0)/(fstCover+1.0),
	838	sndAccuRate = (sndAccu+1.0)/(sndCover+1.0);
	839
	840	/* Which bag has higher information gain? */
	841	boolean isFirst;
	842	double fstInfoGain, sndInfoGain;
	843	double accRate, infoGain, coverage, accurate;
	844
	845	fstInfoGain =
	846	//Utils.eq(defAcRt, 1.0) ?
	847	//fstAccu/(double)numConds :
	848	fstAccu*(Utils.log2(fstAccuRate)-Utils.log2(defAcRt));
	849
	850	sndInfoGain =
	851	//Utils.eq(defAcRt, 1.0) ?
	852	//sndAccu/(double)numConds :
	853	sndAccu*(Utils.log2(sndAccuRate)-Utils.log2(defAcRt));
	854
	855	if(fstInfoGain > sndInfoGain){
	856	isFirst = true;
	857	infoGain = fstInfoGain;
	858	accRate = fstAccuRate;
	859	accurate = fstAccu;
	860	coverage = fstCover;
	861	}
	862	else{
	863	isFirst = false;
	864	infoGain = sndInfoGain;
	865	accRate = sndAccuRate;
	866	accurate = sndAccu;
	867	coverage = sndCover;
	868	}
	869
	870	/* Check whether so far the max infoGain */
	871	if(infoGain > maxInfoGain){
	872	splitPoint = data.instance(prev).value(att);
	873	value = (isFirst) ? 0 : 1;
	874	accuRate = accRate;
	875	accu = accurate;
	876	cover = coverage;
	877	maxInfoGain = infoGain;
	878	finalSplit = (isFirst) ? split : prev;
	879	}
	880
	881	for(int y=prev; y<split; y++){
	882	Instance inst = data.instance(y);
	883	sndCover -= inst.weight();
	884	if(Utils.eq(data.instance(y).classValue(), cl)){
	885	sndAccu -= inst.weight(); // Second bag positive# --
	886	}
	887	}
	888	prev=split;
	889	}
	890	}
	891
	892	/* Split the data */
	893	Instances[] splitData = new Instances[2];
	894	splitData[0] = new Instances(data, 0, finalSplit);
	895	splitData[1] = new Instances(data, finalSplit, total-finalSplit);
	896
	897	return splitData;
	898	}
	899
	900	/**
	901	* Whether the instance is covered by this antecedent
	902	*
	903	* @param inst the instance in question
	904	* @return the boolean value indicating whether the instance is covered
	905	* by this antecedent
	906	*/
	907	public boolean covers(Instance inst){
	908	boolean isCover=true;
	909	if(!inst.isMissing(att)){
	910	if((int)value == 0){ // First bag
	911	if(inst.value(att) > splitPoint)
	912	isCover=false;
	913	}
	914	else if(inst.value(att) < splitPoint) // Second bag
	915	isCover=false;
	916	}
	917	else
	918	isCover = false;
	919
	920	return isCover;
	921	}
	922
	923	/**
	924	* Prints this antecedent
	925	*
	926	* @return a textual description of this antecedent
	927	*/
	928	public String toString() {
	929	String symbol = ((int)value == 0) ? " <= " : " >= ";
	930	return (att.name() + symbol + Utils.doubleToString(splitPoint, 6));
	931	}
	932
	933	/**
	934	* Returns the revision string.
	935	*
	936	* @return the revision
	937	*/
	938	public String getRevision() {
	939	return RevisionUtils.extract("$Revision: 6041 $");
	940	}
	941	}
	942
	943
	944	/**
	945	* The antecedent with nominal attribute
	946	*/
	947	private class NominalAntd
	948	extends Antd {
	949
	950	/** for serialization */
	951	static final long serialVersionUID = -9102297038837585135L;
	952
	953	/* The parameters of infoGain calculated for each attribute value
	954	* in the growing data */
	955	private double[] accurate;
	956	private double[] coverage;
	957
	958	/**
	959	* Constructor
	960	*/
	961	public NominalAntd(Attribute a){
	962	super(a);
	963	int bag = att.numValues();
	964	accurate = new double[bag];
	965	coverage = new double[bag];
	966	}
	967
	968	/**
	969	* Implements Copyable
	970	*
	971	* @return a copy of this object
	972	*/
	973	public Object copy(){
	974	Antd antec = new NominalAntd(getAttr());
	975	antec.value = this.value;
	976	return antec;
	977	}
	978
	979	/**
	980	* Implements the splitData function.
	981	* This procedure is to split the data into bags according
	982	* to the nominal attribute value
	983	* The infoGain for each bag is also calculated.
	984	*
	985	* @param data the data to be split
	986	* @param defAcRt the default accuracy rate for data
	987	* @param cl the class label to be predicted
	988	* @return the array of data after split
	989	*/
	990	public Instances[] splitData(Instances data, double defAcRt,
	991	double cl){
	992	int bag = att.numValues();
	993	Instances[] splitData = new Instances[bag];
	994
	995	for(int x=0; x<bag; x++){
	996	splitData[x] = new Instances(data, data.numInstances());
	997	accurate[x] = 0;
	998	coverage[x] = 0;
	999	}
	1000
	1001	for(int x=0; x<data.numInstances(); x++){
	1002	Instance inst=data.instance(x);
	1003	if(!inst.isMissing(att)){
	1004	int v = (int)inst.value(att);
	1005	splitData[v].add(inst);
	1006	coverage[v] += inst.weight();
	1007	if((int)inst.classValue() == (int)cl)
	1008	accurate[v] += inst.weight();
	1009	}
	1010	}
	1011
	1012	for(int x=0; x<bag; x++){
	1013	double t = coverage[x]+1.0;
	1014	double p = accurate[x] + 1.0;
	1015	double infoGain =
	1016	//Utils.eq(defAcRt, 1.0) ?
	1017	//accurate[x]/(double)numConds :
	1018	accurate[x]*(Utils.log2(p/t)-Utils.log2(defAcRt));
	1019
	1020	if(infoGain > maxInfoGain){
	1021	maxInfoGain = infoGain;
	1022	cover = coverage[x];
	1023	accu = accurate[x];
	1024	accuRate = p/t;
	1025	value = (double)x;
	1026	}
	1027	}
	1028
	1029	return splitData;
	1030	}
	1031
	1032	/**
	1033	* Whether the instance is covered by this antecedent
	1034	*
	1035	* @param inst the instance in question
	1036	* @return the boolean value indicating whether the instance is
	1037	* covered by this antecedent
	1038	*/
	1039	public boolean covers(Instance inst){
	1040	boolean isCover=false;
	1041	if(!inst.isMissing(att)){
	1042	if((int)inst.value(att) == (int)value)
	1043	isCover=true;
	1044	}
	1045	return isCover;
	1046	}
	1047
	1048	/**
	1049	* Prints this antecedent
	1050	*
	1051	* @return a textual description of this antecedent
	1052	*/
	1053	public String toString() {
	1054	return (att.name() + " = " +att.value((int)value));
	1055	}
	1056
	1057	/**
	1058	* Returns the revision string.
	1059	*
	1060	* @return the revision
	1061	*/
	1062	public String getRevision() {
	1063	return RevisionUtils.extract("$Revision: 6041 $");
	1064	}
	1065	}
	1066
	1067
	1068	/**
	1069	* This class implements a single rule that predicts specified class.
	1070	*
	1071	* A rule consists of antecedents "AND"ed together and the consequent
	1072	* (class value) for the classification.
	1073	* In this class, the Information Gain (p*[log(p/t) - log(P/T)]) is used to
	1074	* select an antecedent and Reduced Error Prunning (REP) with the metric
	1075	* of accuracy rate p/(p+n) or (TP+TN)/(P+N) is used to prune the rule.
	1076	*/
	1077	protected class RipperRule
	1078	extends Rule {
	1079
	1080	/** for serialization */
	1081	static final long serialVersionUID = -2410020717305262952L;
	1082
	1083	/** The internal representation of the class label to be predicted */
	1084	private double m_Consequent = -1;
	1085
	1086	/** The vector of antecedents of this rule*/
	1087	protected FastVector m_Antds = null;
	1088
	1089	/** Constructor */
	1090	public RipperRule(){
	1091	m_Antds = new FastVector();
	1092	}
	1093
	1094	/**
	1095	* Sets the internal representation of the class label to be predicted
	1096	*
	1097	* @param cl the internal representation of the class label to be predicted
	1098	*/
	1099	public void setConsequent(double cl) {
	1100	m_Consequent = cl;
	1101	}
	1102
	1103	/**
	1104	* Gets the internal representation of the class label to be predicted
	1105	*
	1106	* @return the internal representation of the class label to be predicted
	1107	*/
	1108	public double getConsequent() {
	1109	return m_Consequent;
	1110	}
	1111
	1112	/**
	1113	* Get a shallow copy of this rule
	1114	*
	1115	* @return the copy
	1116	*/
	1117	public Object copy(){
	1118	RipperRule copy = new RipperRule();
	1119	copy.setConsequent(getConsequent());
	1120	copy.m_Antds = (FastVector)this.m_Antds.copyElements();
	1121	return copy;
	1122	}
	1123
	1124	/**
	1125	* Whether the instance covered by this rule
	1126	*
	1127	* @param datum the instance in question
	1128	* @return the boolean value indicating whether the instance
	1129	* is covered by this rule
	1130	*/
	1131	public boolean covers(Instance datum){
	1132	boolean isCover=true;
	1133
	1134	for(int i=0; i<m_Antds.size(); i++){
	1135	Antd antd = (Antd)m_Antds.elementAt(i);
	1136	if(!antd.covers(datum)){
	1137	isCover = false;
	1138	break;
	1139	}
	1140	}
	1141
	1142	return isCover;
	1143	}
	1144
	1145	/**
	1146	* Whether this rule has antecedents, i.e. whether it is a default rule
	1147	*
	1148	* @return the boolean value indicating whether the rule has antecedents
	1149	*/
	1150	public boolean hasAntds(){
	1151	if (m_Antds == null)
	1152	return false;
	1153	else
	1154	return (m_Antds.size() > 0);
	1155	}
	1156
	1157	/**
	1158	* the number of antecedents of the rule
	1159	*
	1160	* @return the size of this rule
	1161	*/
	1162	public double size(){ return (double)m_Antds.size(); }
	1163
	1164
	1165	/**
	1166	* Private function to compute default number of accurate instances
	1167	* in the specified data for the consequent of the rule
	1168	*
	1169	* @param data the data in question
	1170	* @return the default accuracy number
	1171	*/
	1172	private double computeDefAccu(Instances data){
	1173	double defAccu=0;
	1174	for(int i=0; i<data.numInstances(); i++){
	1175	Instance inst = data.instance(i);
	1176	if((int)inst.classValue() == (int)m_Consequent)
	1177	defAccu += inst.weight();
	1178	}
	1179	return defAccu;
	1180	}
	1181
	1182
	1183	/**
	1184	* Build one rule using the growing data
	1185	*
	1186	* @param data the growing data used to build the rule
	1187	* @throws Exception if the consequent is not set yet
	1188	*/
	1189	public void grow(Instances data) throws Exception {
	1190	if(m_Consequent == -1)
	1191	throw new Exception(" Consequent not set yet.");
	1192
	1193	Instances growData = data;
	1194	double sumOfWeights = growData.sumOfWeights();
	1195	if(!Utils.gr(sumOfWeights, 0.0))
	1196	return;
	1197
	1198	/* Compute the default accurate rate of the growing data */
	1199	double defAccu = computeDefAccu(growData);
	1200	double defAcRt = (defAccu+1.0)/(sumOfWeights+1.0);
	1201
	1202	/* Keep the record of which attributes have already been used*/
	1203	boolean[] used=new boolean [growData.numAttributes()];
	1204	for (int k=0; k<used.length; k++)
	1205	used[k]=false;
	1206	int numUnused=used.length;
	1207
	1208	// If there are already antecedents existing
	1209	for(int j=0; j < m_Antds.size(); j++){
	1210	Antd antdj = (Antd)m_Antds.elementAt(j);
	1211	if(!antdj.getAttr().isNumeric()){
	1212	used[antdj.getAttr().index()]=true;
	1213	numUnused--;
	1214	}
	1215	}
	1216
	1217	double maxInfoGain;
	1218	while (Utils.gr(growData.numInstances(), 0.0) &&
	1219	(numUnused > 0)
	1220	&& Utils.sm(defAcRt, 1.0)
	1221	){
	1222
	1223	// We require that infoGain be positive
	1224	/*if(numAntds == originalSize)
	1225	maxInfoGain = 0.0; // At least one condition allowed
	1226	else
	1227	maxInfoGain = Utils.eq(defAcRt, 1.0) ?
	1228	defAccu/(double)numAntds : 0.0; */
	1229	maxInfoGain = 0.0;
	1230
	1231	/* Build a list of antecedents */
	1232	Antd oneAntd=null;
	1233	Instances coverData = null;
	1234	Enumeration enumAttr=growData.enumerateAttributes();
	1235
	1236	/* Build one condition based on all attributes not used yet*/
	1237	while (enumAttr.hasMoreElements()){
	1238	Attribute att= (Attribute)(enumAttr.nextElement());
	1239
	1240	if(m_Debug)
	1241	System.err.println("\nOne condition: size = "
	1242	+ growData.sumOfWeights());
	1243
	1244	Antd antd =null;
	1245	if(att.isNumeric())
	1246	antd = new NumericAntd(att);
	1247	else
	1248	antd = new NominalAntd(att);
	1249
	1250	if(!used[att.index()]){
	1251	/* Compute the best information gain for each attribute,
	1252	it's stored in the antecedent formed by this attribute.
	1253	This procedure returns the data covered by the antecedent*/
	1254	Instances coveredData = computeInfoGain(growData, defAcRt,
	1255	antd);
	1256	if(coveredData != null){
	1257	double infoGain = antd.getMaxInfoGain();
	1258	if(m_Debug)
	1259	System.err.println("Test of \'"+antd.toString()+
	1260	"\': infoGain = "+
	1261	infoGain + " \| Accuracy = " +
	1262	antd.getAccuRate()+
	1263	"="+antd.getAccu()
	1264	+"/"+antd.getCover()+
	1265	" def. accuracy: "+defAcRt);
	1266
	1267	if(infoGain > maxInfoGain){
	1268	oneAntd=antd;
	1269	coverData = coveredData;
	1270	maxInfoGain = infoGain;
	1271	}
	1272	}
	1273	}
	1274	}
	1275
	1276	if(oneAntd == null) break; // Cannot find antds
	1277	if(Utils.sm(oneAntd.getAccu(), m_MinNo)) break;// Too low coverage
	1278
	1279	//Numeric attributes can be used more than once
	1280	if(!oneAntd.getAttr().isNumeric()){
	1281	used[oneAntd.getAttr().index()]=true;
	1282	numUnused--;
	1283	}
	1284
	1285	m_Antds.addElement(oneAntd);
	1286	growData = coverData;// Grow data size is shrinking
	1287	defAcRt = oneAntd.getAccuRate();
	1288	}
	1289	}
	1290
	1291
	1292	/**
	1293	* Compute the best information gain for the specified antecedent
	1294	*
	1295	* @param instances the data based on which the infoGain is computed
	1296	* @param defAcRt the default accuracy rate of data
	1297	* @param antd the specific antecedent
	1298	* @param numConds the number of antecedents in the rule so far
	1299	* @return the data covered by the antecedent
	1300	*/
	1301	private Instances computeInfoGain(Instances instances, double defAcRt,
	1302	Antd antd){
	1303	Instances data = instances;
	1304
	1305	/* Split the data into bags.
	1306	The information gain of each bag is also calculated in this procedure */
	1307	Instances[] splitData = antd.splitData(data, defAcRt,
	1308	m_Consequent);
	1309
	1310	/* Get the bag of data to be used for next antecedents */
	1311	if(splitData != null)
	1312	return splitData[(int)antd.getAttrValue()];
	1313	else return null;
	1314	}
	1315
	1316	/**
	1317	* Prune all the possible final sequences of the rule using the
	1318	* pruning data. The measure used to prune the rule is based on
	1319	* flag given.
	1320	*
	1321	* @param pruneData the pruning data used to prune the rule
	1322	* @param useWhole flag to indicate whether use the error rate of
	1323	* the whole pruning data instead of the data covered
	1324	*/
	1325	public void prune(Instances pruneData, boolean useWhole){
	1326	Instances data = pruneData;
	1327
	1328	double total = data.sumOfWeights();
	1329	if(!Utils.gr(total, 0.0))
	1330	return;
	1331
	1332	/* The default accurate # and rate on pruning data */
	1333	double defAccu=computeDefAccu(data);
	1334
	1335	if(m_Debug)
	1336	System.err.println("Pruning with " + defAccu +
	1337	" positive data out of " + total +
	1338	" instances");
	1339
	1340	int size=m_Antds.size();
	1341	if(size == 0) return; // Default rule before pruning
	1342
	1343	double[] worthRt = new double[size];
	1344	double[] coverage = new double[size];
	1345	double[] worthValue = new double[size];
	1346	for(int w=0; w<size; w++){
	1347	worthRt[w]=coverage[w]=worthValue[w]=0.0;
	1348	}
	1349
	1350	/* Calculate accuracy parameters for all the antecedents in this rule */
	1351	double tn = 0.0; // True negative if useWhole
	1352	for(int x=0; x<size; x++){
	1353	Antd antd=(Antd)m_Antds.elementAt(x);
	1354	Instances newData = data;
	1355	data = new Instances(newData, 0); // Make data empty
	1356
	1357	for(int y=0; y<newData.numInstances(); y++){
	1358	Instance ins=newData.instance(y);
	1359
	1360	if(antd.covers(ins)){ // Covered by this antecedent
	1361	coverage[x] += ins.weight();
	1362	data.add(ins); // Add to data for further pruning
	1363	if((int)ins.classValue() == (int)m_Consequent) // Accurate prediction
	1364	worthValue[x] += ins.weight();
	1365	}
	1366	else if(useWhole){ // Not covered
	1367	if((int)ins.classValue() != (int)m_Consequent)
	1368	tn += ins.weight();
	1369	}
	1370	}
	1371
	1372	if(useWhole){
	1373	worthValue[x] += tn;
	1374	worthRt[x] = worthValue[x] / total;
	1375	}
	1376	else // Note if coverage is 0, accuracy is 0.5
	1377	worthRt[x] = (worthValue[x]+1.0)/(coverage[x]+2.0);
	1378	}
	1379
	1380	double maxValue = (defAccu+1.0)/(total+2.0);
	1381	int maxIndex = -1;
	1382	for(int i=0; i<worthValue.length; i++){
	1383	if(m_Debug){
	1384	double denom = useWhole ? total : coverage[i];
	1385	System.err.println(i+"(useAccuray? "+!useWhole+"): "
	1386	+ worthRt[i] +
	1387	"="+worthValue[i]+
	1388	"/"+denom);
	1389	}
	1390	if(worthRt[i] > maxValue){ // Prefer to the
	1391	maxValue = worthRt[i]; // shorter rule
	1392	maxIndex = i;
	1393	}
	1394	}
	1395
	1396	/* Prune the antecedents according to the accuracy parameters */
	1397	for(int z=size-1;z>maxIndex;z--)
	1398	m_Antds.removeElementAt(z);
	1399	}
	1400
	1401	/**
	1402	* Prints this rule
	1403	*
	1404	* @param classAttr the class attribute in the data
	1405	* @return a textual description of this rule
	1406	*/
	1407	public String toString(Attribute classAttr) {
	1408	StringBuffer text = new StringBuffer();
	1409	if(m_Antds.size() > 0){
	1410	for(int j=0; j< (m_Antds.size()-1); j++)
	1411	text.append("(" + ((Antd)(m_Antds.elementAt(j))).toString()+ ") and ");
	1412	text.append("("+((Antd)(m_Antds.lastElement())).toString() + ")");
	1413	}
	1414	text.append(" => " + classAttr.name() +
	1415	"=" + classAttr.value((int)m_Consequent));
	1416
	1417	return text.toString();
	1418	}
	1419
	1420	/**
	1421	* Returns the revision string.
	1422	*
	1423	* @return the revision
	1424	*/
	1425	public String getRevision() {
	1426	return RevisionUtils.extract("$Revision: 6041 $");
	1427	}
	1428	}
	1429
	1430	/**
	1431	* Returns default capabilities of the classifier.
	1432	*
	1433	* @return the capabilities of this classifier
	1434	*/
	1435	public Capabilities getCapabilities() {
	1436	Capabilities result = super.getCapabilities();
	1437	result.disableAll();
	1438
	1439	// attributes
	1440	result.enable(Capability.NOMINAL_ATTRIBUTES);
	1441	result.enable(Capability.NUMERIC_ATTRIBUTES);
	1442	result.enable(Capability.DATE_ATTRIBUTES);
	1443	result.enable(Capability.MISSING_VALUES);
	1444
	1445	// class
	1446	result.enable(Capability.NOMINAL_CLASS);
	1447	result.enable(Capability.MISSING_CLASS_VALUES);
	1448
	1449	// instances
	1450	result.setMinimumNumberInstances(m_Folds);
	1451
	1452	return result;
	1453	}
	1454
	1455	/**
	1456	* Builds Ripper in the order of class frequencies. For each class
	1457	* it's built in two stages: building and optimization
	1458	*
	1459	* @param instances the training data
	1460	* @throws Exception if classifier can't be built successfully
	1461	*/
	1462	public void buildClassifier(Instances instances) throws Exception {
	1463
	1464	// can classifier handle the data?
	1465	getCapabilities().testWithFail(instances);
	1466
	1467	// remove instances with missing class
	1468	instances = new Instances(instances);
	1469	instances.deleteWithMissingClass();
	1470
	1471	m_Random = instances.getRandomNumberGenerator(m_Seed);
	1472	m_Total = RuleStats.numAllConditions(instances);
	1473	if(m_Debug)
	1474	System.err.println("Number of all possible conditions = "+m_Total);
	1475
	1476	Instances data = null;
	1477	m_Filter = new ClassOrder();
	1478	((ClassOrder)m_Filter).setSeed(m_Random.nextInt());
	1479	((ClassOrder)m_Filter).setClassOrder(ClassOrder.FREQ_ASCEND);
	1480	m_Filter.setInputFormat(instances);
	1481	data = Filter.useFilter(instances, m_Filter);
	1482
	1483	if(data == null)
	1484	throw new Exception(" Unable to randomize the class orders.");
	1485
	1486	m_Class = data.classAttribute();
	1487	m_Ruleset = new FastVector();
	1488	m_RulesetStats = new FastVector();
	1489	m_Distributions = new FastVector();
	1490
	1491	// Sort by classes frequency
	1492	double[] orderedClasses = ((ClassOrder)m_Filter).getClassCounts();
	1493	if(m_Debug){
	1494	System.err.println("Sorted classes:");
	1495	for(int x=0; x < m_Class.numValues(); x++)
	1496	System.err.println(x+": "+m_Class.value(x) + " has " +
	1497	orderedClasses[x] + " instances.");
	1498	}
	1499	// Iterate from less prevalent class to more frequent one
	1500	oneClass:
	1501	for(int y=0; y < data.numClasses()-1; y++){ // For each class
	1502
	1503	double classIndex = (double)y;
	1504	if(m_Debug){
	1505	int ci = (int)classIndex;
	1506	System.err.println("\n\nClass "+m_Class.value(ci)+"("+ci+"): "
	1507	+ orderedClasses[y] + "instances\n"+
	1508	"=====================================\n");
	1509	}
	1510
	1511	if(Utils.eq(orderedClasses[y],0.0)) // No data for this class
	1512	continue oneClass;
	1513
	1514	// The expected FP/err is the proportion of the class
	1515	double all = 0;
	1516	for(int i=y; i<orderedClasses.length; i++)
	1517	all += orderedClasses[i];
	1518	double expFPRate = orderedClasses[y] / all;
	1519
	1520	double classYWeights = 0, totalWeights = 0;
	1521	for(int j=0; j < data.numInstances(); j++){
	1522	Instance datum = data.instance(j);
	1523	totalWeights += datum.weight();
	1524	if((int)datum.classValue() == y){
	1525	classYWeights += datum.weight();
	1526	}
	1527	}
	1528
	1529	// DL of default rule, no theory DL, only data DL
	1530	double defDL;
	1531	if(classYWeights > 0)
	1532	defDL = RuleStats.dataDL(expFPRate,
	1533	0.0,
	1534	totalWeights,
	1535	0.0,
	1536	classYWeights);
	1537	else
	1538	continue oneClass; // Subsumed by previous rules
	1539
	1540	if(Double.isNaN(defDL) \|\| Double.isInfinite(defDL))
	1541	throw new Exception("Should never happen: "+
	1542	"defDL NaN or infinite!");
	1543	if(m_Debug)
	1544	System.err.println("The default DL = "+defDL);
	1545
	1546	data = rulesetForOneClass(expFPRate, data, classIndex, defDL);
	1547	}
	1548
	1549	// Set the default rule
	1550	RipperRule defRule = new RipperRule();
	1551	defRule.setConsequent((double)(data.numClasses()-1));
	1552	m_Ruleset.addElement(defRule);
	1553
	1554	RuleStats defRuleStat = new RuleStats();
	1555	defRuleStat.setData(data);
	1556	defRuleStat.setNumAllConds(m_Total);
	1557	defRuleStat.addAndUpdate(defRule);
	1558	m_RulesetStats.addElement(defRuleStat);
	1559
	1560	for(int z=0; z < m_RulesetStats.size(); z++){
	1561	RuleStats oneClass = (RuleStats)m_RulesetStats.elementAt(z);
	1562	for(int xyz=0; xyz < oneClass.getRulesetSize(); xyz++){
	1563	double[] classDist = oneClass.getDistributions(xyz);
	1564	Utils.normalize(classDist);
	1565	if(classDist != null)
	1566	m_Distributions.addElement(((ClassOrder)m_Filter).distributionsByOriginalIndex(classDist));
	1567	}
	1568	}
	1569
	1570	// free up memory
	1571	for (int i = 0; i < m_RulesetStats.size(); i++)
	1572	((RuleStats) m_RulesetStats.elementAt(i)).cleanUp();
	1573	}
	1574
	1575	/**
	1576	* Classify the test instance with the rule learner and provide
	1577	* the class distributions
	1578	*
	1579	* @param datum the instance to be classified
	1580	* @return the distribution
	1581	*/
	1582	public double[] distributionForInstance(Instance datum){
	1583	try{
	1584	for(int i=0; i < m_Ruleset.size(); i++){
	1585	RipperRule rule = (RipperRule)m_Ruleset.elementAt(i);
	1586	if(rule.covers(datum))
	1587	return (double[])m_Distributions.elementAt(i);
	1588	}
	1589	}catch(Exception e){
	1590	System.err.println(e.getMessage());
	1591	e.printStackTrace();
	1592	}
	1593
	1594	System.err.println("Should never happen!");
	1595	return new double[datum.classAttribute().numValues()];
	1596	}
	1597
	1598	/** Build a ruleset for the given class according to the given data
	1599	*
	1600	* @param expFPRate the expected FP/(FP+FN) used in DL calculation
	1601	* @param data the given data
	1602	* @param classIndex the given class index
	1603	* @param defDL the default DL in the data
	1604	* @throws Exception if the ruleset can be built properly
	1605	*/
	1606	protected Instances rulesetForOneClass(double expFPRate,
	1607	Instances data,
	1608	double classIndex,
	1609	double defDL)
	1610	throws Exception {
	1611
	1612	Instances newData = data, growData, pruneData;
	1613	boolean stop = false;
	1614	FastVector ruleset = new FastVector();
	1615
	1616	double dl = defDL, minDL = defDL;
	1617	RuleStats rstats = null;
	1618	double[] rst;
	1619
	1620	// Check whether data have positive examples
	1621	boolean defHasPositive = true; // No longer used
	1622	boolean hasPositive = defHasPositive;
	1623
	1624	/******************** Building stage *********************/
	1625	if(m_Debug)
	1626	System.err.println("\n* Building stage *");
	1627
	1628	while((!stop) && hasPositive){ // Generate new rules until
	1629	// stopping criteria met
	1630	RipperRule oneRule;
	1631	if(m_UsePruning){
	1632	/* Split data into Grow and Prune*/
	1633
	1634	// We should have stratified the data, but ripper seems
	1635	// to have a bug that makes it not to do so. In order
	1636	// to simulate it more precisely, we do the same thing.
	1637	//newData.randomize(m_Random);
	1638	newData = RuleStats.stratify(newData, m_Folds, m_Random);
	1639	Instances[] part = RuleStats.partition(newData, m_Folds);
	1640	growData=part[0];
	1641	pruneData=part[1];
	1642	//growData=newData.trainCV(m_Folds, m_Folds-1);
	1643	//pruneData=newData.testCV(m_Folds, m_Folds-1);
	1644
	1645	oneRule = new RipperRule();
	1646	oneRule.setConsequent(classIndex); // Must set first
	1647
	1648	if(m_Debug)
	1649	System.err.println("\nGrowing a rule ...");
	1650	oneRule.grow(growData); // Build the rule
	1651	if(m_Debug)
	1652	System.err.println("One rule found before pruning:"+
	1653	oneRule.toString(m_Class));
	1654
	1655	if(m_Debug)
	1656	System.err.println("\nPruning the rule ...");
	1657	oneRule.prune(pruneData, false); // Prune the rule
	1658	if(m_Debug)
	1659	System.err.println("One rule found after pruning:"+
	1660	oneRule.toString(m_Class));
	1661	}
	1662	else{
	1663	oneRule = new RipperRule();
	1664	oneRule.setConsequent(classIndex); // Must set first
	1665	if(m_Debug)
	1666	System.err.println("\nNo pruning: growing a rule ...");
	1667	oneRule.grow(newData); // Build the rule
	1668	if(m_Debug)
	1669	System.err.println("No pruning: one rule found:\n"+
	1670	oneRule.toString(m_Class));
	1671	}
	1672
	1673	// Compute the DL of this ruleset
	1674	if(rstats == null){ // First rule
	1675	rstats = new RuleStats();
	1676	rstats.setNumAllConds(m_Total);
	1677	rstats.setData(newData);
	1678	}
	1679
	1680	rstats.addAndUpdate(oneRule);
	1681	int last = rstats.getRuleset().size()-1; // Index of last rule
	1682	dl += rstats.relativeDL(last, expFPRate, m_CheckErr);
	1683
	1684	if(Double.isNaN(dl) \|\| Double.isInfinite(dl))
	1685	throw new Exception("Should never happen: dl in "+
	1686	"building stage NaN or infinite!");
	1687	if(m_Debug)
	1688	System.err.println("Before optimization("+last+
	1689	"): the dl = "+dl+" \| best: "+minDL);
	1690
	1691	if(dl < minDL)
	1692	minDL = dl; // The best dl so far
	1693
	1694	rst = rstats.getSimpleStats(last);
	1695	if(m_Debug)
	1696	System.err.println("The rule covers: "+rst[0]+
	1697	" \| pos = " + rst[2] +
	1698	" \| neg = " + rst[4]+
	1699	"\nThe rule doesn't cover: "+rst[1]+
	1700	" \| pos = " + rst[5]);
	1701
	1702	stop = checkStop(rst, minDL, dl);
	1703
	1704	if(!stop){
	1705	ruleset.addElement(oneRule); // Accepted
	1706	newData = rstats.getFiltered(last)[1];// Data not covered
	1707	hasPositive = Utils.gr(rst[5], 0.0); // Positives remaining?
	1708	if(m_Debug)
	1709	System.err.println("One rule added: has positive? "
	1710	+hasPositive);
	1711	}
	1712	else{
	1713	if(m_Debug)
	1714	System.err.println("Quit rule");
	1715	rstats.removeLast(); // Remove last to be re-used
	1716	}
	1717	}// while !stop
	1718
	1719	/****************** Optimization stage *****************/
	1720	RuleStats finalRulesetStat = null;
	1721	if(m_UsePruning){
	1722	for(int z=0; z < m_Optimizations; z++){
	1723	if(m_Debug)
	1724	System.err.println("\n*** Optimization: run #"
	1725	+z+" ***");
	1726
	1727	newData = data;
	1728	finalRulesetStat = new RuleStats();
	1729	finalRulesetStat.setData(newData);
	1730	finalRulesetStat.setNumAllConds(m_Total);
	1731	int position=0;
	1732	stop = false;
	1733	boolean isResidual = false;
	1734	hasPositive = defHasPositive;
	1735	dl = minDL = defDL;
	1736
	1737	oneRule:
	1738	while(!stop && hasPositive){
	1739
	1740	isResidual = (position>=ruleset.size()); // Cover residual positive examples
	1741	// Re-do shuffling and stratification
	1742	//newData.randomize(m_Random);
	1743	newData = RuleStats.stratify(newData, m_Folds, m_Random);
	1744	Instances[] part = RuleStats.partition(newData, m_Folds);
	1745	growData=part[0];
	1746	pruneData=part[1];
	1747	//growData=newData.trainCV(m_Folds, m_Folds-1);
	1748	//pruneData=newData.testCV(m_Folds, m_Folds-1);
	1749	RipperRule finalRule;
	1750
	1751	if(m_Debug)
	1752	System.err.println("\nRule #"+position +
	1753	"\| isResidual?" + isResidual+
	1754	"\| data size: "+newData.sumOfWeights());
	1755
	1756	if(isResidual){
	1757	RipperRule newRule = new RipperRule();
	1758	newRule.setConsequent(classIndex);
	1759	if(m_Debug)
	1760	System.err.println("\nGrowing and pruning"+
	1761	" a new rule ...");
	1762	newRule.grow(growData);
	1763	newRule.prune(pruneData, false);
	1764	finalRule = newRule;
	1765	if(m_Debug)
	1766	System.err.println("\nNew rule found: "+
	1767	newRule.toString(m_Class));
	1768	}
	1769	else{
	1770	RipperRule oldRule = (RipperRule)ruleset.elementAt(position);
	1771	boolean covers = false;
	1772	// Test coverage of the next old rule
	1773	for(int i=0; i<newData.numInstances(); i++)
	1774	if(oldRule.covers(newData.instance(i))){
	1775	covers = true;
	1776	break;
	1777	}
	1778
	1779	if(!covers){// Null coverage, no variants can be generated
	1780	finalRulesetStat.addAndUpdate(oldRule);
	1781	position++;
	1782	continue oneRule;
	1783	}
	1784
	1785	// 2 variants
	1786	if(m_Debug)
	1787	System.err.println("\nGrowing and pruning"+
	1788	" Replace ...");
	1789	RipperRule replace = new RipperRule();
	1790	replace.setConsequent(classIndex);
	1791	replace.grow(growData);
	1792
	1793	// Remove the pruning data covered by the following
	1794	// rules, then simply compute the error rate of the
	1795	// current rule to prune it. According to Ripper,
	1796	// it's equivalent to computing the error of the
	1797	// whole ruleset -- is it true?
	1798	pruneData = RuleStats.rmCoveredBySuccessives(pruneData,ruleset, position);
	1799	replace.prune(pruneData, true);
	1800
	1801	if(m_Debug)
	1802	System.err.println("\nGrowing and pruning"+
	1803	" Revision ...");
	1804	RipperRule revision = (RipperRule)oldRule.copy();
	1805
	1806	// For revision, first rm the data covered by the old rule
	1807	Instances newGrowData = new Instances(growData, 0);
	1808	for(int b=0; b<growData.numInstances(); b++){
	1809	Instance inst = growData.instance(b);
	1810	if(revision.covers(inst))
	1811	newGrowData.add(inst);
	1812	}
	1813	revision.grow(newGrowData);
	1814	revision.prune(pruneData, true);
	1815
	1816	double[][] prevRuleStats = new double[position][6];
	1817	for(int c=0; c < position; c++)
	1818	prevRuleStats[c] = finalRulesetStat.getSimpleStats(c);
	1819
	1820	// Now compare the relative DL of variants
	1821	FastVector tempRules = (FastVector)ruleset.copyElements();
	1822	tempRules.setElementAt(replace, position);
	1823
	1824	RuleStats repStat = new RuleStats(data, tempRules);
	1825	repStat.setNumAllConds(m_Total);
	1826	repStat.countData(position, newData, prevRuleStats);
	1827	//repStat.countData();
	1828	rst = repStat.getSimpleStats(position);
	1829	if(m_Debug)
	1830	System.err.println("Replace rule covers: "+rst[0]+
	1831	" \| pos = " + rst[2] +
	1832	" \| neg = " + rst[4]+
	1833	"\nThe rule doesn't cover: "+rst[1]+
	1834	" \| pos = " + rst[5]);
	1835
	1836	double repDL = repStat.relativeDL(position, expFPRate,
	1837	m_CheckErr);
	1838	if(m_Debug)
	1839	System.err.println("\nReplace: "+
	1840	replace.toString(m_Class)
	1841	+" \|dl = "+repDL);
	1842
	1843	if(Double.isNaN(repDL) \|\| Double.isInfinite(repDL))
	1844	throw new Exception("Should never happen: repDL"+
	1845	"in optmz. stage NaN or "+
	1846	"infinite!");
	1847
	1848	tempRules.setElementAt(revision, position);
	1849	RuleStats revStat = new RuleStats(data, tempRules);
	1850	revStat.setNumAllConds(m_Total);
	1851	revStat.countData(position, newData, prevRuleStats);
	1852	//revStat.countData();
	1853	double revDL = revStat.relativeDL(position, expFPRate,
	1854	m_CheckErr);
	1855
	1856	if(m_Debug)
	1857	System.err.println("Revision: "
	1858	+ revision.toString(m_Class)
	1859	+" \|dl = "+revDL);
	1860
	1861	if(Double.isNaN(revDL) \|\| Double.isInfinite(revDL))
	1862	throw new Exception("Should never happen: revDL"+
	1863	"in optmz. stage NaN or "+
	1864	"infinite!");
	1865
	1866	rstats = new RuleStats(data, ruleset);
	1867	rstats.setNumAllConds(m_Total);
	1868	rstats.countData(position, newData, prevRuleStats);
	1869	//rstats.countData();
	1870	double oldDL = rstats.relativeDL(position, expFPRate,
	1871	m_CheckErr);
	1872
	1873	if(Double.isNaN(oldDL) \|\| Double.isInfinite(oldDL))
	1874	throw new Exception("Should never happen: oldDL"+
	1875	"in optmz. stage NaN or "+
	1876	"infinite!");
	1877	if(m_Debug)
	1878	System.err.println("Old rule: "+
	1879	oldRule.toString(m_Class)
	1880	+" \|dl = "+oldDL);
	1881
	1882	if(m_Debug)
	1883	System.err.println("\nrepDL: "+repDL+
	1884	"\nrevDL: "+revDL+
	1885	"\noldDL: "+oldDL);
	1886
	1887	if((oldDL <= revDL) && (oldDL <= repDL))
	1888	finalRule = oldRule; // Old the best
	1889	else if(revDL <= repDL)
	1890	finalRule = revision; // Revision the best
	1891	else
	1892	finalRule = replace; // Replace the best
	1893	}
	1894
	1895	finalRulesetStat.addAndUpdate(finalRule);
	1896	rst = finalRulesetStat.getSimpleStats(position);
	1897
	1898	if(isResidual){
	1899
	1900	dl += finalRulesetStat.relativeDL(position,
	1901	expFPRate,
	1902	m_CheckErr);
	1903	if(m_Debug)
	1904	System.err.println("After optimization: the dl"
	1905	+"="+dl+" \| best: "+minDL);
	1906
	1907	if(dl < minDL)
	1908	minDL = dl; // The best dl so far
	1909
	1910	stop = checkStop(rst, minDL, dl);
	1911	if(!stop)
	1912	ruleset.addElement(finalRule); // Accepted
	1913	else{
	1914	finalRulesetStat.removeLast(); // Remove last to be re-used
	1915	position--;
	1916	}
	1917	}
	1918	else
	1919	ruleset.setElementAt(finalRule, position); // Accepted
	1920
	1921	if(m_Debug){
	1922	System.err.println("The rule covers: "+rst[0]+
	1923	" \| pos = " + rst[2] +
	1924	" \| neg = " + rst[4]+
	1925	"\nThe rule doesn't cover: "+rst[1]+
	1926	" \| pos = " + rst[5]);
	1927	System.err.println("\nRuleset so far: ");
	1928	for(int x=0; x<ruleset.size(); x++)
	1929	System.err.println(x+": "+((RipperRule)ruleset.elementAt(x)).toString(m_Class));
	1930	System.err.println();
	1931	}
	1932
	1933	//Data not covered
	1934	if(finalRulesetStat.getRulesetSize() > 0)// If any rules
	1935	newData = finalRulesetStat.getFiltered(position)[1];
	1936	hasPositive = Utils.gr(rst[5], 0.0); //Positives remaining?
	1937	position++;
	1938	} // while !stop && hasPositive
	1939
	1940	if(ruleset.size() > (position+1)){ // Hasn't gone through yet
	1941	for(int k=position+1; k<ruleset.size(); k++)
	1942	finalRulesetStat.addAndUpdate((Rule)ruleset.elementAt(k));
	1943	}
	1944	if(m_Debug)
	1945	System.err.println("\nDeleting rules to decrease"+
	1946	" DL of the whole ruleset ...");
	1947	finalRulesetStat.reduceDL(expFPRate, m_CheckErr);
	1948	if(m_Debug){
	1949	int del = ruleset.size() -
	1950	finalRulesetStat.getRulesetSize();
	1951	System.err.println(del+" rules are deleted"+
	1952	" after DL reduction procedure");
	1953	}
	1954	ruleset = finalRulesetStat.getRuleset();
	1955	rstats = finalRulesetStat;
	1956
	1957	} // For each run of optimization
	1958	} // if pruning is used
	1959
	1960	// Concatenate the ruleset for this class to the whole ruleset
	1961	if(m_Debug){
	1962	System.err.println("\nFinal ruleset: ");
	1963	for(int x=0; x<ruleset.size(); x++)
	1964	System.err.println(x+": "+((RipperRule)ruleset.elementAt(x)).toString(m_Class));
	1965	System.err.println();
	1966	}
	1967
	1968	m_Ruleset.appendElements(ruleset);
	1969	m_RulesetStats.addElement(rstats);
	1970
	1971	if(ruleset.size() > 0)// If any rules for this class
	1972	return rstats.getFiltered(ruleset.size()-1)[1]; // Data not
	1973	else // covered
	1974	return data;
	1975	}
	1976
	1977	/**
	1978	* Check whether the stopping criterion meets
	1979	*
	1980	* @param rst the statistic of the ruleset
	1981	* @param minDL the min description length so far
	1982	* @param dl the current description length of the ruleset
	1983	* @return true if stop criterion meets, false otherwise
	1984	*/
	1985	private boolean checkStop(double[] rst, double minDL, double dl){
	1986
	1987	if(dl > minDL+MAX_DL_SURPLUS){
	1988	if(m_Debug)
	1989	System.err.println("DL too large: "+dl+" \| "+minDL);
	1990	return true;
	1991	}
	1992	else if(!Utils.gr(rst[2], 0.0)){// Covered positives
	1993	if(m_Debug)
	1994	System.err.println("Too few positives.");
	1995	return true;
	1996	}
	1997	else if((rst[4]/rst[0]) >= 0.5){// Err rate
	1998	if(m_CheckErr){
	1999	if(m_Debug)
	2000	System.err.println("Error too large: "+
	2001	rst[4] + "/" + rst[0]);
	2002	return true;
	2003	}
	2004	else
	2005	return false;
	2006	}
	2007	else{// Not stops
	2008	if(m_Debug)
	2009	System.err.println("Continue.");
	2010	return false;
	2011	}
	2012	}
	2013
	2014	/**
	2015	* Prints the all the rules of the rule learner.
	2016	*
	2017	* @return a textual description of the classifier
	2018	*/
	2019	public String toString() {
	2020	if (m_Ruleset == null)
	2021	return "JRIP: No model built yet.";
	2022
	2023	StringBuffer sb = new StringBuffer("JRIP rules:\n"+
	2024	"===========\n\n");
	2025	for(int j=0; j<m_RulesetStats.size(); j++){
	2026	RuleStats rs = (RuleStats)m_RulesetStats.elementAt(j);
	2027	FastVector rules = rs.getRuleset();
	2028	for(int k=0; k<rules.size(); k++){
	2029	double[] simStats = rs.getSimpleStats(k);
	2030	sb.append(((RipperRule)rules.elementAt(k)).toString(m_Class)
	2031	+ " ("+simStats[0]+"/"+simStats[4]+")\n");
	2032	}
	2033	}
	2034	if(m_Debug){
	2035	System.err.println("Inside m_Ruleset");
	2036	for(int i=0; i<m_Ruleset.size(); i++)
	2037	System.err.println(((RipperRule)m_Ruleset.elementAt(i)).toString(m_Class));
	2038	}
	2039	sb.append("\nNumber of Rules : "
	2040	+ m_Ruleset.size() + "\n");
	2041	return sb.toString();
	2042	}
	2043
	2044	/**
	2045	* Returns the revision string.
	2046	*
	2047	* @return the revision
	2048	*/
	2049	public String getRevision() {
	2050	return RevisionUtils.extract("$Revision: 6041 $");
	2051	}
	2052
	2053	/**
	2054	* Main method.
	2055	*
	2056	* @param args the options for the classifier
	2057	*/
	2058	public static void main(String[] args) {
	2059	runClassifier(new JRip(), args);
	2060	}
	2061	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: