Context Navigation

source: src/main/java/weka/classifiers/rules/Ridor.java @ 7

Last change on this file since 7 was 4, checked in by gnappo, 14 years ago
Import di weka.
File size: 52.0 KB

Line
1	/*
2	* This program is free software; you can redistribute it and/or modify
3	* it under the terms of the GNU General Public License as published by
4	* the Free Software Foundation; either version 2 of the License, or
5	* (at your option) any later version.
6	*
7	* This program is distributed in the hope that it will be useful,
8	* but WITHOUT ANY WARRANTY; without even the implied warranty of
9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10	* GNU General Public License for more details.
11	*
12	* You should have received a copy of the GNU General Public License
13	* along with this program; if not, write to the Free Software
14	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15	*/
16
17	/*
18	* Ridor.java
19	* Copyright (C) 2001 University of Waikato, Hamilton, New Zealand
20	*
21	*/
22
23	package weka.classifiers.rules;
24
25	import weka.classifiers.Classifier;
26	import weka.classifiers.AbstractClassifier;
27	import weka.core.AdditionalMeasureProducer;
28	import weka.core.Attribute;
29	import weka.core.Capabilities;
30	import weka.core.FastVector;
31	import weka.core.Instance;
32	import weka.core.Instances;
33	import weka.core.Option;
34	import weka.core.RevisionHandler;
35	import weka.core.RevisionUtils;
36	import weka.core.UnsupportedClassTypeException;
37	import weka.core.Utils;
38	import weka.core.WeightedInstancesHandler;
39	import weka.core.Capabilities.Capability;
40
41	import java.io.Serializable;
42	import java.util.Enumeration;
43	import java.util.Random;
44	import java.util.Vector;
45
46	/**
47	<!-- globalinfo-start -->
48	* An implementation of a RIpple-DOwn Rule learner.<br/>
49	* <br/>
50	* It generates a default rule first and then the exceptions for the default rule with the least (weighted) error rate. Then it generates the "best" exceptions for each exception and iterates until pure. Thus it performs a tree-like expansion of exceptions.The exceptions are a set of rules that predict classes other than the default. IREP is used to generate the exceptions.<br/>
51	* <br/>
52	* For more information about Ripple-Down Rules, see:<br/>
53	* <br/>
54	* Brian R. Gaines, Paul Compton (1995). Induction of Ripple-Down Rules Applied to Modeling Large Databases. J. Intell. Inf. Syst.. 5(3):211-228.
55	* <p/>
56	<!-- globalinfo-end -->
57	*
58	* There are five inner classes defined in this class. <br>
59	* The first is Ridor_node, which implements one node in the Ridor tree. It's basically
60	* composed of a default class and a set of exception rules to the default class.<br>
61	* The second inner class is RidorRule, which implements a single exception rule
62	* using REP.<br>
63	* The last three inner classes are only used in RidorRule. They are Antd, NumericAntd
64	* and NominalAntd, which all implement a single antecedent in the RidorRule. <br>
65	* The Antd class is an abstract class, which has two subclasses, NumericAntd and
66	* NominalAntd, to implement the corresponding abstract functions. These two subclasses
67	* implement the functions related to a antecedent with a nominal attribute and a numeric
68	* attribute respectively.<p>
69	*
70	<!-- options-start -->
71	* Valid options are: <p/>
72	*
73	* <pre> -F <number of folds>
74	* Set number of folds for IREP
75	* One fold is used as pruning set.
76	* (default 3)</pre>
77	*
78	* <pre> -S <number of shuffles>
79	* Set number of shuffles to randomize
80	* the data in order to get better rule.
81	* (default 10)</pre>
82	*
83	* <pre> -A
84	* Set flag of whether use the error rate
85	* of all the data to select the default class
86	* in each step. If not set, the learner will only use the error rate in the pruning data</pre>
87	*
88	* <pre> -M
89	* Set flag of whether use the majority class as
90	* the default class in each step instead of
91	* choosing default class based on the error rate
92	* (if the flag is not set)</pre>
93	*
94	* <pre> -N <min. weights>
95	* Set the minimal weights of instances
96	* within a split.
97	* (default 2.0)</pre>
98	*
99	<!-- options-end -->
100	*
101	* @author Xin XU (xx5@cs.waikato.ac.nz)
102	* @version $Revision: 5928 $
103	*/
104	public class Ridor
105	extends AbstractClassifier
106	implements AdditionalMeasureProducer, WeightedInstancesHandler {
107
108	/** for serialization */
109	static final long serialVersionUID = -7261533075088314436L;
110
111	/** The number of folds to split data into Grow and Prune for IREP */
112	private int m_Folds = 3;
113
114	/** The number of shuffles performed on the data for randomization */
115	private int m_Shuffle = 1;
116
117	/** Random object for randomization */
118	private Random m_Random = null;
119
120	/** The seed to perform randomization */
121	private int m_Seed = 1;
122
123	/** Whether use error rate on all the data */
124	private boolean m_IsAllErr = false;
125
126	/** Whether use majority class as default class */
127	private boolean m_IsMajority = false;
128
129	/** The root of Ridor */
130	private Ridor_node m_Root = null;
131
132	/** The class attribute of the data */
133	private Attribute m_Class;
134
135	/** Statistics of the data */
136	private double m_Cover, m_Err;
137
138	/** The minimal number of instance weights within a split*/
139	private double m_MinNo = 2.0;
140
141	/**
142	* Returns a string describing classifier
143	* @return a description suitable for
144	* displaying in the explorer/experimenter gui
145	*/
146	public String globalInfo() {
147	return "An implementation of a RIpple-DOwn Rule learner.\n\n"
148	+ "It generates a default rule first and then the exceptions for the default rule "
149	+ "with the least (weighted) error rate. Then it generates the \"best\" exceptions for "
150	+ "each exception and iterates until pure. Thus it performs a tree-like expansion of "
151	+ "exceptions."
152	+ "The exceptions are a set of rules that predict classes other than the default. "
153	+ "IREP is used to generate the exceptions.\n\n"
154	+ "For more information about Ripple-Down Rules, see:\n\n";
155	}
156
157	/**
158	* Private class implementing the single node of Ridor.
159	* It consists of a default class label, a set of exceptions to the default rule
160	* and the exceptions to each exception
161	*/
162	private class Ridor_node
163	implements Serializable, RevisionHandler {
164
165	/** for serialization */
166	static final long serialVersionUID = -581370560157467677L;
167
168	/** The default class label */
169	private double defClass = Double.NaN;
170
171	/** The set of exceptions of the default rule.
172	Each element also has its own exceptions and the consequent of each rule
173	is determined by its exceptions */
174	private RidorRule[] rules = null;
175
176	/** The exceptions of the exception rules */
177	private Ridor_node[] excepts = null;
178
179	/** The level of this node */
180	private int level;
181
182	/**
183	* Gets the default class label
184	*
185	* @return the default class label
186	*/
187	public double getDefClass() {
188	return defClass;
189	}
190
191	/**
192	* Gets the set of exceptions
193	*
194	* @return the set of exceptions
195	*/
196	public RidorRule[] getRules() {
197	return rules;
198	}
199
200	/**
201	* Gets the exceptions of the exceptions rules
202	*
203	* @return the exceptions of the exceptions rules
204	*/
205	public Ridor_node[] getExcepts() {
206	return excepts;
207	}
208
209	/**
210	* Builds a ripple-down manner rule learner.
211	*
212	* @param dataByClass the divided data by their class label. The real class
213	* labels of the instances are all set to 0
214	* @param lvl the level of the parent node
215	* @throws Exception if ruleset of this node cannot be built
216	*/
217	public void findRules(Instances[] dataByClass, int lvl) throws Exception {
218	Vector finalRules = null;
219	int clas = -1;
220	double[] isPure = new double[dataByClass.length];
221	int numMajority = 0;
222
223	level = lvl + 1;
224
225	for(int h=0; h < dataByClass.length; h++){
226	isPure[h] = dataByClass[h].sumOfWeights();
227	if(Utils.grOrEq(isPure[h], m_Folds))
228	numMajority++; // Count how many class labels have enough instances
229	}
230
231	if(numMajority <= 1){ // The data is pure or not enough
232	defClass = (double)Utils.maxIndex(isPure);
233	return;
234	}
235	double total = Utils.sum(isPure);
236
237	if(m_IsMajority){
238	defClass = (double)Utils.maxIndex(isPure);
239	Instances data = new Instances(dataByClass[(int)defClass]);
240	int index = data.classIndex();
241
242	for(int j=0; j<data.numInstances(); j++)
243	data.instance(j).setClassValue(1); // Set one class as default
244
245	for(int k=0; k < dataByClass.length; k++) // Merge into one dataset
246	if(k != (int)defClass){
247	if(data.numInstances() >= dataByClass[k].numInstances())
248	data = append(data, dataByClass[k]);
249	else data = append(dataByClass[k], data);
250	}
251
252	data.setClassIndex(index); // Position new class label
253
254	double classCount = total - isPure[(int)defClass];
255	finalRules = new Vector();
256	buildRuleset(data, classCount, finalRules);
257	if(finalRules.size() == 0) // No good rules built
258	return;
259	}
260	else{
261	double maxAcRt = isPure[Utils.maxIndex(isPure)] / total;
262
263	// Find default class
264	for(int i=0; i < dataByClass.length; i++){
265	if(isPure[i] >= m_Folds){
266	Instances data = new Instances(dataByClass[i]);
267	int index = data.classIndex();
268
269	for(int j=0; j<data.numInstances(); j++)
270	data.instance(j).setClassValue(1); // Set one class as default
271
272	for(int k=0; k < dataByClass.length; k++) // Merge into one dataset
273	if(k != i){
274	if(data.numInstances() >= dataByClass[k].numInstances())
275	data = append(data, dataByClass[k]);
276	else data = append(dataByClass[k], data);
277	}
278
279	data.setClassIndex(index); // Position new class label
280
281	/* Build a set of rules */
282	double classCount = data.sumOfWeights() - isPure[i];
283	Vector ruleset = new Vector();
284	double wAcRt = buildRuleset(data, classCount, ruleset);
285
286	if(Utils.gr(wAcRt, maxAcRt)){
287	finalRules = ruleset;
288	maxAcRt = wAcRt;
289	clas = i;
290	}
291	}
292	}
293
294	if(finalRules == null){ // No good rules found, set majority class as default
295	defClass = (double)Utils.maxIndex(isPure);
296	return;
297	}
298
299	defClass = (double)clas;
300	}
301
302	/* Store the exception rules and default class in this node */
303	int size = finalRules.size();
304	rules = new RidorRule[size];
305	excepts = new Ridor_node[size];
306	for(int l=0; l < size; l++)
307	rules[l] = (RidorRule)finalRules.elementAt(l);
308
309	/* Build exceptions for each exception rule */
310	Instances[] uncovered = dataByClass;
311	if(level == 1) // The error of default rule
312	m_Err = total - uncovered[(int)defClass].sumOfWeights();
313
314	uncovered[(int)defClass] = new Instances(uncovered[(int)defClass], 0);
315
316	for(int m=0; m < size; m++){
317	/* The data covered by this rule, they are also deducted from the original data */
318	Instances[][] dvdData = divide(rules[m], uncovered);
319	Instances[] covered = dvdData[0]; // Data covered by the rule
320	//uncovered = dvdData[1]; // Data not covered by the rule
321	excepts[m] = new Ridor_node();
322	excepts[m].findRules(covered, level);// Find exceptions on the covered data
323	}
324	}
325
326	/**
327	* Private function to build a rule set and return the weighted avg of accuracy
328	* rate of rules in the set.
329	*
330	* @param insts the data used to build ruleset
331	* @param classCount the counts of the instances with the predicted class but not
332	* yet covered by the ruleset
333	* @param ruleset the ruleset to be built
334	* @return the weighted accuracy rate of the ruleset
335	* @throws Exception if the rules cannot be built properly
336	*/
337	private double buildRuleset(Instances insts, double classCount, Vector ruleset)
338	throws Exception {
339	Instances data = new Instances(insts);
340	double wAcRt = 0; // The weighted accuracy rate of this ruleset
341	double total = data.sumOfWeights();
342
343	while( classCount >= m_Folds ){ // Data is not pure
344	RidorRule bestRule = null;
345	double bestWorthRate= -1; // The best worth achieved by
346	double bestWorth = -1; // randomization of the data
347
348	RidorRule rule = new RidorRule();
349	rule.setPredictedClass(0); // Predict the classes other than default
350
351	for(int j = 0; j < m_Shuffle; j++){
352	if(m_Shuffle > 1)
353	data.randomize(m_Random);
354
355	rule.buildClassifier(data);
356
357	double wr, w; // Worth rate and worth
358	if(m_IsAllErr){
359	wr = (rule.getWorth()+rule.getAccuG()) /
360	(rule.getCoverP()+rule.getCoverG());
361	w = rule.getWorth() + rule.getAccuG();
362	}
363	else{
364	wr = rule.getWorthRate();
365	w = rule.getWorth();
366	}
367
368	if(Utils.gr(wr, bestWorthRate) \|\|
369	(Utils.eq(wr, bestWorthRate) && Utils.gr(w, bestWorth))){
370	bestRule = rule;
371	bestWorthRate = wr;
372	bestWorth = w;
373	}
374	}
375
376	if (bestRule == null)
377	throw new Exception("Something wrong here inside findRule()!");
378
379	if(Utils.sm(bestWorthRate, 0.5) \|\| (!bestRule.hasAntds()))
380	break; // No more good rules generated
381
382	Instances newData = new Instances(data);
383	data = new Instances(newData, 0);// Empty the data
384	classCount = 0;
385	double cover = 0; // Coverage of this rule on whole data
386
387	for(int l=0; l<newData.numInstances(); l++){
388	Instance datum = newData.instance(l);
389	if(!bestRule.isCover(datum)){// Data not covered by the previous rule
390	data.add(datum);
391	if(Utils.eq(datum.classValue(), 0))
392	classCount += datum.weight(); // The predicted class in the data
393	}
394	else cover += datum.weight();
395	}
396
397	wAcRt += computeWeightedAcRt(bestWorthRate, cover, total);
398	ruleset.addElement(bestRule);
399	}
400
401	/* The weighted def. accuracy */
402	double wDefAcRt = (data.sumOfWeights()-classCount) / total;
403	wAcRt += wDefAcRt;
404
405	return wAcRt;
406	}
407
408	/**
409	* Private function to combine two data
410	*
411	* @param data1 the data to which data2 is appended
412	* @param data2 the data to be appended to data1
413	* @return the merged data
414	*/
415	private Instances append(Instances data1, Instances data2){
416	Instances data = new Instances(data1);
417	for(int i=0; i<data2.numInstances(); i++)
418	data.add(data2.instance(i));
419
420	return data;
421	}
422
423	/**
424	* Compute the weighted average of accuracy rate of a certain rule
425	* Each rule is weighted by its coverage proportion in the whole data.
426	* So the accuracy rate of one ruleset is actually
427	*
428	* (worth rate) * (coverage proportion)
429	*
430	* coverage of the rule on the whole data
431	* where coverage proportion = -----------------------------------------
432	* the whole data size fed into the ruleset
433	*
434	* @param worthRt the worth rate
435	* @param cover the coverage of the rule on the whole data
436	* @param total the total data size fed into the ruleset
437	* @return the weighted accuracy rate of this rule
438	*/
439	private double computeWeightedAcRt(double worthRt, double cover, double total){
440
441	return (worthRt * (cover/total));
442	}
443
444	/**
445	* Builds an array of data according to their true class label
446	* Each bag of data is filtered through the rule specified and
447	* is totally covered by this rule.
448	* Both the data covered and uncovered by the rule will be returned
449	* by the procedure.
450	*
451	* @param rule the rule covering the data
452	* @param dataByClass the array of data to be covered by the rule
453	* @return the arrays of data both covered and not covered by the rule
454	*/
455	private Instances[][] divide(RidorRule rule, Instances[] dataByClass){
456	int len = dataByClass.length;
457	Instances[][] dataBags = new Instances[2][len];
458
459	for(int i=0; i < len; i++){
460	Instances[] dvdData = rule.coveredByRule(dataByClass[i]);
461	dataBags[0][i] = dvdData[0]; // Covered by the rule
462	dataBags[1][i] = dvdData[1]; // Not covered by the rule
463	}
464
465	return dataBags;
466	}
467	/**
468	* The size of the certain node of Ridor, i.e. the
469	* number of rules generated within and below this node
470	*
471	* @return the size of this node
472	*/
473	public int size(){
474	int size = 0;
475	if(rules != null){
476	for(int i=0; i < rules.length; i++)
477	size += excepts[i].size(); // The children's size
478	size += rules.length; // This node's size
479	}
480	return size;
481	}
482
483	/**
484	* Prints the all the rules of one node of Ridor.
485	*
486	* @return a textual description of one node of Ridor
487	*/
488	public String toString(){
489	StringBuffer text = new StringBuffer();
490
491	if(level == 1)
492	text.append(m_Class.name() + " = " + m_Class.value((int)getDefClass())+
493	" ("+m_Cover+"/"+m_Err+")\n");
494	if(rules != null){
495	for(int i=0; i < rules.length; i++){
496	for(int j=0; j < level; j++)
497	text.append(" ");
498	String cl = m_Class.value((int)(excepts[i].getDefClass()));
499	text.append(" Except " +
500	rules[i].toString(m_Class.name(), cl)+
501	"\n" + excepts[i].toString());
502	}
503	}
504
505	return text.toString();
506	}
507
508	/**
509	* Returns the revision string.
510	*
511	* @return the revision
512	*/
513	public String getRevision() {
514	return RevisionUtils.extract("$Revision: 5928 $");
515	}
516	}
517
518	/**
519	* This class implements a single rule that predicts the 2-class distribution.
520	*
521	* A rule consists of antecedents "AND"ed together and the consequent (class value)
522	* for the classification. In this case, the consequent is the distribution of
523	* the available classes (always 2 classes) in the dataset.
524	* In this class, the Information Gain (p*[log(p/t) - log(P/T)]) is used to select
525	* an antecedent and Reduced Error Prunning (REP) is used to prune the rule.
526	*
527	*/
528	private class RidorRule
529	implements WeightedInstancesHandler, Serializable, RevisionHandler {
530
531	/** for serialization */
532	static final long serialVersionUID = 4375199423973848157L;
533
534	/** The internal representation of the class label to be predicted*/
535	private double m_Class = -1;
536
537	/** The class attribute of the data*/
538	private Attribute m_ClassAttribute;
539
540	/** The vector of antecedents of this rule*/
541	protected FastVector m_Antds = null;
542
543	/** The worth rate of this rule, in this case, accuracy rate in the pruning data*/
544	private double m_WorthRate = 0;
545
546	/** The worth value of this rule, in this case, accurate # in pruning data*/
547	private double m_Worth = 0;
548
549	/** The sum of weights of the data covered by this rule in the pruning data */
550	private double m_CoverP = 0;
551
552	/** The accurate and covered data of this rule in the growing data */
553	private double m_CoverG = 0, m_AccuG = 0;
554
555	/** The access functions for parameters */
556	public void setPredictedClass(double cl){ m_Class = cl; }
557	public double getPredictedClass(){ return m_Class; }
558
559	/**
560	* Builds a single rule learner with REP dealing with 2 classes.
561	* This rule learner always tries to predict the class with label
562	* m_Class.
563	*
564	* @param instances the training data
565	* @throws Exception if classifier can't be built successfully
566	*/
567	public void buildClassifier(Instances instances) throws Exception {
568	m_ClassAttribute = instances.classAttribute();
569	if (!m_ClassAttribute.isNominal())
570	throw new UnsupportedClassTypeException(" Only nominal class, please.");
571	if(instances.numClasses() != 2)
572	throw new Exception(" Only 2 classes, please.");
573
574	Instances data = new Instances(instances);
575	if(Utils.eq(data.sumOfWeights(),0))
576	throw new Exception(" No training data.");
577
578	data.deleteWithMissingClass();
579	if(Utils.eq(data.sumOfWeights(),0))
580	throw new Exception(" The class labels of all the training data are missing.");
581
582	if(data.numInstances() < m_Folds)
583	throw new Exception(" Not enough data for REP.");
584
585	m_Antds = new FastVector();
586
587	/* Split data into Grow and Prune*/
588	m_Random = new Random(m_Seed);
589	data.randomize(m_Random);
590	data.stratify(m_Folds);
591	Instances growData=data.trainCV(m_Folds, m_Folds-1, m_Random);
592	Instances pruneData=data.testCV(m_Folds, m_Folds-1);
593
594	grow(growData); // Build this rule
595
596	prune(pruneData); // Prune this rule
597	}
598
599	/**
600	* Find all the instances in the dataset covered by this rule.
601	* The instances not covered will also be deducted from the the original data
602	* and returned by this procedure.
603	*
604	* @param insts the dataset to be covered by this rule.
605	* @return the instances covered and not covered by this rule
606	*/
607	public Instances[] coveredByRule(Instances insts){
608	Instances[] data = new Instances[2];
609	data[0] = new Instances(insts, insts.numInstances());
610	data[1] = new Instances(insts, insts.numInstances());
611
612	for(int i=0; i<insts.numInstances(); i++){
613	Instance datum = insts.instance(i);
614	if(isCover(datum))
615	data[0].add(datum); // Covered by this rule
616	else
617	data[1].add(datum); // Not covered by this rule
618	}
619
620	return data;
621	}
622
623	/**
624	* Whether the instance covered by this rule
625	*
626	* @param inst the instance in question
627	* @return the boolean value indicating whether the instance is covered by this rule
628	*/
629	public boolean isCover(Instance datum){
630	boolean isCover=true;
631
632	for(int i=0; i<m_Antds.size(); i++){
633	Antd antd = (Antd)m_Antds.elementAt(i);
634	if(!antd.isCover(datum)){
635	isCover = false;
636	break;
637	}
638	}
639
640	return isCover;
641	}
642
643	/**
644	* Whether this rule has antecedents, i.e. whether it is a default rule
645	*
646	* @return the boolean value indicating whether the rule has antecedents
647	*/
648	public boolean hasAntds(){
649	if (m_Antds == null)
650	return false;
651	else
652	return (m_Antds.size() > 0);
653	}
654
655	/**
656	* Build one rule using the growing data
657	*
658	* @param data the growing data used to build the rule
659	*/
660	private void grow(Instances data){
661	Instances growData = new Instances(data);
662
663	m_AccuG = computeDefAccu(growData);
664	m_CoverG = growData.sumOfWeights();
665	/* Compute the default accurate rate of the growing data */
666	double defAcRt= m_AccuG / m_CoverG;
667
668	/* Keep the record of which attributes have already been used*/
669	boolean[] used=new boolean [growData.numAttributes()];
670	for (int k=0; k<used.length; k++)
671	used[k]=false;
672	int numUnused=used.length;
673
674	double maxInfoGain;
675	boolean isContinue = true; // The stopping criterion of this rule
676
677	while (isContinue){
678	maxInfoGain = 0; // We require that infoGain be positive
679
680	/* Build a list of antecedents */
681	Antd oneAntd=null;
682	Instances coverData = null;
683	Enumeration enumAttr=growData.enumerateAttributes();
684	int index=-1;
685
686	/* Build one condition based on all attributes not used yet*/
687	while (enumAttr.hasMoreElements()){
688	Attribute att= (Attribute)(enumAttr.nextElement());
689	index++;
690
691	Antd antd =null;
692	if(att.isNumeric())
693	antd = new NumericAntd(att);
694	else
695	antd = new NominalAntd(att);
696
697	if(!used[index]){
698	/* Compute the best information gain for each attribute,
699	it's stored in the antecedent formed by this attribute.
700	This procedure returns the data covered by the antecedent*/
701	Instances coveredData = computeInfoGain(growData, defAcRt, antd);
702	if(coveredData != null){
703	double infoGain = antd.getMaxInfoGain();
704	if(Utils.gr(infoGain, maxInfoGain)){
705	oneAntd=antd;
706	coverData = coveredData;
707	maxInfoGain = infoGain;
708	}
709	}
710	}
711	}
712
713	if(oneAntd == null) return;
714
715	//Numeric attributes can be used more than once
716	if(!oneAntd.getAttr().isNumeric()){
717	used[oneAntd.getAttr().index()]=true;
718	numUnused--;
719	}
720
721	m_Antds.addElement((Object)oneAntd);
722	growData = coverData;// Grow data size is shrinking
723
724	defAcRt = oneAntd.getAccuRate();
725
726	/* Stop if no more data, rule perfect, no more attributes */
727	if(Utils.eq(growData.sumOfWeights(), 0.0) \|\| Utils.eq(defAcRt, 1.0) \|\| (numUnused == 0))
728	isContinue = false;
729	}
730	}
731
732	/**
733	* Compute the best information gain for the specified antecedent
734	*
735	* @param data the data based on which the infoGain is computed
736	* @param defAcRt the default accuracy rate of data
737	* @param antd the specific antecedent
738	* @return the data covered by the antecedent
739	*/
740	private Instances computeInfoGain(Instances instances, double defAcRt, Antd antd){
741	Instances data = new Instances(instances);
742
743	/* Split the data into bags.
744	The information gain of each bag is also calculated in this procedure */
745	Instances[] splitData = antd.splitData(data, defAcRt, m_Class);
746
747	/* Get the bag of data to be used for next antecedents */
748	if(splitData != null)
749	return splitData[(int)antd.getAttrValue()];
750	else return null;
751	}
752
753	/**
754	* Prune the rule using the pruning data and update the worth parameters for this rule
755	* The accuracy rate is used to prune the rule.
756	*
757	* @param pruneData the pruning data used to prune the rule
758	*/
759	private void prune(Instances pruneData){
760	Instances data=new Instances(pruneData);
761
762	double total = data.sumOfWeights();
763
764	/* The default accurate# and the the accuracy rate on pruning data */
765	double defAccu=0, defAccuRate=0;
766
767	int size=m_Antds.size();
768	if(size == 0) return; // Default rule before pruning
769
770	double[] worthRt = new double[size];
771	double[] coverage = new double[size];
772	double[] worthValue = new double[size];
773	for(int w=0; w<size; w++){
774	worthRt[w]=coverage[w]=worthValue[w]=0.0;
775	}
776
777	/* Calculate accuracy parameters for all the antecedents in this rule */
778	for(int x=0; x<size; x++){
779	Antd antd=(Antd)m_Antds.elementAt(x);
780	Attribute attr= antd.getAttr();
781	Instances newData = new Instances(data);
782	data = new Instances(newData, newData.numInstances()); // Make data empty
783
784	for(int y=0; y<newData.numInstances(); y++){
785	Instance ins=newData.instance(y);
786	if(!ins.isMissing(attr)){ // Attribute not missing
787	if(antd.isCover(ins)){ // Covered by this antecedent
788	coverage[x] += ins.weight();
789	data.add(ins); // Add to data for further pruning
790	if(Utils.eq(ins.classValue(), m_Class)) // Accurate prediction
791	worthValue[x] += ins.weight();
792	}
793	}
794	}
795
796	if(coverage[x] != 0)
797	worthRt[x] = worthValue[x]/coverage[x];
798	}
799
800	/* Prune the antecedents according to the accuracy parameters */
801	for(int z=(size-1); z > 0; z--)
802	if(Utils.sm(worthRt[z], worthRt[z-1]))
803	m_Antds.removeElementAt(z);
804	else break;
805
806	/* Check whether this rule is a default rule */
807	if(m_Antds.size() == 1){
808	defAccu = computeDefAccu(pruneData);
809	defAccuRate = defAccu/total; // Compute def. accuracy
810	if(Utils.sm(worthRt[0], defAccuRate)){ // Becomes a default rule
811	m_Antds.removeAllElements();
812	}
813	}
814
815	/* Update the worth parameters of this rule*/
816	int antdsSize = m_Antds.size();
817	if(antdsSize != 0){ // Not a default rule
818	m_Worth = worthValue[antdsSize-1]; // WorthValues of the last antecedent
819	m_WorthRate = worthRt[antdsSize-1];
820	m_CoverP = coverage[antdsSize-1];
821	Antd last = (Antd)m_Antds.lastElement();
822	m_CoverG = last.getCover();
823	m_AccuG = last.getAccu();
824	}
825	else{ // Default rule
826	m_Worth = defAccu; // Default WorthValues
827	m_WorthRate = defAccuRate;
828	m_CoverP = total;
829	}
830	}
831
832	/**
833	* Private function to compute default number of accurate instances
834	* in the specified data for m_Class
835	*
836	* @param data the data in question
837	* @return the default accuracy number
838	*/
839	private double computeDefAccu(Instances data){
840	double defAccu=0;
841	for(int i=0; i<data.numInstances(); i++){
842	Instance inst = data.instance(i);
843	if(Utils.eq(inst.classValue(), m_Class))
844	defAccu += inst.weight();
845	}
846	return defAccu;
847	}
848
849	/** The following are get functions after prune() has set the value of worthRate and worth*/
850	public double getWorthRate(){ return m_WorthRate; }
851	public double getWorth(){ return m_Worth; }
852	public double getCoverP(){ return m_CoverP; }
853	public double getCoverG(){ return m_CoverG; }
854	public double getAccuG(){ return m_AccuG; }
855
856	/**
857	* Prints this rule with the specified class label
858	*
859	* @param att the string standing for attribute in the consequent of this rule
860	* @param cl the string standing for value in the consequent of this rule
861	* @return a textual description of this rule with the specified class label
862	*/
863	public String toString(String att, String cl) {
864	StringBuffer text = new StringBuffer();
865	if(m_Antds.size() > 0){
866	for(int j=0; j< (m_Antds.size()-1); j++)
867	text.append("(" + ((Antd)(m_Antds.elementAt(j))).toString()+ ") and ");
868	text.append("("+((Antd)(m_Antds.lastElement())).toString() + ")");
869	}
870	text.append(" => " + att + " = " + cl);
871	text.append(" ("+m_CoverG+"/"+(m_CoverG - m_AccuG)+") ["+
872	m_CoverP+"/"+(m_CoverP - m_Worth)+"]");
873	return text.toString();
874	}
875
876	/**
877	* Prints this rule
878	*
879	* @return a textual description of this rule
880	*/
881	public String toString() {
882	return toString(m_ClassAttribute.name(), m_ClassAttribute.value((int)m_Class));
883	}
884
885	/**
886	* Returns the revision string.
887	*
888	* @return the revision
889	*/
890	public String getRevision() {
891	return RevisionUtils.extract("$Revision: 5928 $");
892	}
893	}
894
895
896	/**
897	* The single antecedent in the rule, which is composed of an attribute and
898	* the corresponding value. There are two inherited classes, namely NumericAntd
899	* and NominalAntd in which the attributes are numeric and nominal respectively.
900	*/
901	private abstract class Antd
902	implements Serializable, RevisionHandler {
903
904	/** for serialization */
905	private static final long serialVersionUID = 5317379013858933369L;
906
907	/** The attribute of the antecedent */
908	protected Attribute att;
909
910	/** The attribute value of the antecedent.
911	For numeric attribute, value is either 0(1st bag) or 1(2nd bag) */
912	protected double value;
913
914	/** The maximum infoGain achieved by this antecedent test */
915	protected double maxInfoGain;
916
917	/** The accurate rate of this antecedent test on the growing data */
918	protected double accuRate;
919
920	/** The coverage of this antecedent */
921	protected double cover;
922
923	/** The accurate data for this antecedent */
924	protected double accu;
925
926	/** Constructor*/
927	public Antd(Attribute a){
928	att=a;
929	value=Double.NaN;
930	maxInfoGain = 0;
931	accuRate = Double.NaN;
932	cover = Double.NaN;
933	accu = Double.NaN;
934	}
935
936	/* The abstract members for inheritance */
937	public abstract Instances[] splitData(Instances data, double defAcRt, double cla);
938	public abstract boolean isCover(Instance inst);
939	public abstract String toString();
940
941	/* Get functions of this antecedent */
942	public Attribute getAttr(){ return att; }
943	public double getAttrValue(){ return value; }
944	public double getMaxInfoGain(){ return maxInfoGain; }
945	public double getAccuRate(){ return accuRate; }
946	public double getAccu(){ return accu; }
947	public double getCover(){ return cover; }
948
949	/**
950	* Returns the revision string.
951	*
952	* @return the revision
953	*/
954	public String getRevision() {
955	return RevisionUtils.extract("$Revision: 5928 $");
956	}
957	}
958
959	/**
960	* The antecedent with numeric attribute
961	*/
962	private class NumericAntd
963	extends Antd {
964
965	/** for serialization */
966	static final long serialVersionUID = 1968761518014492214L;
967
968	/** The split point for this numeric antecedent */
969	private double splitPoint;
970
971	/** Constructor*/
972	public NumericAntd(Attribute a){
973	super(a);
974	splitPoint = Double.NaN;
975	}
976
977	/** Get split point of this numeric antecedent */
978	public double getSplitPoint(){ return splitPoint; }
979
980	/**
981	* Implements the splitData function.
982	* This procedure is to split the data into two bags according
983	* to the information gain of the numeric attribute value
984	* The maximum infoGain is also calculated.
985	*
986	* @param insts the data to be split
987	* @param defAcRt the default accuracy rate for data
988	* @param cl the class label to be predicted
989	* @return the array of data after split
990	*/
991	public Instances[] splitData(Instances insts, double defAcRt, double cl){
992	Instances data = new Instances(insts);
993	data.sort(att);
994	int total=data.numInstances();// Total number of instances without
995	// missing value for att
996
997	int split=1; // Current split position
998	int prev=0; // Previous split position
999	int finalSplit=split; // Final split position
1000	maxInfoGain = 0;
1001	value = 0;
1002
1003	// Compute minimum number of Instances required in each split
1004	double minSplit = 0.1 * (data.sumOfWeights()) / 2.0;
1005	if (Utils.smOrEq(minSplit,m_MinNo))
1006	minSplit = m_MinNo;
1007	else if (Utils.gr(minSplit,25))
1008	minSplit = 25;
1009
1010	double fstCover=0, sndCover=0, fstAccu=0, sndAccu=0;
1011
1012	for(int x=0; x<data.numInstances(); x++){
1013	Instance inst = data.instance(x);
1014	if(inst.isMissing(att)){
1015	total = x;
1016	break;
1017	}
1018
1019	sndCover += inst.weight();
1020	if(Utils.eq(inst.classValue(), cl))
1021	sndAccu += inst.weight();
1022	}
1023
1024	// Enough Instances with known values?
1025	if (Utils.sm(sndCover,(2*minSplit)))
1026	return null;
1027
1028	if(total == 0) return null; // Data all missing for the attribute
1029	splitPoint = data.instance(total-1).value(att);
1030
1031	for(; split < total; split++){
1032	if(!Utils.eq(data.instance(split).value(att),
1033	data.instance(prev).value(att))){ // Can't split within same value
1034
1035	for(int y=prev; y<split; y++){
1036	Instance inst = data.instance(y);
1037	fstCover += inst.weight(); sndCover -= inst.weight();
1038	if(Utils.eq(data.instance(y).classValue(), cl)){
1039	fstAccu += inst.weight(); // First bag positive# ++
1040	sndAccu -= inst.weight(); // Second bag positive# --
1041	}
1042	}
1043
1044	if(Utils.sm(fstCover, minSplit) \|\| Utils.sm(sndCover, minSplit)){
1045	prev=split; // Cannot split because either
1046	continue; // split has not enough data
1047	}
1048
1049	double fstAccuRate = 0, sndAccuRate = 0;
1050	if(!Utils.eq(fstCover,0))
1051	fstAccuRate = fstAccu/fstCover;
1052	if(!Utils.eq(sndCover,0))
1053	sndAccuRate = sndAccu/sndCover;
1054
1055	/* Which bag has higher information gain? */
1056	boolean isFirst;
1057	double fstInfoGain, sndInfoGain;
1058	double accRate, infoGain, coverage, accurate;
1059
1060	fstInfoGain = Utils.eq(fstAccuRate, 0) ?
1061	0 : (fstAccu*(Utils.log2(fstAccuRate) - Utils.log2(defAcRt)));
1062	sndInfoGain = Utils.eq(sndAccuRate, 0) ?
1063	0 : (sndAccu*(Utils.log2(sndAccuRate) - Utils.log2(defAcRt)));
1064	if(Utils.gr(fstInfoGain,sndInfoGain) \|\|
1065	(Utils.eq(fstInfoGain,sndInfoGain)&&(Utils.grOrEq(fstAccuRate,sndAccuRate)))){
1066	isFirst = true;
1067	infoGain = fstInfoGain;
1068	accRate = fstAccuRate;
1069	accurate = fstAccu;
1070	coverage = fstCover;
1071	}
1072	else{
1073	isFirst = false;
1074	infoGain = sndInfoGain;
1075	accRate = sndAccuRate;
1076	accurate = sndAccu;
1077	coverage = sndCover;
1078	}
1079
1080	boolean isUpdate = Utils.gr(infoGain, maxInfoGain);
1081
1082	/* Check whether so far the max infoGain */
1083	if(isUpdate){
1084	splitPoint = (data.instance(split).value(att) +
1085	data.instance(prev).value(att))/2;
1086	value = ((isFirst) ? 0 : 1);
1087	accuRate = accRate;
1088	accu = accurate;
1089	cover = coverage;
1090	maxInfoGain = infoGain;
1091	finalSplit = split;
1092	}
1093	prev=split;
1094	}
1095	}
1096
1097	/* Split the data */
1098	Instances[] splitData = new Instances[2];
1099	splitData[0] = new Instances(data, 0, finalSplit);
1100	splitData[1] = new Instances(data, finalSplit, total-finalSplit);
1101
1102	return splitData;
1103	}
1104
1105	/**
1106	* Whether the instance is covered by this antecedent
1107	*
1108	* @param inst the instance in question
1109	* @return the boolean value indicating whether the instance is covered
1110	* by this antecedent
1111	*/
1112	public boolean isCover(Instance inst){
1113	boolean isCover=false;
1114	if(!inst.isMissing(att)){
1115	if(Utils.eq(value, 0)){
1116	if(Utils.smOrEq(inst.value(att), splitPoint))
1117	isCover=true;
1118	}
1119	else if(Utils.gr(inst.value(att), splitPoint))
1120	isCover=true;
1121	}
1122	return isCover;
1123	}
1124
1125	/**
1126	* Prints this antecedent
1127	*
1128	* @return a textual description of this antecedent
1129	*/
1130	public String toString() {
1131	String symbol = Utils.eq(value, 0.0) ? " <= " : " > ";
1132	return (att.name() + symbol + Utils.doubleToString(splitPoint, 6));
1133	}
1134
1135	/**
1136	* Returns the revision string.
1137	*
1138	* @return the revision
1139	*/
1140	public String getRevision() {
1141	return RevisionUtils.extract("$Revision: 5928 $");
1142	}
1143	}
1144
1145
1146	/**
1147	* The antecedent with nominal attribute
1148	*/
1149	private class NominalAntd
1150	extends Antd {
1151
1152	/** for serialization */
1153	static final long serialVersionUID = -256386137196078004L;
1154
1155	/* The parameters of infoGain calculated for each attribute value */
1156	private double[] accurate;
1157	private double[] coverage;
1158	private double[] infoGain;
1159
1160	/** Constructor*/
1161	public NominalAntd(Attribute a){
1162	super(a);
1163	int bag = att.numValues();
1164	accurate = new double[bag];
1165	coverage = new double[bag];
1166	infoGain = new double[bag];
1167	}
1168
1169	/**
1170	* Implements the splitData function.
1171	* This procedure is to split the data into bags according
1172	* to the nominal attribute value
1173	* The infoGain for each bag is also calculated.
1174	*
1175	* @param data the data to be split
1176	* @param defAcRt the default accuracy rate for data
1177	* @param cl the class label to be predicted
1178	* @return the array of data after split
1179	*/
1180	public Instances[] splitData(Instances data, double defAcRt, double cl){
1181	int bag = att.numValues();
1182	Instances[] splitData = new Instances[bag];
1183
1184	for(int x=0; x<bag; x++){
1185	accurate[x] = coverage[x] = infoGain[x] = 0;
1186	splitData[x] = new Instances(data, data.numInstances());
1187	}
1188
1189	for(int x=0; x<data.numInstances(); x++){
1190	Instance inst=data.instance(x);
1191	if(!inst.isMissing(att)){
1192	int v = (int)inst.value(att);
1193	splitData[v].add(inst);
1194	coverage[v] += inst.weight();
1195	if(Utils.eq(inst.classValue(), cl))
1196	accurate[v] += inst.weight();
1197	}
1198	}
1199
1200	// Check if >=2 splits have more than the minimal data
1201	int count=0;
1202	for(int x=0; x<bag; x++){
1203	double t = coverage[x];
1204	if(Utils.grOrEq(t, m_MinNo)){
1205	double p = accurate[x];
1206
1207	if(!Utils.eq(t, 0.0))
1208	infoGain[x] = p *((Utils.log2(p/t)) - (Utils.log2(defAcRt)));
1209	++count;
1210	}
1211	}
1212
1213	if(count < 2) // Don't split
1214	return null;
1215
1216	value = (double)Utils.maxIndex(infoGain);
1217
1218	cover = coverage[(int)value];
1219	accu = accurate[(int)value];
1220
1221	if(!Utils.eq(cover,0))
1222	accuRate = accu / cover;
1223	else accuRate = 0;
1224
1225	maxInfoGain = infoGain [(int)value];
1226
1227	return splitData;
1228	}
1229
1230	/**
1231	* Whether the instance is covered by this antecedent
1232	*
1233	* @param inst the instance in question
1234	* @return the boolean value indicating whether the instance is covered
1235	* by this antecedent
1236	*/
1237	public boolean isCover(Instance inst){
1238	boolean isCover=false;
1239	if(!inst.isMissing(att)){
1240	if(Utils.eq(inst.value(att), value))
1241	isCover=true;
1242	}
1243	return isCover;
1244	}
1245
1246	/**
1247	* Prints this antecedent
1248	*
1249	* @return a textual description of this antecedent
1250	*/
1251	public String toString() {
1252	return (att.name() + " = " +att.value((int)value));
1253	}
1254
1255	/**
1256	* Returns the revision string.
1257	*
1258	* @return the revision
1259	*/
1260	public String getRevision() {
1261	return RevisionUtils.extract("$Revision: 5928 $");
1262	}
1263	}
1264
1265	/**
1266	* Returns default capabilities of the classifier.
1267	*
1268	* @return the capabilities of this classifier
1269	*/
1270	public Capabilities getCapabilities() {
1271	Capabilities result = super.getCapabilities();
1272	result.disableAll();
1273
1274	// attributes
1275	result.enable(Capability.NOMINAL_ATTRIBUTES);
1276	result.enable(Capability.NUMERIC_ATTRIBUTES);
1277	result.enable(Capability.DATE_ATTRIBUTES);
1278	result.enable(Capability.MISSING_VALUES);
1279
1280	// class
1281	result.enable(Capability.NOMINAL_CLASS);
1282	result.enable(Capability.MISSING_CLASS_VALUES);
1283
1284	return result;
1285	}
1286
1287	/**
1288	* Builds a ripple-down manner rule learner.
1289	*
1290	* @param instances the training data
1291	* @throws Exception if classifier can't be built successfully
1292	*/
1293	public void buildClassifier(Instances instances) throws Exception {
1294
1295	// can classifier handle the data?
1296	getCapabilities().testWithFail(instances);
1297
1298	// remove instances with missing class
1299	Instances data = new Instances(instances);
1300	data.deleteWithMissingClass();
1301
1302	int numCl = data.numClasses();
1303	m_Root = new Ridor_node();
1304	m_Class = instances.classAttribute(); // The original class label
1305
1306	int index = data.classIndex();
1307	m_Cover = data.sumOfWeights();
1308
1309	m_Random = new Random(m_Seed);
1310
1311	/* Create a binary attribute */
1312	FastVector binary_values = new FastVector(2);
1313	binary_values.addElement("otherClasses");
1314	binary_values.addElement("defClass");
1315	Attribute attr = new Attribute ("newClass", binary_values);
1316	data.insertAttributeAt(attr, index);
1317	data.setClassIndex(index); // The new class label
1318
1319	/* Partition the data into bags according to their original class values */
1320	Instances[] dataByClass = new Instances[numCl];
1321	for(int i=0; i < numCl; i++)
1322	dataByClass[i] = new Instances(data, data.numInstances()); // Empty bags
1323	for(int i=0; i < data.numInstances(); i++){ // Partitioning
1324	Instance inst = data.instance(i);
1325	inst.setClassValue(0); // Set new class vaue to be 0
1326	dataByClass[(int)inst.value(index+1)].add(inst);
1327	}
1328
1329	for(int i=0; i < numCl; i++)
1330	dataByClass[i].deleteAttributeAt(index+1); // Delete original class
1331
1332	m_Root.findRules(dataByClass, 0);
1333
1334	}
1335
1336	/**
1337	* Classify the test instance with the rule learner
1338	*
1339	* @param datum the instance to be classified
1340	* @return the classification
1341	*/
1342	public double classifyInstance(Instance datum){
1343	return classify(m_Root, datum);
1344	}
1345
1346	/**
1347	* Classify the test instance with one node of Ridor
1348	*
1349	* @param node the node of Ridor to classify the test instance
1350	* @param datum the instance to be classified
1351	* @return the classification
1352	*/
1353	private double classify(Ridor_node node, Instance datum){
1354	double classValue = node.getDefClass();
1355	RidorRule[] rules = node.getRules();
1356
1357	if(rules != null){
1358	Ridor_node[] excepts = node.getExcepts();
1359	for(int i=0; i < excepts.length; i++){
1360	if(rules[i].isCover(datum)){
1361	classValue = classify(excepts[i], datum);
1362	break;
1363	}
1364	}
1365	}
1366
1367	return classValue;
1368	}
1369
1370	/**
1371	* Returns an enumeration describing the available options
1372	* Valid options are: <p>
1373	*
1374	* -F number <br>
1375	* Set number of folds for reduced error pruning. One fold is
1376	* used as the pruning set. (Default: 3) <p>
1377	*
1378	* -S number <br>
1379	* Set number of shuffles for randomization. (Default: 10) <p>
1380	*
1381	* -A <br>
1382	* Set flag of whether use the error rate of all the data to select
1383	* the default class in each step. If not set, the learner will only use
1384	* the error rate in the pruning data <p>
1385	*
1386	* -M <br>
1387	* Set flag of whether use the majority class as the default class
1388	* in each step instead of choosing default class based on the error rate
1389	* (if the flag is not set) <p>
1390	*
1391	* -N number <br>
1392	* Set the minimal weights of instances within a split.
1393	* (Default: 2) <p>
1394	*
1395	* @return an enumeration of all the available options
1396	*/
1397	public Enumeration listOptions() {
1398	Vector newVector = new Vector(5);
1399
1400	newVector.addElement(new Option("\tSet number of folds for IREP\n" +
1401	"\tOne fold is used as pruning set.\n" +
1402	"\t(default 3)","F", 1, "-F <number of folds>"));
1403	newVector.addElement(new Option("\tSet number of shuffles to randomize\n" +
1404	"\tthe data in order to get better rule.\n" +
1405	"\t(default 1)","S", 1, "-S <number of shuffles>"));
1406	newVector.addElement(new Option("\tSet flag of whether use the error rate \n"+
1407	"\tof all the data to select the default class\n"+
1408	"\tin each step. If not set, the learner will only use"+
1409	"\tthe error rate in the pruning data","A", 0, "-A"));
1410	newVector.addElement(new Option("\t Set flag of whether use the majority class as\n"+
1411	"\tthe default class in each step instead of \n"+
1412	"\tchoosing default class based on the error rate\n"+
1413	"\t(if the flag is not set)","M", 0, "-M"));
1414	newVector.addElement(new Option("\tSet the minimal weights of instances\n" +
1415	"\twithin a split.\n" +
1416	"\t(default 2.0)","N", 1, "-N <min. weights>"));
1417	return newVector.elements();
1418	}
1419
1420	/**
1421	* Parses a given list of options. <p/>
1422	*
1423	<!-- options-start -->
1424	* Valid options are: <p/>
1425	*
1426	* <pre> -F <number of folds>
1427	* Set number of folds for IREP
1428	* One fold is used as pruning set.
1429	* (default 3)</pre>
1430	*
1431	* <pre> -S <number of shuffles>
1432	* Set number of shuffles to randomize
1433	* the data in order to get better rule.
1434	* (default 10)</pre>
1435	*
1436	* <pre> -A
1437	* Set flag of whether use the error rate
1438	* of all the data to select the default class
1439	* in each step. If not set, the learner will only use the error rate in the pruning data</pre>
1440	*
1441	* <pre> -M
1442	* Set flag of whether use the majority class as
1443	* the default class in each step instead of
1444	* choosing default class based on the error rate
1445	* (if the flag is not set)</pre>
1446	*
1447	* <pre> -N <min. weights>
1448	* Set the minimal weights of instances
1449	* within a split.
1450	* (default 2.0)</pre>
1451	*
1452	<!-- options-end -->
1453	*
1454	* @param options the list of options as an array of strings
1455	* @throws Exception if an option is not supported
1456	*/
1457	public void setOptions(String[] options) throws Exception {
1458
1459	String numFoldsString = Utils.getOption('F', options);
1460	if (numFoldsString.length() != 0)
1461	m_Folds = Integer.parseInt(numFoldsString);
1462	else
1463	m_Folds = 3;
1464
1465	String numShuffleString = Utils.getOption('S', options);
1466	if (numShuffleString.length() != 0)
1467	m_Shuffle = Integer.parseInt(numShuffleString);
1468	else
1469	m_Shuffle = 1;
1470
1471	String seedString = Utils.getOption('s', options);
1472	if (seedString.length() != 0)
1473	m_Seed = Integer.parseInt(seedString);
1474	else
1475	m_Seed = 1;
1476
1477	String minNoString = Utils.getOption('N', options);
1478	if (minNoString.length() != 0)
1479	m_MinNo = Double.parseDouble(minNoString);
1480	else
1481	m_MinNo = 2.0;
1482
1483	m_IsAllErr = Utils.getFlag('A', options);
1484	m_IsMajority = Utils.getFlag('M', options);
1485	}
1486
1487	/**
1488	* Gets the current settings of the Classifier.
1489	*
1490	* @return an array of strings suitable for passing to setOptions
1491	*/
1492	public String [] getOptions() {
1493
1494	String [] options = new String [8];
1495	int current = 0;
1496	options[current++] = "-F"; options[current++] = "" + m_Folds;
1497	options[current++] = "-S"; options[current++] = "" + m_Shuffle;
1498	options[current++] = "-N"; options[current++] = "" + m_MinNo;
1499
1500	if(m_IsAllErr)
1501	options[current++] = "-A";
1502	if(m_IsMajority)
1503	options[current++] = "-M";
1504	while (current < options.length)
1505	options[current++] = "";
1506	return options;
1507	}
1508
1509	/** Set and get members for parameters */
1510
1511	/**
1512	* Returns the tip text for this property
1513	* @return tip text for this property suitable for
1514	* displaying in the explorer/experimenter gui
1515	*/
1516	public String foldsTipText() {
1517	return "Determines the amount of data used for pruning. One fold is used for "
1518	+ "pruning, the rest for growing the rules.";
1519	}
1520
1521	public void setFolds(int fold){ m_Folds = fold; }
1522	public int getFolds(){ return m_Folds; }
1523
1524	/**
1525	* Returns the tip text for this property
1526	* @return tip text for this property suitable for
1527	* displaying in the explorer/experimenter gui
1528	*/
1529	public String shuffleTipText() {
1530	return "Determines how often the data is shuffled before a rule "
1531	+ "is chosen. If > 1, a rule is learned multiple times and the "
1532	+ "most accurate rule is chosen.";
1533	}
1534
1535	public void setShuffle(int sh){ m_Shuffle = sh; }
1536	public int getShuffle(){ return m_Shuffle; }
1537
1538	/**
1539	* Returns the tip text for this property
1540	* @return tip text for this property suitable for
1541	* displaying in the explorer/experimenter gui
1542	*/
1543	public String seedTipText() {
1544	return "The seed used for randomizing the data.";
1545	}
1546
1547	public void setSeed(int s){ m_Seed = s; }
1548	public int getSeed(){ return m_Seed; }
1549
1550	/**
1551	* Returns the tip text for this property
1552	* @return tip text for this property suitable for
1553	* displaying in the explorer/experimenter gui
1554	*/
1555	public String wholeDataErrTipText() {
1556	return "Whether worth of rule is computed based on all the data "
1557	+ "or just based on data covered by rule.";
1558	}
1559
1560	public void setWholeDataErr(boolean a){ m_IsAllErr = a; }
1561	public boolean getWholeDataErr(){ return m_IsAllErr; }
1562
1563	/**
1564	* Returns the tip text for this property
1565	* @return tip text for this property suitable for
1566	* displaying in the explorer/experimenter gui
1567	*/
1568	public String majorityClassTipText() {
1569	return "Whether the majority class is used as default.";
1570	}
1571	public void setMajorityClass(boolean m){ m_IsMajority = m; }
1572	public boolean getMajorityClass(){ return m_IsMajority; }
1573
1574	/**
1575	* Returns the tip text for this property
1576	* @return tip text for this property suitable for
1577	* displaying in the explorer/experimenter gui
1578	*/
1579	public String minNoTipText() {
1580	return "The minimum total weight of the instances in a rule.";
1581	}
1582
1583	public void setMinNo(double m){ m_MinNo = m; }
1584	public double getMinNo(){ return m_MinNo; }
1585
1586	/**
1587	* Returns an enumeration of the additional measure names
1588	* @return an enumeration of the measure names
1589	*/
1590	public Enumeration enumerateMeasures() {
1591	Vector newVector = new Vector(1);
1592	newVector.addElement("measureNumRules");
1593	return newVector.elements();
1594	}
1595
1596	/**
1597	* Returns the value of the named measure
1598	* @param additionalMeasureName the name of the measure to query for its value
1599	* @return the value of the named measure
1600	* @throws IllegalArgumentException if the named measure is not supported
1601	*/
1602	public double getMeasure(String additionalMeasureName) {
1603	if (additionalMeasureName.compareToIgnoreCase("measureNumRules") == 0)
1604	return numRules();
1605	else
1606	throw new IllegalArgumentException(additionalMeasureName+" not supported (Ripple down rule learner)");
1607	}
1608
1609	/**
1610	* Measure the number of rules in total in the model
1611	*
1612	* @return the number of rules
1613	*/
1614	private double numRules(){
1615	int size = 0;
1616	if(m_Root != null)
1617	size = m_Root.size();
1618
1619	return (double)(size+1); // Add the default rule
1620	}
1621
1622	/**
1623	* Prints the all the rules of the rule learner.
1624	*
1625	* @return a textual description of the classifier
1626	*/
1627	public String toString() {
1628	if (m_Root == null)
1629	return "RIpple DOwn Rule Learner(Ridor): No model built yet.";
1630
1631	return ("RIpple DOwn Rule Learner(Ridor) rules\n"+
1632	"--------------------------------------\n\n" +
1633	m_Root.toString() +
1634	"\nTotal number of rules (incl. the default rule): " + (int)numRules());
1635	}
1636
1637	/**
1638	* Returns the revision string.
1639	*
1640	* @return the revision
1641	*/
1642	public String getRevision() {
1643	return RevisionUtils.extract("$Revision: 5928 $");
1644	}
1645
1646	/**
1647	* Main method.
1648	*
1649	* @param args the options for the classifier
1650	*/
1651	public static void main(String[] args) {
1652	runClassifier(new Ridor(), args);
1653	}
1654	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: