Context Navigation

source: src/main/java/weka/attributeSelection/RankSearch.java @ 17

Last change on this file since 17 was 4, checked in by gnappo, 14 years ago
Import di weka.
File size: 16.0 KB

Rev	Line
[4]	1	/*
	2	* This program is free software; you can redistribute it and/or modify
	3	* it under the terms of the GNU General Public License as published by
	4	* the Free Software Foundation; either version 2 of the License, or
	5	* (at your option) any later version.
	6	*
	7	* This program is distributed in the hope that it will be useful,
	8	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	10	* GNU General Public License for more details.
	11	*
	12	* You should have received a copy of the GNU General Public License
	13	* along with this program; if not, write to the Free Software
	14	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
	15	*/
	16
	17	/*
	18	* RankSearch.java
	19	* Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
	20	*
	21	*/
	22
	23	package weka.attributeSelection;
	24
	25	import weka.core.Instances;
	26	import weka.core.Option;
	27	import weka.core.OptionHandler;
	28	import weka.core.RevisionUtils;
	29	import weka.core.Utils;
	30
	31	import java.util.BitSet;
	32	import java.util.Enumeration;
	33	import java.util.Vector;
	34
	35	/**
	36	<!-- globalinfo-start -->
	37	* RankSearch : <br/>
	38	* <br/>
	39	* Uses an attribute/subset evaluator to rank all attributes. If a subset evaluator is specified, then a forward selection search is used to generate a ranked list. From the ranked list of attributes, subsets of increasing size are evaluated, ie. The best attribute, the best attribute plus the next best attribute, etc.... The best attribute set is reported. RankSearch is linear in the number of attributes if a simple attribute evaluator is used such as GainRatioAttributeEval.<br/>
	40	* <p/>
	41	<!-- globalinfo-end -->
	42	*
	43	<!-- options-start -->
	44	* Valid options are: <p/>
	45	*
	46	* <pre> -A <attribute evaluator>
	47	* class name of attribute evaluator to use for ranking. Place any
	48	* evaluator options LAST on the command line following a "--".
	49	* eg.:
	50	* -A weka.attributeSelection.GainRatioAttributeEval ... -- -M
	51	* (default: weka.attributeSelection.GainRatioAttributeEval)</pre>
	52	*
	53	* <pre> -S <step size>
	54	* number of attributes to be added from the
	55	* ranking in each iteration (default = 1).</pre>
	56	*
	57	* <pre> -R <start point>
	58	* point in the ranking to start evaluating from.
	59	* (default = 0, ie. the head of the ranking).</pre>
	60	*
	61	* <pre>
	62	* Options specific to evaluator weka.attributeSelection.GainRatioAttributeEval:
	63	* </pre>
	64	*
	65	* <pre> -M
	66	* treat missing values as a seperate value.</pre>
	67	*
	68	<!-- options-end -->
	69	*
	70	* @author Mark Hall (mhall@cs.waikato.ac.nz)
	71	* @version $Revision: 4614 $
	72	*/
	73	public class RankSearch
	74	extends ASSearch
	75	implements OptionHandler {
	76
	77	/** for serialization */
	78	static final long serialVersionUID = -7992268736874353755L;
	79
	80	/** does the data have a class */
	81	private boolean m_hasClass;
	82
	83	/** holds the class index */
	84	private int m_classIndex;
	85
	86	/** number of attributes in the data */
	87	private int m_numAttribs;
	88
	89	/** the best subset found */
	90	private BitSet m_best_group;
	91
	92	/** the attribute evaluator to use for generating the ranking */
	93	private ASEvaluation m_ASEval;
	94
	95	/** the subset evaluator with which to evaluate the ranking */
	96	private ASEvaluation m_SubsetEval;
	97
	98	/** the training instances */
	99	private Instances m_Instances;
	100
	101	/** the merit of the best subset found */
	102	private double m_bestMerit;
	103
	104	/** will hold the attribute ranking */
	105	private int [] m_Ranking;
	106
	107	/** add this many attributes in each iteration from the ranking */
	108	protected int m_add = 1;
	109
	110	/** start from this point in the ranking */
	111	protected int m_startPoint = 0;
	112
	113	/**
	114	* Returns a string describing this search method
	115	* @return a description of the search method suitable for
	116	* displaying in the explorer/experimenter gui
	117	*/
	118	public String globalInfo() {
	119	return "RankSearch : \n\n"
	120	+"Uses an attribute/subset evaluator to rank all attributes. "
	121	+"If a subset evaluator is specified, then a forward selection "
	122	+"search is used to generate a ranked list. From the ranked "
	123	+"list of attributes, subsets of increasing size are evaluated, ie. "
	124	+"The best attribute, the best attribute plus the next best attribute, "
	125	+"etc.... The best attribute set is reported. RankSearch is linear in "
	126	+"the number of attributes if a simple attribute evaluator is used "
	127	+"such as GainRatioAttributeEval.\n";
	128	}
	129
	130	/**
	131	* Constructor
	132	*/
	133	public RankSearch () {
	134	resetOptions();
	135	}
	136
	137	/**
	138	* Returns the tip text for this property
	139	* @return tip text for this property suitable for
	140	* displaying in the explorer/experimenter gui
	141	*/
	142	public String attributeEvaluatorTipText() {
	143	return "Attribute evaluator to use for generating a ranking.";
	144	}
	145
	146	/**
	147	* Set the attribute evaluator to use for generating the ranking.
	148	* @param newEvaluator the attribute evaluator to use.
	149	*/
	150	public void setAttributeEvaluator(ASEvaluation newEvaluator) {
	151	m_ASEval = newEvaluator;
	152	}
	153
	154	/**
	155	* Get the attribute evaluator used to generate the ranking.
	156	* @return the evaluator used to generate the ranking.
	157	*/
	158	public ASEvaluation getAttributeEvaluator() {
	159	return m_ASEval;
	160	}
	161
	162	/**
	163	* Returns the tip text for this property
	164	* @return tip text for this property suitable for
	165	* displaying in the explorer/experimenter gui
	166	*/
	167	public String stepSizeTipText() {
	168	return "Add this many attributes from the ranking in each iteration.";
	169	}
	170
	171	/**
	172	* Set the number of attributes to add from the rankining
	173	* in each iteration
	174	* @param ss the number of attribes to add.
	175	*/
	176	public void setStepSize(int ss) {
	177	if (ss > 0) {
	178	m_add = ss;
	179	}
	180	}
	181
	182	/**
	183	* Get the number of attributes to add from the rankining
	184	* in each iteration
	185	* @return the number of attributes to add.
	186	*/
	187	public int getStepSize() {
	188	return m_add;
	189	}
	190
	191	/**
	192	* Returns the tip text for this property
	193	* @return tip text for this property suitable for
	194	* displaying in the explorer/experimenter gui
	195	*/
	196	public String startPointTipText() {
	197	return "Start evaluating from this point in the ranking.";
	198	}
	199
	200	/**
	201	* Set the point at which to start evaluating the ranking
	202	* @param sp the position in the ranking to start at
	203	*/
	204	public void setStartPoint(int sp) {
	205	if (sp >= 0) {
	206	m_startPoint = sp;
	207	}
	208	}
	209
	210	/**
	211	* Get the point at which to start evaluating the ranking
	212	* @return the position in the ranking to start at
	213	*/
	214	public int getStartPoint() {
	215	return m_startPoint;
	216	}
	217
	218	/**
	219	* Returns an enumeration describing the available options.
	220	* @return an enumeration of all the available options.
	221	**/
	222	public Enumeration listOptions () {
	223	Vector newVector = new Vector(4);
	224
	225	newVector.addElement(new Option(
	226	"\tclass name of attribute evaluator to use for ranking. Place any\n"
	227	+ "\tevaluator options LAST on the command line following a \"--\".\n"
	228	+ "\teg.:\n"
	229	+ "\t\t-A weka.attributeSelection.GainRatioAttributeEval ... -- -M\n"
	230	+ "\t(default: weka.attributeSelection.GainRatioAttributeEval)",
	231	"A", 1, "-A <attribute evaluator>"));
	232
	233	newVector.addElement(new Option(
	234	"\tnumber of attributes to be added from the"
	235	+"\n\tranking in each iteration (default = 1).",
	236	"S", 1,"-S <step size>"));
	237
	238	newVector.addElement(new Option(
	239	"\tpoint in the ranking to start evaluating from. "
	240	+"\n\t(default = 0, ie. the head of the ranking).",
	241	"R", 1,"-R <start point>"));
	242
	243	if ((m_ASEval != null) &&
	244	(m_ASEval instanceof OptionHandler)) {
	245	newVector.addElement(new Option("", "", 0, "\nOptions specific to "
	246	+ "evaluator "
	247	+ m_ASEval.getClass().getName()
	248	+ ":"));
	249	Enumeration enu = ((OptionHandler)m_ASEval).listOptions();
	250
	251	while (enu.hasMoreElements()) {
	252	newVector.addElement(enu.nextElement());
	253	}
	254	}
	255
	256	return newVector.elements();
	257	}
	258
	259
	260	/**
	261	* Parses a given list of options. <p/>
	262	*
	263	<!-- options-start -->
	264	* Valid options are: <p/>
	265	*
	266	* <pre> -A <attribute evaluator>
	267	* class name of attribute evaluator to use for ranking. Place any
	268	* evaluator options LAST on the command line following a "--".
	269	* eg.:
	270	* -A weka.attributeSelection.GainRatioAttributeEval ... -- -M
	271	* (default: weka.attributeSelection.GainRatioAttributeEval)</pre>
	272	*
	273	* <pre> -S <step size>
	274	* number of attributes to be added from the
	275	* ranking in each iteration (default = 1).</pre>
	276	*
	277	* <pre> -R <start point>
	278	* point in the ranking to start evaluating from.
	279	* (default = 0, ie. the head of the ranking).</pre>
	280	*
	281	* <pre>
	282	* Options specific to evaluator weka.attributeSelection.GainRatioAttributeEval:
	283	* </pre>
	284	*
	285	* <pre> -M
	286	* treat missing values as a seperate value.</pre>
	287	*
	288	<!-- options-end -->
	289	*
	290	* @param options the list of options as an array of strings
	291	* @throws Exception if an option is not supported
	292	*/
	293	public void setOptions (String[] options)
	294	throws Exception {
	295	String optionString;
	296	resetOptions();
	297
	298	optionString = Utils.getOption('S', options);
	299	if (optionString.length() != 0) {
	300	setStepSize(Integer.parseInt(optionString));
	301	}
	302
	303	optionString = Utils.getOption('R', options);
	304	if (optionString.length() != 0) {
	305	setStartPoint(Integer.parseInt(optionString));
	306	}
	307
	308	optionString = Utils.getOption('A', options);
	309	if (optionString.length() == 0)
	310	optionString = GainRatioAttributeEval.class.getName();
	311	setAttributeEvaluator(ASEvaluation.forName(optionString,
	312	Utils.partitionOptions(options)));
	313	}
	314
	315	/**
	316	* Gets the current settings of WrapperSubsetEval.
	317	*
	318	* @return an array of strings suitable for passing to setOptions()
	319	*/
	320	public String[] getOptions () {
	321	String[] evaluatorOptions = new String[0];
	322
	323	if ((m_ASEval != null) &&
	324	(m_ASEval instanceof OptionHandler)) {
	325	evaluatorOptions = ((OptionHandler)m_ASEval).getOptions();
	326	}
	327
	328	String[] options = new String[8 + evaluatorOptions.length];
	329	int current = 0;
	330
	331	options[current++] = "-S"; options[current++] = ""+getStepSize();
	332
	333	options[current++] = "-R"; options[current++] = ""+getStartPoint();
	334
	335	if (getAttributeEvaluator() != null) {
	336	options[current++] = "-A";
	337	options[current++] = getAttributeEvaluator().getClass().getName();
	338	}
	339
	340	if (evaluatorOptions.length > 0) {
	341	options[current++] = "--";
	342	System.arraycopy(evaluatorOptions, 0, options, current,
	343	evaluatorOptions.length);
	344	current += evaluatorOptions.length;
	345	}
	346
	347	while (current < options.length) {
	348	options[current++] = "";
	349	}
	350
	351	return options;
	352	}
	353
	354	/**
	355	* Reset the search method.
	356	*/
	357	protected void resetOptions () {
	358	m_ASEval = new GainRatioAttributeEval();
	359	m_Ranking = null;
	360	}
	361
	362	/**
	363	* Ranks attributes using the specified attribute evaluator and then
	364	* searches the ranking using the supplied subset evaluator.
	365	*
	366	* @param ASEval the subset evaluator to guide the search
	367	* @param data the training instances.
	368	* @return an array (not necessarily ordered) of selected attribute indexes
	369	* @throws Exception if the search can't be completed
	370	*/
	371	public int[] search (ASEvaluation ASEval, Instances data)
	372	throws Exception {
	373
	374	double best_merit = -Double.MAX_VALUE;
	375	double temp_merit;
	376	BitSet temp_group, best_group=null;
	377
	378	if (!(ASEval instanceof SubsetEvaluator)) {
	379	throw new Exception(ASEval.getClass().getName()
	380	+ " is not a "
	381	+ "Subset evaluator!");
	382	}
	383
	384	m_SubsetEval = ASEval;
	385	m_Instances = data;
	386	m_numAttribs = m_Instances.numAttributes();
	387
	388	/* if (m_ASEval instanceof AttributeTransformer) {
	389	throw new Exception("Can't use an attribute transformer "
	390	+"with RankSearch");
	391	} */
	392	if (m_ASEval instanceof UnsupervisedAttributeEvaluator \|\|
	393	m_ASEval instanceof UnsupervisedSubsetEvaluator) {
	394	m_hasClass = false;
	395	/* if (!(m_SubsetEval instanceof UnsupervisedSubsetEvaluator)) {
	396	throw new Exception("Must use an unsupervised subset evaluator.");
	397	} */
	398	}
	399	else {
	400	m_hasClass = true;
	401	m_classIndex = m_Instances.classIndex();
	402	}
	403
	404	if (m_ASEval instanceof AttributeEvaluator) {
	405	// generate the attribute ranking first
	406	Ranker ranker = new Ranker();
	407	m_ASEval.buildEvaluator(m_Instances);
	408	if (m_ASEval instanceof AttributeTransformer) {
	409	// get the transformed data a rebuild the subset evaluator
	410	m_Instances = ((AttributeTransformer)m_ASEval).
	411	transformedData(m_Instances);
	412	((ASEvaluation)m_SubsetEval).buildEvaluator(m_Instances);
	413	}
	414	m_Ranking = ranker.search(m_ASEval, m_Instances);
	415	} else {
	416	GreedyStepwise fs = new GreedyStepwise();
	417	double [][]rankres;
	418	fs.setGenerateRanking(true);
	419	((ASEvaluation)m_ASEval).buildEvaluator(m_Instances);
	420	fs.search(m_ASEval, m_Instances);
	421	rankres = fs.rankedAttributes();
	422	m_Ranking = new int[rankres.length];
	423	for (int i=0;i<rankres.length;i++) {
	424	m_Ranking[i] = (int)rankres[i][0];
	425	}
	426	}
	427
	428	// now evaluate the attribute ranking
	429	for (int i=m_startPoint;i<m_Ranking.length;i+=m_add) {
	430	temp_group = new BitSet(m_numAttribs);
	431	for (int j=0;j<=i;j++) {
	432	temp_group.set(m_Ranking[j]);
	433	}
	434	temp_merit = ((SubsetEvaluator)m_SubsetEval).evaluateSubset(temp_group);
	435
	436	if (temp_merit > best_merit) {
	437	best_merit = temp_merit;;
	438	best_group = temp_group;
	439	}
	440	}
	441	m_bestMerit = best_merit;
	442	return attributeList(best_group);
	443	}
	444
	445	/**
	446	* converts a BitSet into a list of attribute indexes
	447	* @param group the BitSet to convert
	448	* @return an array of attribute indexes
	449	**/
	450	private int[] attributeList (BitSet group) {
	451	int count = 0;
	452
	453	// count how many were selected
	454	for (int i = 0; i < m_numAttribs; i++) {
	455	if (group.get(i)) {
	456	count++;
	457	}
	458	}
	459
	460	int[] list = new int[count];
	461	count = 0;
	462
	463	for (int i = 0; i < m_numAttribs; i++) {
	464	if (group.get(i)) {
	465	list[count++] = i;
	466	}
	467	}
	468
	469	return list;
	470	}
	471
	472	/**
	473	* returns a description of the search as a String
	474	* @return a description of the search
	475	*/
	476	public String toString () {
	477	StringBuffer text = new StringBuffer();
	478	text.append("\tRankSearch :\n");
	479	text.append("\tAttribute evaluator : "
	480	+ getAttributeEvaluator().getClass().getName() +" ");
	481	if (m_ASEval instanceof OptionHandler) {
	482	String[] evaluatorOptions = new String[0];
	483	evaluatorOptions = ((OptionHandler)m_ASEval).getOptions();
	484	for (int i=0;i<evaluatorOptions.length;i++) {
	485	text.append(evaluatorOptions[i]+' ');
	486	}
	487	}
	488	text.append("\n");
	489	text.append("\tAttribute ranking : \n");
	490	int rlength = (int)(Math.log(m_Ranking.length) / Math.log(10) + 1);
	491	for (int i=0;i<m_Ranking.length;i++) {
	492	text.append("\t "+Utils.doubleToString((double)(m_Ranking[i]+1),
	493	rlength,0)
	494	+" "+m_Instances.attribute(m_Ranking[i]).name()+'\n');
	495	}
	496	text.append("\tMerit of best subset found : ");
	497	int fieldwidth = 3;
	498	double precision = (m_bestMerit - (int)m_bestMerit);
	499	if (Math.abs(m_bestMerit) > 0) {
	500	fieldwidth = (int)Math.abs((Math.log(Math.abs(m_bestMerit)) / Math.log(10)))+2;
	501	}
	502	if (Math.abs(precision) > 0) {
	503	precision = Math.abs((Math.log(Math.abs(precision)) / Math.log(10)))+3;
	504	} else {
	505	precision = 2;
	506	}
	507
	508	text.append(Utils.doubleToString(Math.abs(m_bestMerit),
	509	fieldwidth+(int)precision,
	510	(int)precision)+"\n");
	511	return text.toString();
	512	}
	513
	514	/**
	515	* Returns the revision string.
	516	*
	517	* @return the revision
	518	*/
	519	public String getRevision() {
	520	return RevisionUtils.extract("$Revision: 4614 $");
	521	}
	522	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: