[4] | 1 | /* |
---|
| 2 | * This program is free software; you can redistribute it and/or modify |
---|
| 3 | * it under the terms of the GNU General Public License as published by |
---|
| 4 | * the Free Software Foundation; either version 2 of the License, or |
---|
| 5 | * (at your option) any later version. |
---|
| 6 | * |
---|
| 7 | * This program is distributed in the hope that it will be useful, |
---|
| 8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
| 9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
| 10 | * GNU General Public License for more details. |
---|
| 11 | * |
---|
| 12 | * You should have received a copy of the GNU General Public License |
---|
| 13 | * along with this program; if not, write to the Free Software |
---|
| 14 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
---|
| 15 | */ |
---|
| 16 | |
---|
| 17 | /* |
---|
| 18 | * LinearForwardSelection.java |
---|
| 19 | * Copyright (C) 2007 Martin Guetlein |
---|
| 20 | * |
---|
| 21 | */ |
---|
| 22 | |
---|
| 23 | package weka.attributeSelection; |
---|
| 24 | |
---|
| 25 | import weka.core.Instances; |
---|
| 26 | import weka.core.Option; |
---|
| 27 | import weka.core.OptionHandler; |
---|
| 28 | import weka.core.Range; |
---|
| 29 | import weka.core.RevisionUtils; |
---|
| 30 | import weka.core.SelectedTag; |
---|
| 31 | import weka.core.Tag; |
---|
| 32 | import weka.core.Utils; |
---|
| 33 | import weka.core.TechnicalInformation; |
---|
| 34 | import weka.core.TechnicalInformationHandler; |
---|
| 35 | import weka.core.TechnicalInformation.Field; |
---|
| 36 | import weka.core.TechnicalInformation.Type; |
---|
| 37 | import java.util.BitSet; |
---|
| 38 | import java.util.Enumeration; |
---|
| 39 | import java.util.Vector; |
---|
| 40 | |
---|
| 41 | |
---|
| 42 | /** |
---|
| 43 | <!-- globalinfo-start --> |
---|
| 44 | * LinearForwardSelection:<br/> |
---|
| 45 | * <br/> |
---|
| 46 | * Extension of BestFirst. Takes a restricted number of k attributes into account. Fixed-set selects a fixed number k of attributes, whereas k is increased in each step when fixed-width is selected. The search uses either the initial ordering to select the top k attributes, or performs a ranking (with the same evalutator the search uses later on). The search direction can be forward, or floating forward selection (with opitional backward search steps).<br/> |
---|
| 47 | * <br/> |
---|
| 48 | * For more information see:<br/> |
---|
| 49 | * <br/> |
---|
| 50 | * Martin Guetlein, Eibe Frank, Mark Hall, Andreas Karwath: Large Scale Attribute Selection Using Wrappers. In: Proc IEEE Symposium on Computational Intelligence and Data Mining, 332-339, 2009.<br/> |
---|
| 51 | * <br/> |
---|
| 52 | * Martin Guetlein (2006). Large Scale Attribute Selection Using Wrappers. Freiburg, Germany. |
---|
| 53 | * <p/> |
---|
| 54 | <!-- globalinfo-end --> |
---|
| 55 | * |
---|
| 56 | <!-- options-start --> |
---|
| 57 | * Valid options are: <p/> |
---|
| 58 | * |
---|
| 59 | * <pre> -P <start set> |
---|
| 60 | * Specify a starting set of attributes. |
---|
| 61 | * Eg. 1,3,5-7.</pre> |
---|
| 62 | * |
---|
| 63 | * <pre> -D <0 = forward selection | 1 = floating forward selection> |
---|
| 64 | * Forward selection method. (default = 0).</pre> |
---|
| 65 | * |
---|
| 66 | * <pre> -N <num> |
---|
| 67 | * Number of non-improving nodes to |
---|
| 68 | * consider before terminating search.</pre> |
---|
| 69 | * |
---|
| 70 | * <pre> -I |
---|
| 71 | * Perform initial ranking to select the |
---|
| 72 | * top-ranked attributes.</pre> |
---|
| 73 | * |
---|
| 74 | * <pre> -K <num> |
---|
| 75 | * Number of top-ranked attributes that are |
---|
| 76 | * taken into account by the search.</pre> |
---|
| 77 | * |
---|
| 78 | * <pre> -T <0 = fixed-set | 1 = fixed-width> |
---|
| 79 | * Type of Linear Forward Selection (default = 0).</pre> |
---|
| 80 | * |
---|
| 81 | * <pre> -S <num> |
---|
| 82 | * Size of lookup cache for evaluated subsets. |
---|
| 83 | * Expressed as a multiple of the number of |
---|
| 84 | * attributes in the data set. (default = 1)</pre> |
---|
| 85 | * |
---|
| 86 | * <pre> -Z |
---|
| 87 | * verbose on/off</pre> |
---|
| 88 | * |
---|
| 89 | <!-- options-end --> |
---|
| 90 | * |
---|
| 91 | * @author Martin Guetlein (martin.guetlein@gmail.com) |
---|
| 92 | * @version $Revision: 6160 $ |
---|
| 93 | */ |
---|
| 94 | public class LinearForwardSelection |
---|
| 95 | extends ASSearch |
---|
| 96 | implements OptionHandler, |
---|
| 97 | StartSetHandler, |
---|
| 98 | TechnicalInformationHandler { |
---|
| 99 | /** search directions */ |
---|
| 100 | protected static final int SEARCH_METHOD_FORWARD = 0; |
---|
| 101 | protected static final int SEARCH_METHOD_FLOATING = 1; |
---|
| 102 | public static final Tag[] TAGS_SEARCH_METHOD = { |
---|
| 103 | new Tag(SEARCH_METHOD_FORWARD, "Forward selection"), |
---|
| 104 | new Tag(SEARCH_METHOD_FLOATING, "Floating forward selection"), |
---|
| 105 | }; |
---|
| 106 | |
---|
| 107 | /** search directions */ |
---|
| 108 | protected static final int TYPE_FIXED_SET = 0; |
---|
| 109 | protected static final int TYPE_FIXED_WIDTH = 1; |
---|
| 110 | public static final Tag[] TAGS_TYPE = { |
---|
| 111 | new Tag(TYPE_FIXED_SET, "Fixed-set"), |
---|
| 112 | new Tag(TYPE_FIXED_WIDTH, "Fixed-width"), |
---|
| 113 | }; |
---|
| 114 | |
---|
| 115 | // member variables |
---|
| 116 | /** maximum number of stale nodes before terminating search */ |
---|
| 117 | protected int m_maxStale; |
---|
| 118 | |
---|
| 119 | /** 0 == forward selection, 1 == floating forward search */ |
---|
| 120 | protected int m_forwardSearchMethod; |
---|
| 121 | |
---|
| 122 | /** perform initial ranking to select top-ranked attributes */ |
---|
| 123 | protected boolean m_performRanking; |
---|
| 124 | |
---|
| 125 | /** |
---|
| 126 | * number of top-ranked attributes that are taken into account for the |
---|
| 127 | * search |
---|
| 128 | */ |
---|
| 129 | protected int m_numUsedAttributes; |
---|
| 130 | |
---|
| 131 | /** 0 == fixed-set, 1 == fixed-width */ |
---|
| 132 | protected int m_linearSelectionType; |
---|
| 133 | |
---|
| 134 | /** holds an array of starting attributes */ |
---|
| 135 | protected int[] m_starting; |
---|
| 136 | |
---|
| 137 | /** holds the start set for the search as a Range */ |
---|
| 138 | protected Range m_startRange; |
---|
| 139 | |
---|
| 140 | /** does the data have a class */ |
---|
| 141 | protected boolean m_hasClass; |
---|
| 142 | |
---|
| 143 | /** holds the class index */ |
---|
| 144 | protected int m_classIndex; |
---|
| 145 | |
---|
| 146 | /** number of attributes in the data */ |
---|
| 147 | protected int m_numAttribs; |
---|
| 148 | |
---|
| 149 | /** total number of subsets evaluated during a search */ |
---|
| 150 | protected int m_totalEvals; |
---|
| 151 | |
---|
| 152 | /** for debugging */ |
---|
| 153 | protected boolean m_verbose; |
---|
| 154 | |
---|
| 155 | /** holds the merit of the best subset found */ |
---|
| 156 | protected double m_bestMerit; |
---|
| 157 | |
---|
| 158 | /** holds the maximum size of the lookup cache for evaluated subsets */ |
---|
| 159 | protected int m_cacheSize; |
---|
| 160 | |
---|
| 161 | /** |
---|
| 162 | * Constructor |
---|
| 163 | */ |
---|
| 164 | public LinearForwardSelection() { |
---|
| 165 | resetOptions(); |
---|
| 166 | } |
---|
| 167 | |
---|
| 168 | /** |
---|
| 169 | * Returns a string describing this search method |
---|
| 170 | * |
---|
| 171 | * @return a description of the search method suitable for displaying in the |
---|
| 172 | * explorer/experimenter gui |
---|
| 173 | */ |
---|
| 174 | public String globalInfo() { |
---|
| 175 | return "LinearForwardSelection:\n\n" + |
---|
| 176 | "Extension of BestFirst. Takes a restricted number of k attributes " + |
---|
| 177 | "into account. Fixed-set selects a fixed number k of attributes, " + |
---|
| 178 | "whereas k is increased in each step when fixed-width is selected. " + |
---|
| 179 | "The search uses either the initial ordering to select the " + |
---|
| 180 | "top k attributes, or performs a ranking (with the same evalutator the " + |
---|
| 181 | "search uses later on). The search direction can be forward, " + |
---|
| 182 | "or floating forward selection (with opitional backward search steps).\n\n" |
---|
| 183 | + "For more information see:\n\n" |
---|
| 184 | + getTechnicalInformation().toString(); |
---|
| 185 | } |
---|
| 186 | |
---|
| 187 | /** |
---|
| 188 | * Returns an instance of a TechnicalInformation object, containing |
---|
| 189 | * detailed information about the technical background of this class, |
---|
| 190 | * e.g., paper reference or book this class is based on. |
---|
| 191 | * |
---|
| 192 | * @return the technical information about this class |
---|
| 193 | */ |
---|
| 194 | public TechnicalInformation getTechnicalInformation() { |
---|
| 195 | TechnicalInformation result; |
---|
| 196 | TechnicalInformation additional; |
---|
| 197 | |
---|
| 198 | result = new TechnicalInformation(Type.INPROCEEDINGS); |
---|
| 199 | result.setValue(Field.AUTHOR, "Martin Guetlein and Eibe Frank and Mark Hall and Andreas Karwath"); |
---|
| 200 | result.setValue(Field.YEAR, "2009"); |
---|
| 201 | result.setValue(Field.TITLE, "Large Scale Attribute Selection Using Wrappers"); |
---|
| 202 | result.setValue(Field.BOOKTITLE, "Proc IEEE Symposium on Computational Intelligence and Data Mining"); |
---|
| 203 | result.setValue(Field.PAGES, "332-339"); |
---|
| 204 | result.setValue(Field.PUBLISHER, "IEEE"); |
---|
| 205 | |
---|
| 206 | additional = result.add(Type.MASTERSTHESIS); |
---|
| 207 | additional.setValue(Field.AUTHOR, "Martin Guetlein"); |
---|
| 208 | additional.setValue(Field.YEAR, "2006"); |
---|
| 209 | additional.setValue(Field.TITLE, "Large Scale Attribute Selection Using Wrappers"); |
---|
| 210 | additional.setValue(Field.SCHOOL, "Albert-Ludwigs-Universitaet"); |
---|
| 211 | additional.setValue(Field.ADDRESS, "Freiburg, Germany"); |
---|
| 212 | |
---|
| 213 | return result; |
---|
| 214 | } |
---|
| 215 | |
---|
| 216 | /** |
---|
| 217 | * Returns an enumeration describing the available options. |
---|
| 218 | * |
---|
| 219 | * @return an enumeration of all the available options. |
---|
| 220 | * |
---|
| 221 | */ |
---|
| 222 | public Enumeration listOptions() { |
---|
| 223 | Vector newVector = new Vector(8); |
---|
| 224 | |
---|
| 225 | newVector.addElement(new Option("\tSpecify a starting set of attributes." + |
---|
| 226 | "\n\tEg. 1,3,5-7.", "P", 1, "-P <start set>")); |
---|
| 227 | newVector.addElement(new Option( |
---|
| 228 | "\tForward selection method. (default = 0).", "D", 1, |
---|
| 229 | "-D <0 = forward selection | 1 = floating forward selection>")); |
---|
| 230 | newVector.addElement(new Option("\tNumber of non-improving nodes to" + |
---|
| 231 | "\n\tconsider before terminating search.", "N", 1, "-N <num>")); |
---|
| 232 | newVector.addElement(new Option("\tPerform initial ranking to select the" + |
---|
| 233 | "\n\ttop-ranked attributes.", "I", 0, "-I")); |
---|
| 234 | newVector.addElement(new Option( |
---|
| 235 | "\tNumber of top-ranked attributes that are " + |
---|
| 236 | "\n\ttaken into account by the search.", "K", 1, "-K <num>")); |
---|
| 237 | newVector.addElement(new Option( |
---|
| 238 | "\tType of Linear Forward Selection (default = 0).", "T", 1, |
---|
| 239 | "-T <0 = fixed-set | 1 = fixed-width>")); |
---|
| 240 | newVector.addElement(new Option( |
---|
| 241 | "\tSize of lookup cache for evaluated subsets." + |
---|
| 242 | "\n\tExpressed as a multiple of the number of" + |
---|
| 243 | "\n\tattributes in the data set. (default = 1)", "S", 1, "-S <num>")); |
---|
| 244 | newVector.addElement(new Option("\tverbose on/off", "Z", 0, "-Z")); |
---|
| 245 | |
---|
| 246 | return newVector.elements(); |
---|
| 247 | } |
---|
| 248 | |
---|
| 249 | /** |
---|
| 250 | * Parses a given list of options. |
---|
| 251 | * |
---|
| 252 | * Valid options are: |
---|
| 253 | * <p> |
---|
| 254 | * |
---|
| 255 | * -P <start set> <br> |
---|
| 256 | * Specify a starting set of attributes. Eg 1,4,7-9. |
---|
| 257 | * <p> |
---|
| 258 | * |
---|
| 259 | * -D <0 = forward selection | 1 = floating forward selection> <br> |
---|
| 260 | * Forward selection method of the search. (default = 0). |
---|
| 261 | * <p> |
---|
| 262 | * |
---|
| 263 | * -N <num> <br> |
---|
| 264 | * Number of non improving nodes to consider before terminating search. |
---|
| 265 | * (default = 5). |
---|
| 266 | * <p> |
---|
| 267 | * |
---|
| 268 | * -I <br> |
---|
| 269 | * Perform initial ranking to select top-ranked attributes. |
---|
| 270 | * <p> |
---|
| 271 | * |
---|
| 272 | * -K <num> <br> |
---|
| 273 | * Number of top-ranked attributes that are taken into account. |
---|
| 274 | * <p> |
---|
| 275 | * |
---|
| 276 | * -T <0 = fixed-set | 1 = fixed-width> <br> |
---|
| 277 | * Typ of Linear Forward Selection (default = 0). |
---|
| 278 | * <p> |
---|
| 279 | * |
---|
| 280 | * -S <num> <br> |
---|
| 281 | * Size of lookup cache for evaluated subsets. Expressed as a multiple of |
---|
| 282 | * the number of attributes in the data set. (default = 1). |
---|
| 283 | * <p> |
---|
| 284 | * |
---|
| 285 | * -Z <br> |
---|
| 286 | * verbose on/off. |
---|
| 287 | * <p> |
---|
| 288 | * |
---|
| 289 | * @param options |
---|
| 290 | * the list of options as an array of strings |
---|
| 291 | * @exception Exception |
---|
| 292 | * if an option is not supported |
---|
| 293 | * |
---|
| 294 | */ |
---|
| 295 | public void setOptions(String[] options) throws Exception { |
---|
| 296 | String optionString; |
---|
| 297 | resetOptions(); |
---|
| 298 | |
---|
| 299 | optionString = Utils.getOption('P', options); |
---|
| 300 | |
---|
| 301 | if (optionString.length() != 0) { |
---|
| 302 | setStartSet(optionString); |
---|
| 303 | } |
---|
| 304 | |
---|
| 305 | optionString = Utils.getOption('D', options); |
---|
| 306 | |
---|
| 307 | if (optionString.length() != 0) { |
---|
| 308 | setForwardSelectionMethod(new SelectedTag(Integer.parseInt(optionString), |
---|
| 309 | TAGS_SEARCH_METHOD)); |
---|
| 310 | } else { |
---|
| 311 | setForwardSelectionMethod(new SelectedTag(SEARCH_METHOD_FORWARD, |
---|
| 312 | TAGS_SEARCH_METHOD)); |
---|
| 313 | } |
---|
| 314 | |
---|
| 315 | optionString = Utils.getOption('N', options); |
---|
| 316 | |
---|
| 317 | if (optionString.length() != 0) { |
---|
| 318 | setSearchTermination(Integer.parseInt(optionString)); |
---|
| 319 | } |
---|
| 320 | |
---|
| 321 | setPerformRanking(Utils.getFlag('I', options)); |
---|
| 322 | |
---|
| 323 | optionString = Utils.getOption('K', options); |
---|
| 324 | |
---|
| 325 | if (optionString.length() != 0) { |
---|
| 326 | setNumUsedAttributes(Integer.parseInt(optionString)); |
---|
| 327 | } |
---|
| 328 | |
---|
| 329 | optionString = Utils.getOption('T', options); |
---|
| 330 | |
---|
| 331 | if (optionString.length() != 0) { |
---|
| 332 | setType(new SelectedTag(Integer.parseInt(optionString), TAGS_TYPE)); |
---|
| 333 | } else { |
---|
| 334 | setType(new SelectedTag(TYPE_FIXED_SET, TAGS_TYPE)); |
---|
| 335 | } |
---|
| 336 | |
---|
| 337 | optionString = Utils.getOption('S', options); |
---|
| 338 | |
---|
| 339 | if (optionString.length() != 0) { |
---|
| 340 | setLookupCacheSize(Integer.parseInt(optionString)); |
---|
| 341 | } |
---|
| 342 | |
---|
| 343 | m_verbose = Utils.getFlag('Z', options); |
---|
| 344 | } |
---|
| 345 | |
---|
| 346 | /** |
---|
| 347 | * Set the maximum size of the evaluated subset cache (hashtable). This is |
---|
| 348 | * expressed as a multiplier for the number of attributes in the data set. |
---|
| 349 | * (default = 1). |
---|
| 350 | * |
---|
| 351 | * @param size |
---|
| 352 | * the maximum size of the hashtable |
---|
| 353 | */ |
---|
| 354 | public void setLookupCacheSize(int size) { |
---|
| 355 | if (size >= 0) { |
---|
| 356 | m_cacheSize = size; |
---|
| 357 | } |
---|
| 358 | } |
---|
| 359 | |
---|
| 360 | /** |
---|
| 361 | * Return the maximum size of the evaluated subset cache (expressed as a |
---|
| 362 | * multiplier for the number of attributes in a data set. |
---|
| 363 | * |
---|
| 364 | * @return the maximum size of the hashtable. |
---|
| 365 | */ |
---|
| 366 | public int getLookupCacheSize() { |
---|
| 367 | return m_cacheSize; |
---|
| 368 | } |
---|
| 369 | |
---|
| 370 | /** |
---|
| 371 | * Returns the tip text for this property |
---|
| 372 | * |
---|
| 373 | * @return tip text for this property suitable for displaying in the |
---|
| 374 | * explorer/experimenter gui |
---|
| 375 | */ |
---|
| 376 | public String lookupCacheSizeTipText() { |
---|
| 377 | return "Set the maximum size of the lookup cache of evaluated subsets. This is " + |
---|
| 378 | "expressed as a multiplier of the number of attributes in the data set. " + |
---|
| 379 | "(default = 1)."; |
---|
| 380 | } |
---|
| 381 | |
---|
| 382 | /** |
---|
| 383 | * Returns the tip text for this property |
---|
| 384 | * |
---|
| 385 | * @return tip text for this property suitable for displaying in the |
---|
| 386 | * explorer/experimenter gui |
---|
| 387 | */ |
---|
| 388 | public String startSetTipText() { |
---|
| 389 | return "Set the start point for the search. This is specified as a comma " + |
---|
| 390 | "seperated list off attribute indexes starting at 1. It can include " + |
---|
| 391 | "ranges. Eg. 1,2,5-9,17."; |
---|
| 392 | } |
---|
| 393 | |
---|
| 394 | /** |
---|
| 395 | * Sets a starting set of attributes for the search. It is the search |
---|
| 396 | * method's responsibility to report this start set (if any) in its |
---|
| 397 | * toString() method. |
---|
| 398 | * |
---|
| 399 | * @param startSet |
---|
| 400 | * a string containing a list of attributes (and or ranges), eg. |
---|
| 401 | * 1,2,6,10-15. |
---|
| 402 | * @exception Exception |
---|
| 403 | * if start set can't be set. |
---|
| 404 | */ |
---|
| 405 | public void setStartSet(String startSet) throws Exception { |
---|
| 406 | m_startRange.setRanges(startSet); |
---|
| 407 | } |
---|
| 408 | |
---|
| 409 | /** |
---|
| 410 | * Returns a list of attributes (and or attribute ranges) as a String |
---|
| 411 | * |
---|
| 412 | * @return a list of attributes (and or attribute ranges) |
---|
| 413 | */ |
---|
| 414 | public String getStartSet() { |
---|
| 415 | return m_startRange.getRanges(); |
---|
| 416 | } |
---|
| 417 | |
---|
| 418 | /** |
---|
| 419 | * Returns the tip text for this property |
---|
| 420 | * |
---|
| 421 | * @return tip text for this property suitable for displaying in the |
---|
| 422 | * explorer/experimenter gui |
---|
| 423 | */ |
---|
| 424 | public String searchTerminationTipText() { |
---|
| 425 | return "Set the amount of backtracking. Specify the number of "; |
---|
| 426 | } |
---|
| 427 | |
---|
| 428 | /** |
---|
| 429 | * Set the numnber of non-improving nodes to consider before terminating |
---|
| 430 | * search. |
---|
| 431 | * |
---|
| 432 | * @param t |
---|
| 433 | * the number of non-improving nodes |
---|
| 434 | * @exception Exception |
---|
| 435 | * if t is less than 1 |
---|
| 436 | */ |
---|
| 437 | public void setSearchTermination(int t) throws Exception { |
---|
| 438 | if (t < 1) { |
---|
| 439 | throw new Exception("Value of -N must be > 0."); |
---|
| 440 | } |
---|
| 441 | |
---|
| 442 | m_maxStale = t; |
---|
| 443 | } |
---|
| 444 | |
---|
| 445 | /** |
---|
| 446 | * Get the termination criterion (number of non-improving nodes). |
---|
| 447 | * |
---|
| 448 | * @return the number of non-improving nodes |
---|
| 449 | */ |
---|
| 450 | public int getSearchTermination() { |
---|
| 451 | return m_maxStale; |
---|
| 452 | } |
---|
| 453 | |
---|
| 454 | /** |
---|
| 455 | * Returns the tip text for this property |
---|
| 456 | * |
---|
| 457 | * @return tip text for this property suitable for displaying in the |
---|
| 458 | * explorer/experimenter gui |
---|
| 459 | */ |
---|
| 460 | public String performRankingTipText() { |
---|
| 461 | return "Perform initial ranking to select top-ranked attributes."; |
---|
| 462 | } |
---|
| 463 | |
---|
| 464 | /** |
---|
| 465 | * Perform initial ranking to select top-ranked attributes. |
---|
| 466 | * |
---|
| 467 | * @param b |
---|
| 468 | * true if initial ranking should be performed |
---|
| 469 | */ |
---|
| 470 | public void setPerformRanking(boolean b) { |
---|
| 471 | m_performRanking = b; |
---|
| 472 | } |
---|
| 473 | |
---|
| 474 | /** |
---|
| 475 | * Get boolean if initial ranking should be performed to select the |
---|
| 476 | * top-ranked attributes |
---|
| 477 | * |
---|
| 478 | * @return true if initial ranking should be performed |
---|
| 479 | */ |
---|
| 480 | public boolean getPerformRanking() { |
---|
| 481 | return m_performRanking; |
---|
| 482 | } |
---|
| 483 | |
---|
| 484 | /** |
---|
| 485 | * Returns the tip text for this property |
---|
| 486 | * |
---|
| 487 | * @return tip text for this property suitable for displaying in the |
---|
| 488 | * explorer/experimenter gui |
---|
| 489 | */ |
---|
| 490 | public String numUsedAttributesTipText() { |
---|
| 491 | return "Set the amount of top-ranked attributes that are taken into account by the search process."; |
---|
| 492 | } |
---|
| 493 | |
---|
| 494 | /** |
---|
| 495 | * Set the number of top-ranked attributes that taken into account by the |
---|
| 496 | * search process. |
---|
| 497 | * |
---|
| 498 | * @param k |
---|
| 499 | * the number of attributes |
---|
| 500 | * @exception Exception |
---|
| 501 | * if k is less than 2 |
---|
| 502 | */ |
---|
| 503 | public void setNumUsedAttributes(int k) throws Exception { |
---|
| 504 | if (k < 2) { |
---|
| 505 | throw new Exception("Value of -K must be >= 2."); |
---|
| 506 | } |
---|
| 507 | |
---|
| 508 | m_numUsedAttributes = k; |
---|
| 509 | } |
---|
| 510 | |
---|
| 511 | /** |
---|
| 512 | * Get the number of top-ranked attributes that taken into account by the |
---|
| 513 | * search process. |
---|
| 514 | * |
---|
| 515 | * @return the number of top-ranked attributes that taken into account |
---|
| 516 | */ |
---|
| 517 | public int getNumUsedAttributes() { |
---|
| 518 | return m_numUsedAttributes; |
---|
| 519 | } |
---|
| 520 | |
---|
| 521 | /** |
---|
| 522 | * Returns the tip text for this property |
---|
| 523 | * |
---|
| 524 | * @return tip text for this property suitable for displaying in the |
---|
| 525 | * explorer/experimenter gui |
---|
| 526 | */ |
---|
| 527 | public String forwardSelectionMethodTipText() { |
---|
| 528 | return "Set the direction of the search."; |
---|
| 529 | } |
---|
| 530 | |
---|
| 531 | /** |
---|
| 532 | * Set the search direction |
---|
| 533 | * |
---|
| 534 | * @param d |
---|
| 535 | * the direction of the search |
---|
| 536 | */ |
---|
| 537 | public void setForwardSelectionMethod(SelectedTag d) { |
---|
| 538 | if (d.getTags() == TAGS_SEARCH_METHOD) { |
---|
| 539 | m_forwardSearchMethod = d.getSelectedTag().getID(); |
---|
| 540 | } |
---|
| 541 | } |
---|
| 542 | |
---|
| 543 | /** |
---|
| 544 | * Get the search direction |
---|
| 545 | * |
---|
| 546 | * @return the direction of the search |
---|
| 547 | */ |
---|
| 548 | public SelectedTag getForwardSelectionMethod() { |
---|
| 549 | return new SelectedTag(m_forwardSearchMethod, TAGS_SEARCH_METHOD); |
---|
| 550 | } |
---|
| 551 | |
---|
| 552 | /** |
---|
| 553 | * Returns the tip text for this property |
---|
| 554 | * |
---|
| 555 | * @return tip text for this property suitable for displaying in the |
---|
| 556 | * explorer/experimenter gui |
---|
| 557 | */ |
---|
| 558 | public String typeTipText() { |
---|
| 559 | return "Set the type of the search."; |
---|
| 560 | } |
---|
| 561 | |
---|
| 562 | /** |
---|
| 563 | * Set the type |
---|
| 564 | * |
---|
| 565 | * @param t |
---|
| 566 | * the Linear Forward Selection type |
---|
| 567 | */ |
---|
| 568 | public void setType(SelectedTag t) { |
---|
| 569 | if (t.getTags() == TAGS_TYPE) { |
---|
| 570 | m_linearSelectionType = t.getSelectedTag().getID(); |
---|
| 571 | } |
---|
| 572 | } |
---|
| 573 | |
---|
| 574 | /** |
---|
| 575 | * Get the type |
---|
| 576 | * |
---|
| 577 | * @return the Linear Forward Selection type |
---|
| 578 | */ |
---|
| 579 | public SelectedTag getType() { |
---|
| 580 | return new SelectedTag(m_linearSelectionType, TAGS_TYPE); |
---|
| 581 | } |
---|
| 582 | |
---|
| 583 | /** |
---|
| 584 | * Returns the tip text for this property |
---|
| 585 | * |
---|
| 586 | * @return tip text for this property suitable for displaying in the |
---|
| 587 | * explorer/experimenter gui |
---|
| 588 | */ |
---|
| 589 | public String verboseTipText() { |
---|
| 590 | return "Turn on verbose output for monitoring the search's progress."; |
---|
| 591 | } |
---|
| 592 | |
---|
| 593 | /** |
---|
| 594 | * Set whether verbose output should be generated. |
---|
| 595 | * |
---|
| 596 | * @param b |
---|
| 597 | * true if output is to be verbose. |
---|
| 598 | */ |
---|
| 599 | public void setVerbose(boolean b) { |
---|
| 600 | m_verbose = b; |
---|
| 601 | } |
---|
| 602 | |
---|
| 603 | /** |
---|
| 604 | * Get whether output is to be verbose |
---|
| 605 | * |
---|
| 606 | * @return true if output will be verbose |
---|
| 607 | */ |
---|
| 608 | public boolean getVerbose() { |
---|
| 609 | return m_verbose; |
---|
| 610 | } |
---|
| 611 | |
---|
| 612 | /** |
---|
| 613 | * Gets the current settings of LinearForwardSelection. |
---|
| 614 | * |
---|
| 615 | * @return an array of strings suitable for passing to setOptions() |
---|
| 616 | */ |
---|
| 617 | public String[] getOptions() { |
---|
| 618 | String[] options = new String[13]; |
---|
| 619 | int current = 0; |
---|
| 620 | |
---|
| 621 | if (!(getStartSet().equals(""))) { |
---|
| 622 | options[current++] = "-P"; |
---|
| 623 | options[current++] = "" + startSetToString(); |
---|
| 624 | } |
---|
| 625 | |
---|
| 626 | options[current++] = "-D"; |
---|
| 627 | options[current++] = "" + m_forwardSearchMethod; |
---|
| 628 | options[current++] = "-N"; |
---|
| 629 | options[current++] = "" + m_maxStale; |
---|
| 630 | |
---|
| 631 | if (m_performRanking) { |
---|
| 632 | options[current++] = "-I"; |
---|
| 633 | } |
---|
| 634 | |
---|
| 635 | options[current++] = "-K"; |
---|
| 636 | options[current++] = "" + m_numUsedAttributes; |
---|
| 637 | options[current++] = "-T"; |
---|
| 638 | options[current++] = "" + m_linearSelectionType; |
---|
| 639 | |
---|
| 640 | if (m_verbose) |
---|
| 641 | options[current++] = "-Z"; |
---|
| 642 | |
---|
| 643 | while (current < options.length) { |
---|
| 644 | options[current++] = ""; |
---|
| 645 | } |
---|
| 646 | |
---|
| 647 | return options; |
---|
| 648 | } |
---|
| 649 | |
---|
| 650 | /** |
---|
| 651 | * converts the array of starting attributes to a string. This is used by |
---|
| 652 | * getOptions to return the actual attributes specified as the starting set. |
---|
| 653 | * This is better than using m_startRanges.getRanges() as the same start set |
---|
| 654 | * can be specified in different ways from the command line---eg 1,2,3 == |
---|
| 655 | * 1-3. This is to ensure that stuff that is stored in a database is |
---|
| 656 | * comparable. |
---|
| 657 | * |
---|
| 658 | * @return a comma seperated list of individual attribute numbers as a |
---|
| 659 | * String |
---|
| 660 | */ |
---|
| 661 | private String startSetToString() { |
---|
| 662 | StringBuffer FString = new StringBuffer(); |
---|
| 663 | boolean didPrint; |
---|
| 664 | |
---|
| 665 | if (m_starting == null) { |
---|
| 666 | return getStartSet(); |
---|
| 667 | } |
---|
| 668 | |
---|
| 669 | for (int i = 0; i < m_starting.length; i++) { |
---|
| 670 | didPrint = false; |
---|
| 671 | |
---|
| 672 | if ((m_hasClass == false) || |
---|
| 673 | ((m_hasClass == true) && (i != m_classIndex))) { |
---|
| 674 | FString.append((m_starting[i] + 1)); |
---|
| 675 | didPrint = true; |
---|
| 676 | } |
---|
| 677 | |
---|
| 678 | if (i == (m_starting.length - 1)) { |
---|
| 679 | FString.append(""); |
---|
| 680 | } else { |
---|
| 681 | if (didPrint) { |
---|
| 682 | FString.append(","); |
---|
| 683 | } |
---|
| 684 | } |
---|
| 685 | } |
---|
| 686 | |
---|
| 687 | return FString.toString(); |
---|
| 688 | } |
---|
| 689 | |
---|
| 690 | /** |
---|
| 691 | * returns a description of the search as a String |
---|
| 692 | * |
---|
| 693 | * @return a description of the search |
---|
| 694 | */ |
---|
| 695 | public String toString() { |
---|
| 696 | StringBuffer LFSString = new StringBuffer(); |
---|
| 697 | |
---|
| 698 | LFSString.append("\tLinear Forward Selection.\n\tStart set: "); |
---|
| 699 | |
---|
| 700 | if (m_starting == null) { |
---|
| 701 | LFSString.append("no attributes\n"); |
---|
| 702 | } else { |
---|
| 703 | LFSString.append(startSetToString() + "\n"); |
---|
| 704 | } |
---|
| 705 | |
---|
| 706 | LFSString.append("\tForward selection method: "); |
---|
| 707 | |
---|
| 708 | if (m_forwardSearchMethod == SEARCH_METHOD_FORWARD) { |
---|
| 709 | LFSString.append("forward selection\n"); |
---|
| 710 | } else { |
---|
| 711 | LFSString.append("floating forward selection\n"); |
---|
| 712 | } |
---|
| 713 | |
---|
| 714 | LFSString.append("\tStale search after " + m_maxStale + |
---|
| 715 | " node expansions\n"); |
---|
| 716 | |
---|
| 717 | LFSString.append("\tLinear Forward Selection Type: "); |
---|
| 718 | |
---|
| 719 | if (m_linearSelectionType == TYPE_FIXED_SET) { |
---|
| 720 | LFSString.append("fixed-set\n"); |
---|
| 721 | } else { |
---|
| 722 | LFSString.append("fixed-width\n"); |
---|
| 723 | } |
---|
| 724 | |
---|
| 725 | LFSString.append("\tNumber of top-ranked attributes that are used: " + |
---|
| 726 | m_numUsedAttributes + "\n"); |
---|
| 727 | |
---|
| 728 | LFSString.append("\tTotal number of subsets evaluated: " + m_totalEvals + |
---|
| 729 | "\n"); |
---|
| 730 | LFSString.append("\tMerit of best subset found: " + |
---|
| 731 | Utils.doubleToString(Math.abs(m_bestMerit), 8, 3) + "\n"); |
---|
| 732 | |
---|
| 733 | return LFSString.toString(); |
---|
| 734 | } |
---|
| 735 | |
---|
| 736 | /** |
---|
| 737 | * Searches the attribute subset space by linear forward selection |
---|
| 738 | * |
---|
| 739 | * @param ASEval |
---|
| 740 | * the attribute evaluator to guide the search |
---|
| 741 | * @param data |
---|
| 742 | * the training instances. |
---|
| 743 | * @return an array (not necessarily ordered) of selected attribute indexes |
---|
| 744 | * @exception Exception |
---|
| 745 | * if the search can't be completed |
---|
| 746 | */ |
---|
| 747 | public int[] search(ASEvaluation ASEval, Instances data) |
---|
| 748 | throws Exception { |
---|
| 749 | m_totalEvals = 0; |
---|
| 750 | |
---|
| 751 | if (!(ASEval instanceof SubsetEvaluator)) { |
---|
| 752 | throw new Exception(ASEval.getClass().getName() + " is not a " + |
---|
| 753 | "Subset evaluator!"); |
---|
| 754 | } |
---|
| 755 | |
---|
| 756 | if (ASEval instanceof UnsupervisedSubsetEvaluator) { |
---|
| 757 | m_hasClass = false; |
---|
| 758 | } else { |
---|
| 759 | m_hasClass = true; |
---|
| 760 | m_classIndex = data.classIndex(); |
---|
| 761 | } |
---|
| 762 | |
---|
| 763 | ((ASEvaluation) ASEval).buildEvaluator(data); |
---|
| 764 | |
---|
| 765 | m_numAttribs = data.numAttributes(); |
---|
| 766 | |
---|
| 767 | if (m_numUsedAttributes > m_numAttribs) { |
---|
| 768 | System.out.println( |
---|
| 769 | "Decreasing number of top-ranked attributes to total number of attributes: " + |
---|
| 770 | data.numAttributes()); |
---|
| 771 | m_numUsedAttributes = m_numAttribs; |
---|
| 772 | } |
---|
| 773 | |
---|
| 774 | BitSet start_group = new BitSet(m_numAttribs); |
---|
| 775 | m_startRange.setUpper(m_numAttribs - 1); |
---|
| 776 | |
---|
| 777 | if (!(getStartSet().equals(""))) { |
---|
| 778 | m_starting = m_startRange.getSelection(); |
---|
| 779 | } |
---|
| 780 | |
---|
| 781 | // If a starting subset has been supplied, then initialise the bitset |
---|
| 782 | if (m_starting != null) { |
---|
| 783 | for (int i = 0; i < m_starting.length; i++) { |
---|
| 784 | if ((m_starting[i]) != m_classIndex) { |
---|
| 785 | start_group.set(m_starting[i]); |
---|
| 786 | } |
---|
| 787 | } |
---|
| 788 | } |
---|
| 789 | |
---|
| 790 | LFSMethods LFS = new LFSMethods(); |
---|
| 791 | |
---|
| 792 | int[] ranking; |
---|
| 793 | |
---|
| 794 | if (m_performRanking) { |
---|
| 795 | ranking = LFS.rankAttributes(data, (SubsetEvaluator) ASEval, m_verbose); |
---|
| 796 | } else { |
---|
| 797 | ranking = new int[m_numAttribs]; |
---|
| 798 | |
---|
| 799 | for (int i = 0; i < ranking.length; i++) { |
---|
| 800 | ranking[i] = i; |
---|
| 801 | } |
---|
| 802 | } |
---|
| 803 | |
---|
| 804 | if (m_forwardSearchMethod == SEARCH_METHOD_FORWARD) { |
---|
| 805 | LFS.forwardSearch(m_cacheSize, start_group, ranking, m_numUsedAttributes, |
---|
| 806 | m_linearSelectionType == TYPE_FIXED_WIDTH, m_maxStale, -1, data, |
---|
| 807 | (SubsetEvaluator) ASEval, m_verbose); |
---|
| 808 | } else if (m_forwardSearchMethod == SEARCH_METHOD_FLOATING) { |
---|
| 809 | LFS.floatingForwardSearch(m_cacheSize, start_group, ranking, |
---|
| 810 | m_numUsedAttributes, m_linearSelectionType == TYPE_FIXED_WIDTH, |
---|
| 811 | m_maxStale, data, (SubsetEvaluator) ASEval, m_verbose); |
---|
| 812 | } |
---|
| 813 | |
---|
| 814 | m_totalEvals = LFS.getNumEvalsTotal(); |
---|
| 815 | m_bestMerit = LFS.getBestMerit(); |
---|
| 816 | |
---|
| 817 | return attributeList(LFS.getBestGroup()); |
---|
| 818 | } |
---|
| 819 | |
---|
| 820 | /** |
---|
| 821 | * Reset options to default values |
---|
| 822 | */ |
---|
| 823 | protected void resetOptions() { |
---|
| 824 | m_maxStale = 5; |
---|
| 825 | m_forwardSearchMethod = SEARCH_METHOD_FORWARD; |
---|
| 826 | m_performRanking = true; |
---|
| 827 | m_numUsedAttributes = 50; |
---|
| 828 | m_linearSelectionType = TYPE_FIXED_SET; |
---|
| 829 | m_starting = null; |
---|
| 830 | m_startRange = new Range(); |
---|
| 831 | m_classIndex = -1; |
---|
| 832 | m_totalEvals = 0; |
---|
| 833 | m_cacheSize = 1; |
---|
| 834 | m_verbose = false; |
---|
| 835 | } |
---|
| 836 | |
---|
| 837 | /** |
---|
| 838 | * converts a BitSet into a list of attribute indexes |
---|
| 839 | * |
---|
| 840 | * @param group |
---|
| 841 | * the BitSet to convert |
---|
| 842 | * @return an array of attribute indexes |
---|
| 843 | */ |
---|
| 844 | protected int[] attributeList(BitSet group) { |
---|
| 845 | int count = 0; |
---|
| 846 | |
---|
| 847 | // count how many were selected |
---|
| 848 | for (int i = 0; i < m_numAttribs; i++) { |
---|
| 849 | if (group.get(i)) { |
---|
| 850 | count++; |
---|
| 851 | } |
---|
| 852 | } |
---|
| 853 | |
---|
| 854 | int[] list = new int[count]; |
---|
| 855 | count = 0; |
---|
| 856 | |
---|
| 857 | for (int i = 0; i < m_numAttribs; i++) { |
---|
| 858 | if (group.get(i)) { |
---|
| 859 | list[count++] = i; |
---|
| 860 | } |
---|
| 861 | } |
---|
| 862 | |
---|
| 863 | return list; |
---|
| 864 | } |
---|
| 865 | |
---|
| 866 | /** |
---|
| 867 | * Returns the revision string. |
---|
| 868 | * |
---|
| 869 | * @return the revision |
---|
| 870 | */ |
---|
| 871 | public String getRevision() { |
---|
| 872 | return RevisionUtils.extract("$Revision: 6160 $"); |
---|
| 873 | } |
---|
| 874 | } |
---|
| 875 | |
---|