source: branches/MetisMQI/src/main/java/weka/filters/unsupervised/instance/SubsetByExpression.java

Last change on this file was 29, checked in by gnappo, 15 years ago

Taggata versione per la demo e aggiunto branch.

File size: 11.4 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 * SubsetByExpression.java
19 * Copyright (C) 2008 University of Waikato, Hamilton, New Zealand
20 */
21
22package weka.filters.unsupervised.instance;
23
24import weka.core.Capabilities;
25import weka.core.Instances;
26import weka.core.Option;
27import weka.core.RevisionUtils;
28import weka.core.Utils;
29import weka.core.Capabilities.Capability;
30import weka.filters.SimpleBatchFilter;
31import weka.filters.unsupervised.instance.subsetbyexpression.Parser;
32
33import java.util.Enumeration;
34import java.util.Vector;
35
36/**
37 <!-- globalinfo-start -->
38 * Filters instances according to a user-specified expression.<br/>
39 * <br/>
40 * Grammar:<br/>
41 * <br/>
42 * boolexpr_list ::= boolexpr_list boolexpr_part | boolexpr_part;<br/>
43 * <br/>
44 * boolexpr_part ::= boolexpr:e {: parser.setResult(e); :} ;<br/>
45 * <br/>
46 * boolexpr ::=    BOOLEAN <br/>
47 *               | true<br/>
48 *               | false<br/>
49 *               | expr &lt; expr<br/>
50 *               | expr &lt;= expr<br/>
51 *               | expr &gt; expr<br/>
52 *               | expr &gt;= expr<br/>
53 *               | expr = expr<br/>
54 *               | ( boolexpr )<br/>
55 *               | not boolexpr<br/>
56 *               | boolexpr and boolexpr<br/>
57 *               | boolexpr or boolexpr<br/>
58 *               | ATTRIBUTE is STRING<br/>
59 *               ;<br/>
60 * <br/>
61 * expr      ::=   NUMBER<br/>
62 *               | ATTRIBUTE<br/>
63 *               | ( expr )<br/>
64 *               | opexpr<br/>
65 *               | funcexpr<br/>
66 *               ;<br/>
67 * <br/>
68 * opexpr    ::=   expr + expr<br/>
69 *               | expr - expr<br/>
70 *               | expr * expr<br/>
71 *               | expr / expr<br/>
72 *               ;<br/>
73 * <br/>
74 * funcexpr ::=    abs ( expr )<br/>
75 *               | sqrt ( expr )<br/>
76 *               | log ( expr )<br/>
77 *               | exp ( expr )<br/>
78 *               | sin ( expr )<br/>
79 *               | cos ( expr )<br/>
80 *               | tan ( expr )<br/>
81 *               | rint ( expr )<br/>
82 *               | floor ( expr )<br/>
83 *               | pow ( expr for base , expr for exponent )<br/>
84 *               | ceil ( expr )<br/>
85 *               ;<br/>
86 * <br/>
87 * Notes:<br/>
88 * - NUMBER<br/>
89 *   any integer or floating point number <br/>
90 *   (but not in scientific notation!)<br/>
91 * - STRING<br/>
92 *   any string surrounded by single quotes; <br/>
93 *   the string may not contain a single quote though.<br/>
94 * - ATTRIBUTE<br/>
95 *   the following placeholders are recognized for <br/>
96 *   attribute values:<br/>
97 *   - CLASS for the class value in case a class attribute is set.<br/>
98 *   - ATTxyz with xyz a number from 1 to # of attributes in the<br/>
99 *     dataset, representing the value of indexed attribute.<br/>
100 * <br/>
101 * Examples:<br/>
102 * - extracting only mammals and birds from the 'zoo' UCI dataset:<br/>
103 *   (CLASS is 'mammal') or (CLASS is 'bird')<br/>
104 * - extracting only animals with at least 2 legs from the 'zoo' UCI dataset:<br/>
105 *   (ATT14 &gt;= 2)<br/>
106 * - extracting only instances with non-missing 'wage-increase-second-year'<br/>
107 *   from the 'labor' UCI dataset:<br/>
108 *   not ismissing(ATT3)<br/>
109 * <p/>
110 <!-- globalinfo-end -->
111 *
112 <!-- options-start -->
113 * Valid options are: <p/>
114 *
115 * <pre> -E &lt;expr&gt;
116 *  The expression to use for filtering
117 *  (default: true).</pre>
118 *
119 <!-- options-end -->
120 *
121 * @author  fracpete (fracpete at waikato dot ac dot nz)
122 * @version $Revision: 6113 $
123 */
124public class SubsetByExpression
125  extends SimpleBatchFilter {
126
127  /** for serialization. */
128  private static final long serialVersionUID = 5628686110979589602L;
129 
130  /** the expresion to use for filtering. */
131  protected String m_Expression = "true";
132 
133  /**
134   * Returns a string describing this filter.
135   *
136   * @return            a description of the filter suitable for
137   *                    displaying in the explorer/experimenter gui
138   */
139  public String globalInfo() {
140    return 
141        "Filters instances according to a user-specified expression.\n\n"
142      + "Grammar:\n\n"
143      + "boolexpr_list ::= boolexpr_list boolexpr_part | boolexpr_part;\n"
144      + "\n"
145      + "boolexpr_part ::= boolexpr:e {: parser.setResult(e); :} ;\n"
146      + "\n"
147      + "boolexpr ::=    BOOLEAN \n"
148      + "              | true\n"
149      + "              | false\n"
150      + "              | expr < expr\n"
151      + "              | expr <= expr\n"
152      + "              | expr > expr\n"
153      + "              | expr >= expr\n"
154      + "              | expr = expr\n"
155      + "              | ( boolexpr )\n"
156      + "              | not boolexpr\n"
157      + "              | boolexpr and boolexpr\n"
158      + "              | boolexpr or boolexpr\n"
159      + "              | ATTRIBUTE is STRING\n"
160      + "              ;\n"
161      + "\n"
162      + "expr      ::=   NUMBER\n"
163      + "              | ATTRIBUTE\n"
164      + "              | ( expr )\n"
165      + "              | opexpr\n"
166      + "              | funcexpr\n"
167      + "              ;\n"
168      + "\n"
169      + "opexpr    ::=   expr + expr\n"
170      + "              | expr - expr\n"
171      + "              | expr * expr\n"
172      + "              | expr / expr\n"
173      + "              ;\n"
174      + "\n"
175      + "funcexpr ::=    abs ( expr )\n"
176      + "              | sqrt ( expr )\n"
177      + "              | log ( expr )\n"
178      + "              | exp ( expr )\n"
179      + "              | sin ( expr )\n"
180      + "              | cos ( expr )\n"
181      + "              | tan ( expr )\n"
182      + "              | rint ( expr )\n"
183      + "              | floor ( expr )\n"
184      + "              | pow ( expr for base , expr for exponent )\n"
185      + "              | ceil ( expr )\n"
186      + "              ;\n"
187      + "\n"
188      + "Notes:\n"
189      + "- NUMBER\n"
190      + "  any integer or floating point number \n"
191      + "  (but not in scientific notation!)\n"
192      + "- STRING\n"
193      + "  any string surrounded by single quotes; \n"
194      + "  the string may not contain a single quote though.\n"
195      + "- ATTRIBUTE\n"
196      + "  the following placeholders are recognized for \n"
197      + "  attribute values:\n"
198      + "  - CLASS for the class value in case a class attribute is set.\n"
199      + "  - ATTxyz with xyz a number from 1 to # of attributes in the\n"
200      + "    dataset, representing the value of indexed attribute.\n"
201      + "\n"
202      + "Examples:\n"
203      + "- extracting only mammals and birds from the 'zoo' UCI dataset:\n"
204      + "  (CLASS is 'mammal') or (CLASS is 'bird')\n"
205      + "- extracting only animals with at least 2 legs from the 'zoo' UCI dataset:\n"
206      + "  (ATT14 >= 2)\n"
207      + "- extracting only instances with non-missing 'wage-increase-second-year'\n"
208      + "  from the 'labor' UCI dataset:\n"
209      + "  not ismissing(ATT3)\n"
210      ;
211  }
212
213  /**
214   * Returns an enumeration describing the available options.
215   *
216   * @return an enumeration of all the available options.
217   */
218  public Enumeration listOptions() {
219    Vector      result;
220   
221    result = new Vector();
222
223    result.addElement(new Option(
224        "\tThe expression to use for filtering\n"
225        + "\t(default: true).",
226        "E", 1, "-E <expr>"));
227
228    return result.elements();
229  }
230
231
232  /**
233   * Parses a given list of options. <p/>
234   *
235   <!-- options-start -->
236   * Valid options are: <p/>
237   *
238   * <pre> -E &lt;expr&gt;
239   *  The expression to use for filtering
240   *  (default: true).</pre>
241   *
242   <!-- options-end -->
243   *
244   * @param options the list of options as an array of strings
245   * @throws Exception if an option is not supported
246   */
247  public void setOptions(String[] options) throws Exception {
248    String      tmpStr;
249   
250    tmpStr = Utils.getOption('E', options);
251    if (tmpStr.length() != 0)
252      setExpression(tmpStr);
253    else
254      setExpression("true");
255   
256    if (getInputFormat() != null)
257      setInputFormat(getInputFormat());
258  }
259
260  /**
261   * Gets the current settings of the filter.
262   *
263   * @return an array of strings suitable for passing to setOptions
264   */
265  public String[] getOptions() {
266    Vector<String>      result;
267   
268    result = new Vector();
269
270    result.add("-E");
271    result.add("" + getExpression());
272
273    return result.toArray(new String[result.size()]);
274  }
275
276  /**
277   * Returns the Capabilities of this filter.
278   *
279   * @return            the capabilities of this object
280   * @see               Capabilities
281   */
282  public Capabilities getCapabilities() {
283    Capabilities result = super.getCapabilities();
284    result.disableAll();
285
286    // attributes
287    result.enable(Capability.NOMINAL_ATTRIBUTES);
288    result.enable(Capability.NUMERIC_ATTRIBUTES);
289    result.enable(Capability.DATE_ATTRIBUTES);
290    result.enable(Capability.MISSING_VALUES);
291   
292    // class
293    result.enable(Capability.NOMINAL_CLASS);
294    result.enable(Capability.NUMERIC_CLASS);
295    result.enable(Capability.DATE_CLASS);
296    result.enable(Capability.MISSING_CLASS_VALUES);
297    result.enable(Capability.NO_CLASS);
298   
299    return result;
300  }
301
302  /**
303   * Sets the expression used for filtering.
304   *
305   * @param value       the expression
306   */
307  public void setExpression(String value) {
308    m_Expression = value;
309  }
310
311  /**
312   * Returns the expression used for filtering.
313   *
314   * @return            the expression
315   */
316  public String getExpression() {
317    return m_Expression;
318  }
319
320  /**
321   * Returns the tip text for this property.
322   *
323   * @return            tip text for this property suitable for
324   *                    displaying in the explorer/experimenter gui
325   */
326  public String expressionTipText() {
327    return "The expression to used for filtering the dataset.";
328  }
329
330  /**
331   * Determines the output format based on the input format and returns
332   * this.
333   *
334   * @param inputFormat     the input format to base the output format on
335   * @return                the output format
336   * @throws Exception      in case the determination goes wrong
337   */
338  protected Instances determineOutputFormat(Instances inputFormat)
339      throws Exception {
340   
341    return new Instances(inputFormat, 0);
342  }
343
344  /**
345   * Processes the given data (may change the provided dataset) and returns
346   * the modified version. This method is called in batchFinished().
347   *
348   * @param instances   the data to process
349   * @return            the modified data
350   * @throws Exception  in case the processing goes wrong
351   * @see               #batchFinished()
352   */
353  protected Instances process(Instances instances) throws Exception {
354    if (!isFirstBatchDone())
355      return Parser.filter(m_Expression, instances);
356    else
357      return instances;
358  }
359
360  /**
361   * Returns the revision string.
362   *
363   * @return            the revision
364   */
365  public String getRevision() {
366    return RevisionUtils.extract("$Revision: 6113 $");
367  }
368
369  /**
370   * Main method for running this filter.
371   *
372   * @param args        arguments for the filter: use -h for help
373   */
374  public static void main(String[] args) {
375    runFilter(new SubsetByExpression(), args);
376  }
377}
Note: See TracBrowser for help on using the repository browser.