source: branches/MetisMQI/src/main/java/weka/filters/unsupervised/instance/subsetbyexpression/Parser.cup

Last change on this file was 29, checked in by gnappo, 15 years ago

Taggata versione per la demo e aggiunto branch.

File size: 11.5 KB
Line 
1/*
2 * STANDARD ML OF NEW JERSEY COPYRIGHT NOTICE, LICENSE AND DISCLAIMER.
3 *
4 * Copyright (c) 1989-1998 by Lucent Technologies
5 *
6 * Permission to use, copy, modify, and distribute this software and its
7 * documentation for any purpose and without fee is hereby granted, provided
8 * that the above copyright notice appear in all copies and that both the
9 * copyright notice and this permission notice and warranty disclaimer appear
10 * in supporting documentation, and that the name of Lucent Technologies, Bell
11 * Labs or any Lucent entity not be used in advertising or publicity pertaining
12 * to distribution of the software without specific, written prior permission.
13 *
14 * Lucent disclaims all warranties with regard to this software, including all
15 * implied warranties of merchantability and fitness. In no event shall Lucent
16 * be liable for any special, indirect or consequential damages or any damages
17 * whatsoever resulting from loss of use, data or profits, whether in an action
18 * of contract, negligence or other tortious action, arising out of or in
19 * connection with the use or performance of this software.
20 *
21 * Taken from this URL:
22 * http://www.smlnj.org/license.html
23 *
24 * This license is compatible with the GNU GPL (see section "Standard ML of New
25 * Jersey Copyright License"):
26 * http://www.gnu.org/licenses/license-list.html#StandardMLofNJ
27 */
28
29/*
30 * Copyright 1996-1999 by Scott Hudson, Frank Flannery, C. Scott Ananian
31 */
32
33package weka.filters.unsupervised.instance.subsetbyexpression;
34
35import weka.core.*;
36import java_cup.runtime.*;
37
38import java.io.*;
39import java.util.*;
40
41/**
42 * A parser for evaluating whether an Instance complies to a boolean expression
43 * or not.
44 *
45 * @author FracPete (fracpete at waikato dot ac dot nz)
46 * @version $Revision: 4939 $
47 */
48
49parser code {:
50  /** variable - value relation. */
51  protected HashMap m_Symbols = new HashMap();
52
53  /** attribute - attribute-type (constants from weka.core.Attribute) relation. */
54  protected Hashtable<String,Integer> m_AttributeTypes = new Hashtable<String,Integer>();
55
56  /** for storing the result of the expression. */
57  protected Boolean m_Result = null;
58
59  /**
60   * Sets the variable - value relation to use.
61   *
62   * @param value the variable-value relation
63   */
64  public void setSymbols(HashMap value) {
65    m_Symbols = value;
66  }
67
68  /**
69   * Returns the current variable - value relation in use.
70   *
71   * @return the variable-value relation
72   */
73  public HashMap getSymbols() {
74    return m_Symbols;
75  }
76
77  /**
78   * Sets the attribute - attribute-type relation to use.
79   *
80   * @param value the att - att-type relation
81   */
82  public void setAttributeTypes(Hashtable value) {
83    m_AttributeTypes = value;
84  }
85
86  /**
87   * Returns the current attribute - attribute-type relation in use.
88   *
89   * @return the att - att-type relation
90   */
91  public Hashtable getAttributeTypes() {
92    return m_AttributeTypes;
93  }
94
95  /**
96   * Sets the result of the evaluation.
97   *
98   * @param value the result
99   */
100  public void setResult(Boolean value) {
101    m_Result = value;
102  }
103
104  /**
105   * Returns the result of the evaluation.
106   *
107   * @return the result
108   */
109  public Boolean getResult() {
110    return m_Result;
111  }
112
113  /**
114   * Returns either a String object for nominal attributes or a Double for numeric
115   * ones. For all other attribute types this method throws an Exception.
116   * It also returns a Double object in case of a missing value (for all
117   * attribute types!).
118   *
119   * @param instance the instance to work on
120   * @param index the index of the attribute to return
121   * @return the converted value
122   */
123  public static Object getValue(Instance instance, int index) {
124    if (instance.isMissing(index))
125      return new Double(Instance.missingValue());
126    else if (instance.attribute(index).isNominal())
127      return new String(instance.stringValue(index));
128    else if (instance.attribute(index).isNumeric())
129      return new Double(instance.value(index));
130    else
131      throw new IllegalArgumentException(
132          "Unhandled attribute type '" + instance.attribute(index).type() + "'!");
133  }
134
135  /**
136   * Filters the input dataset against the provided expression.
137   *
138   * @param expression the expression used for filtering
139   * @param input the input data
140   * @return the filtered data
141   * @throws Exception if parsing fails
142   */
143  public static Instances filter(String expression, Instances input) throws Exception {
144    // setup output
145    Instances output = new Instances(input, 0);
146   
147    // setup attribute - attribute-type relation
148    Hashtable<String,Integer> attTypes = new Hashtable<String,Integer>();
149    for (int i = 0; i < input.numAttributes(); i++)
150       attTypes.put("ATT" + (i+1), input.attribute(i).type());
151    if (input.classIndex() > -1)
152      attTypes.put("CLASS", input.classAttribute().type());
153   
154    // filter dataset
155    SymbolFactory sf = new DefaultSymbolFactory();
156    HashMap symbols = new HashMap();
157    ByteArrayInputStream parserInput = new ByteArrayInputStream(expression.getBytes());
158    for (int i = 0; i < input.numInstances(); i++) {
159      Instance instance = input.instance(i);
160
161      // setup symbols
162      for (int n = 0; n < instance.numAttributes(); n++) {
163        if (n == instance.classIndex())
164          symbols.put("CLASS", getValue(instance, n));
165        symbols.put("ATT" + (n+1), getValue(instance, n));
166      }
167
168      // evaluate expression
169      parserInput.reset();
170      Parser parser = new Parser(new Scanner(parserInput,sf), sf);
171      parser.setSymbols(symbols);
172      parser.parse();
173      if (parser.getResult())
174        output.add((Instance) instance.copy());
175    }
176
177    return output;
178  }
179
180  /**
181   * Runs the parser from commandline. Takes the following arguments:
182   * <ol>
183   *   <li>expression</li>
184   *   <li>input file</li>
185   *   <li>class index (first|last|num), use 0 to ignore</li>
186   *   <li>output file</li>
187   * </ol>
188   *
189   * @param args the commandline arguments
190   * @throws Exception if something goes wrong
191   */
192  public static void main(String args[]) throws Exception {
193    // get expression
194    String expression = args[0];
195
196    // read input data
197    BufferedReader reader = new BufferedReader(new FileReader(args[1]));
198    Instances input = new Instances(reader);
199    reader.close();
200    if (args[2].equals("first"))
201      input.setClassIndex(0);
202    else if (args[2].equals("last"))
203      input.setClassIndex(input.numAttributes() - 1);
204    else
205      input.setClassIndex(Integer.parseInt(args[2]) - 1);
206
207    // process dataset
208    Instances output = filter(expression, input);
209
210    // save output file
211    BufferedWriter writer = new BufferedWriter(new FileWriter(args[3]));
212    writer.write(new Instances(output, 0).toString());
213    writer.newLine();
214    for (int i = 0; i < output.numInstances(); i++) {
215      writer.write(output.instance(i).toString());
216      writer.newLine();
217    }
218    writer.flush();
219    writer.close();
220  }
221:}
222
223terminal COMMA, LPAREN, RPAREN, ISMISSING;
224terminal MINUS, PLUS, TIMES, DIVISION;
225terminal ABS, SQRT, LOG, EXP, SIN, COS, TAN, RINT, FLOOR, POW, CEIL;
226terminal TRUE, FALSE, LT, LE, GT, GE, EQ, NOT, AND, OR, IS;
227terminal Double NUMBER;
228terminal Boolean BOOLEAN;
229terminal String ATTRIBUTE, STRING;
230
231non terminal boolexpr_list, boolexpr_part;
232non terminal Double expr;
233non terminal Double opexpr;
234non terminal Double funcexpr;
235non terminal Boolean boolexpr;
236
237precedence left PLUS, MINUS;
238precedence left TIMES, DIVISION;
239precedence left LPAREN, RPAREN;
240precedence left ABS, SQRT, LOG, EXP, SIN, COS, TAN, RINT, FLOOR, POW, CEIL;
241precedence left AND, OR;
242precedence left NOT;
243
244boolexpr_list ::= boolexpr_list boolexpr_part | boolexpr_part;
245boolexpr_part ::= boolexpr:e {: parser.setResult(e); :} ;
246boolexpr ::=    BOOLEAN:b
247                {: RESULT = b; :}
248              | TRUE
249                {: RESULT = new Boolean(true); :}
250              | FALSE
251                {: RESULT = new Boolean(false); :}
252              | expr:l LT expr:r
253                {: RESULT = new Boolean(l.doubleValue() < r.doubleValue()); :}
254              | expr:l LE expr:r
255                {: RESULT = new Boolean(l.doubleValue() <= r.doubleValue()); :}
256              | expr:l GT expr:r
257                {: RESULT = new Boolean(l.doubleValue() > r.doubleValue()); :}
258              | expr:l GE expr:r
259                {: RESULT = new Boolean(l.doubleValue() >= r.doubleValue()); :}
260              | expr:l EQ expr:r
261                {: RESULT = new Boolean(l.doubleValue() == r.doubleValue()); :}
262              | LPAREN boolexpr:b RPAREN
263                {: RESULT = b; :}
264              | NOT boolexpr:b
265                {: RESULT = !b; :}
266              | boolexpr:l AND boolexpr:r
267                {: RESULT = l && r; :}
268              | boolexpr:l OR boolexpr:r
269                {: RESULT = l || r; :}
270              | ATTRIBUTE:a IS STRING:s
271                {: if (parser.getSymbols().containsKey(a))
272                     RESULT = (parser.getSymbols().get(a) instanceof String) && ((String) parser.getSymbols().get(a)).equals(s);
273                   else
274                     throw new IllegalStateException("Unknown symbol '" + a + "'!");
275                :}
276              | ISMISSING LPAREN ATTRIBUTE:a RPAREN
277                {: if (parser.getSymbols().containsKey(a))
278                     RESULT = (parser.getSymbols().get(a) instanceof Double) && Instance.isMissingValue((Double) parser.getSymbols().get(a));
279                   else
280                     throw new IllegalStateException("Unknown symbol '" + a + "'!");
281                :}
282              ;
283
284expr      ::=   NUMBER:n
285                {: RESULT = n; :}
286              | ATTRIBUTE:a
287                {: if (parser.getSymbols().containsKey(a))
288                     RESULT = (Double) parser.getSymbols().get(a);
289                   else
290                     throw new IllegalStateException("Unknown symbol '" + a + "'!");
291                :}
292              | LPAREN expr:e RPAREN
293                {: RESULT = e; :}
294              | opexpr:o
295                {: RESULT = o; :}
296              | funcexpr:f
297                {: RESULT = f; :}
298              ;
299
300opexpr    ::=   expr:l PLUS expr:r
301                {: RESULT = new Double(l.doubleValue() + r.doubleValue()); :}
302              | expr:l MINUS expr:r
303                {: RESULT = new Double(l.doubleValue() - r.doubleValue()); :}
304              | expr:l TIMES expr:r
305                {: RESULT = new Double(l.doubleValue() * r.doubleValue()); :}
306              | expr:l DIVISION expr:r
307                {: RESULT = new Double(l.doubleValue() / r.doubleValue()); :}
308              ;
309
310funcexpr ::=    ABS LPAREN expr:e RPAREN
311                {: RESULT = new Double(Math.abs(e)); :}
312              | SQRT LPAREN expr:e RPAREN
313                {: RESULT = new Double(Math.sqrt(e)); :}
314              | LOG LPAREN expr:e RPAREN
315                {: RESULT = new Double(Math.log(e)); :}
316              | EXP LPAREN expr:e RPAREN
317                {: RESULT = new Double(Math.exp(e)); :}
318              | SIN LPAREN expr:e RPAREN
319                {: RESULT = new Double(Math.sin(e)); :}
320              | COS LPAREN expr:e RPAREN
321                {: RESULT = new Double(Math.cos(e)); :}
322              | TAN LPAREN expr:e RPAREN
323                {: RESULT = new Double(Math.tan(e)); :}
324              | RINT LPAREN expr:e RPAREN
325                {: RESULT = new Double(Math.rint(e)); :}
326              | FLOOR LPAREN expr:e RPAREN
327                {: RESULT = new Double(Math.floor(e)); :}
328              | POW LPAREN expr:base COMMA expr:exponent RPAREN
329                {: RESULT = new Double(Math.pow(base, exponent)); :}
330              | CEIL LPAREN expr:e RPAREN
331                {: RESULT = new Double(Math.ceil(e)); :}
332              ;
333
Note: See TracBrowser for help on using the repository browser.