1 | /* |
---|
2 | * This program is free software; you can redistribute it and/or modify |
---|
3 | * it under the terms of the GNU General Public License as published by |
---|
4 | * the Free Software Foundation; either version 2 of the License, or |
---|
5 | * (at your option) any later version. |
---|
6 | * |
---|
7 | * This program is distributed in the hope that it will be useful, |
---|
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
10 | * GNU General Public License for more details. |
---|
11 | * |
---|
12 | * You should have received a copy of the GNU General Public License |
---|
13 | * along with this program; if not, write to the Free Software |
---|
14 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
---|
15 | */ |
---|
16 | |
---|
17 | /* |
---|
18 | * PartitionedMultiFilter.java |
---|
19 | * Copyright (C) 2006 University of Waikato, Hamilton, New Zealand |
---|
20 | * |
---|
21 | */ |
---|
22 | |
---|
23 | package weka.filters.unsupervised.attribute; |
---|
24 | |
---|
25 | import weka.core.Attribute; |
---|
26 | import weka.core.Capabilities; |
---|
27 | import weka.core.FastVector; |
---|
28 | import weka.core.Instance; |
---|
29 | import weka.core.DenseInstance; |
---|
30 | import weka.core.Instances; |
---|
31 | import weka.core.Option; |
---|
32 | import weka.core.OptionHandler; |
---|
33 | import weka.core.Range; |
---|
34 | import weka.core.RevisionUtils; |
---|
35 | import weka.core.SparseInstance; |
---|
36 | import weka.core.Utils; |
---|
37 | import weka.core.Capabilities.Capability; |
---|
38 | import weka.filters.AllFilter; |
---|
39 | import weka.filters.Filter; |
---|
40 | import weka.filters.SimpleBatchFilter; |
---|
41 | |
---|
42 | import java.util.Enumeration; |
---|
43 | import java.util.Vector; |
---|
44 | |
---|
45 | /** |
---|
46 | <!-- globalinfo-start --> |
---|
47 | * A filter that applies filters on subsets of attributes and assembles the output into a new dataset. Attributes that are not covered by any of the ranges can be either retained or removed from the output. |
---|
48 | * <p/> |
---|
49 | <!-- globalinfo-end --> |
---|
50 | * |
---|
51 | <!-- options-start --> |
---|
52 | * Valid options are: <p/> |
---|
53 | * |
---|
54 | * <pre> -D |
---|
55 | * Turns on output of debugging information.</pre> |
---|
56 | * |
---|
57 | * <pre> -F <classname [options]> |
---|
58 | * A filter to apply (can be specified multiple times).</pre> |
---|
59 | * |
---|
60 | * <pre> -R <range> |
---|
61 | * An attribute range (can be specified multiple times). |
---|
62 | * For each filter a range must be supplied. 'first' and 'last' |
---|
63 | * are valid indices.</pre> |
---|
64 | * |
---|
65 | * <pre> -U |
---|
66 | * Flag for leaving unused attributes out of the output, by default |
---|
67 | * these are included in the filter output.</pre> |
---|
68 | * |
---|
69 | <!-- options-end --> |
---|
70 | * |
---|
71 | * @author FracPete (fracpete at waikato dot ac dot nz) |
---|
72 | * @version $Revision: 5987 $ |
---|
73 | * @see weka.filters.StreamableFilter |
---|
74 | */ |
---|
75 | public class PartitionedMultiFilter |
---|
76 | extends SimpleBatchFilter { |
---|
77 | |
---|
78 | /** for serialization */ |
---|
79 | private static final long serialVersionUID = -6293720886005713120L; |
---|
80 | |
---|
81 | /** The filters */ |
---|
82 | protected Filter m_Filters[] = {new AllFilter()}; |
---|
83 | |
---|
84 | /** The attribute ranges */ |
---|
85 | protected Range m_Ranges[] = {new Range("first-last")}; |
---|
86 | |
---|
87 | /** Whether unused attributes are left out of the output */ |
---|
88 | protected boolean m_RemoveUnused = false; |
---|
89 | |
---|
90 | /** the indices of the unused attributes */ |
---|
91 | protected int[] m_IndicesUnused = new int[0]; |
---|
92 | |
---|
93 | /** |
---|
94 | * Returns a string describing this filter |
---|
95 | * @return a description of the filter suitable for |
---|
96 | * displaying in the explorer/experimenter gui |
---|
97 | */ |
---|
98 | public String globalInfo() { |
---|
99 | return |
---|
100 | "A filter that applies filters on subsets of attributes and " |
---|
101 | + "assembles the output into a new dataset. Attributes that are " |
---|
102 | + "not covered by any of the ranges can be either retained or removed " |
---|
103 | + "from the output."; |
---|
104 | } |
---|
105 | |
---|
106 | /** |
---|
107 | * Returns an enumeration describing the available options. |
---|
108 | * |
---|
109 | * @return an enumeration of all the available options. |
---|
110 | */ |
---|
111 | public Enumeration listOptions() { |
---|
112 | Vector result = new Vector(); |
---|
113 | Enumeration enm = super.listOptions(); |
---|
114 | while (enm.hasMoreElements()) |
---|
115 | result.add(enm.nextElement()); |
---|
116 | |
---|
117 | result.addElement(new Option( |
---|
118 | "\tA filter to apply (can be specified multiple times).", |
---|
119 | "F", 1, "-F <classname [options]>")); |
---|
120 | |
---|
121 | result.addElement(new Option( |
---|
122 | "\tAn attribute range (can be specified multiple times).\n" |
---|
123 | + "\tFor each filter a range must be supplied. 'first' and 'last'\n" |
---|
124 | + "\tare valid indices.", |
---|
125 | "R", 1, "-R <range>")); |
---|
126 | |
---|
127 | result.addElement(new Option( |
---|
128 | "\tFlag for leaving unused attributes out of the output, by default\n" |
---|
129 | + "\tthese are included in the filter output.", |
---|
130 | "U", 0, "-U")); |
---|
131 | |
---|
132 | return result.elements(); |
---|
133 | } |
---|
134 | |
---|
135 | /** |
---|
136 | * Parses a list of options for this object. <p/> |
---|
137 | * |
---|
138 | <!-- options-start --> |
---|
139 | * Valid options are: <p/> |
---|
140 | * |
---|
141 | * <pre> -D |
---|
142 | * Turns on output of debugging information.</pre> |
---|
143 | * |
---|
144 | * <pre> -F <classname [options]> |
---|
145 | * A filter to apply (can be specified multiple times).</pre> |
---|
146 | * |
---|
147 | * <pre> -R <range> |
---|
148 | * An attribute range (can be specified multiple times). |
---|
149 | * For each filter a range must be supplied. 'first' and 'last' |
---|
150 | * are valid indices.</pre> |
---|
151 | * |
---|
152 | * <pre> -U |
---|
153 | * Flag for leaving unused attributes out of the output, by default |
---|
154 | * these are included in the filter output.</pre> |
---|
155 | * |
---|
156 | <!-- options-end --> |
---|
157 | * |
---|
158 | * @param options the list of options as an array of strings |
---|
159 | * @throws Exception if an option is not supported |
---|
160 | */ |
---|
161 | public void setOptions(String[] options) throws Exception { |
---|
162 | String tmpStr; |
---|
163 | String classname; |
---|
164 | String[] options2; |
---|
165 | Vector objects; |
---|
166 | |
---|
167 | super.setOptions(options); |
---|
168 | |
---|
169 | setRemoveUnused(Utils.getFlag("U", options)); |
---|
170 | |
---|
171 | objects = new Vector(); |
---|
172 | while ((tmpStr = Utils.getOption("F", options)).length() != 0) { |
---|
173 | options2 = Utils.splitOptions(tmpStr); |
---|
174 | classname = options2[0]; |
---|
175 | options2[0] = ""; |
---|
176 | objects.add(Utils.forName(Filter.class, classname, options2)); |
---|
177 | } |
---|
178 | |
---|
179 | // at least one filter |
---|
180 | if (objects.size() == 0) |
---|
181 | objects.add(new AllFilter()); |
---|
182 | |
---|
183 | setFilters((Filter[]) objects.toArray(new Filter[objects.size()])); |
---|
184 | |
---|
185 | objects = new Vector(); |
---|
186 | while ((tmpStr = Utils.getOption("R", options)).length() != 0) { |
---|
187 | objects.add(new Range(tmpStr)); |
---|
188 | } |
---|
189 | |
---|
190 | // at least one Range |
---|
191 | if (objects.size() == 0) |
---|
192 | objects.add(new Range("first-last")); |
---|
193 | |
---|
194 | setRanges((Range[]) objects.toArray(new Range[objects.size()])); |
---|
195 | |
---|
196 | // is number of filters the same as ranges? |
---|
197 | checkDimensions(); |
---|
198 | } |
---|
199 | |
---|
200 | /** |
---|
201 | * Gets the current settings of the filter. |
---|
202 | * |
---|
203 | * @return an array of strings suitable for passing to setOptions |
---|
204 | */ |
---|
205 | public String[] getOptions() { |
---|
206 | Vector result; |
---|
207 | String[] options; |
---|
208 | int i; |
---|
209 | |
---|
210 | result = new Vector(); |
---|
211 | |
---|
212 | options = super.getOptions(); |
---|
213 | for (i = 0; i < options.length; i++) |
---|
214 | result.add(options[i]); |
---|
215 | |
---|
216 | if (getRemoveUnused()) |
---|
217 | result.add("-U"); |
---|
218 | |
---|
219 | for (i = 0; i < getFilters().length; i++) { |
---|
220 | result.add("-F"); |
---|
221 | result.add(getFilterSpec(getFilter(i))); |
---|
222 | } |
---|
223 | |
---|
224 | for (i = 0; i < getRanges().length; i++) { |
---|
225 | result.add("-R"); |
---|
226 | result.add("" + getRange(i).getRanges()); |
---|
227 | } |
---|
228 | |
---|
229 | return (String[]) result.toArray(new String[result.size()]); |
---|
230 | } |
---|
231 | |
---|
232 | /** |
---|
233 | * checks whether the dimensions of filters and ranges fit together |
---|
234 | * |
---|
235 | * @throws Exception if dimensions differ |
---|
236 | */ |
---|
237 | protected void checkDimensions() throws Exception { |
---|
238 | if (getFilters().length != getRanges().length) |
---|
239 | throw new IllegalArgumentException( |
---|
240 | "Number of filters (= " + getFilters().length + ") " |
---|
241 | + "and ranges (= " + getRanges().length + ") don't match!"); |
---|
242 | } |
---|
243 | |
---|
244 | /** |
---|
245 | * Returns the Capabilities of this filter. |
---|
246 | * |
---|
247 | * @return the capabilities of this object |
---|
248 | * @see Capabilities |
---|
249 | */ |
---|
250 | public Capabilities getCapabilities() { |
---|
251 | Capabilities result; |
---|
252 | |
---|
253 | if (getFilters().length == 0) { |
---|
254 | result = super.getCapabilities(); |
---|
255 | result.disableAll(); |
---|
256 | } else { |
---|
257 | result = getFilters()[0].getCapabilities(); |
---|
258 | } |
---|
259 | |
---|
260 | // disable attributes |
---|
261 | result.disable(Capability.STRING_ATTRIBUTES); |
---|
262 | result.disableDependency(Capability.STRING_ATTRIBUTES); |
---|
263 | result.disable(Capability.RELATIONAL_ATTRIBUTES); |
---|
264 | result.disableDependency(Capability.RELATIONAL_ATTRIBUTES); |
---|
265 | |
---|
266 | return result; |
---|
267 | } |
---|
268 | |
---|
269 | /** |
---|
270 | * Sets whether unused attributes (ones that are not covered by any of the |
---|
271 | * ranges) are removed from the output. |
---|
272 | * |
---|
273 | * @param value if true then the unused attributes get removed |
---|
274 | */ |
---|
275 | public void setRemoveUnused(boolean value) { |
---|
276 | m_RemoveUnused = value; |
---|
277 | } |
---|
278 | |
---|
279 | /** |
---|
280 | * Gets whether unused attributes (ones that are not covered by any of the |
---|
281 | * ranges) are removed from the output. |
---|
282 | * |
---|
283 | * @return true if unused attributes are removed |
---|
284 | */ |
---|
285 | public boolean getRemoveUnused() { |
---|
286 | return m_RemoveUnused; |
---|
287 | } |
---|
288 | |
---|
289 | /** |
---|
290 | * Returns the tip text for this property |
---|
291 | * |
---|
292 | * @return tip text for this property suitable for |
---|
293 | * displaying in the explorer/experimenter gui |
---|
294 | */ |
---|
295 | public String removeUnusedTipText() { |
---|
296 | return |
---|
297 | "If true then unused attributes (ones that are not covered by any " |
---|
298 | + "of the ranges) will be removed from the output."; |
---|
299 | } |
---|
300 | |
---|
301 | /** |
---|
302 | * Sets the list of possible filters to choose from. |
---|
303 | * Also resets the state of the filter (this reset doesn't affect the |
---|
304 | * options). |
---|
305 | * |
---|
306 | * @param filters an array of filters with all options set. |
---|
307 | * @see #reset() |
---|
308 | */ |
---|
309 | public void setFilters(Filter[] filters) { |
---|
310 | m_Filters = filters; |
---|
311 | reset(); |
---|
312 | } |
---|
313 | |
---|
314 | /** |
---|
315 | * Gets the list of possible filters to choose from. |
---|
316 | * |
---|
317 | * @return the array of Filters |
---|
318 | */ |
---|
319 | public Filter[] getFilters() { |
---|
320 | return m_Filters; |
---|
321 | } |
---|
322 | |
---|
323 | /** |
---|
324 | * Returns the tip text for this property |
---|
325 | * |
---|
326 | * @return tip text for this property suitable for |
---|
327 | * displaying in the explorer/experimenter gui |
---|
328 | */ |
---|
329 | public String filtersTipText() { |
---|
330 | return "The base filters to be used."; |
---|
331 | } |
---|
332 | |
---|
333 | /** |
---|
334 | * Gets a single filter from the set of available filters. |
---|
335 | * |
---|
336 | * @param index the index of the filter wanted |
---|
337 | * @return the Filter |
---|
338 | */ |
---|
339 | public Filter getFilter(int index) { |
---|
340 | return m_Filters[index]; |
---|
341 | } |
---|
342 | |
---|
343 | /** |
---|
344 | * returns the filter classname and the options as one string |
---|
345 | * |
---|
346 | * @param filter the filter to get the specs for |
---|
347 | * @return the classname plus options |
---|
348 | */ |
---|
349 | protected String getFilterSpec(Filter filter) { |
---|
350 | String result; |
---|
351 | |
---|
352 | if (filter == null) { |
---|
353 | result = ""; |
---|
354 | } |
---|
355 | else { |
---|
356 | result = filter.getClass().getName(); |
---|
357 | if (filter instanceof OptionHandler) |
---|
358 | result += " " |
---|
359 | + Utils.joinOptions(((OptionHandler) filter).getOptions()); |
---|
360 | } |
---|
361 | |
---|
362 | return result; |
---|
363 | } |
---|
364 | |
---|
365 | /** |
---|
366 | * Sets the list of possible Ranges to choose from. |
---|
367 | * Also resets the state of the Range (this reset doesn't affect the |
---|
368 | * options). |
---|
369 | * |
---|
370 | * @param Ranges an array of Ranges with all options set. |
---|
371 | * @see #reset() |
---|
372 | */ |
---|
373 | public void setRanges(Range[] Ranges) { |
---|
374 | m_Ranges = Ranges; |
---|
375 | reset(); |
---|
376 | } |
---|
377 | |
---|
378 | /** |
---|
379 | * Gets the list of possible Ranges to choose from. |
---|
380 | * |
---|
381 | * @return the array of Ranges |
---|
382 | */ |
---|
383 | public Range[] getRanges() { |
---|
384 | return m_Ranges; |
---|
385 | } |
---|
386 | |
---|
387 | /** |
---|
388 | * Returns the tip text for this property |
---|
389 | * |
---|
390 | * @return tip text for this property suitable for |
---|
391 | * displaying in the explorer/experimenter gui |
---|
392 | */ |
---|
393 | public String rangesTipText() { |
---|
394 | return "The attribute ranges to be used."; |
---|
395 | } |
---|
396 | |
---|
397 | /** |
---|
398 | * Gets a single Range from the set of available Ranges. |
---|
399 | * |
---|
400 | * @param index the index of the Range wanted |
---|
401 | * @return the Range |
---|
402 | */ |
---|
403 | public Range getRange(int index) { |
---|
404 | return m_Ranges[index]; |
---|
405 | } |
---|
406 | |
---|
407 | /** |
---|
408 | * determines the indices of unused attributes (ones that are not covered |
---|
409 | * by any of the range) |
---|
410 | * |
---|
411 | * @param data the data to base the determination on |
---|
412 | * @see #m_IndicesUnused |
---|
413 | */ |
---|
414 | protected void determineUnusedIndices(Instances data) { |
---|
415 | Vector<Integer> indices; |
---|
416 | int i; |
---|
417 | int n; |
---|
418 | boolean covered; |
---|
419 | |
---|
420 | // traverse all ranges |
---|
421 | indices = new Vector<Integer>(); |
---|
422 | for (i = 0; i < data.numAttributes(); i++) { |
---|
423 | if (i == data.classIndex()) |
---|
424 | continue; |
---|
425 | |
---|
426 | covered = false; |
---|
427 | for (n = 0; n < getRanges().length; n++) { |
---|
428 | if (getRanges()[n].isInRange(i)) { |
---|
429 | covered = true; |
---|
430 | break; |
---|
431 | } |
---|
432 | } |
---|
433 | |
---|
434 | if (!covered) |
---|
435 | indices.add(new Integer(i)); |
---|
436 | } |
---|
437 | |
---|
438 | // create array |
---|
439 | m_IndicesUnused = new int[indices.size()]; |
---|
440 | for (i = 0; i < indices.size(); i++) |
---|
441 | m_IndicesUnused[i] = indices.get(i).intValue(); |
---|
442 | |
---|
443 | if (getDebug()) |
---|
444 | System.out.println( |
---|
445 | "Unused indices: " + Utils.arrayToString(m_IndicesUnused)); |
---|
446 | } |
---|
447 | |
---|
448 | /** |
---|
449 | * generates a subset of the dataset with only the attributes from the range |
---|
450 | * (class is always added if present) |
---|
451 | * |
---|
452 | * @param data the data to work on |
---|
453 | * @param range the range of attribute to use |
---|
454 | * @return the generated subset |
---|
455 | * @throws Exception if creation fails |
---|
456 | */ |
---|
457 | protected Instances generateSubset(Instances data, Range range) throws Exception { |
---|
458 | Remove filter; |
---|
459 | String atts; |
---|
460 | Instances result; |
---|
461 | |
---|
462 | // determine attributes |
---|
463 | atts = range.getRanges(); |
---|
464 | if ((data.classIndex() > -1) && (!range.isInRange(data.classIndex()))) |
---|
465 | atts += "," + (data.classIndex() + 1); |
---|
466 | |
---|
467 | // setup filter |
---|
468 | filter = new Remove(); |
---|
469 | filter.setAttributeIndices(atts); |
---|
470 | filter.setInvertSelection(true); |
---|
471 | filter.setInputFormat(data); |
---|
472 | |
---|
473 | // generate output |
---|
474 | result = Filter.useFilter(data, filter); |
---|
475 | |
---|
476 | return result; |
---|
477 | } |
---|
478 | |
---|
479 | /** |
---|
480 | * renames all the attributes in the dataset (excluding the class if present) |
---|
481 | * by adding the prefix to the name. |
---|
482 | * |
---|
483 | * @param data the data to work on |
---|
484 | * @param prefix the prefix for the attributes |
---|
485 | * @return a copy of the data with the attributes renamed |
---|
486 | * @throws Exception if renaming fails |
---|
487 | */ |
---|
488 | protected Instances renameAttributes(Instances data, String prefix) throws Exception { |
---|
489 | Instances result; |
---|
490 | int i; |
---|
491 | FastVector atts; |
---|
492 | |
---|
493 | // rename attributes |
---|
494 | atts = new FastVector(); |
---|
495 | for (i = 0; i < data.numAttributes(); i++) { |
---|
496 | if (i == data.classIndex()) |
---|
497 | atts.addElement(data.attribute(i).copy()); |
---|
498 | else |
---|
499 | atts.addElement(data.attribute(i).copy(prefix + data.attribute(i).name())); |
---|
500 | } |
---|
501 | |
---|
502 | // create new dataset |
---|
503 | result = new Instances(data.relationName(), atts, data.numInstances()); |
---|
504 | for (i = 0; i < data.numInstances(); i++) { |
---|
505 | result.add((Instance) data.instance(i).copy()); |
---|
506 | } |
---|
507 | |
---|
508 | // set class if present |
---|
509 | if (data.classIndex() > -1) |
---|
510 | result.setClassIndex(data.classIndex()); |
---|
511 | |
---|
512 | return result; |
---|
513 | } |
---|
514 | |
---|
515 | /** |
---|
516 | * Determines the output format based only on the full input dataset and |
---|
517 | * returns this otherwise null is returned. In case the output format cannot |
---|
518 | * be returned immediately, i.e., immediateOutputFormat() returns false, |
---|
519 | * then this method will be called from batchFinished(). |
---|
520 | * |
---|
521 | * @param inputFormat the input format to base the output format on |
---|
522 | * @return the output format |
---|
523 | * @throws Exception in case the determination goes wrong |
---|
524 | * @see #hasImmediateOutputFormat() |
---|
525 | * @see #batchFinished() |
---|
526 | */ |
---|
527 | protected Instances determineOutputFormat(Instances inputFormat) throws Exception { |
---|
528 | Instances result; |
---|
529 | Instances processed; |
---|
530 | int i; |
---|
531 | int n; |
---|
532 | FastVector atts; |
---|
533 | Attribute att; |
---|
534 | |
---|
535 | if (!isFirstBatchDone()) { |
---|
536 | // we need the full dataset here, see process(Instances) |
---|
537 | if (inputFormat.numInstances() == 0) |
---|
538 | return null; |
---|
539 | |
---|
540 | checkDimensions(); |
---|
541 | |
---|
542 | // determine unused indices |
---|
543 | determineUnusedIndices(inputFormat); |
---|
544 | |
---|
545 | atts = new FastVector(); |
---|
546 | for (i = 0; i < getFilters().length; i++) { |
---|
547 | if (!isFirstBatchDone()) { |
---|
548 | // generate subset |
---|
549 | processed = generateSubset(inputFormat, getRange(i)); |
---|
550 | // set input format |
---|
551 | if (!getFilter(i).setInputFormat(processed)) |
---|
552 | Filter.useFilter(processed, getFilter(i)); |
---|
553 | } |
---|
554 | |
---|
555 | // get output format |
---|
556 | processed = getFilter(i).getOutputFormat(); |
---|
557 | |
---|
558 | // rename attributes |
---|
559 | processed = renameAttributes(processed, "filtered-" + i + "-"); |
---|
560 | |
---|
561 | // add attributes |
---|
562 | for (n = 0; n < processed.numAttributes(); n++) { |
---|
563 | if (n == processed.classIndex()) |
---|
564 | continue; |
---|
565 | atts.addElement(processed.attribute(n).copy()); |
---|
566 | } |
---|
567 | } |
---|
568 | |
---|
569 | // add unused attributes |
---|
570 | if (!getRemoveUnused()) { |
---|
571 | for (i = 0; i < m_IndicesUnused.length; i++) { |
---|
572 | att = inputFormat.attribute(m_IndicesUnused[i]); |
---|
573 | atts.addElement(att.copy("unfiltered-" + att.name())); |
---|
574 | } |
---|
575 | } |
---|
576 | |
---|
577 | // add class if present |
---|
578 | if (inputFormat.classIndex() > -1) |
---|
579 | atts.addElement(inputFormat.classAttribute().copy()); |
---|
580 | |
---|
581 | // generate new dataset |
---|
582 | result = new Instances(inputFormat.relationName(), atts, 0); |
---|
583 | if (inputFormat.classIndex() > -1) |
---|
584 | result.setClassIndex(result.numAttributes() - 1); |
---|
585 | } |
---|
586 | else { |
---|
587 | result = getOutputFormat(); |
---|
588 | } |
---|
589 | |
---|
590 | return result; |
---|
591 | } |
---|
592 | |
---|
593 | /** |
---|
594 | * Processes the given data (may change the provided dataset) and returns |
---|
595 | * the modified version. This method is called in batchFinished(). |
---|
596 | * |
---|
597 | * @param instances the data to process |
---|
598 | * @return the modified data |
---|
599 | * @throws Exception in case the processing goes wrong |
---|
600 | * @see #batchFinished() |
---|
601 | */ |
---|
602 | protected Instances process(Instances instances) throws Exception { |
---|
603 | Instances result; |
---|
604 | int i; |
---|
605 | int n; |
---|
606 | int m; |
---|
607 | int index; |
---|
608 | Instances[] processed; |
---|
609 | Instance inst; |
---|
610 | Instance newInst; |
---|
611 | double[] values; |
---|
612 | Vector errors; |
---|
613 | |
---|
614 | if (!isFirstBatchDone()) { |
---|
615 | checkDimensions(); |
---|
616 | |
---|
617 | // set upper limits |
---|
618 | for (i = 0; i < m_Ranges.length; i++) |
---|
619 | m_Ranges[i].setUpper(instances.numAttributes() - 1); |
---|
620 | |
---|
621 | // determine unused indices |
---|
622 | determineUnusedIndices(instances); |
---|
623 | } |
---|
624 | |
---|
625 | // pass data through all filters |
---|
626 | processed = new Instances[getFilters().length]; |
---|
627 | for (i = 0; i < getFilters().length; i++) { |
---|
628 | processed[i] = generateSubset(instances, getRange(i)); |
---|
629 | if (!isFirstBatchDone()) |
---|
630 | getFilter(i).setInputFormat(processed[i]); |
---|
631 | processed[i] = Filter.useFilter(processed[i], getFilter(i)); |
---|
632 | } |
---|
633 | |
---|
634 | // set output format (can only be determined with full dataset, hence here) |
---|
635 | if (!isFirstBatchDone()) { |
---|
636 | result = determineOutputFormat(instances); |
---|
637 | setOutputFormat(result); |
---|
638 | } |
---|
639 | else { |
---|
640 | result = getOutputFormat(); |
---|
641 | } |
---|
642 | |
---|
643 | // check whether all filters didn't change the number of instances |
---|
644 | errors = new Vector(); |
---|
645 | for (i = 0; i < processed.length; i++) { |
---|
646 | if (processed[i].numInstances() != instances.numInstances()) |
---|
647 | errors.add(new Integer(i)); |
---|
648 | } |
---|
649 | if (errors.size() > 0) |
---|
650 | throw new IllegalStateException( |
---|
651 | "The following filter(s) changed the number of instances: " + errors); |
---|
652 | |
---|
653 | // assemble data |
---|
654 | for (i = 0; i < instances.numInstances(); i++) { |
---|
655 | inst = instances.instance(i); |
---|
656 | values = new double[result.numAttributes()]; |
---|
657 | |
---|
658 | // filtered data |
---|
659 | index = 0; |
---|
660 | for (n = 0; n < processed.length; n++) { |
---|
661 | for (m = 0; m < processed[n].numAttributes(); m++) { |
---|
662 | if (m == processed[n].classIndex()) |
---|
663 | continue; |
---|
664 | values[index] = processed[n].instance(i).value(m); |
---|
665 | index++; |
---|
666 | } |
---|
667 | } |
---|
668 | |
---|
669 | // unused attributes |
---|
670 | if (!getRemoveUnused()) { |
---|
671 | for (n = 0; n < m_IndicesUnused.length; n++) { |
---|
672 | values[index] = inst.value(m_IndicesUnused[n]); |
---|
673 | index++; |
---|
674 | } |
---|
675 | } |
---|
676 | |
---|
677 | // class |
---|
678 | if (instances.classIndex() > -1) |
---|
679 | values[values.length - 1] = inst.value(instances.classIndex()); |
---|
680 | |
---|
681 | // generate and add instance |
---|
682 | if (inst instanceof SparseInstance) |
---|
683 | newInst = new SparseInstance(instances.instance(i).weight(), values); |
---|
684 | else |
---|
685 | newInst = new DenseInstance(instances.instance(i).weight(), values); |
---|
686 | result.add(newInst); |
---|
687 | } |
---|
688 | |
---|
689 | return result; |
---|
690 | } |
---|
691 | |
---|
692 | /** |
---|
693 | * Returns the revision string. |
---|
694 | * |
---|
695 | * @return the revision |
---|
696 | */ |
---|
697 | public String getRevision() { |
---|
698 | return RevisionUtils.extract("$Revision: 5987 $"); |
---|
699 | } |
---|
700 | |
---|
701 | /** |
---|
702 | * Main method for executing this class. |
---|
703 | * |
---|
704 | * @param args should contain arguments for the filter: use -h for help |
---|
705 | */ |
---|
706 | public static void main(String[] args) { |
---|
707 | runFilter(new PartitionedMultiFilter(), args); |
---|
708 | } |
---|
709 | } |
---|