1 | /* |
---|
2 | * This program is free software; you can redistribute it and/or modify |
---|
3 | * it under the terms of the GNU General Public License as published by |
---|
4 | * the Free Software Foundation; either version 2 of the License, or |
---|
5 | * (at your option) any later version. |
---|
6 | * |
---|
7 | * This program is distributed in the hope that it will be useful, |
---|
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
10 | * GNU General Public License for more details. |
---|
11 | * |
---|
12 | * You should have received a copy of the GNU General Public License |
---|
13 | * along with this program; if not, write to the Free Software |
---|
14 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
---|
15 | */ |
---|
16 | |
---|
17 | /* |
---|
18 | * NumericCleaner.java |
---|
19 | * Copyright (C) 2006 University of Waikato, Hamilton, New Zealand |
---|
20 | */ |
---|
21 | |
---|
22 | package weka.filters.unsupervised.attribute; |
---|
23 | |
---|
24 | import weka.core.Capabilities; |
---|
25 | import weka.core.Instance; |
---|
26 | import weka.core.DenseInstance; |
---|
27 | import weka.core.Instances; |
---|
28 | import weka.core.Option; |
---|
29 | import weka.core.Range; |
---|
30 | import weka.core.RevisionUtils; |
---|
31 | import weka.core.Utils; |
---|
32 | import weka.core.Capabilities.Capability; |
---|
33 | import weka.filters.SimpleStreamFilter; |
---|
34 | |
---|
35 | import java.util.Enumeration; |
---|
36 | import java.util.Vector; |
---|
37 | |
---|
38 | |
---|
39 | /** |
---|
40 | <!-- globalinfo-start --> |
---|
41 | * A filter that 'cleanses' the numeric data from values that are too small, too big or very close to a certain value (e.g., 0) and sets these values to a pre-defined default. |
---|
42 | * <p/> |
---|
43 | <!-- globalinfo-end --> |
---|
44 | * |
---|
45 | <!-- options-start --> |
---|
46 | * Valid options are: <p/> |
---|
47 | * |
---|
48 | * <pre> -D |
---|
49 | * Turns on output of debugging information.</pre> |
---|
50 | * |
---|
51 | * <pre> -min <double> |
---|
52 | * The minimum threshold. (default -Double.MAX_VALUE)</pre> |
---|
53 | * |
---|
54 | * <pre> -min-default <double> |
---|
55 | * The replacement for values smaller than the minimum threshold. |
---|
56 | * (default -Double.MAX_VALUE)</pre> |
---|
57 | * |
---|
58 | * <pre> -max <double> |
---|
59 | * The maximum threshold. (default Double.MAX_VALUE)</pre> |
---|
60 | * |
---|
61 | * <pre> -max-default <double> |
---|
62 | * The replacement for values larger than the maximum threshold. |
---|
63 | * (default Double.MAX_VALUE)</pre> |
---|
64 | * |
---|
65 | * <pre> -closeto <double> |
---|
66 | * The number values are checked for closeness. (default 0)</pre> |
---|
67 | * |
---|
68 | * <pre> -closeto-default <double> |
---|
69 | * The replacement for values that are close to '-closeto'. |
---|
70 | * (default 0)</pre> |
---|
71 | * |
---|
72 | * <pre> -closeto-tolerance <double> |
---|
73 | * The tolerance below which numbers are considered being close to |
---|
74 | * to each other. (default 1E-6)</pre> |
---|
75 | * |
---|
76 | * <pre> -decimals <int> |
---|
77 | * The number of decimals to round to, -1 means no rounding at all. |
---|
78 | * (default -1)</pre> |
---|
79 | * |
---|
80 | * <pre> -R <col1,col2,...> |
---|
81 | * The list of columns to cleanse, e.g., first-last or first-3,5-last. |
---|
82 | * (default first-last)</pre> |
---|
83 | * |
---|
84 | * <pre> -V |
---|
85 | * Inverts the matching sense.</pre> |
---|
86 | * |
---|
87 | * <pre> -include-class |
---|
88 | * Whether to include the class in the cleansing. |
---|
89 | * The class column will always be skipped, if this flag is not |
---|
90 | * present. (default no)</pre> |
---|
91 | * |
---|
92 | <!-- options-end --> |
---|
93 | * |
---|
94 | * @author fracpete (fracpete at waikato dot ac dot nz) |
---|
95 | * @version $Revision: 5987 $ |
---|
96 | */ |
---|
97 | public class NumericCleaner |
---|
98 | extends SimpleStreamFilter { |
---|
99 | |
---|
100 | /** for serialization */ |
---|
101 | private static final long serialVersionUID = -352890679895066592L; |
---|
102 | |
---|
103 | /** the minimum threshold */ |
---|
104 | protected double m_MinThreshold = -Double.MAX_VALUE; |
---|
105 | |
---|
106 | /** the minimum default replacement value */ |
---|
107 | protected double m_MinDefault = -Double.MAX_VALUE; |
---|
108 | |
---|
109 | /** the maximum threshold */ |
---|
110 | protected double m_MaxThreshold = Double.MAX_VALUE; |
---|
111 | |
---|
112 | /** the maximum default replacement value */ |
---|
113 | protected double m_MaxDefault = Double.MAX_VALUE; |
---|
114 | |
---|
115 | /** the number the values are checked for closeness to */ |
---|
116 | protected double m_CloseTo = 0; |
---|
117 | |
---|
118 | /** the default replacement value for numbers "close-to" */ |
---|
119 | protected double m_CloseToDefault = 0; |
---|
120 | |
---|
121 | /** the tolerance distance, below which numbers are considered being "close-to" */ |
---|
122 | protected double m_CloseToTolerance = 1E-6; |
---|
123 | |
---|
124 | /** Stores which columns to cleanse */ |
---|
125 | protected Range m_Cols = new Range("first-last"); |
---|
126 | |
---|
127 | /** whether to include the class attribute */ |
---|
128 | protected boolean m_IncludeClass = false; |
---|
129 | |
---|
130 | /** the number of decimals to round to (-1 means no rounding) */ |
---|
131 | protected int m_Decimals = -1; |
---|
132 | |
---|
133 | /** |
---|
134 | * Returns a string describing this filter. |
---|
135 | * |
---|
136 | * @return a description of the filter suitable for |
---|
137 | * displaying in the explorer/experimenter gui |
---|
138 | */ |
---|
139 | public String globalInfo() { |
---|
140 | return |
---|
141 | "A filter that 'cleanses' the numeric data from values that are too " |
---|
142 | + "small, too big or very close to a certain value (e.g., 0) and sets " |
---|
143 | + "these values to a pre-defined default."; |
---|
144 | } |
---|
145 | |
---|
146 | /** |
---|
147 | * Returns an enumeration describing the available options. |
---|
148 | * |
---|
149 | * @return an enumeration of all the available options. |
---|
150 | */ |
---|
151 | public Enumeration listOptions() { |
---|
152 | Vector result; |
---|
153 | Enumeration enm; |
---|
154 | |
---|
155 | result = new Vector(); |
---|
156 | |
---|
157 | enm = super.listOptions(); |
---|
158 | while (enm.hasMoreElements()) |
---|
159 | result.addElement(enm.nextElement()); |
---|
160 | |
---|
161 | result.addElement(new Option( |
---|
162 | "\tThe minimum threshold. (default -Double.MAX_VALUE)", |
---|
163 | "min", 1, "-min <double>")); |
---|
164 | |
---|
165 | result.addElement(new Option( |
---|
166 | "\tThe replacement for values smaller than the minimum threshold.\n" |
---|
167 | + "\t(default -Double.MAX_VALUE)", |
---|
168 | "min-default", 1, "-min-default <double>")); |
---|
169 | |
---|
170 | result.addElement(new Option( |
---|
171 | "\tThe maximum threshold. (default Double.MAX_VALUE)", |
---|
172 | "max", 1, "-max <double>")); |
---|
173 | |
---|
174 | result.addElement(new Option( |
---|
175 | "\tThe replacement for values larger than the maximum threshold.\n" |
---|
176 | + "\t(default Double.MAX_VALUE)", |
---|
177 | "max-default", 1, "-max-default <double>")); |
---|
178 | |
---|
179 | result.addElement(new Option( |
---|
180 | "\tThe number values are checked for closeness. (default 0)", |
---|
181 | "closeto", 1, "-closeto <double>")); |
---|
182 | |
---|
183 | result.addElement(new Option( |
---|
184 | "\tThe replacement for values that are close to '-closeto'.\n" |
---|
185 | + "\t(default 0)", |
---|
186 | "closeto-default", 1, "-closeto-default <double>")); |
---|
187 | |
---|
188 | result.addElement(new Option( |
---|
189 | "\tThe tolerance below which numbers are considered being close to \n" |
---|
190 | + "\tto each other. (default 1E-6)", |
---|
191 | "closeto-tolerance", 1, "-closeto-tolerance <double>")); |
---|
192 | |
---|
193 | result.addElement(new Option( |
---|
194 | "\tThe number of decimals to round to, -1 means no rounding at all.\n" |
---|
195 | + "\t(default -1)", |
---|
196 | "decimals", 1, "-decimals <int>")); |
---|
197 | |
---|
198 | result.addElement(new Option( |
---|
199 | "\tThe list of columns to cleanse, e.g., first-last or first-3,5-last.\n" |
---|
200 | + "\t(default first-last)", |
---|
201 | "R", 1, "-R <col1,col2,...>")); |
---|
202 | |
---|
203 | result.addElement(new Option( |
---|
204 | "\tInverts the matching sense.", |
---|
205 | "V", 0, "-V")); |
---|
206 | |
---|
207 | result.addElement(new Option( |
---|
208 | "\tWhether to include the class in the cleansing.\n" |
---|
209 | + "\tThe class column will always be skipped, if this flag is not\n" |
---|
210 | + "\tpresent. (default no)", |
---|
211 | "include-class", 0, "-include-class")); |
---|
212 | |
---|
213 | return result.elements(); |
---|
214 | } |
---|
215 | |
---|
216 | /** |
---|
217 | * Gets the current settings of the filter. |
---|
218 | * |
---|
219 | * @return an array of strings suitable for passing to setOptions |
---|
220 | */ |
---|
221 | public String[] getOptions() { |
---|
222 | int i; |
---|
223 | Vector result; |
---|
224 | String[] options; |
---|
225 | |
---|
226 | result = new Vector(); |
---|
227 | options = super.getOptions(); |
---|
228 | for (i = 0; i < options.length; i++) |
---|
229 | result.add(options[i]); |
---|
230 | |
---|
231 | result.add("-min"); |
---|
232 | result.add("" + m_MinThreshold); |
---|
233 | |
---|
234 | result.add("-min-default"); |
---|
235 | result.add("" + m_MinDefault); |
---|
236 | |
---|
237 | result.add("-max"); |
---|
238 | result.add("" + m_MaxThreshold); |
---|
239 | |
---|
240 | result.add("-max-default"); |
---|
241 | result.add("" + m_MaxDefault); |
---|
242 | |
---|
243 | result.add("-closeto"); |
---|
244 | result.add("" + m_CloseTo); |
---|
245 | |
---|
246 | result.add("-closeto-default"); |
---|
247 | result.add("" + m_CloseToDefault); |
---|
248 | |
---|
249 | result.add("-closeto-tolerance"); |
---|
250 | result.add("" + m_CloseToTolerance); |
---|
251 | |
---|
252 | result.add("-R"); |
---|
253 | result.add("" + m_Cols.getRanges()); |
---|
254 | |
---|
255 | if (m_Cols.getInvert()) |
---|
256 | result.add("-V"); |
---|
257 | |
---|
258 | if (m_IncludeClass) |
---|
259 | result.add("-include-class"); |
---|
260 | |
---|
261 | result.add("-decimals"); |
---|
262 | result.add("" + getDecimals()); |
---|
263 | |
---|
264 | return (String[]) result.toArray(new String[result.size()]); |
---|
265 | } |
---|
266 | |
---|
267 | /** |
---|
268 | * Parses a given list of options. <p/> |
---|
269 | * |
---|
270 | <!-- options-start --> |
---|
271 | * Valid options are: <p/> |
---|
272 | * |
---|
273 | * <pre> -D |
---|
274 | * Turns on output of debugging information.</pre> |
---|
275 | * |
---|
276 | * <pre> -min <double> |
---|
277 | * The minimum threshold. (default -Double.MAX_VALUE)</pre> |
---|
278 | * |
---|
279 | * <pre> -min-default <double> |
---|
280 | * The replacement for values smaller than the minimum threshold. |
---|
281 | * (default -Double.MAX_VALUE)</pre> |
---|
282 | * |
---|
283 | * <pre> -max <double> |
---|
284 | * The maximum threshold. (default Double.MAX_VALUE)</pre> |
---|
285 | * |
---|
286 | * <pre> -max-default <double> |
---|
287 | * The replacement for values larger than the maximum threshold. |
---|
288 | * (default Double.MAX_VALUE)</pre> |
---|
289 | * |
---|
290 | * <pre> -closeto <double> |
---|
291 | * The number values are checked for closeness. (default 0)</pre> |
---|
292 | * |
---|
293 | * <pre> -closeto-default <double> |
---|
294 | * The replacement for values that are close to '-closeto'. |
---|
295 | * (default 0)</pre> |
---|
296 | * |
---|
297 | * <pre> -closeto-tolerance <double> |
---|
298 | * The tolerance below which numbers are considered being close to |
---|
299 | * to each other. (default 1E-6)</pre> |
---|
300 | * |
---|
301 | * <pre> -decimals <int> |
---|
302 | * The number of decimals to round to, -1 means no rounding at all. |
---|
303 | * (default -1)</pre> |
---|
304 | * |
---|
305 | * <pre> -R <col1,col2,...> |
---|
306 | * The list of columns to cleanse, e.g., first-last or first-3,5-last. |
---|
307 | * (default first-last)</pre> |
---|
308 | * |
---|
309 | * <pre> -V |
---|
310 | * Inverts the matching sense.</pre> |
---|
311 | * |
---|
312 | * <pre> -include-class |
---|
313 | * Whether to include the class in the cleansing. |
---|
314 | * The class column will always be skipped, if this flag is not |
---|
315 | * present. (default no)</pre> |
---|
316 | * |
---|
317 | <!-- options-end --> |
---|
318 | * |
---|
319 | * @param options the list of options as an array of strings |
---|
320 | * @throws Exception if an option is not supported |
---|
321 | */ |
---|
322 | public void setOptions(String[] options) throws Exception { |
---|
323 | String tmpStr; |
---|
324 | |
---|
325 | tmpStr = Utils.getOption("min", options); |
---|
326 | if (tmpStr.length() != 0) |
---|
327 | setMinThreshold(Double.parseDouble(tmpStr)); |
---|
328 | else |
---|
329 | setMinThreshold(-Double.MAX_VALUE); |
---|
330 | |
---|
331 | tmpStr = Utils.getOption("min-default", options); |
---|
332 | if (tmpStr.length() != 0) |
---|
333 | setMinDefault(Double.parseDouble(tmpStr)); |
---|
334 | else |
---|
335 | setMinDefault(-Double.MAX_VALUE); |
---|
336 | |
---|
337 | tmpStr = Utils.getOption("max", options); |
---|
338 | if (tmpStr.length() != 0) |
---|
339 | setMaxThreshold(Double.parseDouble(tmpStr)); |
---|
340 | else |
---|
341 | setMaxThreshold(Double.MAX_VALUE); |
---|
342 | |
---|
343 | tmpStr = Utils.getOption("max-default", options); |
---|
344 | if (tmpStr.length() != 0) |
---|
345 | setMaxDefault(Double.parseDouble(tmpStr)); |
---|
346 | else |
---|
347 | setMaxDefault(Double.MAX_VALUE); |
---|
348 | |
---|
349 | tmpStr = Utils.getOption("closeto", options); |
---|
350 | if (tmpStr.length() != 0) |
---|
351 | setCloseTo(Double.parseDouble(tmpStr)); |
---|
352 | else |
---|
353 | setCloseTo(0); |
---|
354 | |
---|
355 | tmpStr = Utils.getOption("closeto-default", options); |
---|
356 | if (tmpStr.length() != 0) |
---|
357 | setCloseToDefault(Double.parseDouble(tmpStr)); |
---|
358 | else |
---|
359 | setCloseToDefault(0); |
---|
360 | |
---|
361 | tmpStr = Utils.getOption("closeto-tolerance", options); |
---|
362 | if (tmpStr.length() != 0) |
---|
363 | setCloseToTolerance(Double.parseDouble(tmpStr)); |
---|
364 | else |
---|
365 | setCloseToTolerance(1E-6); |
---|
366 | |
---|
367 | tmpStr = Utils.getOption("R", options); |
---|
368 | if (tmpStr.length() != 0) |
---|
369 | setAttributeIndices(tmpStr); |
---|
370 | else |
---|
371 | setAttributeIndices("first-last"); |
---|
372 | |
---|
373 | setInvertSelection(Utils.getFlag("V", options)); |
---|
374 | |
---|
375 | setIncludeClass(Utils.getFlag("include-class", options)); |
---|
376 | |
---|
377 | tmpStr = Utils.getOption("decimals", options); |
---|
378 | if (tmpStr.length() != 0) |
---|
379 | setDecimals(Integer.parseInt(tmpStr)); |
---|
380 | else |
---|
381 | setDecimals(-1); |
---|
382 | |
---|
383 | super.setOptions(options); |
---|
384 | } |
---|
385 | |
---|
386 | /** |
---|
387 | * Returns the Capabilities of this filter. |
---|
388 | * |
---|
389 | * @return the capabilities of this object |
---|
390 | * @see Capabilities |
---|
391 | */ |
---|
392 | public Capabilities getCapabilities() { |
---|
393 | Capabilities result = super.getCapabilities(); |
---|
394 | result.disableAll(); |
---|
395 | |
---|
396 | // attributes |
---|
397 | result.enableAllAttributes(); |
---|
398 | result.enable(Capability.MISSING_VALUES); |
---|
399 | |
---|
400 | // class |
---|
401 | result.enableAllClasses(); |
---|
402 | result.enable(Capability.MISSING_CLASS_VALUES); |
---|
403 | result.enable(Capability.NO_CLASS); |
---|
404 | |
---|
405 | return result; |
---|
406 | } |
---|
407 | |
---|
408 | /** |
---|
409 | * Determines the output format based on the input format and returns |
---|
410 | * this. In case the output format cannot be returned immediately, i.e., |
---|
411 | * immediateOutputFormat() returns false, then this method will be called |
---|
412 | * from batchFinished(). |
---|
413 | * |
---|
414 | * @param inputFormat the input format to base the output format on |
---|
415 | * @return the output format |
---|
416 | * @throws Exception in case the determination goes wrong |
---|
417 | * @see #hasImmediateOutputFormat() |
---|
418 | * @see #batchFinished() |
---|
419 | */ |
---|
420 | protected Instances determineOutputFormat(Instances inputFormat) |
---|
421 | throws Exception { |
---|
422 | |
---|
423 | m_Cols.setUpper(inputFormat.numAttributes() - 1); |
---|
424 | |
---|
425 | return new Instances(inputFormat); |
---|
426 | } |
---|
427 | |
---|
428 | /** |
---|
429 | * processes the given instance (may change the provided instance) and |
---|
430 | * returns the modified version. |
---|
431 | * |
---|
432 | * @param instance the instance to process |
---|
433 | * @return the modified data |
---|
434 | * @throws Exception in case the processing goes wrong |
---|
435 | */ |
---|
436 | protected Instance process(Instance instance) throws Exception { |
---|
437 | Instance result; |
---|
438 | int i; |
---|
439 | double val; |
---|
440 | double factor; |
---|
441 | |
---|
442 | result = (Instance) instance.copy(); |
---|
443 | |
---|
444 | if (m_Decimals > -1) |
---|
445 | factor = StrictMath.pow(10, m_Decimals); |
---|
446 | else |
---|
447 | factor = 1; |
---|
448 | |
---|
449 | for (i = 0; i < result.numAttributes(); i++) { |
---|
450 | // only numeric attributes |
---|
451 | if (!result.attribute(i).isNumeric()) |
---|
452 | continue; |
---|
453 | |
---|
454 | // out of range? |
---|
455 | if (!m_Cols.isInRange(i)) |
---|
456 | continue; |
---|
457 | |
---|
458 | // skip class? |
---|
459 | if ( (result.classIndex() == i) && (!m_IncludeClass) ) |
---|
460 | continue; |
---|
461 | |
---|
462 | // too small? |
---|
463 | if (result.value(i) < m_MinThreshold) { |
---|
464 | if (getDebug()) |
---|
465 | System.out.println("Too small: " + result.value(i) + " -> " + m_MinDefault); |
---|
466 | result.setValue(i, m_MinDefault); |
---|
467 | } |
---|
468 | // too big? |
---|
469 | else if (result.value(i) > m_MaxThreshold) { |
---|
470 | if (getDebug()) |
---|
471 | System.out.println("Too big: " + result.value(i) + " -> " + m_MaxDefault); |
---|
472 | result.setValue(i, m_MaxDefault); |
---|
473 | } |
---|
474 | // too close? |
---|
475 | else if ( (result.value(i) - m_CloseTo < m_CloseToTolerance) |
---|
476 | && (m_CloseTo - result.value(i) < m_CloseToTolerance) |
---|
477 | && (result.value(i) != m_CloseTo) ) { |
---|
478 | if (getDebug()) |
---|
479 | System.out.println("Too close: " + result.value(i) + " -> " + m_CloseToDefault); |
---|
480 | result.setValue(i, m_CloseToDefault); |
---|
481 | } |
---|
482 | |
---|
483 | // decimals? |
---|
484 | if (m_Decimals > -1) { |
---|
485 | val = result.value(i); |
---|
486 | val = StrictMath.round(val * factor) / factor; |
---|
487 | result.setValue(i, val); |
---|
488 | } |
---|
489 | } |
---|
490 | |
---|
491 | return result; |
---|
492 | } |
---|
493 | |
---|
494 | /** |
---|
495 | * Returns the tip text for this property |
---|
496 | * |
---|
497 | * @return tip text for this property suitable for |
---|
498 | * displaying in the explorer/experimenter gui |
---|
499 | */ |
---|
500 | public String minThresholdTipText() { |
---|
501 | return "The minimum threshold below values are replaced by a default."; |
---|
502 | } |
---|
503 | |
---|
504 | /** |
---|
505 | * Get the minimum threshold. |
---|
506 | * |
---|
507 | * @return the minimum threshold. |
---|
508 | */ |
---|
509 | public double getMinThreshold() { |
---|
510 | return m_MinThreshold; |
---|
511 | } |
---|
512 | |
---|
513 | /** |
---|
514 | * Set the minimum threshold. |
---|
515 | * |
---|
516 | * @param value the minimum threshold to use. |
---|
517 | */ |
---|
518 | public void setMinThreshold(double value) { |
---|
519 | m_MinThreshold = value; |
---|
520 | } |
---|
521 | |
---|
522 | /** |
---|
523 | * Returns the tip text for this property |
---|
524 | * |
---|
525 | * @return tip text for this property suitable for |
---|
526 | * displaying in the explorer/experimenter gui |
---|
527 | */ |
---|
528 | public String minDefaultTipText() { |
---|
529 | return "The default value to replace values that are below the minimum threshold."; |
---|
530 | } |
---|
531 | |
---|
532 | /** |
---|
533 | * Get the minimum default. |
---|
534 | * |
---|
535 | * @return the minimum default. |
---|
536 | */ |
---|
537 | public double getMinDefault() { |
---|
538 | return m_MinDefault; |
---|
539 | } |
---|
540 | |
---|
541 | /** |
---|
542 | * Set the minimum default. |
---|
543 | * |
---|
544 | * @param value the minimum default to use. |
---|
545 | */ |
---|
546 | public void setMinDefault(double value) { |
---|
547 | m_MinDefault = value; |
---|
548 | } |
---|
549 | |
---|
550 | /** |
---|
551 | * Returns the tip text for this property |
---|
552 | * |
---|
553 | * @return tip text for this property suitable for |
---|
554 | * displaying in the explorer/experimenter gui |
---|
555 | */ |
---|
556 | public String maxThresholdTipText() { |
---|
557 | return "The maximum threshold above values are replaced by a default."; |
---|
558 | } |
---|
559 | |
---|
560 | /** |
---|
561 | * Get the maximum threshold. |
---|
562 | * |
---|
563 | * @return the maximum threshold. |
---|
564 | */ |
---|
565 | public double getMaxThreshold() { |
---|
566 | return m_MaxThreshold; |
---|
567 | } |
---|
568 | |
---|
569 | /** |
---|
570 | * Set the maximum threshold. |
---|
571 | * |
---|
572 | * @param value the maximum threshold to use. |
---|
573 | */ |
---|
574 | public void setMaxThreshold(double value) { |
---|
575 | m_MaxThreshold = value; |
---|
576 | } |
---|
577 | |
---|
578 | /** |
---|
579 | * Returns the tip text for this property |
---|
580 | * |
---|
581 | * @return tip text for this property suitable for |
---|
582 | * displaying in the explorer/experimenter gui |
---|
583 | */ |
---|
584 | public String maxDefaultTipText() { |
---|
585 | return "The default value to replace values that are above the maximum threshold."; |
---|
586 | } |
---|
587 | |
---|
588 | /** |
---|
589 | * Get the maximum default. |
---|
590 | * |
---|
591 | * @return the maximum default. |
---|
592 | */ |
---|
593 | public double getMaxDefault() { |
---|
594 | return m_MaxDefault; |
---|
595 | } |
---|
596 | |
---|
597 | /** |
---|
598 | * Set the naximum default. |
---|
599 | * |
---|
600 | * @param value the maximum default to use. |
---|
601 | */ |
---|
602 | public void setMaxDefault(double value) { |
---|
603 | m_MaxDefault = value; |
---|
604 | } |
---|
605 | |
---|
606 | /** |
---|
607 | * Returns the tip text for this property |
---|
608 | * |
---|
609 | * @return tip text for this property suitable for |
---|
610 | * displaying in the explorer/experimenter gui |
---|
611 | */ |
---|
612 | public String closeToTipText() { |
---|
613 | return |
---|
614 | "The number values are checked for whether they are too close to " |
---|
615 | + "and get replaced by a default."; |
---|
616 | } |
---|
617 | |
---|
618 | /** |
---|
619 | * Get the "close to" number. |
---|
620 | * |
---|
621 | * @return the "close to" number. |
---|
622 | */ |
---|
623 | public double getCloseTo() { |
---|
624 | return m_CloseTo; |
---|
625 | } |
---|
626 | |
---|
627 | /** |
---|
628 | * Set the "close to" number. |
---|
629 | * |
---|
630 | * @param value the number to use for checking closeness. |
---|
631 | */ |
---|
632 | public void setCloseTo(double value) { |
---|
633 | m_CloseTo = value; |
---|
634 | } |
---|
635 | |
---|
636 | /** |
---|
637 | * Returns the tip text for this property |
---|
638 | * |
---|
639 | * @return tip text for this property suitable for |
---|
640 | * displaying in the explorer/experimenter gui |
---|
641 | */ |
---|
642 | public String closeToDefaultTipText() { |
---|
643 | return "The default value to replace values with that are too close."; |
---|
644 | } |
---|
645 | |
---|
646 | /** |
---|
647 | * Get the "close to" default. |
---|
648 | * |
---|
649 | * @return the "close to" default. |
---|
650 | */ |
---|
651 | public double getCloseToDefault() { |
---|
652 | return m_CloseToDefault; |
---|
653 | } |
---|
654 | |
---|
655 | /** |
---|
656 | * Set the "close to" default. |
---|
657 | * |
---|
658 | * @param value the "close to" default to use. |
---|
659 | */ |
---|
660 | public void setCloseToDefault(double value) { |
---|
661 | m_CloseToDefault = value; |
---|
662 | } |
---|
663 | |
---|
664 | /** |
---|
665 | * Returns the tip text for this property |
---|
666 | * |
---|
667 | * @return tip text for this property suitable for |
---|
668 | * displaying in the explorer/experimenter gui |
---|
669 | */ |
---|
670 | public String closeToToleranceTipText() { |
---|
671 | return "The value below which values are considered close to."; |
---|
672 | } |
---|
673 | |
---|
674 | /** |
---|
675 | * Get the "close to" Tolerance. |
---|
676 | * |
---|
677 | * @return the "close to" Tolerance. |
---|
678 | */ |
---|
679 | public double getCloseToTolerance() { |
---|
680 | return m_CloseToTolerance; |
---|
681 | } |
---|
682 | |
---|
683 | /** |
---|
684 | * Set the "close to" Tolerance. |
---|
685 | * |
---|
686 | * @param value the "close to" Tolerance to use. |
---|
687 | */ |
---|
688 | public void setCloseToTolerance(double value) { |
---|
689 | m_CloseToTolerance = value; |
---|
690 | } |
---|
691 | |
---|
692 | /** |
---|
693 | * Returns the tip text for this property |
---|
694 | * |
---|
695 | * @return tip text for this property suitable for |
---|
696 | * displaying in the explorer/experimenter gui |
---|
697 | */ |
---|
698 | public String attributeIndicesTipText() { |
---|
699 | return "The selection of columns to use in the cleansing processs, first and last are valid indices."; |
---|
700 | } |
---|
701 | |
---|
702 | /** |
---|
703 | * Gets the selection of the columns, e.g., first-last or first-3,5-last |
---|
704 | * |
---|
705 | * @return the selected indices |
---|
706 | */ |
---|
707 | public String getAttributeIndices() { |
---|
708 | return m_Cols.getRanges(); |
---|
709 | } |
---|
710 | |
---|
711 | /** |
---|
712 | * Sets the columns to use, e.g., first-last or first-3,5-last |
---|
713 | * |
---|
714 | * @param value the columns to use |
---|
715 | */ |
---|
716 | public void setAttributeIndices(String value) { |
---|
717 | m_Cols.setRanges(value); |
---|
718 | } |
---|
719 | |
---|
720 | /** |
---|
721 | * Returns the tip text for this property |
---|
722 | * |
---|
723 | * @return tip text for this property suitable for |
---|
724 | * displaying in the explorer/experimenter gui |
---|
725 | */ |
---|
726 | public String invertSelectionTipText() { |
---|
727 | return "If enabled the selection of the columns is inverted."; |
---|
728 | } |
---|
729 | |
---|
730 | /** |
---|
731 | * Gets whether the selection of the columns is inverted |
---|
732 | * |
---|
733 | * @return true if the selection is inverted |
---|
734 | */ |
---|
735 | public boolean getInvertSelection() { |
---|
736 | return m_Cols.getInvert(); |
---|
737 | } |
---|
738 | |
---|
739 | /** |
---|
740 | * Sets whether the selection of the indices is inverted or not |
---|
741 | * |
---|
742 | * @param value the new invert setting |
---|
743 | */ |
---|
744 | public void setInvertSelection(boolean value) { |
---|
745 | m_Cols.setInvert(value); |
---|
746 | } |
---|
747 | |
---|
748 | /** |
---|
749 | * Returns the tip text for this property |
---|
750 | * |
---|
751 | * @return tip text for this property suitable for |
---|
752 | * displaying in the explorer/experimenter gui |
---|
753 | */ |
---|
754 | public String includeClassTipText() { |
---|
755 | return "If disabled, the class attribute will be always left out of the cleaning process."; |
---|
756 | } |
---|
757 | |
---|
758 | /** |
---|
759 | * Gets whether the class is included in the cleaning process or always |
---|
760 | * skipped. |
---|
761 | * |
---|
762 | * @return true if the class can be considered for cleaning. |
---|
763 | */ |
---|
764 | public boolean getIncludeClass() { |
---|
765 | return m_IncludeClass; |
---|
766 | } |
---|
767 | |
---|
768 | /** |
---|
769 | * Sets whether the class can be cleaned, too. |
---|
770 | * |
---|
771 | * @param value true if the class can be cleansed, too |
---|
772 | */ |
---|
773 | public void setIncludeClass(boolean value) { |
---|
774 | m_IncludeClass = value; |
---|
775 | } |
---|
776 | |
---|
777 | /** |
---|
778 | * Returns the tip text for this property |
---|
779 | * |
---|
780 | * @return tip text for this property suitable for |
---|
781 | * displaying in the explorer/experimenter gui |
---|
782 | */ |
---|
783 | public String decimalsTipText() { |
---|
784 | return "The number of decimals to round to, -1 means no rounding at all."; |
---|
785 | } |
---|
786 | |
---|
787 | /** |
---|
788 | * Get the number of decimals to round to. |
---|
789 | * |
---|
790 | * @return the number of decimals. |
---|
791 | */ |
---|
792 | public int getDecimals() { |
---|
793 | return m_Decimals; |
---|
794 | } |
---|
795 | |
---|
796 | /** |
---|
797 | * Set the number of decimals to round to. |
---|
798 | * |
---|
799 | * @param value the number of decimals. |
---|
800 | */ |
---|
801 | public void setDecimals(int value) { |
---|
802 | m_Decimals = value; |
---|
803 | } |
---|
804 | |
---|
805 | /** |
---|
806 | * Returns the revision string. |
---|
807 | * |
---|
808 | * @return the revision |
---|
809 | */ |
---|
810 | public String getRevision() { |
---|
811 | return RevisionUtils.extract("$Revision: 5987 $"); |
---|
812 | } |
---|
813 | |
---|
814 | /** |
---|
815 | * Runs the filter from commandline, use "-h" to see all options. |
---|
816 | * |
---|
817 | * @param args the commandline options for the filter |
---|
818 | */ |
---|
819 | public static void main(String[] args) { |
---|
820 | runFilter(new NumericCleaner(), args); |
---|
821 | } |
---|
822 | } |
---|