1 | /* |
---|
2 | * This program is free software; you can redistribute it and/or modify |
---|
3 | * it under the terms of the GNU General Public License as published by |
---|
4 | * the Free Software Foundation; either version 2 of the License, or |
---|
5 | * (at your option) any later version. |
---|
6 | * |
---|
7 | * This program is distributed in the hope that it will be useful, |
---|
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
10 | * GNU General Public License for more details. |
---|
11 | * |
---|
12 | * You should have received a copy of the GNU General Public License |
---|
13 | * along with this program; if not, write to the Free Software |
---|
14 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
---|
15 | */ |
---|
16 | |
---|
17 | /* |
---|
18 | * PairedTTester.java |
---|
19 | * Copyright (C) 1999 University of Waikato, Hamilton, New Zealand |
---|
20 | * |
---|
21 | */ |
---|
22 | |
---|
23 | |
---|
24 | package weka.experiment; |
---|
25 | |
---|
26 | import weka.core.Attribute; |
---|
27 | import weka.core.FastVector; |
---|
28 | import weka.core.Instance; |
---|
29 | import weka.core.Instances; |
---|
30 | import weka.core.Option; |
---|
31 | import weka.core.OptionHandler; |
---|
32 | import weka.core.Range; |
---|
33 | import weka.core.RevisionHandler; |
---|
34 | import weka.core.RevisionUtils; |
---|
35 | import weka.core.Utils; |
---|
36 | |
---|
37 | import java.io.BufferedReader; |
---|
38 | import java.io.FileReader; |
---|
39 | import java.io.Serializable; |
---|
40 | import java.text.SimpleDateFormat; |
---|
41 | import java.util.Date; |
---|
42 | import java.util.Enumeration; |
---|
43 | import java.util.Vector; |
---|
44 | |
---|
45 | /** |
---|
46 | * Calculates T-Test statistics on data stored in a set of instances. <p/> |
---|
47 | * |
---|
48 | <!-- options-start --> |
---|
49 | * Valid options are: <p/> |
---|
50 | * |
---|
51 | * <pre> -D <index,index2-index4,...> |
---|
52 | * Specify list of columns that specify a unique |
---|
53 | * dataset. |
---|
54 | * First and last are valid indexes. (default none)</pre> |
---|
55 | * |
---|
56 | * <pre> -R <index> |
---|
57 | * Set the index of the column containing the run number</pre> |
---|
58 | * |
---|
59 | * <pre> -F <index> |
---|
60 | * Set the index of the column containing the fold number</pre> |
---|
61 | * |
---|
62 | * <pre> -G <index1,index2-index4,...> |
---|
63 | * Specify list of columns that specify a unique |
---|
64 | * 'result generator' (eg: classifier name and options). |
---|
65 | * First and last are valid indexes. (default none)</pre> |
---|
66 | * |
---|
67 | * <pre> -S <significance level> |
---|
68 | * Set the significance level for comparisons (default 0.05)</pre> |
---|
69 | * |
---|
70 | * <pre> -V |
---|
71 | * Show standard deviations</pre> |
---|
72 | * |
---|
73 | * <pre> -L |
---|
74 | * Produce table comparisons in Latex table format</pre> |
---|
75 | * |
---|
76 | * <pre> -csv |
---|
77 | * Produce table comparisons in CSV table format</pre> |
---|
78 | * |
---|
79 | * <pre> -html |
---|
80 | * Produce table comparisons in HTML table format</pre> |
---|
81 | * |
---|
82 | * <pre> -significance |
---|
83 | * Produce table comparisons with only the significance values</pre> |
---|
84 | * |
---|
85 | * <pre> -gnuplot |
---|
86 | * Produce table comparisons output suitable for GNUPlot</pre> |
---|
87 | * |
---|
88 | <!-- options-end --> |
---|
89 | * |
---|
90 | * @author Len Trigg (trigg@cs.waikato.ac.nz) |
---|
91 | * @version $Revision: 5415 $ |
---|
92 | */ |
---|
93 | public class PairedTTester |
---|
94 | implements OptionHandler, Tester, RevisionHandler { |
---|
95 | |
---|
96 | /** for serialization */ |
---|
97 | static final long serialVersionUID = 8370014624008728610L; |
---|
98 | |
---|
99 | /** The set of instances we will analyse */ |
---|
100 | protected Instances m_Instances; |
---|
101 | |
---|
102 | /** The index of the column containing the run number */ |
---|
103 | protected int m_RunColumn = 0; |
---|
104 | |
---|
105 | /** The option setting for the run number column (-1 means last) */ |
---|
106 | protected int m_RunColumnSet = -1; |
---|
107 | |
---|
108 | /** The option setting for the fold number column (-1 means none) */ |
---|
109 | protected int m_FoldColumn = -1; |
---|
110 | |
---|
111 | /** The column to sort on (-1 means default sorting) */ |
---|
112 | protected int m_SortColumn = -1; |
---|
113 | |
---|
114 | /** The sorting of the datasets (according to the sort column) */ |
---|
115 | protected int[] m_SortOrder = null; |
---|
116 | |
---|
117 | /** The sorting of the columns (test base is always first) */ |
---|
118 | protected int[] m_ColOrder = null; |
---|
119 | |
---|
120 | /** The significance level for comparisons */ |
---|
121 | protected double m_SignificanceLevel = 0.05; |
---|
122 | |
---|
123 | /** |
---|
124 | * The range of columns that specify a unique "dataset" |
---|
125 | * (eg: scheme plus configuration) |
---|
126 | */ |
---|
127 | protected Range m_DatasetKeyColumnsRange = new Range(); |
---|
128 | |
---|
129 | /** An array containing the indexes of just the selected columns */ |
---|
130 | protected int [] m_DatasetKeyColumns; |
---|
131 | |
---|
132 | /** The list of dataset specifiers */ |
---|
133 | protected DatasetSpecifiers m_DatasetSpecifiers = |
---|
134 | new DatasetSpecifiers(); |
---|
135 | |
---|
136 | /** |
---|
137 | * The range of columns that specify a unique result set |
---|
138 | * (eg: scheme plus configuration) |
---|
139 | */ |
---|
140 | protected Range m_ResultsetKeyColumnsRange = new Range(); |
---|
141 | |
---|
142 | /** An array containing the indexes of just the selected columns */ |
---|
143 | protected int [] m_ResultsetKeyColumns; |
---|
144 | |
---|
145 | /** An array containing the indexes of the datasets to display */ |
---|
146 | protected int[] m_DisplayedResultsets = null; |
---|
147 | |
---|
148 | /** Stores a vector for each resultset holding all instances in each set */ |
---|
149 | protected FastVector m_Resultsets = new FastVector(); |
---|
150 | |
---|
151 | /** Indicates whether the instances have been partitioned */ |
---|
152 | protected boolean m_ResultsetsValid; |
---|
153 | |
---|
154 | /** Indicates whether standard deviations should be displayed */ |
---|
155 | protected boolean m_ShowStdDevs = false; |
---|
156 | |
---|
157 | /** the instance of the class to produce the output. */ |
---|
158 | protected ResultMatrix m_ResultMatrix = new ResultMatrixPlainText(); |
---|
159 | |
---|
160 | /** A list of unique "dataset" specifiers that have been observed */ |
---|
161 | protected class DatasetSpecifiers |
---|
162 | implements RevisionHandler, Serializable { |
---|
163 | |
---|
164 | /** for serialization. */ |
---|
165 | private static final long serialVersionUID = -9020938059902723401L; |
---|
166 | |
---|
167 | /** the specifiers that have been observed */ |
---|
168 | FastVector m_Specifiers = new FastVector(); |
---|
169 | |
---|
170 | /** |
---|
171 | * Removes all specifiers. |
---|
172 | */ |
---|
173 | protected void removeAllSpecifiers() { |
---|
174 | |
---|
175 | m_Specifiers.removeAllElements(); |
---|
176 | } |
---|
177 | |
---|
178 | /** |
---|
179 | * Add an instance to the list of specifiers (if necessary) |
---|
180 | * |
---|
181 | * @param inst the instance to add |
---|
182 | */ |
---|
183 | protected void add(Instance inst) { |
---|
184 | |
---|
185 | for (int i = 0; i < m_Specifiers.size(); i++) { |
---|
186 | Instance specifier = (Instance)m_Specifiers.elementAt(i); |
---|
187 | boolean found = true; |
---|
188 | for (int j = 0; j < m_DatasetKeyColumns.length; j++) { |
---|
189 | if (inst.value(m_DatasetKeyColumns[j]) != |
---|
190 | specifier.value(m_DatasetKeyColumns[j])) { |
---|
191 | found = false; |
---|
192 | } |
---|
193 | } |
---|
194 | if (found) { |
---|
195 | return; |
---|
196 | } |
---|
197 | } |
---|
198 | m_Specifiers.addElement(inst); |
---|
199 | } |
---|
200 | |
---|
201 | /** |
---|
202 | * Get the template at the given position. |
---|
203 | * |
---|
204 | * @param i the index |
---|
205 | * @return the template |
---|
206 | */ |
---|
207 | protected Instance specifier(int i) { |
---|
208 | |
---|
209 | return (Instance)m_Specifiers.elementAt(i); |
---|
210 | } |
---|
211 | |
---|
212 | /** |
---|
213 | * Gets the number of specifiers. |
---|
214 | * |
---|
215 | * @return the current number of specifiers |
---|
216 | */ |
---|
217 | protected int numSpecifiers() { |
---|
218 | |
---|
219 | return m_Specifiers.size(); |
---|
220 | } |
---|
221 | |
---|
222 | /** |
---|
223 | * Returns the revision string. |
---|
224 | * |
---|
225 | * @return the revision |
---|
226 | */ |
---|
227 | public String getRevision() { |
---|
228 | return RevisionUtils.extract("$Revision: 5415 $"); |
---|
229 | } |
---|
230 | } |
---|
231 | |
---|
232 | /** Utility class to store the instances pertaining to a dataset */ |
---|
233 | protected class Dataset |
---|
234 | implements RevisionHandler, Serializable { |
---|
235 | |
---|
236 | /** for serialization. */ |
---|
237 | private static final long serialVersionUID = -2801397601839433282L; |
---|
238 | |
---|
239 | /** the template */ |
---|
240 | Instance m_Template; |
---|
241 | |
---|
242 | /** the dataset */ |
---|
243 | FastVector m_Dataset; |
---|
244 | |
---|
245 | /** |
---|
246 | * Constructor |
---|
247 | * |
---|
248 | * @param template the template |
---|
249 | */ |
---|
250 | public Dataset(Instance template) { |
---|
251 | |
---|
252 | m_Template = template; |
---|
253 | m_Dataset = new FastVector(); |
---|
254 | add(template); |
---|
255 | } |
---|
256 | |
---|
257 | /** |
---|
258 | * Returns true if the two instances match on those attributes that have |
---|
259 | * been designated key columns (eg: scheme name and scheme options) |
---|
260 | * |
---|
261 | * @param first the first instance |
---|
262 | * @return true if first and second match on the currently set key columns |
---|
263 | */ |
---|
264 | protected boolean matchesTemplate(Instance first) { |
---|
265 | |
---|
266 | for (int i = 0; i < m_DatasetKeyColumns.length; i++) { |
---|
267 | if (first.value(m_DatasetKeyColumns[i]) != |
---|
268 | m_Template.value(m_DatasetKeyColumns[i])) { |
---|
269 | return false; |
---|
270 | } |
---|
271 | } |
---|
272 | return true; |
---|
273 | } |
---|
274 | |
---|
275 | /** |
---|
276 | * Adds the given instance to the dataset |
---|
277 | * |
---|
278 | * @param inst the instance to add |
---|
279 | */ |
---|
280 | protected void add(Instance inst) { |
---|
281 | |
---|
282 | m_Dataset.addElement(inst); |
---|
283 | } |
---|
284 | |
---|
285 | /** |
---|
286 | * Returns a vector containing the instances in the dataset |
---|
287 | * |
---|
288 | * @return the current contents |
---|
289 | */ |
---|
290 | protected FastVector contents() { |
---|
291 | |
---|
292 | return m_Dataset; |
---|
293 | } |
---|
294 | |
---|
295 | /** |
---|
296 | * Sorts the instances in the dataset by the run number. |
---|
297 | * |
---|
298 | * @param runColumn a value of type 'int' |
---|
299 | */ |
---|
300 | public void sort(int runColumn) { |
---|
301 | |
---|
302 | double [] runNums = new double [m_Dataset.size()]; |
---|
303 | for (int j = 0; j < runNums.length; j++) { |
---|
304 | runNums[j] = ((Instance) m_Dataset.elementAt(j)).value(runColumn); |
---|
305 | } |
---|
306 | int [] index = Utils.stableSort(runNums); |
---|
307 | FastVector newDataset = new FastVector(runNums.length); |
---|
308 | for (int j = 0; j < index.length; j++) { |
---|
309 | newDataset.addElement(m_Dataset.elementAt(index[j])); |
---|
310 | } |
---|
311 | m_Dataset = newDataset; |
---|
312 | } |
---|
313 | |
---|
314 | /** |
---|
315 | * Returns the revision string. |
---|
316 | * |
---|
317 | * @return the revision |
---|
318 | */ |
---|
319 | public String getRevision() { |
---|
320 | return RevisionUtils.extract("$Revision: 5415 $"); |
---|
321 | } |
---|
322 | } |
---|
323 | |
---|
324 | /** Utility class to store the instances in a resultset */ |
---|
325 | protected class Resultset |
---|
326 | implements RevisionHandler, Serializable { |
---|
327 | |
---|
328 | /** for serialization. */ |
---|
329 | private static final long serialVersionUID = 1543786683821339978L; |
---|
330 | |
---|
331 | /** the template */ |
---|
332 | Instance m_Template; |
---|
333 | |
---|
334 | /** the dataset */ |
---|
335 | FastVector m_Datasets; |
---|
336 | |
---|
337 | /** |
---|
338 | * Constructir |
---|
339 | * |
---|
340 | * @param template the template |
---|
341 | */ |
---|
342 | public Resultset(Instance template) { |
---|
343 | |
---|
344 | m_Template = template; |
---|
345 | m_Datasets = new FastVector(); |
---|
346 | add(template); |
---|
347 | } |
---|
348 | |
---|
349 | /** |
---|
350 | * Returns true if the two instances match on those attributes that have |
---|
351 | * been designated key columns (eg: scheme name and scheme options) |
---|
352 | * |
---|
353 | * @param first the first instance |
---|
354 | * @return true if first and second match on the currently set key columns |
---|
355 | */ |
---|
356 | protected boolean matchesTemplate(Instance first) { |
---|
357 | |
---|
358 | for (int i = 0; i < m_ResultsetKeyColumns.length; i++) { |
---|
359 | if (first.value(m_ResultsetKeyColumns[i]) != |
---|
360 | m_Template.value(m_ResultsetKeyColumns[i])) { |
---|
361 | return false; |
---|
362 | } |
---|
363 | } |
---|
364 | return true; |
---|
365 | } |
---|
366 | |
---|
367 | /** |
---|
368 | * Returns a string descriptive of the resultset key column values |
---|
369 | * for this resultset |
---|
370 | * |
---|
371 | * @return a value of type 'String' |
---|
372 | */ |
---|
373 | protected String templateString() { |
---|
374 | |
---|
375 | String result = ""; |
---|
376 | String tempResult = ""; |
---|
377 | for (int i = 0; i < m_ResultsetKeyColumns.length; i++) { |
---|
378 | tempResult = m_Template.toString(m_ResultsetKeyColumns[i]) + ' '; |
---|
379 | |
---|
380 | // compact the string |
---|
381 | tempResult = Utils.removeSubstring(tempResult, "weka.classifiers."); |
---|
382 | tempResult = Utils.removeSubstring(tempResult, "weka.filters."); |
---|
383 | tempResult = Utils.removeSubstring(tempResult, "weka.attributeSelection."); |
---|
384 | result += tempResult; |
---|
385 | } |
---|
386 | return result.trim(); |
---|
387 | } |
---|
388 | |
---|
389 | /** |
---|
390 | * Returns a vector containing all instances belonging to one dataset. |
---|
391 | * |
---|
392 | * @param inst a template instance |
---|
393 | * @return a value of type 'FastVector' |
---|
394 | */ |
---|
395 | public FastVector dataset(Instance inst) { |
---|
396 | |
---|
397 | for (int i = 0; i < m_Datasets.size(); i++) { |
---|
398 | if (((Dataset)m_Datasets.elementAt(i)).matchesTemplate(inst)) { |
---|
399 | return ((Dataset)m_Datasets.elementAt(i)).contents(); |
---|
400 | } |
---|
401 | } |
---|
402 | return null; |
---|
403 | } |
---|
404 | |
---|
405 | /** |
---|
406 | * Adds an instance to this resultset |
---|
407 | * |
---|
408 | * @param newInst a value of type 'Instance' |
---|
409 | */ |
---|
410 | public void add(Instance newInst) { |
---|
411 | |
---|
412 | for (int i = 0; i < m_Datasets.size(); i++) { |
---|
413 | if (((Dataset)m_Datasets.elementAt(i)).matchesTemplate(newInst)) { |
---|
414 | ((Dataset)m_Datasets.elementAt(i)).add(newInst); |
---|
415 | return; |
---|
416 | } |
---|
417 | } |
---|
418 | Dataset newDataset = new Dataset(newInst); |
---|
419 | m_Datasets.addElement(newDataset); |
---|
420 | } |
---|
421 | |
---|
422 | /** |
---|
423 | * Sorts the instances in each dataset by the run number. |
---|
424 | * |
---|
425 | * @param runColumn a value of type 'int' |
---|
426 | */ |
---|
427 | public void sort(int runColumn) { |
---|
428 | |
---|
429 | for (int i = 0; i < m_Datasets.size(); i++) { |
---|
430 | ((Dataset)m_Datasets.elementAt(i)).sort(runColumn); |
---|
431 | } |
---|
432 | } |
---|
433 | |
---|
434 | /** |
---|
435 | * Returns the revision string. |
---|
436 | * |
---|
437 | * @return the revision |
---|
438 | */ |
---|
439 | public String getRevision() { |
---|
440 | return RevisionUtils.extract("$Revision: 5415 $"); |
---|
441 | } |
---|
442 | } // Resultset |
---|
443 | |
---|
444 | |
---|
445 | /** |
---|
446 | * Returns a string descriptive of the key column values for |
---|
447 | * the "datasets |
---|
448 | * |
---|
449 | * @param template the template |
---|
450 | * @return a value of type 'String' |
---|
451 | */ |
---|
452 | protected String templateString(Instance template) { |
---|
453 | |
---|
454 | String result = ""; |
---|
455 | for (int i = 0; i < m_DatasetKeyColumns.length; i++) { |
---|
456 | result += template.toString(m_DatasetKeyColumns[i]) + ' '; |
---|
457 | } |
---|
458 | if (result.startsWith("weka.classifiers.")) { |
---|
459 | result = result.substring("weka.classifiers.".length()); |
---|
460 | } |
---|
461 | return result.trim(); |
---|
462 | } |
---|
463 | |
---|
464 | /** |
---|
465 | * Sets the matrix to use to produce the output. |
---|
466 | * @param matrix the instance to use to produce the output |
---|
467 | * @see ResultMatrix |
---|
468 | */ |
---|
469 | public void setResultMatrix(ResultMatrix matrix) { |
---|
470 | m_ResultMatrix = matrix; |
---|
471 | } |
---|
472 | |
---|
473 | /** |
---|
474 | * Gets the instance that produces the output. |
---|
475 | * @return the instance to produce the output |
---|
476 | */ |
---|
477 | public ResultMatrix getResultMatrix() { |
---|
478 | return m_ResultMatrix; |
---|
479 | } |
---|
480 | |
---|
481 | /** |
---|
482 | * Set whether standard deviations are displayed or not. |
---|
483 | * @param s true if standard deviations are to be displayed |
---|
484 | */ |
---|
485 | public void setShowStdDevs(boolean s) { |
---|
486 | m_ShowStdDevs = s; |
---|
487 | } |
---|
488 | |
---|
489 | /** |
---|
490 | * Returns true if standard deviations have been requested. |
---|
491 | * @return true if standard deviations are to be displayed. |
---|
492 | */ |
---|
493 | public boolean getShowStdDevs() { |
---|
494 | return m_ShowStdDevs; |
---|
495 | } |
---|
496 | |
---|
497 | /** |
---|
498 | * Separates the instances into resultsets and by dataset/run. |
---|
499 | * |
---|
500 | * @throws Exception if the TTest parameters have not been set. |
---|
501 | */ |
---|
502 | protected void prepareData() throws Exception { |
---|
503 | |
---|
504 | if (m_Instances == null) { |
---|
505 | throw new Exception("No instances have been set"); |
---|
506 | } |
---|
507 | if (m_RunColumnSet == -1) { |
---|
508 | m_RunColumn = m_Instances.numAttributes() - 1; |
---|
509 | } else { |
---|
510 | m_RunColumn = m_RunColumnSet; |
---|
511 | } |
---|
512 | |
---|
513 | if (m_ResultsetKeyColumnsRange == null) { |
---|
514 | throw new Exception("No result specifier columns have been set"); |
---|
515 | } |
---|
516 | m_ResultsetKeyColumnsRange.setUpper(m_Instances.numAttributes() - 1); |
---|
517 | m_ResultsetKeyColumns = m_ResultsetKeyColumnsRange.getSelection(); |
---|
518 | |
---|
519 | if (m_DatasetKeyColumnsRange == null) { |
---|
520 | throw new Exception("No dataset specifier columns have been set"); |
---|
521 | } |
---|
522 | m_DatasetKeyColumnsRange.setUpper(m_Instances.numAttributes() - 1); |
---|
523 | m_DatasetKeyColumns = m_DatasetKeyColumnsRange.getSelection(); |
---|
524 | |
---|
525 | // Split the data up into result sets |
---|
526 | m_Resultsets.removeAllElements(); |
---|
527 | m_DatasetSpecifiers.removeAllSpecifiers(); |
---|
528 | for (int i = 0; i < m_Instances.numInstances(); i++) { |
---|
529 | Instance current = m_Instances.instance(i); |
---|
530 | if (current.isMissing(m_RunColumn)) { |
---|
531 | throw new Exception("Instance has missing value in run " |
---|
532 | + "column!\n" + current); |
---|
533 | } |
---|
534 | for (int j = 0; j < m_ResultsetKeyColumns.length; j++) { |
---|
535 | if (current.isMissing(m_ResultsetKeyColumns[j])) { |
---|
536 | throw new Exception("Instance has missing value in resultset key " |
---|
537 | + "column " + (m_ResultsetKeyColumns[j] + 1) |
---|
538 | + "!\n" + current); |
---|
539 | } |
---|
540 | } |
---|
541 | for (int j = 0; j < m_DatasetKeyColumns.length; j++) { |
---|
542 | if (current.isMissing(m_DatasetKeyColumns[j])) { |
---|
543 | throw new Exception("Instance has missing value in dataset key " |
---|
544 | + "column " + (m_DatasetKeyColumns[j] + 1) |
---|
545 | + "!\n" + current); |
---|
546 | } |
---|
547 | } |
---|
548 | boolean found = false; |
---|
549 | for (int j = 0; j < m_Resultsets.size(); j++) { |
---|
550 | Resultset resultset = (Resultset) m_Resultsets.elementAt(j); |
---|
551 | if (resultset.matchesTemplate(current)) { |
---|
552 | resultset.add(current); |
---|
553 | found = true; |
---|
554 | break; |
---|
555 | } |
---|
556 | } |
---|
557 | if (!found) { |
---|
558 | Resultset resultset = new Resultset(current); |
---|
559 | m_Resultsets.addElement(resultset); |
---|
560 | } |
---|
561 | |
---|
562 | m_DatasetSpecifiers.add(current); |
---|
563 | } |
---|
564 | |
---|
565 | // Tell each resultset to sort on the run column |
---|
566 | for (int j = 0; j < m_Resultsets.size(); j++) { |
---|
567 | Resultset resultset = (Resultset) m_Resultsets.elementAt(j); |
---|
568 | if (m_FoldColumn >= 0) { |
---|
569 | // sort on folds first in case they are out of order |
---|
570 | resultset.sort(m_FoldColumn); |
---|
571 | } |
---|
572 | resultset.sort(m_RunColumn); |
---|
573 | } |
---|
574 | |
---|
575 | m_ResultsetsValid = true; |
---|
576 | } |
---|
577 | |
---|
578 | /** |
---|
579 | * Gets the number of datasets in the resultsets |
---|
580 | * |
---|
581 | * @return the number of datasets in the resultsets |
---|
582 | */ |
---|
583 | public int getNumDatasets() { |
---|
584 | |
---|
585 | if (!m_ResultsetsValid) { |
---|
586 | try { |
---|
587 | prepareData(); |
---|
588 | } catch (Exception ex) { |
---|
589 | ex.printStackTrace(); |
---|
590 | return 0; |
---|
591 | } |
---|
592 | } |
---|
593 | return m_DatasetSpecifiers.numSpecifiers(); |
---|
594 | } |
---|
595 | |
---|
596 | /** |
---|
597 | * Gets the number of resultsets in the data. |
---|
598 | * |
---|
599 | * @return the number of resultsets in the data |
---|
600 | */ |
---|
601 | public int getNumResultsets() { |
---|
602 | |
---|
603 | if (!m_ResultsetsValid) { |
---|
604 | try { |
---|
605 | prepareData(); |
---|
606 | } catch (Exception ex) { |
---|
607 | ex.printStackTrace(); |
---|
608 | return 0; |
---|
609 | } |
---|
610 | } |
---|
611 | return m_Resultsets.size(); |
---|
612 | } |
---|
613 | |
---|
614 | /** |
---|
615 | * Gets a string descriptive of the specified resultset. |
---|
616 | * |
---|
617 | * @param index the index of the resultset |
---|
618 | * @return a descriptive string for the resultset |
---|
619 | */ |
---|
620 | public String getResultsetName(int index) { |
---|
621 | |
---|
622 | if (!m_ResultsetsValid) { |
---|
623 | try { |
---|
624 | prepareData(); |
---|
625 | } catch (Exception ex) { |
---|
626 | ex.printStackTrace(); |
---|
627 | return null; |
---|
628 | } |
---|
629 | } |
---|
630 | return ((Resultset) m_Resultsets.elementAt(index)).templateString(); |
---|
631 | } |
---|
632 | |
---|
633 | /** |
---|
634 | * Checks whether the resultset with the given index shall be displayed. |
---|
635 | * |
---|
636 | * @param index the index of the resultset to check whether it shall be displayed |
---|
637 | * @return whether the specified resultset is displayed |
---|
638 | */ |
---|
639 | public boolean displayResultset(int index) { |
---|
640 | boolean result; |
---|
641 | int i; |
---|
642 | |
---|
643 | result = true; |
---|
644 | |
---|
645 | if (m_DisplayedResultsets != null) { |
---|
646 | result = false; |
---|
647 | for (i = 0; i < m_DisplayedResultsets.length; i++) { |
---|
648 | if (m_DisplayedResultsets[i] == index) { |
---|
649 | result = true; |
---|
650 | break; |
---|
651 | } |
---|
652 | } |
---|
653 | } |
---|
654 | |
---|
655 | return result; |
---|
656 | } |
---|
657 | |
---|
658 | /** |
---|
659 | * Computes a paired t-test comparison for a specified dataset between |
---|
660 | * two resultsets. |
---|
661 | * |
---|
662 | * @param datasetSpecifier the dataset specifier |
---|
663 | * @param resultset1Index the index of the first resultset |
---|
664 | * @param resultset2Index the index of the second resultset |
---|
665 | * @param comparisonColumn the column containing values to compare |
---|
666 | * @return the results of the paired comparison |
---|
667 | * @throws Exception if an error occurs |
---|
668 | */ |
---|
669 | public PairedStats calculateStatistics(Instance datasetSpecifier, |
---|
670 | int resultset1Index, |
---|
671 | int resultset2Index, |
---|
672 | int comparisonColumn) throws Exception { |
---|
673 | |
---|
674 | if (m_Instances.attribute(comparisonColumn).type() |
---|
675 | != Attribute.NUMERIC) { |
---|
676 | throw new Exception("Comparison column " + (comparisonColumn + 1) |
---|
677 | + " (" |
---|
678 | + m_Instances.attribute(comparisonColumn).name() |
---|
679 | + ") is not numeric"); |
---|
680 | } |
---|
681 | if (!m_ResultsetsValid) { |
---|
682 | prepareData(); |
---|
683 | } |
---|
684 | |
---|
685 | Resultset resultset1 = (Resultset) m_Resultsets.elementAt(resultset1Index); |
---|
686 | Resultset resultset2 = (Resultset) m_Resultsets.elementAt(resultset2Index); |
---|
687 | FastVector dataset1 = resultset1.dataset(datasetSpecifier); |
---|
688 | FastVector dataset2 = resultset2.dataset(datasetSpecifier); |
---|
689 | String datasetName = templateString(datasetSpecifier); |
---|
690 | if (dataset1 == null) { |
---|
691 | throw new Exception("No results for dataset=" + datasetName |
---|
692 | + " for resultset=" + resultset1.templateString()); |
---|
693 | } else if (dataset2 == null) { |
---|
694 | throw new Exception("No results for dataset=" + datasetName |
---|
695 | + " for resultset=" + resultset2.templateString()); |
---|
696 | } else if (dataset1.size() != dataset2.size()) { |
---|
697 | throw new Exception("Results for dataset=" + datasetName |
---|
698 | + " differ in size for resultset=" |
---|
699 | + resultset1.templateString() |
---|
700 | + " and resultset=" |
---|
701 | + resultset2.templateString() |
---|
702 | ); |
---|
703 | } |
---|
704 | |
---|
705 | PairedStats pairedStats = new PairedStats(m_SignificanceLevel); |
---|
706 | |
---|
707 | for (int k = 0; k < dataset1.size(); k ++) { |
---|
708 | Instance current1 = (Instance) dataset1.elementAt(k); |
---|
709 | Instance current2 = (Instance) dataset2.elementAt(k); |
---|
710 | if (current1.isMissing(comparisonColumn)) { |
---|
711 | System.err.println("Instance has missing value in comparison " |
---|
712 | + "column!\n" + current1); |
---|
713 | continue; |
---|
714 | } |
---|
715 | if (current2.isMissing(comparisonColumn)) { |
---|
716 | System.err.println("Instance has missing value in comparison " |
---|
717 | + "column!\n" + current2); |
---|
718 | continue; |
---|
719 | } |
---|
720 | if (current1.value(m_RunColumn) != current2.value(m_RunColumn)) { |
---|
721 | System.err.println("Run numbers do not match!\n" |
---|
722 | + current1 + current2); |
---|
723 | } |
---|
724 | if (m_FoldColumn != -1) { |
---|
725 | if (current1.value(m_FoldColumn) != current2.value(m_FoldColumn)) { |
---|
726 | System.err.println("Fold numbers do not match!\n" |
---|
727 | + current1 + current2); |
---|
728 | } |
---|
729 | } |
---|
730 | double value1 = current1.value(comparisonColumn); |
---|
731 | double value2 = current2.value(comparisonColumn); |
---|
732 | pairedStats.add(value1, value2); |
---|
733 | } |
---|
734 | pairedStats.calculateDerived(); |
---|
735 | //System.err.println("Differences stats:\n" + pairedStats.differencesStats); |
---|
736 | return pairedStats; |
---|
737 | |
---|
738 | } |
---|
739 | |
---|
740 | /** |
---|
741 | * Creates a key that maps resultset numbers to their descriptions. |
---|
742 | * |
---|
743 | * @return a value of type 'String' |
---|
744 | */ |
---|
745 | public String resultsetKey() { |
---|
746 | |
---|
747 | if (!m_ResultsetsValid) { |
---|
748 | try { |
---|
749 | prepareData(); |
---|
750 | } catch (Exception ex) { |
---|
751 | ex.printStackTrace(); |
---|
752 | return ex.getMessage(); |
---|
753 | } |
---|
754 | } |
---|
755 | String result = ""; |
---|
756 | for (int j = 0; j < getNumResultsets(); j++) { |
---|
757 | result += "(" + (j + 1) + ") " + getResultsetName(j) + '\n'; |
---|
758 | } |
---|
759 | return result + '\n'; |
---|
760 | } |
---|
761 | |
---|
762 | /** |
---|
763 | * Creates a "header" string describing the current resultsets. |
---|
764 | * |
---|
765 | * @param comparisonColumn a value of type 'int' |
---|
766 | * @return a value of type 'String' |
---|
767 | */ |
---|
768 | public String header(int comparisonColumn) { |
---|
769 | |
---|
770 | if (!m_ResultsetsValid) { |
---|
771 | try { |
---|
772 | prepareData(); |
---|
773 | } catch (Exception ex) { |
---|
774 | ex.printStackTrace(); |
---|
775 | return ex.getMessage(); |
---|
776 | } |
---|
777 | } |
---|
778 | |
---|
779 | initResultMatrix(); |
---|
780 | m_ResultMatrix.addHeader("Tester", getClass().getName()); |
---|
781 | m_ResultMatrix.addHeader("Analysing", m_Instances.attribute(comparisonColumn).name()); |
---|
782 | m_ResultMatrix.addHeader("Datasets", Integer.toString(getNumDatasets())); |
---|
783 | m_ResultMatrix.addHeader("Resultsets", Integer.toString(getNumResultsets())); |
---|
784 | m_ResultMatrix.addHeader("Confidence", getSignificanceLevel() + " (two tailed)"); |
---|
785 | m_ResultMatrix.addHeader("Sorted by", getSortColumnName()); |
---|
786 | m_ResultMatrix.addHeader("Date", (new SimpleDateFormat()).format(new Date())); |
---|
787 | |
---|
788 | return m_ResultMatrix.toStringHeader() + "\n"; |
---|
789 | } |
---|
790 | |
---|
791 | /** |
---|
792 | * Carries out a comparison between all resultsets, counting the number |
---|
793 | * of datsets where one resultset outperforms the other. |
---|
794 | * |
---|
795 | * @param comparisonColumn the index of the comparison column |
---|
796 | * @param nonSigWin for storing the non-significant wins |
---|
797 | * @return a 2d array where element [i][j] is the number of times resultset |
---|
798 | * j performed significantly better than resultset i. |
---|
799 | * @throws Exception if an error occurs |
---|
800 | */ |
---|
801 | public int [][] multiResultsetWins(int comparisonColumn, int [][] nonSigWin) |
---|
802 | throws Exception { |
---|
803 | |
---|
804 | int numResultsets = getNumResultsets(); |
---|
805 | int [][] win = new int [numResultsets][numResultsets]; |
---|
806 | // int [][] nonSigWin = new int [numResultsets][numResultsets]; |
---|
807 | for (int i = 0; i < numResultsets; i++) { |
---|
808 | for (int j = i + 1; j < numResultsets; j++) { |
---|
809 | System.err.print("Comparing (" + (i + 1) + ") with (" |
---|
810 | + (j + 1) + ")\r"); |
---|
811 | System.err.flush(); |
---|
812 | for (int k = 0; k < getNumDatasets(); k++) { |
---|
813 | try { |
---|
814 | PairedStats pairedStats = |
---|
815 | calculateStatistics(m_DatasetSpecifiers.specifier(k), i, j, |
---|
816 | comparisonColumn); |
---|
817 | if (pairedStats.differencesSignificance < 0) { |
---|
818 | win[i][j]++; |
---|
819 | } else if (pairedStats.differencesSignificance > 0) { |
---|
820 | win[j][i]++; |
---|
821 | } |
---|
822 | |
---|
823 | if (pairedStats.differencesStats.mean < 0) { |
---|
824 | nonSigWin[i][j]++; |
---|
825 | } else if (pairedStats.differencesStats.mean > 0) { |
---|
826 | nonSigWin[j][i]++; |
---|
827 | } |
---|
828 | } catch (Exception ex) { |
---|
829 | //ex.printStackTrace(); |
---|
830 | System.err.println(ex.getMessage()); |
---|
831 | } |
---|
832 | } |
---|
833 | } |
---|
834 | } |
---|
835 | return win; |
---|
836 | } |
---|
837 | |
---|
838 | /** |
---|
839 | * clears the content and fills the column and row names according to the |
---|
840 | * given sorting |
---|
841 | */ |
---|
842 | protected void initResultMatrix() { |
---|
843 | m_ResultMatrix.setSize(getNumResultsets(), getNumDatasets()); |
---|
844 | m_ResultMatrix.setShowStdDev(m_ShowStdDevs); |
---|
845 | |
---|
846 | for (int i = 0; i < getNumDatasets(); i++) |
---|
847 | m_ResultMatrix.setRowName(i, |
---|
848 | templateString(m_DatasetSpecifiers.specifier(i))); |
---|
849 | |
---|
850 | for (int j = 0; j < getNumResultsets(); j++) { |
---|
851 | m_ResultMatrix.setColName(j, getResultsetName(j)); |
---|
852 | m_ResultMatrix.setColHidden(j, !displayResultset(j)); |
---|
853 | } |
---|
854 | } |
---|
855 | |
---|
856 | /** |
---|
857 | * Carries out a comparison between all resultsets, counting the number |
---|
858 | * of datsets where one resultset outperforms the other. The results |
---|
859 | * are summarized in a table. |
---|
860 | * |
---|
861 | * @param comparisonColumn the index of the comparison column |
---|
862 | * @return the results in a string |
---|
863 | * @throws Exception if an error occurs |
---|
864 | */ |
---|
865 | public String multiResultsetSummary(int comparisonColumn) |
---|
866 | throws Exception { |
---|
867 | |
---|
868 | int[][] nonSigWin = new int [getNumResultsets()][getNumResultsets()]; |
---|
869 | int[][] win = multiResultsetWins(comparisonColumn, nonSigWin); |
---|
870 | |
---|
871 | initResultMatrix(); |
---|
872 | m_ResultMatrix.setSummary(nonSigWin, win); |
---|
873 | |
---|
874 | return m_ResultMatrix.toStringSummary(); |
---|
875 | } |
---|
876 | |
---|
877 | /** |
---|
878 | * returns a ranking of the resultsets |
---|
879 | * |
---|
880 | * @param comparisonColumn the column to compare with |
---|
881 | * @return the ranking |
---|
882 | * @throws Exception if something goes wrong |
---|
883 | */ |
---|
884 | public String multiResultsetRanking(int comparisonColumn) |
---|
885 | throws Exception { |
---|
886 | |
---|
887 | int[][] nonSigWin = new int [getNumResultsets()][getNumResultsets()]; |
---|
888 | int[][] win = multiResultsetWins(comparisonColumn, nonSigWin); |
---|
889 | |
---|
890 | initResultMatrix(); |
---|
891 | m_ResultMatrix.setRanking(win); |
---|
892 | |
---|
893 | return m_ResultMatrix.toStringRanking(); |
---|
894 | } |
---|
895 | |
---|
896 | /** |
---|
897 | * Creates a comparison table where a base resultset is compared to the |
---|
898 | * other resultsets. Results are presented for every dataset. |
---|
899 | * |
---|
900 | * @param baseResultset the index of the base resultset |
---|
901 | * @param comparisonColumn the index of the column to compare over |
---|
902 | * @return the comparison table string |
---|
903 | * @throws Exception if an error occurs |
---|
904 | */ |
---|
905 | public String multiResultsetFull(int baseResultset, |
---|
906 | int comparisonColumn) throws Exception { |
---|
907 | |
---|
908 | int maxWidthMean = 2; |
---|
909 | int maxWidthStdDev = 2; |
---|
910 | |
---|
911 | double[] sortValues = new double[getNumDatasets()]; |
---|
912 | |
---|
913 | // determine max field width |
---|
914 | for (int i = 0; i < getNumDatasets(); i++) { |
---|
915 | sortValues[i] = Double.POSITIVE_INFINITY; // sorts skipped cols to end |
---|
916 | |
---|
917 | for (int j = 0; j < getNumResultsets(); j++) { |
---|
918 | if (!displayResultset(j)) |
---|
919 | continue; |
---|
920 | try { |
---|
921 | PairedStats pairedStats = |
---|
922 | calculateStatistics(m_DatasetSpecifiers.specifier(i), |
---|
923 | baseResultset, j, comparisonColumn); |
---|
924 | if (!Double.isInfinite(pairedStats.yStats.mean) && |
---|
925 | !Double.isNaN(pairedStats.yStats.mean)) { |
---|
926 | double width = ((Math.log(Math.abs(pairedStats.yStats.mean)) / |
---|
927 | Math.log(10))+1); |
---|
928 | if (width > maxWidthMean) { |
---|
929 | maxWidthMean = (int)width; |
---|
930 | } |
---|
931 | } |
---|
932 | |
---|
933 | if (j == baseResultset) { |
---|
934 | if (getSortColumn() != -1) |
---|
935 | sortValues[i] = calculateStatistics( |
---|
936 | m_DatasetSpecifiers.specifier(i), |
---|
937 | baseResultset, j, getSortColumn()).xStats.mean; |
---|
938 | else |
---|
939 | sortValues[i] = i; |
---|
940 | } |
---|
941 | |
---|
942 | if (m_ShowStdDevs && |
---|
943 | !Double.isInfinite(pairedStats.yStats.stdDev) && |
---|
944 | !Double.isNaN(pairedStats.yStats.stdDev)) { |
---|
945 | double width = ((Math.log(Math.abs(pairedStats.yStats.stdDev)) / |
---|
946 | Math.log(10))+1); |
---|
947 | if (width > maxWidthStdDev) { |
---|
948 | maxWidthStdDev = (int)width; |
---|
949 | } |
---|
950 | } |
---|
951 | } catch (Exception ex) { |
---|
952 | //ex.printStackTrace(); |
---|
953 | System.err.println(ex); |
---|
954 | } |
---|
955 | } |
---|
956 | } |
---|
957 | |
---|
958 | // sort rows according to sort column |
---|
959 | m_SortOrder = Utils.sort(sortValues); |
---|
960 | |
---|
961 | // determine column order |
---|
962 | m_ColOrder = new int[getNumResultsets()]; |
---|
963 | m_ColOrder[0] = baseResultset; |
---|
964 | int index = 1; |
---|
965 | for (int i = 0; i < getNumResultsets(); i++) { |
---|
966 | if (i == baseResultset) |
---|
967 | continue; |
---|
968 | m_ColOrder[index] = i; |
---|
969 | index++; |
---|
970 | } |
---|
971 | |
---|
972 | // setup matrix |
---|
973 | initResultMatrix(); |
---|
974 | m_ResultMatrix.setRowOrder(m_SortOrder); |
---|
975 | m_ResultMatrix.setColOrder(m_ColOrder); |
---|
976 | m_ResultMatrix.setMeanWidth(maxWidthMean); |
---|
977 | m_ResultMatrix.setStdDevWidth(maxWidthStdDev); |
---|
978 | m_ResultMatrix.setSignificanceWidth(1); |
---|
979 | |
---|
980 | // make sure that test base is displayed, even though it might not be |
---|
981 | // selected |
---|
982 | for (int i = 0; i < m_ResultMatrix.getColCount(); i++) { |
---|
983 | if ( (i == baseResultset) |
---|
984 | && (m_ResultMatrix.getColHidden(i)) ) { |
---|
985 | m_ResultMatrix.setColHidden(i, false); |
---|
986 | System.err.println("Note: test base was hidden - set visible!"); |
---|
987 | } |
---|
988 | } |
---|
989 | |
---|
990 | // the data |
---|
991 | for (int i = 0; i < getNumDatasets(); i++) { |
---|
992 | m_ResultMatrix.setRowName(i, |
---|
993 | templateString(m_DatasetSpecifiers.specifier(i))); |
---|
994 | |
---|
995 | for (int j = 0; j < getNumResultsets(); j++) { |
---|
996 | try { |
---|
997 | // calc stats |
---|
998 | PairedStats pairedStats = |
---|
999 | calculateStatistics(m_DatasetSpecifiers.specifier(i), |
---|
1000 | baseResultset, j, comparisonColumn); |
---|
1001 | |
---|
1002 | // count |
---|
1003 | m_ResultMatrix.setCount(i, pairedStats.count); |
---|
1004 | |
---|
1005 | // mean |
---|
1006 | m_ResultMatrix.setMean(j, i, pairedStats.yStats.mean); |
---|
1007 | |
---|
1008 | // std dev |
---|
1009 | m_ResultMatrix.setStdDev(j, i, pairedStats.yStats.stdDev); |
---|
1010 | |
---|
1011 | // significance |
---|
1012 | if (pairedStats.differencesSignificance < 0) |
---|
1013 | m_ResultMatrix.setSignificance(j, i, ResultMatrix.SIGNIFICANCE_WIN); |
---|
1014 | else if (pairedStats.differencesSignificance > 0) |
---|
1015 | m_ResultMatrix.setSignificance(j, i, ResultMatrix.SIGNIFICANCE_LOSS); |
---|
1016 | else |
---|
1017 | m_ResultMatrix.setSignificance(j, i, ResultMatrix.SIGNIFICANCE_TIE); |
---|
1018 | } |
---|
1019 | catch (Exception e) { |
---|
1020 | //e.printStackTrace(); |
---|
1021 | System.err.println(e); |
---|
1022 | } |
---|
1023 | } |
---|
1024 | } |
---|
1025 | |
---|
1026 | // generate output |
---|
1027 | StringBuffer result = new StringBuffer(1000); |
---|
1028 | try { |
---|
1029 | result.append(m_ResultMatrix.toStringMatrix()); |
---|
1030 | } |
---|
1031 | catch (Exception e) { |
---|
1032 | e.printStackTrace(); |
---|
1033 | } |
---|
1034 | |
---|
1035 | // append a key so that we can tell the difference between long |
---|
1036 | // scheme+option names |
---|
1037 | result.append("\n\n" + m_ResultMatrix.toStringKey()); |
---|
1038 | |
---|
1039 | return result.toString(); |
---|
1040 | } |
---|
1041 | |
---|
1042 | /** |
---|
1043 | * Lists options understood by this object. |
---|
1044 | * |
---|
1045 | * @return an enumeration of Options. |
---|
1046 | */ |
---|
1047 | public Enumeration listOptions() { |
---|
1048 | |
---|
1049 | Vector newVector = new Vector(); |
---|
1050 | |
---|
1051 | newVector.addElement(new Option( |
---|
1052 | "\tSpecify list of columns that specify a unique\n" |
---|
1053 | + "\tdataset.\n" |
---|
1054 | + "\tFirst and last are valid indexes. (default none)", |
---|
1055 | "D", 1, "-D <index,index2-index4,...>")); |
---|
1056 | newVector.addElement(new Option( |
---|
1057 | "\tSet the index of the column containing the run number", |
---|
1058 | "R", 1, "-R <index>")); |
---|
1059 | newVector.addElement(new Option( |
---|
1060 | "\tSet the index of the column containing the fold number", |
---|
1061 | "F", 1, "-F <index>")); |
---|
1062 | newVector.addElement(new Option( |
---|
1063 | "\tSpecify list of columns that specify a unique\n" |
---|
1064 | + "\t'result generator' (eg: classifier name and options).\n" |
---|
1065 | + "\tFirst and last are valid indexes. (default none)", |
---|
1066 | "G", 1, "-G <index1,index2-index4,...>")); |
---|
1067 | newVector.addElement(new Option( |
---|
1068 | "\tSet the significance level for comparisons (default 0.05)", |
---|
1069 | "S", 1, "-S <significance level>")); |
---|
1070 | newVector.addElement(new Option( |
---|
1071 | "\tShow standard deviations", |
---|
1072 | "V", 0, "-V")); |
---|
1073 | newVector.addElement(new Option( |
---|
1074 | "\tProduce table comparisons in Latex table format", |
---|
1075 | "L", 0, "-L")); |
---|
1076 | newVector.addElement(new Option( |
---|
1077 | "\tProduce table comparisons in CSV table format", |
---|
1078 | "csv", 0, "-csv")); |
---|
1079 | newVector.addElement(new Option( |
---|
1080 | "\tProduce table comparisons in HTML table format", |
---|
1081 | "html", 0, "-html")); |
---|
1082 | newVector.addElement(new Option( |
---|
1083 | "\tProduce table comparisons with only the significance values", |
---|
1084 | "significance", 0, "-significance")); |
---|
1085 | newVector.addElement(new Option( |
---|
1086 | "\tProduce table comparisons output suitable for GNUPlot", |
---|
1087 | "gnuplot", 0, "-gnuplot")); |
---|
1088 | |
---|
1089 | return newVector.elements(); |
---|
1090 | } |
---|
1091 | |
---|
1092 | /** |
---|
1093 | * Parses a given list of options. <p/> |
---|
1094 | * |
---|
1095 | <!-- options-start --> |
---|
1096 | * Valid options are: <p/> |
---|
1097 | * |
---|
1098 | * <pre> -D <index,index2-index4,...> |
---|
1099 | * Specify list of columns that specify a unique |
---|
1100 | * dataset. |
---|
1101 | * First and last are valid indexes. (default none)</pre> |
---|
1102 | * |
---|
1103 | * <pre> -R <index> |
---|
1104 | * Set the index of the column containing the run number</pre> |
---|
1105 | * |
---|
1106 | * <pre> -F <index> |
---|
1107 | * Set the index of the column containing the fold number</pre> |
---|
1108 | * |
---|
1109 | * <pre> -G <index1,index2-index4,...> |
---|
1110 | * Specify list of columns that specify a unique |
---|
1111 | * 'result generator' (eg: classifier name and options). |
---|
1112 | * First and last are valid indexes. (default none)</pre> |
---|
1113 | * |
---|
1114 | * <pre> -S <significance level> |
---|
1115 | * Set the significance level for comparisons (default 0.05)</pre> |
---|
1116 | * |
---|
1117 | * <pre> -V |
---|
1118 | * Show standard deviations</pre> |
---|
1119 | * |
---|
1120 | * <pre> -L |
---|
1121 | * Produce table comparisons in Latex table format</pre> |
---|
1122 | * |
---|
1123 | * <pre> -csv |
---|
1124 | * Produce table comparisons in CSV table format</pre> |
---|
1125 | * |
---|
1126 | * <pre> -html |
---|
1127 | * Produce table comparisons in HTML table format</pre> |
---|
1128 | * |
---|
1129 | * <pre> -significance |
---|
1130 | * Produce table comparisons with only the significance values</pre> |
---|
1131 | * |
---|
1132 | * <pre> -gnuplot |
---|
1133 | * Produce table comparisons output suitable for GNUPlot</pre> |
---|
1134 | * |
---|
1135 | <!-- options-end --> |
---|
1136 | * |
---|
1137 | * @param options an array containing options to set. |
---|
1138 | * @throws Exception if invalid options are given |
---|
1139 | */ |
---|
1140 | public void setOptions(String[] options) throws Exception { |
---|
1141 | |
---|
1142 | setShowStdDevs(Utils.getFlag('V', options)); |
---|
1143 | if (Utils.getFlag('L', options)) |
---|
1144 | setResultMatrix(new ResultMatrixLatex()); |
---|
1145 | if (Utils.getFlag("csv", options)) |
---|
1146 | setResultMatrix(new ResultMatrixCSV()); |
---|
1147 | if (Utils.getFlag("html", options)) |
---|
1148 | setResultMatrix(new ResultMatrixHTML()); |
---|
1149 | if (Utils.getFlag("significance", options)) |
---|
1150 | setResultMatrix(new ResultMatrixSignificance()); |
---|
1151 | |
---|
1152 | String datasetList = Utils.getOption('D', options); |
---|
1153 | Range datasetRange = new Range(); |
---|
1154 | if (datasetList.length() != 0) { |
---|
1155 | datasetRange.setRanges(datasetList); |
---|
1156 | } |
---|
1157 | setDatasetKeyColumns(datasetRange); |
---|
1158 | |
---|
1159 | String indexStr = Utils.getOption('R', options); |
---|
1160 | if (indexStr.length() != 0) { |
---|
1161 | if (indexStr.equals("first")) { |
---|
1162 | setRunColumn(0); |
---|
1163 | } else if (indexStr.equals("last")) { |
---|
1164 | setRunColumn(-1); |
---|
1165 | } else { |
---|
1166 | setRunColumn(Integer.parseInt(indexStr) - 1); |
---|
1167 | } |
---|
1168 | } else { |
---|
1169 | setRunColumn(-1); |
---|
1170 | } |
---|
1171 | |
---|
1172 | String foldStr = Utils.getOption('F', options); |
---|
1173 | if (foldStr.length() != 0) { |
---|
1174 | setFoldColumn(Integer.parseInt(foldStr) - 1); |
---|
1175 | } else { |
---|
1176 | setFoldColumn(-1); |
---|
1177 | } |
---|
1178 | |
---|
1179 | String sigStr = Utils.getOption('S', options); |
---|
1180 | if (sigStr.length() != 0) { |
---|
1181 | setSignificanceLevel((new Double(sigStr)).doubleValue()); |
---|
1182 | } else { |
---|
1183 | setSignificanceLevel(0.05); |
---|
1184 | } |
---|
1185 | |
---|
1186 | String resultsetList = Utils.getOption('G', options); |
---|
1187 | Range generatorRange = new Range(); |
---|
1188 | if (resultsetList.length() != 0) { |
---|
1189 | generatorRange.setRanges(resultsetList); |
---|
1190 | } |
---|
1191 | setResultsetKeyColumns(generatorRange); |
---|
1192 | } |
---|
1193 | |
---|
1194 | /** |
---|
1195 | * Gets current settings of the PairedTTester. |
---|
1196 | * |
---|
1197 | * @return an array of strings containing current options. |
---|
1198 | */ |
---|
1199 | public String[] getOptions() { |
---|
1200 | |
---|
1201 | String [] options = new String [11]; |
---|
1202 | int current = 0; |
---|
1203 | |
---|
1204 | if (!getResultsetKeyColumns().getRanges().equals("")) { |
---|
1205 | options[current++] = "-G"; |
---|
1206 | options[current++] = getResultsetKeyColumns().getRanges(); |
---|
1207 | } |
---|
1208 | if (!getDatasetKeyColumns().getRanges().equals("")) { |
---|
1209 | options[current++] = "-D"; |
---|
1210 | options[current++] = getDatasetKeyColumns().getRanges(); |
---|
1211 | } |
---|
1212 | options[current++] = "-R"; |
---|
1213 | options[current++] = "" + (getRunColumn() + 1); |
---|
1214 | options[current++] = "-S"; |
---|
1215 | options[current++] = "" + getSignificanceLevel(); |
---|
1216 | |
---|
1217 | if (getShowStdDevs()) { |
---|
1218 | options[current++] = "-V"; |
---|
1219 | } |
---|
1220 | |
---|
1221 | if (getResultMatrix().equals(ResultMatrixLatex.class)) |
---|
1222 | options[current++] = "-L"; |
---|
1223 | |
---|
1224 | if (getResultMatrix().equals(ResultMatrixCSV.class)) |
---|
1225 | options[current++] = "-csv"; |
---|
1226 | |
---|
1227 | if (getResultMatrix().equals(ResultMatrixHTML.class)) |
---|
1228 | options[current++] = "-html"; |
---|
1229 | |
---|
1230 | if (getResultMatrix().equals(ResultMatrixSignificance.class)) |
---|
1231 | options[current++] = "-significance"; |
---|
1232 | |
---|
1233 | while (current < options.length) { |
---|
1234 | options[current++] = ""; |
---|
1235 | } |
---|
1236 | return options; |
---|
1237 | } |
---|
1238 | |
---|
1239 | /** |
---|
1240 | * Get the value of ResultsetKeyColumns. |
---|
1241 | * |
---|
1242 | * @return Value of ResultsetKeyColumns. |
---|
1243 | */ |
---|
1244 | public Range getResultsetKeyColumns() { |
---|
1245 | |
---|
1246 | return m_ResultsetKeyColumnsRange; |
---|
1247 | } |
---|
1248 | |
---|
1249 | /** |
---|
1250 | * Set the value of ResultsetKeyColumns. |
---|
1251 | * |
---|
1252 | * @param newResultsetKeyColumns Value to assign to ResultsetKeyColumns. |
---|
1253 | */ |
---|
1254 | public void setResultsetKeyColumns(Range newResultsetKeyColumns) { |
---|
1255 | |
---|
1256 | m_ResultsetKeyColumnsRange = newResultsetKeyColumns; |
---|
1257 | m_ResultsetsValid = false; |
---|
1258 | } |
---|
1259 | |
---|
1260 | /** |
---|
1261 | * Gets the indices of the the datasets that are displayed (if <code>null</code> |
---|
1262 | * then all are displayed). The base is always displayed. |
---|
1263 | * |
---|
1264 | * @return the indices of the datasets to display |
---|
1265 | */ |
---|
1266 | public int[] getDisplayedResultsets() { |
---|
1267 | return m_DisplayedResultsets; |
---|
1268 | } |
---|
1269 | |
---|
1270 | /** |
---|
1271 | * Sets the indicies of the datasets to display (<code>null</code> means all). |
---|
1272 | * The base is always displayed. |
---|
1273 | * |
---|
1274 | * @param cols the indices of the datasets to display |
---|
1275 | */ |
---|
1276 | public void setDisplayedResultsets(int[] cols) { |
---|
1277 | m_DisplayedResultsets = cols; |
---|
1278 | } |
---|
1279 | |
---|
1280 | /** |
---|
1281 | * Get the value of SignificanceLevel. |
---|
1282 | * |
---|
1283 | * @return Value of SignificanceLevel. |
---|
1284 | */ |
---|
1285 | public double getSignificanceLevel() { |
---|
1286 | |
---|
1287 | return m_SignificanceLevel; |
---|
1288 | } |
---|
1289 | |
---|
1290 | /** |
---|
1291 | * Set the value of SignificanceLevel. |
---|
1292 | * |
---|
1293 | * @param newSignificanceLevel Value to assign to SignificanceLevel. |
---|
1294 | */ |
---|
1295 | public void setSignificanceLevel(double newSignificanceLevel) { |
---|
1296 | |
---|
1297 | m_SignificanceLevel = newSignificanceLevel; |
---|
1298 | } |
---|
1299 | |
---|
1300 | /** |
---|
1301 | * Get the value of DatasetKeyColumns. |
---|
1302 | * |
---|
1303 | * @return Value of DatasetKeyColumns. |
---|
1304 | */ |
---|
1305 | public Range getDatasetKeyColumns() { |
---|
1306 | |
---|
1307 | return m_DatasetKeyColumnsRange; |
---|
1308 | } |
---|
1309 | |
---|
1310 | /** |
---|
1311 | * Set the value of DatasetKeyColumns. |
---|
1312 | * |
---|
1313 | * @param newDatasetKeyColumns Value to assign to DatasetKeyColumns. |
---|
1314 | */ |
---|
1315 | public void setDatasetKeyColumns(Range newDatasetKeyColumns) { |
---|
1316 | |
---|
1317 | m_DatasetKeyColumnsRange = newDatasetKeyColumns; |
---|
1318 | m_ResultsetsValid = false; |
---|
1319 | } |
---|
1320 | |
---|
1321 | /** |
---|
1322 | * Get the value of RunColumn. |
---|
1323 | * |
---|
1324 | * @return Value of RunColumn. |
---|
1325 | */ |
---|
1326 | public int getRunColumn() { |
---|
1327 | |
---|
1328 | return m_RunColumnSet; |
---|
1329 | } |
---|
1330 | |
---|
1331 | /** |
---|
1332 | * Set the value of RunColumn. |
---|
1333 | * |
---|
1334 | * @param newRunColumn Value to assign to RunColumn. |
---|
1335 | */ |
---|
1336 | public void setRunColumn(int newRunColumn) { |
---|
1337 | |
---|
1338 | m_RunColumnSet = newRunColumn; |
---|
1339 | m_ResultsetsValid = false; |
---|
1340 | } |
---|
1341 | |
---|
1342 | /** |
---|
1343 | * Get the value of FoldColumn. |
---|
1344 | * |
---|
1345 | * @return Value of FoldColumn. |
---|
1346 | */ |
---|
1347 | public int getFoldColumn() { |
---|
1348 | |
---|
1349 | return m_FoldColumn; |
---|
1350 | } |
---|
1351 | |
---|
1352 | /** |
---|
1353 | * Set the value of FoldColumn. |
---|
1354 | * |
---|
1355 | * @param newFoldColumn Value to assign to FoldColumn. |
---|
1356 | */ |
---|
1357 | public void setFoldColumn(int newFoldColumn) { |
---|
1358 | |
---|
1359 | m_FoldColumn = newFoldColumn; |
---|
1360 | m_ResultsetsValid = false; |
---|
1361 | } |
---|
1362 | |
---|
1363 | /** |
---|
1364 | * Returns the name of the column to sort on. |
---|
1365 | * |
---|
1366 | * @return the name of the column to sort on. |
---|
1367 | */ |
---|
1368 | public String getSortColumnName() { |
---|
1369 | if (getSortColumn() == -1) |
---|
1370 | return "-"; |
---|
1371 | else |
---|
1372 | return m_Instances.attribute(getSortColumn()).name(); |
---|
1373 | } |
---|
1374 | |
---|
1375 | /** |
---|
1376 | * Returns the column to sort on, -1 means the default sorting. |
---|
1377 | * |
---|
1378 | * @return the column to sort on. |
---|
1379 | */ |
---|
1380 | public int getSortColumn() { |
---|
1381 | return m_SortColumn; |
---|
1382 | } |
---|
1383 | |
---|
1384 | /** |
---|
1385 | * Set the column to sort on, -1 means the default sorting. |
---|
1386 | * |
---|
1387 | * @param newSortColumn the new sort column. |
---|
1388 | */ |
---|
1389 | public void setSortColumn(int newSortColumn) { |
---|
1390 | if (newSortColumn >= -1) |
---|
1391 | m_SortColumn = newSortColumn; |
---|
1392 | } |
---|
1393 | |
---|
1394 | /** |
---|
1395 | * Get the value of Instances. |
---|
1396 | * |
---|
1397 | * @return Value of Instances. |
---|
1398 | */ |
---|
1399 | public Instances getInstances() { |
---|
1400 | |
---|
1401 | return m_Instances; |
---|
1402 | } |
---|
1403 | |
---|
1404 | /** |
---|
1405 | * Set the value of Instances. |
---|
1406 | * |
---|
1407 | * @param newInstances Value to assign to Instances. |
---|
1408 | */ |
---|
1409 | public void setInstances(Instances newInstances) { |
---|
1410 | |
---|
1411 | m_Instances = newInstances; |
---|
1412 | m_ResultsetsValid = false; |
---|
1413 | } |
---|
1414 | |
---|
1415 | /** |
---|
1416 | * retrieves all the settings from the given Tester |
---|
1417 | * |
---|
1418 | * @param tester the Tester to get the settings from |
---|
1419 | */ |
---|
1420 | public void assign(Tester tester) { |
---|
1421 | setInstances(tester.getInstances()); |
---|
1422 | setResultMatrix(tester.getResultMatrix()); |
---|
1423 | setShowStdDevs(tester.getShowStdDevs()); |
---|
1424 | setResultsetKeyColumns(tester.getResultsetKeyColumns()); |
---|
1425 | setDisplayedResultsets(tester.getDisplayedResultsets()); |
---|
1426 | setSignificanceLevel(tester.getSignificanceLevel()); |
---|
1427 | setDatasetKeyColumns(tester.getDatasetKeyColumns()); |
---|
1428 | setRunColumn(tester.getRunColumn()); |
---|
1429 | setFoldColumn(tester.getFoldColumn()); |
---|
1430 | setSortColumn(tester.getSortColumn()); |
---|
1431 | } |
---|
1432 | |
---|
1433 | /** |
---|
1434 | * returns a string that is displayed as tooltip on the "perform test" |
---|
1435 | * button in the experimenter |
---|
1436 | * |
---|
1437 | * @return the tool tip |
---|
1438 | */ |
---|
1439 | public String getToolTipText() { |
---|
1440 | return "Performs test using t-test statistic"; |
---|
1441 | } |
---|
1442 | |
---|
1443 | /** |
---|
1444 | * returns the name of the tester |
---|
1445 | * |
---|
1446 | * @return the display name |
---|
1447 | */ |
---|
1448 | public String getDisplayName() { |
---|
1449 | return "Paired T-Tester"; |
---|
1450 | } |
---|
1451 | |
---|
1452 | /** |
---|
1453 | * Returns the revision string. |
---|
1454 | * |
---|
1455 | * @return the revision |
---|
1456 | */ |
---|
1457 | public String getRevision() { |
---|
1458 | return RevisionUtils.extract("$Revision: 5415 $"); |
---|
1459 | } |
---|
1460 | |
---|
1461 | /** |
---|
1462 | * Test the class from the command line. |
---|
1463 | * |
---|
1464 | * @param args contains options for the instance ttests |
---|
1465 | */ |
---|
1466 | public static void main(String args[]) { |
---|
1467 | |
---|
1468 | try { |
---|
1469 | PairedTTester tt = new PairedTTester(); |
---|
1470 | String datasetName = Utils.getOption('t', args); |
---|
1471 | String compareColStr = Utils.getOption('c', args); |
---|
1472 | String baseColStr = Utils.getOption('b', args); |
---|
1473 | boolean summaryOnly = Utils.getFlag('s', args); |
---|
1474 | boolean rankingOnly = Utils.getFlag('r', args); |
---|
1475 | try { |
---|
1476 | if ((datasetName.length() == 0) |
---|
1477 | || (compareColStr.length() == 0)) { |
---|
1478 | throw new Exception("-t and -c options are required"); |
---|
1479 | } |
---|
1480 | tt.setOptions(args); |
---|
1481 | Utils.checkForRemainingOptions(args); |
---|
1482 | } catch (Exception ex) { |
---|
1483 | String result = ""; |
---|
1484 | Enumeration enu = tt.listOptions(); |
---|
1485 | while (enu.hasMoreElements()) { |
---|
1486 | Option option = (Option) enu.nextElement(); |
---|
1487 | result += option.synopsis() + '\n' |
---|
1488 | + option.description() + '\n'; |
---|
1489 | } |
---|
1490 | throw new Exception( |
---|
1491 | "Usage:\n\n" |
---|
1492 | + "-t <file>\n" |
---|
1493 | + "\tSet the dataset containing data to evaluate\n" |
---|
1494 | + "-b <index>\n" |
---|
1495 | + "\tSet the resultset to base comparisons against (optional)\n" |
---|
1496 | + "-c <index>\n" |
---|
1497 | + "\tSet the column to perform a comparison on\n" |
---|
1498 | + "-s\n" |
---|
1499 | + "\tSummarize wins over all resultset pairs\n\n" |
---|
1500 | + "-r\n" |
---|
1501 | + "\tGenerate a resultset ranking\n\n" |
---|
1502 | + result); |
---|
1503 | } |
---|
1504 | Instances data = new Instances(new BufferedReader( |
---|
1505 | new FileReader(datasetName))); |
---|
1506 | tt.setInstances(data); |
---|
1507 | // tt.prepareData(); |
---|
1508 | int compareCol = Integer.parseInt(compareColStr) - 1; |
---|
1509 | System.out.println(tt.header(compareCol)); |
---|
1510 | if (rankingOnly) { |
---|
1511 | System.out.println(tt.multiResultsetRanking(compareCol)); |
---|
1512 | } else if (summaryOnly) { |
---|
1513 | System.out.println(tt.multiResultsetSummary(compareCol)); |
---|
1514 | } else { |
---|
1515 | System.out.println(tt.resultsetKey()); |
---|
1516 | if (baseColStr.length() == 0) { |
---|
1517 | for (int i = 0; i < tt.getNumResultsets(); i++) { |
---|
1518 | if (!tt.displayResultset(i)) |
---|
1519 | continue; |
---|
1520 | System.out.println(tt.multiResultsetFull(i, compareCol)); |
---|
1521 | } |
---|
1522 | } else { |
---|
1523 | int baseCol = Integer.parseInt(baseColStr) - 1; |
---|
1524 | System.out.println(tt.multiResultsetFull(baseCol, compareCol)); |
---|
1525 | } |
---|
1526 | } |
---|
1527 | } catch(Exception e) { |
---|
1528 | e.printStackTrace(); |
---|
1529 | System.err.println(e.getMessage()); |
---|
1530 | } |
---|
1531 | } |
---|
1532 | } |
---|