Context Navigation

source: src/main/java/weka/experiment/PairedTTester.java @ 13

Last change on this file since 13 was 4, checked in by gnappo, 15 years ago
Import di weka.
File size: 43.6 KB

Rev	Line
[4]	1	/*
	2	* This program is free software; you can redistribute it and/or modify
	3	* it under the terms of the GNU General Public License as published by
	4	* the Free Software Foundation; either version 2 of the License, or
	5	* (at your option) any later version.
	6	*
	7	* This program is distributed in the hope that it will be useful,
	8	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	10	* GNU General Public License for more details.
	11	*
	12	* You should have received a copy of the GNU General Public License
	13	* along with this program; if not, write to the Free Software
	14	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
	15	*/
	16
	17	/*
	18	* PairedTTester.java
	19	* Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
	20	*
	21	*/
	22
	23
	24	package weka.experiment;
	25
	26	import weka.core.Attribute;
	27	import weka.core.FastVector;
	28	import weka.core.Instance;
	29	import weka.core.Instances;
	30	import weka.core.Option;
	31	import weka.core.OptionHandler;
	32	import weka.core.Range;
	33	import weka.core.RevisionHandler;
	34	import weka.core.RevisionUtils;
	35	import weka.core.Utils;
	36
	37	import java.io.BufferedReader;
	38	import java.io.FileReader;
	39	import java.io.Serializable;
	40	import java.text.SimpleDateFormat;
	41	import java.util.Date;
	42	import java.util.Enumeration;
	43	import java.util.Vector;
	44
	45	/**
	46	* Calculates T-Test statistics on data stored in a set of instances. <p/>
	47	*
	48	<!-- options-start -->
	49	* Valid options are: <p/>
	50	*
	51	* <pre> -D <index,index2-index4,...>
	52	* Specify list of columns that specify a unique
	53	* dataset.
	54	* First and last are valid indexes. (default none)</pre>
	55	*
	56	* <pre> -R <index>
	57	* Set the index of the column containing the run number</pre>
	58	*
	59	* <pre> -F <index>
	60	* Set the index of the column containing the fold number</pre>
	61	*
	62	* <pre> -G <index1,index2-index4,...>
	63	* Specify list of columns that specify a unique
	64	* 'result generator' (eg: classifier name and options).
	65	* First and last are valid indexes. (default none)</pre>
	66	*
	67	* <pre> -S <significance level>
	68	* Set the significance level for comparisons (default 0.05)</pre>
	69	*
	70	* <pre> -V
	71	* Show standard deviations</pre>
	72	*
	73	* <pre> -L
	74	* Produce table comparisons in Latex table format</pre>
	75	*
	76	* <pre> -csv
	77	* Produce table comparisons in CSV table format</pre>
	78	*
	79	* <pre> -html
	80	* Produce table comparisons in HTML table format</pre>
	81	*
	82	* <pre> -significance
	83	* Produce table comparisons with only the significance values</pre>
	84	*
	85	* <pre> -gnuplot
	86	* Produce table comparisons output suitable for GNUPlot</pre>
	87	*
	88	<!-- options-end -->
	89	*
	90	* @author Len Trigg (trigg@cs.waikato.ac.nz)
	91	* @version $Revision: 5415 $
	92	*/
	93	public class PairedTTester
	94	implements OptionHandler, Tester, RevisionHandler {
	95
	96	/** for serialization */
	97	static final long serialVersionUID = 8370014624008728610L;
	98
	99	/** The set of instances we will analyse */
	100	protected Instances m_Instances;
	101
	102	/** The index of the column containing the run number */
	103	protected int m_RunColumn = 0;
	104
	105	/** The option setting for the run number column (-1 means last) */
	106	protected int m_RunColumnSet = -1;
	107
	108	/** The option setting for the fold number column (-1 means none) */
	109	protected int m_FoldColumn = -1;
	110
	111	/** The column to sort on (-1 means default sorting) */
	112	protected int m_SortColumn = -1;
	113
	114	/** The sorting of the datasets (according to the sort column) */
	115	protected int[] m_SortOrder = null;
	116
	117	/** The sorting of the columns (test base is always first) */
	118	protected int[] m_ColOrder = null;
	119
	120	/** The significance level for comparisons */
	121	protected double m_SignificanceLevel = 0.05;
	122
	123	/**
	124	* The range of columns that specify a unique "dataset"
	125	* (eg: scheme plus configuration)
	126	*/
	127	protected Range m_DatasetKeyColumnsRange = new Range();
	128
	129	/** An array containing the indexes of just the selected columns */
	130	protected int [] m_DatasetKeyColumns;
	131
	132	/** The list of dataset specifiers */
	133	protected DatasetSpecifiers m_DatasetSpecifiers =
	134	new DatasetSpecifiers();
	135
	136	/**
	137	* The range of columns that specify a unique result set
	138	* (eg: scheme plus configuration)
	139	*/
	140	protected Range m_ResultsetKeyColumnsRange = new Range();
	141
	142	/** An array containing the indexes of just the selected columns */
	143	protected int [] m_ResultsetKeyColumns;
	144
	145	/** An array containing the indexes of the datasets to display */
	146	protected int[] m_DisplayedResultsets = null;
	147
	148	/** Stores a vector for each resultset holding all instances in each set */
	149	protected FastVector m_Resultsets = new FastVector();
	150
	151	/** Indicates whether the instances have been partitioned */
	152	protected boolean m_ResultsetsValid;
	153
	154	/** Indicates whether standard deviations should be displayed */
	155	protected boolean m_ShowStdDevs = false;
	156
	157	/** the instance of the class to produce the output. */
	158	protected ResultMatrix m_ResultMatrix = new ResultMatrixPlainText();
	159
	160	/** A list of unique "dataset" specifiers that have been observed */
	161	protected class DatasetSpecifiers
	162	implements RevisionHandler, Serializable {
	163
	164	/** for serialization. */
	165	private static final long serialVersionUID = -9020938059902723401L;
	166
	167	/** the specifiers that have been observed */
	168	FastVector m_Specifiers = new FastVector();
	169
	170	/**
	171	* Removes all specifiers.
	172	*/
	173	protected void removeAllSpecifiers() {
	174
	175	m_Specifiers.removeAllElements();
	176	}
	177
	178	/**
	179	* Add an instance to the list of specifiers (if necessary)
	180	*
	181	* @param inst the instance to add
	182	*/
	183	protected void add(Instance inst) {
	184
	185	for (int i = 0; i < m_Specifiers.size(); i++) {
	186	Instance specifier = (Instance)m_Specifiers.elementAt(i);
	187	boolean found = true;
	188	for (int j = 0; j < m_DatasetKeyColumns.length; j++) {
	189	if (inst.value(m_DatasetKeyColumns[j]) !=
	190	specifier.value(m_DatasetKeyColumns[j])) {
	191	found = false;
	192	}
	193	}
	194	if (found) {
	195	return;
	196	}
	197	}
	198	m_Specifiers.addElement(inst);
	199	}
	200
	201	/**
	202	* Get the template at the given position.
	203	*
	204	* @param i the index
	205	* @return the template
	206	*/
	207	protected Instance specifier(int i) {
	208
	209	return (Instance)m_Specifiers.elementAt(i);
	210	}
	211
	212	/**
	213	* Gets the number of specifiers.
	214	*
	215	* @return the current number of specifiers
	216	*/
	217	protected int numSpecifiers() {
	218
	219	return m_Specifiers.size();
	220	}
	221
	222	/**
	223	* Returns the revision string.
	224	*
	225	* @return the revision
	226	*/
	227	public String getRevision() {
	228	return RevisionUtils.extract("$Revision: 5415 $");
	229	}
	230	}
	231
	232	/** Utility class to store the instances pertaining to a dataset */
	233	protected class Dataset
	234	implements RevisionHandler, Serializable {
	235
	236	/** for serialization. */
	237	private static final long serialVersionUID = -2801397601839433282L;
	238
	239	/** the template */
	240	Instance m_Template;
	241
	242	/** the dataset */
	243	FastVector m_Dataset;
	244
	245	/**
	246	* Constructor
	247	*
	248	* @param template the template
	249	*/
	250	public Dataset(Instance template) {
	251
	252	m_Template = template;
	253	m_Dataset = new FastVector();
	254	add(template);
	255	}
	256
	257	/**
	258	* Returns true if the two instances match on those attributes that have
	259	* been designated key columns (eg: scheme name and scheme options)
	260	*
	261	* @param first the first instance
	262	* @return true if first and second match on the currently set key columns
	263	*/
	264	protected boolean matchesTemplate(Instance first) {
	265
	266	for (int i = 0; i < m_DatasetKeyColumns.length; i++) {
	267	if (first.value(m_DatasetKeyColumns[i]) !=
	268	m_Template.value(m_DatasetKeyColumns[i])) {
	269	return false;
	270	}
	271	}
	272	return true;
	273	}
	274
	275	/**
	276	* Adds the given instance to the dataset
	277	*
	278	* @param inst the instance to add
	279	*/
	280	protected void add(Instance inst) {
	281
	282	m_Dataset.addElement(inst);
	283	}
	284
	285	/**
	286	* Returns a vector containing the instances in the dataset
	287	*
	288	* @return the current contents
	289	*/
	290	protected FastVector contents() {
	291
	292	return m_Dataset;
	293	}
	294
	295	/**
	296	* Sorts the instances in the dataset by the run number.
	297	*
	298	* @param runColumn a value of type 'int'
	299	*/
	300	public void sort(int runColumn) {
	301
	302	double [] runNums = new double [m_Dataset.size()];
	303	for (int j = 0; j < runNums.length; j++) {
	304	runNums[j] = ((Instance) m_Dataset.elementAt(j)).value(runColumn);
	305	}
	306	int [] index = Utils.stableSort(runNums);
	307	FastVector newDataset = new FastVector(runNums.length);
	308	for (int j = 0; j < index.length; j++) {
	309	newDataset.addElement(m_Dataset.elementAt(index[j]));
	310	}
	311	m_Dataset = newDataset;
	312	}
	313
	314	/**
	315	* Returns the revision string.
	316	*
	317	* @return the revision
	318	*/
	319	public String getRevision() {
	320	return RevisionUtils.extract("$Revision: 5415 $");
	321	}
	322	}
	323
	324	/** Utility class to store the instances in a resultset */
	325	protected class Resultset
	326	implements RevisionHandler, Serializable {
	327
	328	/** for serialization. */
	329	private static final long serialVersionUID = 1543786683821339978L;
	330
	331	/** the template */
	332	Instance m_Template;
	333
	334	/** the dataset */
	335	FastVector m_Datasets;
	336
	337	/**
	338	* Constructir
	339	*
	340	* @param template the template
	341	*/
	342	public Resultset(Instance template) {
	343
	344	m_Template = template;
	345	m_Datasets = new FastVector();
	346	add(template);
	347	}
	348
	349	/**
	350	* Returns true if the two instances match on those attributes that have
	351	* been designated key columns (eg: scheme name and scheme options)
	352	*
	353	* @param first the first instance
	354	* @return true if first and second match on the currently set key columns
	355	*/
	356	protected boolean matchesTemplate(Instance first) {
	357
	358	for (int i = 0; i < m_ResultsetKeyColumns.length; i++) {
	359	if (first.value(m_ResultsetKeyColumns[i]) !=
	360	m_Template.value(m_ResultsetKeyColumns[i])) {
	361	return false;
	362	}
	363	}
	364	return true;
	365	}
	366
	367	/**
	368	* Returns a string descriptive of the resultset key column values
	369	* for this resultset
	370	*
	371	* @return a value of type 'String'
	372	*/
	373	protected String templateString() {
	374
	375	String result = "";
	376	String tempResult = "";
	377	for (int i = 0; i < m_ResultsetKeyColumns.length; i++) {
	378	tempResult = m_Template.toString(m_ResultsetKeyColumns[i]) + ' ';
	379
	380	// compact the string
	381	tempResult = Utils.removeSubstring(tempResult, "weka.classifiers.");
	382	tempResult = Utils.removeSubstring(tempResult, "weka.filters.");
	383	tempResult = Utils.removeSubstring(tempResult, "weka.attributeSelection.");
	384	result += tempResult;
	385	}
	386	return result.trim();
	387	}
	388
	389	/**
	390	* Returns a vector containing all instances belonging to one dataset.
	391	*
	392	* @param inst a template instance
	393	* @return a value of type 'FastVector'
	394	*/
	395	public FastVector dataset(Instance inst) {
	396
	397	for (int i = 0; i < m_Datasets.size(); i++) {
	398	if (((Dataset)m_Datasets.elementAt(i)).matchesTemplate(inst)) {
	399	return ((Dataset)m_Datasets.elementAt(i)).contents();
	400	}
	401	}
	402	return null;
	403	}
	404
	405	/**
	406	* Adds an instance to this resultset
	407	*
	408	* @param newInst a value of type 'Instance'
	409	*/
	410	public void add(Instance newInst) {
	411
	412	for (int i = 0; i < m_Datasets.size(); i++) {
	413	if (((Dataset)m_Datasets.elementAt(i)).matchesTemplate(newInst)) {
	414	((Dataset)m_Datasets.elementAt(i)).add(newInst);
	415	return;
	416	}
	417	}
	418	Dataset newDataset = new Dataset(newInst);
	419	m_Datasets.addElement(newDataset);
	420	}
	421
	422	/**
	423	* Sorts the instances in each dataset by the run number.
	424	*
	425	* @param runColumn a value of type 'int'
	426	*/
	427	public void sort(int runColumn) {
	428
	429	for (int i = 0; i < m_Datasets.size(); i++) {
	430	((Dataset)m_Datasets.elementAt(i)).sort(runColumn);
	431	}
	432	}
	433
	434	/**
	435	* Returns the revision string.
	436	*
	437	* @return the revision
	438	*/
	439	public String getRevision() {
	440	return RevisionUtils.extract("$Revision: 5415 $");
	441	}
	442	} // Resultset
	443
	444
	445	/**
	446	* Returns a string descriptive of the key column values for
	447	* the "datasets
	448	*
	449	* @param template the template
	450	* @return a value of type 'String'
	451	*/
	452	protected String templateString(Instance template) {
	453
	454	String result = "";
	455	for (int i = 0; i < m_DatasetKeyColumns.length; i++) {
	456	result += template.toString(m_DatasetKeyColumns[i]) + ' ';
	457	}
	458	if (result.startsWith("weka.classifiers.")) {
	459	result = result.substring("weka.classifiers.".length());
	460	}
	461	return result.trim();
	462	}
	463
	464	/**
	465	* Sets the matrix to use to produce the output.
	466	* @param matrix the instance to use to produce the output
	467	* @see ResultMatrix
	468	*/
	469	public void setResultMatrix(ResultMatrix matrix) {
	470	m_ResultMatrix = matrix;
	471	}
	472
	473	/**
	474	* Gets the instance that produces the output.
	475	* @return the instance to produce the output
	476	*/
	477	public ResultMatrix getResultMatrix() {
	478	return m_ResultMatrix;
	479	}
	480
	481	/**
	482	* Set whether standard deviations are displayed or not.
	483	* @param s true if standard deviations are to be displayed
	484	*/
	485	public void setShowStdDevs(boolean s) {
	486	m_ShowStdDevs = s;
	487	}
	488
	489	/**
	490	* Returns true if standard deviations have been requested.
	491	* @return true if standard deviations are to be displayed.
	492	*/
	493	public boolean getShowStdDevs() {
	494	return m_ShowStdDevs;
	495	}
	496
	497	/**
	498	* Separates the instances into resultsets and by dataset/run.
	499	*
	500	* @throws Exception if the TTest parameters have not been set.
	501	*/
	502	protected void prepareData() throws Exception {
	503
	504	if (m_Instances == null) {
	505	throw new Exception("No instances have been set");
	506	}
	507	if (m_RunColumnSet == -1) {
	508	m_RunColumn = m_Instances.numAttributes() - 1;
	509	} else {
	510	m_RunColumn = m_RunColumnSet;
	511	}
	512
	513	if (m_ResultsetKeyColumnsRange == null) {
	514	throw new Exception("No result specifier columns have been set");
	515	}
	516	m_ResultsetKeyColumnsRange.setUpper(m_Instances.numAttributes() - 1);
	517	m_ResultsetKeyColumns = m_ResultsetKeyColumnsRange.getSelection();
	518
	519	if (m_DatasetKeyColumnsRange == null) {
	520	throw new Exception("No dataset specifier columns have been set");
	521	}
	522	m_DatasetKeyColumnsRange.setUpper(m_Instances.numAttributes() - 1);
	523	m_DatasetKeyColumns = m_DatasetKeyColumnsRange.getSelection();
	524
	525	// Split the data up into result sets
	526	m_Resultsets.removeAllElements();
	527	m_DatasetSpecifiers.removeAllSpecifiers();
	528	for (int i = 0; i < m_Instances.numInstances(); i++) {
	529	Instance current = m_Instances.instance(i);
	530	if (current.isMissing(m_RunColumn)) {
	531	throw new Exception("Instance has missing value in run "
	532	+ "column!\n" + current);
	533	}
	534	for (int j = 0; j < m_ResultsetKeyColumns.length; j++) {
	535	if (current.isMissing(m_ResultsetKeyColumns[j])) {
	536	throw new Exception("Instance has missing value in resultset key "
	537	+ "column " + (m_ResultsetKeyColumns[j] + 1)
	538	+ "!\n" + current);
	539	}
	540	}
	541	for (int j = 0; j < m_DatasetKeyColumns.length; j++) {
	542	if (current.isMissing(m_DatasetKeyColumns[j])) {
	543	throw new Exception("Instance has missing value in dataset key "
	544	+ "column " + (m_DatasetKeyColumns[j] + 1)
	545	+ "!\n" + current);
	546	}
	547	}
	548	boolean found = false;
	549	for (int j = 0; j < m_Resultsets.size(); j++) {
	550	Resultset resultset = (Resultset) m_Resultsets.elementAt(j);
	551	if (resultset.matchesTemplate(current)) {
	552	resultset.add(current);
	553	found = true;
	554	break;
	555	}
	556	}
	557	if (!found) {
	558	Resultset resultset = new Resultset(current);
	559	m_Resultsets.addElement(resultset);
	560	}
	561
	562	m_DatasetSpecifiers.add(current);
	563	}
	564
	565	// Tell each resultset to sort on the run column
	566	for (int j = 0; j < m_Resultsets.size(); j++) {
	567	Resultset resultset = (Resultset) m_Resultsets.elementAt(j);
	568	if (m_FoldColumn >= 0) {
	569	// sort on folds first in case they are out of order
	570	resultset.sort(m_FoldColumn);
	571	}
	572	resultset.sort(m_RunColumn);
	573	}
	574
	575	m_ResultsetsValid = true;
	576	}
	577
	578	/**
	579	* Gets the number of datasets in the resultsets
	580	*
	581	* @return the number of datasets in the resultsets
	582	*/
	583	public int getNumDatasets() {
	584
	585	if (!m_ResultsetsValid) {
	586	try {
	587	prepareData();
	588	} catch (Exception ex) {
	589	ex.printStackTrace();
	590	return 0;
	591	}
	592	}
	593	return m_DatasetSpecifiers.numSpecifiers();
	594	}
	595
	596	/**
	597	* Gets the number of resultsets in the data.
	598	*
	599	* @return the number of resultsets in the data
	600	*/
	601	public int getNumResultsets() {
	602
	603	if (!m_ResultsetsValid) {
	604	try {
	605	prepareData();
	606	} catch (Exception ex) {
	607	ex.printStackTrace();
	608	return 0;
	609	}
	610	}
	611	return m_Resultsets.size();
	612	}
	613
	614	/**
	615	* Gets a string descriptive of the specified resultset.
	616	*
	617	* @param index the index of the resultset
	618	* @return a descriptive string for the resultset
	619	*/
	620	public String getResultsetName(int index) {
	621
	622	if (!m_ResultsetsValid) {
	623	try {
	624	prepareData();
	625	} catch (Exception ex) {
	626	ex.printStackTrace();
	627	return null;
	628	}
	629	}
	630	return ((Resultset) m_Resultsets.elementAt(index)).templateString();
	631	}
	632
	633	/**
	634	* Checks whether the resultset with the given index shall be displayed.
	635	*
	636	* @param index the index of the resultset to check whether it shall be displayed
	637	* @return whether the specified resultset is displayed
	638	*/
	639	public boolean displayResultset(int index) {
	640	boolean result;
	641	int i;
	642
	643	result = true;
	644
	645	if (m_DisplayedResultsets != null) {
	646	result = false;
	647	for (i = 0; i < m_DisplayedResultsets.length; i++) {
	648	if (m_DisplayedResultsets[i] == index) {
	649	result = true;
	650	break;
	651	}
	652	}
	653	}
	654
	655	return result;
	656	}
	657
	658	/**
	659	* Computes a paired t-test comparison for a specified dataset between
	660	* two resultsets.
	661	*
	662	* @param datasetSpecifier the dataset specifier
	663	* @param resultset1Index the index of the first resultset
	664	* @param resultset2Index the index of the second resultset
	665	* @param comparisonColumn the column containing values to compare
	666	* @return the results of the paired comparison
	667	* @throws Exception if an error occurs
	668	*/
	669	public PairedStats calculateStatistics(Instance datasetSpecifier,
	670	int resultset1Index,
	671	int resultset2Index,
	672	int comparisonColumn) throws Exception {
	673
	674	if (m_Instances.attribute(comparisonColumn).type()
	675	!= Attribute.NUMERIC) {
	676	throw new Exception("Comparison column " + (comparisonColumn + 1)
	677	+ " ("
	678	+ m_Instances.attribute(comparisonColumn).name()
	679	+ ") is not numeric");
	680	}
	681	if (!m_ResultsetsValid) {
	682	prepareData();
	683	}
	684
	685	Resultset resultset1 = (Resultset) m_Resultsets.elementAt(resultset1Index);
	686	Resultset resultset2 = (Resultset) m_Resultsets.elementAt(resultset2Index);
	687	FastVector dataset1 = resultset1.dataset(datasetSpecifier);
	688	FastVector dataset2 = resultset2.dataset(datasetSpecifier);
	689	String datasetName = templateString(datasetSpecifier);
	690	if (dataset1 == null) {
	691	throw new Exception("No results for dataset=" + datasetName
	692	+ " for resultset=" + resultset1.templateString());
	693	} else if (dataset2 == null) {
	694	throw new Exception("No results for dataset=" + datasetName
	695	+ " for resultset=" + resultset2.templateString());
	696	} else if (dataset1.size() != dataset2.size()) {
	697	throw new Exception("Results for dataset=" + datasetName
	698	+ " differ in size for resultset="
	699	+ resultset1.templateString()
	700	+ " and resultset="
	701	+ resultset2.templateString()
	702	);
	703	}
	704
	705	PairedStats pairedStats = new PairedStats(m_SignificanceLevel);
	706
	707	for (int k = 0; k < dataset1.size(); k ++) {
	708	Instance current1 = (Instance) dataset1.elementAt(k);
	709	Instance current2 = (Instance) dataset2.elementAt(k);
	710	if (current1.isMissing(comparisonColumn)) {
	711	System.err.println("Instance has missing value in comparison "
	712	+ "column!\n" + current1);
	713	continue;
	714	}
	715	if (current2.isMissing(comparisonColumn)) {
	716	System.err.println("Instance has missing value in comparison "
	717	+ "column!\n" + current2);
	718	continue;
	719	}
	720	if (current1.value(m_RunColumn) != current2.value(m_RunColumn)) {
	721	System.err.println("Run numbers do not match!\n"
	722	+ current1 + current2);
	723	}
	724	if (m_FoldColumn != -1) {
	725	if (current1.value(m_FoldColumn) != current2.value(m_FoldColumn)) {
	726	System.err.println("Fold numbers do not match!\n"
	727	+ current1 + current2);
	728	}
	729	}
	730	double value1 = current1.value(comparisonColumn);
	731	double value2 = current2.value(comparisonColumn);
	732	pairedStats.add(value1, value2);
	733	}
	734	pairedStats.calculateDerived();
	735	//System.err.println("Differences stats:\n" + pairedStats.differencesStats);
	736	return pairedStats;
	737
	738	}
	739
	740	/**
	741	* Creates a key that maps resultset numbers to their descriptions.
	742	*
	743	* @return a value of type 'String'
	744	*/
	745	public String resultsetKey() {
	746
	747	if (!m_ResultsetsValid) {
	748	try {
	749	prepareData();
	750	} catch (Exception ex) {
	751	ex.printStackTrace();
	752	return ex.getMessage();
	753	}
	754	}
	755	String result = "";
	756	for (int j = 0; j < getNumResultsets(); j++) {
	757	result += "(" + (j + 1) + ") " + getResultsetName(j) + '\n';
	758	}
	759	return result + '\n';
	760	}
	761
	762	/**
	763	* Creates a "header" string describing the current resultsets.
	764	*
	765	* @param comparisonColumn a value of type 'int'
	766	* @return a value of type 'String'
	767	*/
	768	public String header(int comparisonColumn) {
	769
	770	if (!m_ResultsetsValid) {
	771	try {
	772	prepareData();
	773	} catch (Exception ex) {
	774	ex.printStackTrace();
	775	return ex.getMessage();
	776	}
	777	}
	778
	779	initResultMatrix();
	780	m_ResultMatrix.addHeader("Tester", getClass().getName());
	781	m_ResultMatrix.addHeader("Analysing", m_Instances.attribute(comparisonColumn).name());
	782	m_ResultMatrix.addHeader("Datasets", Integer.toString(getNumDatasets()));
	783	m_ResultMatrix.addHeader("Resultsets", Integer.toString(getNumResultsets()));
	784	m_ResultMatrix.addHeader("Confidence", getSignificanceLevel() + " (two tailed)");
	785	m_ResultMatrix.addHeader("Sorted by", getSortColumnName());
	786	m_ResultMatrix.addHeader("Date", (new SimpleDateFormat()).format(new Date()));
	787
	788	return m_ResultMatrix.toStringHeader() + "\n";
	789	}
	790
	791	/**
	792	* Carries out a comparison between all resultsets, counting the number
	793	* of datsets where one resultset outperforms the other.
	794	*
	795	* @param comparisonColumn the index of the comparison column
	796	* @param nonSigWin for storing the non-significant wins
	797	* @return a 2d array where element [i][j] is the number of times resultset
	798	* j performed significantly better than resultset i.
	799	* @throws Exception if an error occurs
	800	*/
	801	public int [][] multiResultsetWins(int comparisonColumn, int [][] nonSigWin)
	802	throws Exception {
	803
	804	int numResultsets = getNumResultsets();
	805	int [][] win = new int [numResultsets][numResultsets];
	806	// int [][] nonSigWin = new int [numResultsets][numResultsets];
	807	for (int i = 0; i < numResultsets; i++) {
	808	for (int j = i + 1; j < numResultsets; j++) {
	809	System.err.print("Comparing (" + (i + 1) + ") with ("
	810	+ (j + 1) + ")\r");
	811	System.err.flush();
	812	for (int k = 0; k < getNumDatasets(); k++) {
	813	try {
	814	PairedStats pairedStats =
	815	calculateStatistics(m_DatasetSpecifiers.specifier(k), i, j,
	816	comparisonColumn);
	817	if (pairedStats.differencesSignificance < 0) {
	818	win[i][j]++;
	819	} else if (pairedStats.differencesSignificance > 0) {
	820	win[j][i]++;
	821	}
	822
	823	if (pairedStats.differencesStats.mean < 0) {
	824	nonSigWin[i][j]++;
	825	} else if (pairedStats.differencesStats.mean > 0) {
	826	nonSigWin[j][i]++;
	827	}
	828	} catch (Exception ex) {
	829	//ex.printStackTrace();
	830	System.err.println(ex.getMessage());
	831	}
	832	}
	833	}
	834	}
	835	return win;
	836	}
	837
	838	/**
	839	* clears the content and fills the column and row names according to the
	840	* given sorting
	841	*/
	842	protected void initResultMatrix() {
	843	m_ResultMatrix.setSize(getNumResultsets(), getNumDatasets());
	844	m_ResultMatrix.setShowStdDev(m_ShowStdDevs);
	845
	846	for (int i = 0; i < getNumDatasets(); i++)
	847	m_ResultMatrix.setRowName(i,
	848	templateString(m_DatasetSpecifiers.specifier(i)));
	849
	850	for (int j = 0; j < getNumResultsets(); j++) {
	851	m_ResultMatrix.setColName(j, getResultsetName(j));
	852	m_ResultMatrix.setColHidden(j, !displayResultset(j));
	853	}
	854	}
	855
	856	/**
	857	* Carries out a comparison between all resultsets, counting the number
	858	* of datsets where one resultset outperforms the other. The results
	859	* are summarized in a table.
	860	*
	861	* @param comparisonColumn the index of the comparison column
	862	* @return the results in a string
	863	* @throws Exception if an error occurs
	864	*/
	865	public String multiResultsetSummary(int comparisonColumn)
	866	throws Exception {
	867
	868	int[][] nonSigWin = new int [getNumResultsets()][getNumResultsets()];
	869	int[][] win = multiResultsetWins(comparisonColumn, nonSigWin);
	870
	871	initResultMatrix();
	872	m_ResultMatrix.setSummary(nonSigWin, win);
	873
	874	return m_ResultMatrix.toStringSummary();
	875	}
	876
	877	/**
	878	* returns a ranking of the resultsets
	879	*
	880	* @param comparisonColumn the column to compare with
	881	* @return the ranking
	882	* @throws Exception if something goes wrong
	883	*/
	884	public String multiResultsetRanking(int comparisonColumn)
	885	throws Exception {
	886
	887	int[][] nonSigWin = new int [getNumResultsets()][getNumResultsets()];
	888	int[][] win = multiResultsetWins(comparisonColumn, nonSigWin);
	889
	890	initResultMatrix();
	891	m_ResultMatrix.setRanking(win);
	892
	893	return m_ResultMatrix.toStringRanking();
	894	}
	895
	896	/**
	897	* Creates a comparison table where a base resultset is compared to the
	898	* other resultsets. Results are presented for every dataset.
	899	*
	900	* @param baseResultset the index of the base resultset
	901	* @param comparisonColumn the index of the column to compare over
	902	* @return the comparison table string
	903	* @throws Exception if an error occurs
	904	*/
	905	public String multiResultsetFull(int baseResultset,
	906	int comparisonColumn) throws Exception {
	907
	908	int maxWidthMean = 2;
	909	int maxWidthStdDev = 2;
	910
	911	double[] sortValues = new double[getNumDatasets()];
	912
	913	// determine max field width
	914	for (int i = 0; i < getNumDatasets(); i++) {
	915	sortValues[i] = Double.POSITIVE_INFINITY; // sorts skipped cols to end
	916
	917	for (int j = 0; j < getNumResultsets(); j++) {
	918	if (!displayResultset(j))
	919	continue;
	920	try {
	921	PairedStats pairedStats =
	922	calculateStatistics(m_DatasetSpecifiers.specifier(i),
	923	baseResultset, j, comparisonColumn);
	924	if (!Double.isInfinite(pairedStats.yStats.mean) &&
	925	!Double.isNaN(pairedStats.yStats.mean)) {
	926	double width = ((Math.log(Math.abs(pairedStats.yStats.mean)) /
	927	Math.log(10))+1);
	928	if (width > maxWidthMean) {
	929	maxWidthMean = (int)width;
	930	}
	931	}
	932
	933	if (j == baseResultset) {
	934	if (getSortColumn() != -1)
	935	sortValues[i] = calculateStatistics(
	936	m_DatasetSpecifiers.specifier(i),
	937	baseResultset, j, getSortColumn()).xStats.mean;
	938	else
	939	sortValues[i] = i;
	940	}
	941
	942	if (m_ShowStdDevs &&
	943	!Double.isInfinite(pairedStats.yStats.stdDev) &&
	944	!Double.isNaN(pairedStats.yStats.stdDev)) {
	945	double width = ((Math.log(Math.abs(pairedStats.yStats.stdDev)) /
	946	Math.log(10))+1);
	947	if (width > maxWidthStdDev) {
	948	maxWidthStdDev = (int)width;
	949	}
	950	}
	951	} catch (Exception ex) {
	952	//ex.printStackTrace();
	953	System.err.println(ex);
	954	}
	955	}
	956	}
	957
	958	// sort rows according to sort column
	959	m_SortOrder = Utils.sort(sortValues);
	960
	961	// determine column order
	962	m_ColOrder = new int[getNumResultsets()];
	963	m_ColOrder[0] = baseResultset;
	964	int index = 1;
	965	for (int i = 0; i < getNumResultsets(); i++) {
	966	if (i == baseResultset)
	967	continue;
	968	m_ColOrder[index] = i;
	969	index++;
	970	}
	971
	972	// setup matrix
	973	initResultMatrix();
	974	m_ResultMatrix.setRowOrder(m_SortOrder);
	975	m_ResultMatrix.setColOrder(m_ColOrder);
	976	m_ResultMatrix.setMeanWidth(maxWidthMean);
	977	m_ResultMatrix.setStdDevWidth(maxWidthStdDev);
	978	m_ResultMatrix.setSignificanceWidth(1);
	979
	980	// make sure that test base is displayed, even though it might not be
	981	// selected
	982	for (int i = 0; i < m_ResultMatrix.getColCount(); i++) {
	983	if ( (i == baseResultset)
	984	&& (m_ResultMatrix.getColHidden(i)) ) {
	985	m_ResultMatrix.setColHidden(i, false);
	986	System.err.println("Note: test base was hidden - set visible!");
	987	}
	988	}
	989
	990	// the data
	991	for (int i = 0; i < getNumDatasets(); i++) {
	992	m_ResultMatrix.setRowName(i,
	993	templateString(m_DatasetSpecifiers.specifier(i)));
	994
	995	for (int j = 0; j < getNumResultsets(); j++) {
	996	try {
	997	// calc stats
	998	PairedStats pairedStats =
	999	calculateStatistics(m_DatasetSpecifiers.specifier(i),
	1000	baseResultset, j, comparisonColumn);
	1001
	1002	// count
	1003	m_ResultMatrix.setCount(i, pairedStats.count);
	1004
	1005	// mean
	1006	m_ResultMatrix.setMean(j, i, pairedStats.yStats.mean);
	1007
	1008	// std dev
	1009	m_ResultMatrix.setStdDev(j, i, pairedStats.yStats.stdDev);
	1010
	1011	// significance
	1012	if (pairedStats.differencesSignificance < 0)
	1013	m_ResultMatrix.setSignificance(j, i, ResultMatrix.SIGNIFICANCE_WIN);
	1014	else if (pairedStats.differencesSignificance > 0)
	1015	m_ResultMatrix.setSignificance(j, i, ResultMatrix.SIGNIFICANCE_LOSS);
	1016	else
	1017	m_ResultMatrix.setSignificance(j, i, ResultMatrix.SIGNIFICANCE_TIE);
	1018	}
	1019	catch (Exception e) {
	1020	//e.printStackTrace();
	1021	System.err.println(e);
	1022	}
	1023	}
	1024	}
	1025
	1026	// generate output
	1027	StringBuffer result = new StringBuffer(1000);
	1028	try {
	1029	result.append(m_ResultMatrix.toStringMatrix());
	1030	}
	1031	catch (Exception e) {
	1032	e.printStackTrace();
	1033	}
	1034
	1035	// append a key so that we can tell the difference between long
	1036	// scheme+option names
	1037	result.append("\n\n" + m_ResultMatrix.toStringKey());
	1038
	1039	return result.toString();
	1040	}
	1041
	1042	/**
	1043	* Lists options understood by this object.
	1044	*
	1045	* @return an enumeration of Options.
	1046	*/
	1047	public Enumeration listOptions() {
	1048
	1049	Vector newVector = new Vector();
	1050
	1051	newVector.addElement(new Option(
	1052	"\tSpecify list of columns that specify a unique\n"
	1053	+ "\tdataset.\n"
	1054	+ "\tFirst and last are valid indexes. (default none)",
	1055	"D", 1, "-D <index,index2-index4,...>"));
	1056	newVector.addElement(new Option(
	1057	"\tSet the index of the column containing the run number",
	1058	"R", 1, "-R <index>"));
	1059	newVector.addElement(new Option(
	1060	"\tSet the index of the column containing the fold number",
	1061	"F", 1, "-F <index>"));
	1062	newVector.addElement(new Option(
	1063	"\tSpecify list of columns that specify a unique\n"
	1064	+ "\t'result generator' (eg: classifier name and options).\n"
	1065	+ "\tFirst and last are valid indexes. (default none)",
	1066	"G", 1, "-G <index1,index2-index4,...>"));
	1067	newVector.addElement(new Option(
	1068	"\tSet the significance level for comparisons (default 0.05)",
	1069	"S", 1, "-S <significance level>"));
	1070	newVector.addElement(new Option(
	1071	"\tShow standard deviations",
	1072	"V", 0, "-V"));
	1073	newVector.addElement(new Option(
	1074	"\tProduce table comparisons in Latex table format",
	1075	"L", 0, "-L"));
	1076	newVector.addElement(new Option(
	1077	"\tProduce table comparisons in CSV table format",
	1078	"csv", 0, "-csv"));
	1079	newVector.addElement(new Option(
	1080	"\tProduce table comparisons in HTML table format",
	1081	"html", 0, "-html"));
	1082	newVector.addElement(new Option(
	1083	"\tProduce table comparisons with only the significance values",
	1084	"significance", 0, "-significance"));
	1085	newVector.addElement(new Option(
	1086	"\tProduce table comparisons output suitable for GNUPlot",
	1087	"gnuplot", 0, "-gnuplot"));
	1088
	1089	return newVector.elements();
	1090	}
	1091
	1092	/**
	1093	* Parses a given list of options. <p/>
	1094	*
	1095	<!-- options-start -->
	1096	* Valid options are: <p/>
	1097	*
	1098	* <pre> -D <index,index2-index4,...>
	1099	* Specify list of columns that specify a unique
	1100	* dataset.
	1101	* First and last are valid indexes. (default none)</pre>
	1102	*
	1103	* <pre> -R <index>
	1104	* Set the index of the column containing the run number</pre>
	1105	*
	1106	* <pre> -F <index>
	1107	* Set the index of the column containing the fold number</pre>
	1108	*
	1109	* <pre> -G <index1,index2-index4,...>
	1110	* Specify list of columns that specify a unique
	1111	* 'result generator' (eg: classifier name and options).
	1112	* First and last are valid indexes. (default none)</pre>
	1113	*
	1114	* <pre> -S <significance level>
	1115	* Set the significance level for comparisons (default 0.05)</pre>
	1116	*
	1117	* <pre> -V
	1118	* Show standard deviations</pre>
	1119	*
	1120	* <pre> -L
	1121	* Produce table comparisons in Latex table format</pre>
	1122	*
	1123	* <pre> -csv
	1124	* Produce table comparisons in CSV table format</pre>
	1125	*
	1126	* <pre> -html
	1127	* Produce table comparisons in HTML table format</pre>
	1128	*
	1129	* <pre> -significance
	1130	* Produce table comparisons with only the significance values</pre>
	1131	*
	1132	* <pre> -gnuplot
	1133	* Produce table comparisons output suitable for GNUPlot</pre>
	1134	*
	1135	<!-- options-end -->
	1136	*
	1137	* @param options an array containing options to set.
	1138	* @throws Exception if invalid options are given
	1139	*/
	1140	public void setOptions(String[] options) throws Exception {
	1141
	1142	setShowStdDevs(Utils.getFlag('V', options));
	1143	if (Utils.getFlag('L', options))
	1144	setResultMatrix(new ResultMatrixLatex());
	1145	if (Utils.getFlag("csv", options))
	1146	setResultMatrix(new ResultMatrixCSV());
	1147	if (Utils.getFlag("html", options))
	1148	setResultMatrix(new ResultMatrixHTML());
	1149	if (Utils.getFlag("significance", options))
	1150	setResultMatrix(new ResultMatrixSignificance());
	1151
	1152	String datasetList = Utils.getOption('D', options);
	1153	Range datasetRange = new Range();
	1154	if (datasetList.length() != 0) {
	1155	datasetRange.setRanges(datasetList);
	1156	}
	1157	setDatasetKeyColumns(datasetRange);
	1158
	1159	String indexStr = Utils.getOption('R', options);
	1160	if (indexStr.length() != 0) {
	1161	if (indexStr.equals("first")) {
	1162	setRunColumn(0);
	1163	} else if (indexStr.equals("last")) {
	1164	setRunColumn(-1);
	1165	} else {
	1166	setRunColumn(Integer.parseInt(indexStr) - 1);
	1167	}
	1168	} else {
	1169	setRunColumn(-1);
	1170	}
	1171
	1172	String foldStr = Utils.getOption('F', options);
	1173	if (foldStr.length() != 0) {
	1174	setFoldColumn(Integer.parseInt(foldStr) - 1);
	1175	} else {
	1176	setFoldColumn(-1);
	1177	}
	1178
	1179	String sigStr = Utils.getOption('S', options);
	1180	if (sigStr.length() != 0) {
	1181	setSignificanceLevel((new Double(sigStr)).doubleValue());
	1182	} else {
	1183	setSignificanceLevel(0.05);
	1184	}
	1185
	1186	String resultsetList = Utils.getOption('G', options);
	1187	Range generatorRange = new Range();
	1188	if (resultsetList.length() != 0) {
	1189	generatorRange.setRanges(resultsetList);
	1190	}
	1191	setResultsetKeyColumns(generatorRange);
	1192	}
	1193
	1194	/**
	1195	* Gets current settings of the PairedTTester.
	1196	*
	1197	* @return an array of strings containing current options.
	1198	*/
	1199	public String[] getOptions() {
	1200
	1201	String [] options = new String [11];
	1202	int current = 0;
	1203
	1204	if (!getResultsetKeyColumns().getRanges().equals("")) {
	1205	options[current++] = "-G";
	1206	options[current++] = getResultsetKeyColumns().getRanges();
	1207	}
	1208	if (!getDatasetKeyColumns().getRanges().equals("")) {
	1209	options[current++] = "-D";
	1210	options[current++] = getDatasetKeyColumns().getRanges();
	1211	}
	1212	options[current++] = "-R";
	1213	options[current++] = "" + (getRunColumn() + 1);
	1214	options[current++] = "-S";
	1215	options[current++] = "" + getSignificanceLevel();
	1216
	1217	if (getShowStdDevs()) {
	1218	options[current++] = "-V";
	1219	}
	1220
	1221	if (getResultMatrix().equals(ResultMatrixLatex.class))
	1222	options[current++] = "-L";
	1223
	1224	if (getResultMatrix().equals(ResultMatrixCSV.class))
	1225	options[current++] = "-csv";
	1226
	1227	if (getResultMatrix().equals(ResultMatrixHTML.class))
	1228	options[current++] = "-html";
	1229
	1230	if (getResultMatrix().equals(ResultMatrixSignificance.class))
	1231	options[current++] = "-significance";
	1232
	1233	while (current < options.length) {
	1234	options[current++] = "";
	1235	}
	1236	return options;
	1237	}
	1238
	1239	/**
	1240	* Get the value of ResultsetKeyColumns.
	1241	*
	1242	* @return Value of ResultsetKeyColumns.
	1243	*/
	1244	public Range getResultsetKeyColumns() {
	1245
	1246	return m_ResultsetKeyColumnsRange;
	1247	}
	1248
	1249	/**
	1250	* Set the value of ResultsetKeyColumns.
	1251	*
	1252	* @param newResultsetKeyColumns Value to assign to ResultsetKeyColumns.
	1253	*/
	1254	public void setResultsetKeyColumns(Range newResultsetKeyColumns) {
	1255
	1256	m_ResultsetKeyColumnsRange = newResultsetKeyColumns;
	1257	m_ResultsetsValid = false;
	1258	}
	1259
	1260	/**
	1261	* Gets the indices of the the datasets that are displayed (if <code>null</code>
	1262	* then all are displayed). The base is always displayed.
	1263	*
	1264	* @return the indices of the datasets to display
	1265	*/
	1266	public int[] getDisplayedResultsets() {
	1267	return m_DisplayedResultsets;
	1268	}
	1269
	1270	/**
	1271	* Sets the indicies of the datasets to display (<code>null</code> means all).
	1272	* The base is always displayed.
	1273	*
	1274	* @param cols the indices of the datasets to display
	1275	*/
	1276	public void setDisplayedResultsets(int[] cols) {
	1277	m_DisplayedResultsets = cols;
	1278	}
	1279
	1280	/**
	1281	* Get the value of SignificanceLevel.
	1282	*
	1283	* @return Value of SignificanceLevel.
	1284	*/
	1285	public double getSignificanceLevel() {
	1286
	1287	return m_SignificanceLevel;
	1288	}
	1289
	1290	/**
	1291	* Set the value of SignificanceLevel.
	1292	*
	1293	* @param newSignificanceLevel Value to assign to SignificanceLevel.
	1294	*/
	1295	public void setSignificanceLevel(double newSignificanceLevel) {
	1296
	1297	m_SignificanceLevel = newSignificanceLevel;
	1298	}
	1299
	1300	/**
	1301	* Get the value of DatasetKeyColumns.
	1302	*
	1303	* @return Value of DatasetKeyColumns.
	1304	*/
	1305	public Range getDatasetKeyColumns() {
	1306
	1307	return m_DatasetKeyColumnsRange;
	1308	}
	1309
	1310	/**
	1311	* Set the value of DatasetKeyColumns.
	1312	*
	1313	* @param newDatasetKeyColumns Value to assign to DatasetKeyColumns.
	1314	*/
	1315	public void setDatasetKeyColumns(Range newDatasetKeyColumns) {
	1316
	1317	m_DatasetKeyColumnsRange = newDatasetKeyColumns;
	1318	m_ResultsetsValid = false;
	1319	}
	1320
	1321	/**
	1322	* Get the value of RunColumn.
	1323	*
	1324	* @return Value of RunColumn.
	1325	*/
	1326	public int getRunColumn() {
	1327
	1328	return m_RunColumnSet;
	1329	}
	1330
	1331	/**
	1332	* Set the value of RunColumn.
	1333	*
	1334	* @param newRunColumn Value to assign to RunColumn.
	1335	*/
	1336	public void setRunColumn(int newRunColumn) {
	1337
	1338	m_RunColumnSet = newRunColumn;
	1339	m_ResultsetsValid = false;
	1340	}
	1341
	1342	/**
	1343	* Get the value of FoldColumn.
	1344	*
	1345	* @return Value of FoldColumn.
	1346	*/
	1347	public int getFoldColumn() {
	1348
	1349	return m_FoldColumn;
	1350	}
	1351
	1352	/**
	1353	* Set the value of FoldColumn.
	1354	*
	1355	* @param newFoldColumn Value to assign to FoldColumn.
	1356	*/
	1357	public void setFoldColumn(int newFoldColumn) {
	1358
	1359	m_FoldColumn = newFoldColumn;
	1360	m_ResultsetsValid = false;
	1361	}
	1362
	1363	/**
	1364	* Returns the name of the column to sort on.
	1365	*
	1366	* @return the name of the column to sort on.
	1367	*/
	1368	public String getSortColumnName() {
	1369	if (getSortColumn() == -1)
	1370	return "-";
	1371	else
	1372	return m_Instances.attribute(getSortColumn()).name();
	1373	}
	1374
	1375	/**
	1376	* Returns the column to sort on, -1 means the default sorting.
	1377	*
	1378	* @return the column to sort on.
	1379	*/
	1380	public int getSortColumn() {
	1381	return m_SortColumn;
	1382	}
	1383
	1384	/**
	1385	* Set the column to sort on, -1 means the default sorting.
	1386	*
	1387	* @param newSortColumn the new sort column.
	1388	*/
	1389	public void setSortColumn(int newSortColumn) {
	1390	if (newSortColumn >= -1)
	1391	m_SortColumn = newSortColumn;
	1392	}
	1393
	1394	/**
	1395	* Get the value of Instances.
	1396	*
	1397	* @return Value of Instances.
	1398	*/
	1399	public Instances getInstances() {
	1400
	1401	return m_Instances;
	1402	}
	1403
	1404	/**
	1405	* Set the value of Instances.
	1406	*
	1407	* @param newInstances Value to assign to Instances.
	1408	*/
	1409	public void setInstances(Instances newInstances) {
	1410
	1411	m_Instances = newInstances;
	1412	m_ResultsetsValid = false;
	1413	}
	1414
	1415	/**
	1416	* retrieves all the settings from the given Tester
	1417	*
	1418	* @param tester the Tester to get the settings from
	1419	*/
	1420	public void assign(Tester tester) {
	1421	setInstances(tester.getInstances());
	1422	setResultMatrix(tester.getResultMatrix());
	1423	setShowStdDevs(tester.getShowStdDevs());
	1424	setResultsetKeyColumns(tester.getResultsetKeyColumns());
	1425	setDisplayedResultsets(tester.getDisplayedResultsets());
	1426	setSignificanceLevel(tester.getSignificanceLevel());
	1427	setDatasetKeyColumns(tester.getDatasetKeyColumns());
	1428	setRunColumn(tester.getRunColumn());
	1429	setFoldColumn(tester.getFoldColumn());
	1430	setSortColumn(tester.getSortColumn());
	1431	}
	1432
	1433	/**
	1434	* returns a string that is displayed as tooltip on the "perform test"
	1435	* button in the experimenter
	1436	*
	1437	* @return the tool tip
	1438	*/
	1439	public String getToolTipText() {
	1440	return "Performs test using t-test statistic";
	1441	}
	1442
	1443	/**
	1444	* returns the name of the tester
	1445	*
	1446	* @return the display name
	1447	*/
	1448	public String getDisplayName() {
	1449	return "Paired T-Tester";
	1450	}
	1451
	1452	/**
	1453	* Returns the revision string.
	1454	*
	1455	* @return the revision
	1456	*/
	1457	public String getRevision() {
	1458	return RevisionUtils.extract("$Revision: 5415 $");
	1459	}
	1460
	1461	/**
	1462	* Test the class from the command line.
	1463	*
	1464	* @param args contains options for the instance ttests
	1465	*/
	1466	public static void main(String args[]) {
	1467
	1468	try {
	1469	PairedTTester tt = new PairedTTester();
	1470	String datasetName = Utils.getOption('t', args);
	1471	String compareColStr = Utils.getOption('c', args);
	1472	String baseColStr = Utils.getOption('b', args);
	1473	boolean summaryOnly = Utils.getFlag('s', args);
	1474	boolean rankingOnly = Utils.getFlag('r', args);
	1475	try {
	1476	if ((datasetName.length() == 0)
	1477	\|\| (compareColStr.length() == 0)) {
	1478	throw new Exception("-t and -c options are required");
	1479	}
	1480	tt.setOptions(args);
	1481	Utils.checkForRemainingOptions(args);
	1482	} catch (Exception ex) {
	1483	String result = "";
	1484	Enumeration enu = tt.listOptions();
	1485	while (enu.hasMoreElements()) {
	1486	Option option = (Option) enu.nextElement();
	1487	result += option.synopsis() + '\n'
	1488	+ option.description() + '\n';
	1489	}
	1490	throw new Exception(
	1491	"Usage:\n\n"
	1492	+ "-t <file>\n"
	1493	+ "\tSet the dataset containing data to evaluate\n"
	1494	+ "-b <index>\n"
	1495	+ "\tSet the resultset to base comparisons against (optional)\n"
	1496	+ "-c <index>\n"
	1497	+ "\tSet the column to perform a comparison on\n"
	1498	+ "-s\n"
	1499	+ "\tSummarize wins over all resultset pairs\n\n"
	1500	+ "-r\n"
	1501	+ "\tGenerate a resultset ranking\n\n"
	1502	+ result);
	1503	}
	1504	Instances data = new Instances(new BufferedReader(
	1505	new FileReader(datasetName)));
	1506	tt.setInstances(data);
	1507	// tt.prepareData();
	1508	int compareCol = Integer.parseInt(compareColStr) - 1;
	1509	System.out.println(tt.header(compareCol));
	1510	if (rankingOnly) {
	1511	System.out.println(tt.multiResultsetRanking(compareCol));
	1512	} else if (summaryOnly) {
	1513	System.out.println(tt.multiResultsetSummary(compareCol));
	1514	} else {
	1515	System.out.println(tt.resultsetKey());
	1516	if (baseColStr.length() == 0) {
	1517	for (int i = 0; i < tt.getNumResultsets(); i++) {
	1518	if (!tt.displayResultset(i))
	1519	continue;
	1520	System.out.println(tt.multiResultsetFull(i, compareCol));
	1521	}
	1522	} else {
	1523	int baseCol = Integer.parseInt(baseColStr) - 1;
	1524	System.out.println(tt.multiResultsetFull(baseCol, compareCol));
	1525	}
	1526	}
	1527	} catch(Exception e) {
	1528	e.printStackTrace();
	1529	System.err.println(e.getMessage());
	1530	}
	1531	}
	1532	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: