Context Navigation

source: src/main/java/weka/classifiers/misc/monotone/InstancesUtil.java @ 14

Last change on this file since 14 was 4, checked in by gnappo, 14 years ago
Import di weka.
File size: 29.8 KB

Line
1	/*
2	* This program is free software; you can redistribute it and/or modify
3	* it under the terms of the GNU General Public License as published by
4	* the Free Software Foundation; either version 2 of the License, or
5	* (at your option) any later version.
6	*
7	* This program is distributed in the hope that it will be useful,
8	* but WITHOUT ANY WARRANTY; without even the implied warranty of
9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10	* GNU General Public License for more details.
11	*
12	* You should have received a copy of the GNU General Public License
13	* along with this program; if not, write to the Free Software
14	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15	*/
16
17	/*
18	* InstancesUtil.java
19	* Copyright (C) 2004 Stijn Lievens
20	*
21	*/
22
23	package weka.classifiers.misc.monotone;
24
25	import weka.classifiers.Classifier;
26	import weka.core.Instance;
27	import weka.core.DenseInstance;
28	import weka.core.Instances;
29	import weka.core.RevisionHandler;
30	import weka.core.RevisionUtils;
31	import weka.core.Utils;
32	import weka.estimators.DiscreteEstimator;
33
34	import java.io.BufferedWriter;
35	import java.io.IOException;
36	import java.util.HashMap;
37	import java.util.Iterator;
38	import java.util.Map;
39	import java.util.Random;
40
41	/**
42	* This class contains some methods for working with objects of
43	* type <code> Instance </code> and <code> Instances, </code> not
44	* provided by there respective classes.
45	* <p>
46	* This implementation is part of the master's thesis: "Studie
47	* en implementatie van instantie-gebaseerde algoritmen voor gesuperviseerd
48	* rangschikken", Stijn Lievens, Ghent University, 2004.
49	* </p>
50	*
51	* @author Stijn Lievens (stijn.lievens@ugent.be)
52	* @version $Revision: 5987 $
53	*/
54	public class InstancesUtil
55	implements RevisionHandler {
56
57	/**
58	* Compares two instances, ignoring the class attribute (if any)
59	*
60	* @param i1 the first instance
61	* @param i2 the second instance
62	* @return true if both instances are equal (ignoring the class
63	* attribute), false otherwise
64	*/
65	public static boolean equalIgnoreClass(Instance i1, Instance i2) {
66	int n = i1.numAttributes();
67	int classIndex = i1.classIndex();
68	if (i2.numAttributes() != n \|\| classIndex != i2.classIndex()) {
69	return false;
70	}
71	int i = 0;
72	while(i < n && (i == classIndex
73	\|\| Utils.eq(i1.value(i), i2.value(i)))) {
74	i++;
75	}
76	return i == n;
77	}
78
79	/**
80	* Get the index of an instance in a set of instances, where
81	* instances are compared ignoring the class attribute.
82	*
83	* @param instances the set of instances
84	* @param instance to instance to be found in the given set of instances
85	* @return the index of the first instance that equals the given instance
86	* (ignoring the class attribute), -1 if the instance was not found
87	*/
88	public static int containsIgnoreClass(Instances instances, Instance instance) {
89	double[] dd = instance.toDoubleArray();
90	int classIndex = instances.classIndex();
91	int n = instances.numAttributes();
92	Iterator it =
93	new EnumerationIterator(instances.enumerateInstances());
94	int index = 0;
95	while(it.hasNext()) {
96	Instance tmp = (Instance) it.next();
97	int i = 0;
98	while(i < n &&
99	(i == classIndex \|\| Utils.eq(dd[i], tmp.value(i)))) {
100	i++;
101	}
102	if (i == n) {
103	return index; // found it
104	}
105	index++;
106	}
107	return -1;
108	}
109
110	/**
111	* Find the next occurence of an instance, ignoring the class,
112	* for which the index in the dataset is at least <code> index. </code>
113	*
114	* @param instances the set of instances to be searched
115	* @param instance the instance to be found
116	* @param index the minimum index that might be returned
117	* @return the index of the first instance with index at
118	* least <code> index </code> that equals the given instance
119	* (ignoring the class attribute), -1 if the instance was not found
120	*/
121	public static int nextOccurenceIgnoreClass(Instances instances, Instance instance, int index) {
122	double[] dd = instance.toDoubleArray();
123	int classIndex = instances.classIndex();
124	int n = instances.numAttributes();
125	int numInstances = instances.numInstances();
126	int currentIndex = index;
127	while(currentIndex < numInstances) {
128	Instance tmp = instances.instance(currentIndex);
129	int i = 0;
130	while(i < n &&
131	(i == classIndex \|\| Utils.eq(dd[i], tmp.value(i)))) {
132	i++;
133	}
134	if (i == n) {
135	return currentIndex; // found it
136	}
137	currentIndex++;
138	}
139	return -1; // not present
140	}
141
142	/**
143	* Check if all instances have the same class value.
144	*
145	* @param instances the instances to be checked for homogeneity
146	* @return true if the instances have the same class value, false otherwise
147	*/
148	public static boolean isHomogeneous(Instances instances) {
149	Iterator it = new EnumerationIterator(instances.enumerateInstances());
150	if (it.hasNext()) {
151	double classValue = ((Instance) it.next()).classValue();
152	while(it.hasNext()) {
153	if (((Instance) it.next()).classValue() != classValue) {
154	return false;
155	}
156	}
157	}
158	return true; // empty or all identical
159	}
160
161	/**
162	* Compares two instances in the data space, this is ignoring the class
163	* attribute. An instance is strictly smaller than another instance
164	* if the same holds for the <code> Coordinates </code> based on
165	* these instances.
166	*
167	* @param i1 the first instance
168	* @param i2 the second instance
169	* @return <code> true </code> if the first instance is strictly smaller
170	* than the second instance, <code> false </code> otherwise
171	*/
172	public static boolean strictlySmaller(Instance i1, Instance i2) {
173	// XXX implementation can be done faster
174	Coordinates c1 = new Coordinates(i1);
175	Coordinates c2 = new Coordinates(i2);
176
177	return c1.strictlySmaller(c2);
178	}
179
180	/**
181	* Compares two instances in the data space, this is, ignoring the class
182	* attribute. An instance is smaller or equal than another instance
183	* if the same holds for the <code> Coordinates </code> based on
184	* these instances.
185	*
186	* @param i1 the first instance
187	* @param i2 the second instance
188	* @return <code> true </code> if the first instance is smaller or equal
189	* than the second instance, <code> false </code> otherwise
190	*/
191	public static boolean smallerOrEqual(Instance i1,Instance i2) {
192	// XXX implementation can be done faster
193	Coordinates c1 = new Coordinates(i1);
194	Coordinates c2 = new Coordinates(i2);
195
196	return c1.smallerOrEqual(c2);
197	}
198
199	/**
200	* Checks if two instances are comparable in the data space, this is
201	* ignoring the class attribute. Two instances are comparable if the
202	* first is smaller or equal than the second, or the other way around.
203	*
204	* @param i1 the first instance
205	* @param i2 the second instance
206	* @return <code> true </code> if the given instances are comparable,
207	* <code> false </code> otherwise
208	* @throws IllegalArgumentException if the two instances don't have the
209	* same length
210	*/
211	public static boolean comparable(Instance i1, Instance i2) throws IllegalArgumentException {
212	// XXX maybe we should think about using 'equalHeaders' of Instance
213	// to obtain a fool proof implementation
214	Coordinates c1 = new Coordinates(i1);
215	Coordinates c2 = new Coordinates(i2);
216
217	return c1.smallerOrEqual(c2) \|\| c2.smallerOrEqual(c1);
218	}
219
220	/**
221	* Checks it two instances give rise to doubt. There is doubt between
222	* two instances if their <code> Coordinates </code> are equal, but
223	* their class value is different.
224	*
225	* @param i1 the first instance
226	* @param i2 the second instance
227	* @return <code> true </code> if there is doubt between the two
228	* given instances, <code> false </code> otherwise
229	*/
230	public static boolean doubt(Instance i1, Instance i2) {
231	// XXX use equalHeaders ?
232	if (i1.classValue() == i2.classValue()) {
233	return false;
234	}
235	Coordinates c1 = new Coordinates(i1);
236	Coordinates c2 = new Coordinates(i2);
237
238	return c1.equals(c2);
239	}
240
241	/**
242	* Checks if two instances give rise to reversed preference.
243	* Two instances give rise to reversed preference in the data space,
244	* if their <code> Coordinates </code> are comparable but different,
245	* and their class values are not related in the same way.
246	*
247	* @param i1 the first instance
248	* @param i2 the second instance
249	* @return <code> true </code> if <code> i1 </code> and <code> i2 </code>
250	* give rise to reversed preference, <code> false </code> otherwise
251	* @throws IllegalArgumentException if the two instances don't have
252	* the same length
253	*/
254	public static boolean reversedPreference(Instance i1, Instance i2) throws IllegalArgumentException {
255	// XXX should the implementation be made fool proof by use of
256	// 'equalHeaders'? It can also be speeded up I think.
257
258	if (i1.classValue() == i2.classValue()) {
259	return false;
260	}
261	Coordinates c1 = new Coordinates(i1);
262	Coordinates c2 = new Coordinates(i2);
263
264	if (i1.classValue() > i2.classValue() && c1.strictlySmaller(c2)) {
265	return true;
266	}
267	if (i2.classValue() > i1.classValue() && c2.strictlySmaller(c1)) {
268	return true;
269	}
270
271	return false;
272	}
273
274	/**
275	* Checks if the given data set is monotone. We say that a data set
276	* is monotone if it contains doubt nor reversed preferences.
277	*
278	* @param instances the data set to be checked
279	* @return <code> true </code> if the given data set if monotone,
280	* <code> false </code> otherwise
281	*/
282	public static boolean isMonotone(Instances instances) {
283	int n = instances.numInstances();
284	for (int i = 0; i < n; i++) {
285	Instance i1 = instances.instance(i);
286	for (int j = i + 1; j < n; j++) {
287	if ( doubt(i1, instances.instance(j)) \|\|
288	reversedPreference(i1, instances.instance(j))) {
289	return false;
290	}
291	}
292	}
293	return true;
294	}
295
296	/**
297	* Test if a set of instances is quasi monotone. We say that a set
298	* of instances <code> S </code> is quasi monotone with respect to
299	* a set of instances <code> D </code> iff
300	* <code> [x,y] \cap D \neq \emptyset \implies class(x) \leq class(y).
301	* </code> This implies that <code> D </code> itself is monotone.
302	*
303	* @param ground the instances playing the role of <code> D </code>
304	* @param other the instances playing the role of <code> S </code>
305	* @return true if the instances are quasi monotone, false otherwise
306	*/
307	public static boolean isQuasiMonotone(Instances ground, Instances other) {
308	if (!isMonotone(ground)) {
309	return false;
310	}
311	Iterator it1 = new EnumerationIterator(ground.enumerateInstances());
312	while(it1.hasNext()) {
313	Instance inst1 = (Instance) it1.next();
314	Iterator it2 = new EnumerationIterator(other.enumerateInstances());
315	while(it2.hasNext()) {
316	Instance inst2 = (Instance) it2.next();
317	if (doubt(inst1, inst2) \|\| reversedPreference(inst1, inst2)) {
318	return false;
319	}
320	}
321	}
322	return true;
323	}
324
325	/**
326	* Gather some statistics regarding reversed preferences.
327	*
328	* @param instances the instances to be examined
329	* @return array of length 3; position 0 indicates the number of
330	* couples that have reversed preference, position 1 the number of
331	* couples that are comparable, and position 2 the total
332	* number of couples
333	* @see #reversedPreference(Instance, Instance)
334	*/
335	public static int[] nrOfReversedPreferences(Instances instances) {
336	int[] stats = new int[3];
337	int n = instances.numInstances();
338	stats[0] = 0;
339	stats[1] = 0;
340	// number of couples
341	stats[2] = n * (n - 1) / 2;
342	for (int i = 0; i < n; i++) {
343	Instance i1 = instances.instance(i);
344	for (int j = i + 1; j < n; j++) {
345	Instance j1 = instances.instance(j);
346	if (comparable(i1, j1)) {
347	stats[1]++; // comparable
348	if (reversedPreference(i1, j1)) {
349	stats[0]++; // reversed preference
350	}
351	}
352	}
353	}
354	return stats;
355	}
356
357	/**
358	* Find the number of stochastic reversed preferences in the dataset.
359	*
360	* @param instances the instances to be examined
361	* @return an array of integers containing at position
362	* <ul>
363	* <li> 0: number of different coordinates, this is the size of S_X </li>
364	* <li> 1: number of couples showing reversed preference:<br>
365	* <code> x < y </code> and
366	* <code> not (F_x leqstoch F_y) </code> </li>
367	* <li> 2: number of couples having<br>
368	* <code> x < y </code> and <code> F_y leqstoch F_x </code>
369	* and <code> F_x neq F_y </code> </li>
370	* <li> 3: number of couples that are comparable <br>
371	* <code> \|\{ (x,y)\in S_X \times S_x \| x < y\}\| </code> </li>
372	* <li> 4: number of couples in S_X </li>
373	* </ul>
374	* @throws IllegalArgumentException if there are no instances with
375	* a non-missing class value, or if the class is not set
376	*/
377	public static int[] nrStochasticReversedPreference(Instances instances)
378	throws IllegalArgumentException {
379
380	if (instances.classIndex() < 0) {
381	throw new IllegalArgumentException("Class is not set");
382	}
383
384	// copy the dataset
385	Instances data = new Instances(instances);
386
387	// new dataset where examples with missing class value are removed
388	data.deleteWithMissingClass();
389	if (data.numInstances() == 0) {
390	throw new IllegalArgumentException
391	("No instances with a class value!");
392	}
393
394	// build the Map for the estimatedDistributions
395	Map distributions = new HashMap(data.numInstances()/2);
396
397	// cycle through all instances
398	Iterator i =
399	new EnumerationIterator(instances.enumerateInstances());
400
401	while (i.hasNext()) {
402	Instance instance = (Instance) i.next();
403	Coordinates c = new Coordinates(instance);
404
405	// get DiscreteEstimator from the map
406	DiscreteEstimator df =
407	(DiscreteEstimator) distributions.get(c);
408
409	// if no DiscreteEstimator is present in the map, create one
410	if (df == null) {
411	df = new DiscreteEstimator(instances.numClasses(), 0);
412	}
413	df.addValue(instance.classValue(),instance.weight()); // update
414	distributions.put(c,df); // put back in map
415	}
416
417
418	// build the map of cumulative distribution functions
419	Map cumulativeDistributions =
420	new HashMap(distributions.size());
421
422	// Cycle trough the map of discrete distributions, and create a new
423	// one containing cumulative discrete distributions
424	for (Iterator it=distributions.keySet().iterator();
425	it.hasNext();) {
426	Coordinates c = (Coordinates) it.next();
427	DiscreteEstimator df =
428	(DiscreteEstimator) distributions.get(c);
429	cumulativeDistributions.put
430	(c, new CumulativeDiscreteDistribution(df));
431	}
432	int[] revPref = new int[5];
433	revPref[0] = cumulativeDistributions.size();
434	Iterator it = cumulativeDistributions.keySet().iterator();
435	while (it.hasNext()) {
436	Coordinates c1 = (Coordinates) it.next();
437	CumulativeDiscreteDistribution cdf1 =
438	(CumulativeDiscreteDistribution)
439	cumulativeDistributions.get(c1);
440	Iterator it2 = cumulativeDistributions.keySet().iterator();
441	while(it2.hasNext()) {
442	Coordinates c2 = (Coordinates) it2.next();
443	CumulativeDiscreteDistribution cdf2 =
444	(CumulativeDiscreteDistribution)
445	cumulativeDistributions.get(c2);
446	if (c2.equals(c1)) {
447	continue;
448	}
449
450	revPref[4]++;
451
452	if (c1.strictlySmaller(c2) == true) {
453	revPref[3]++; //vergelijkbaar
454	if (cdf1.stochasticDominatedBy(cdf2) == false ) {
455	revPref[1]++;
456	if (cdf2.stochasticDominatedBy(cdf1) == true) {
457	revPref[2]++;
458	}
459	}
460	}
461	}
462	}
463	revPref[4] /= 2;
464	return revPref;
465	}
466
467	/**
468	* Counts the number of redundant pairs in the sense of OLM.
469	* Two instances are redundant if they are comparable and have the same
470	* class value.
471	*
472	* @param instances the instances to be checked
473	* @return the number of redundant pairs in the given set of instances
474	*/
475	public static int nrOfRedundant(Instances instances) {
476	int n = instances.numInstances();
477	int nrRedundant = 0;
478	for (int i = 0; i < n; i++) {
479	Instance i1 = instances.instance(i);
480	for (int j = i + 1; j < n; j++) {
481	Instance j1 = instances.instance(j);
482	if (j1.classValue() == i1.classValue() && comparable(i1, j1) ) {
483	nrRedundant++;
484	}
485	}
486	}
487
488	return nrRedundant;
489	}
490
491	/**
492	* Calulates the total loss over the <code> instances </code>,
493	* using the trained <code> classifier </code> and the
494	* specified <code> lossFunction. </code> The instances
495	* should not contain missing values in the class attribute.
496	*
497	* @param classifier the trained classifier to use
498	* @param instances the test instances
499	* @param lossFunction the loss function to use
500	* @return the total loss of all the instances using the given classifier and loss function
501	*/
502	public static double totalLoss(Classifier classifier, Instances instances,
503	NominalLossFunction lossFunction) {
504
505	double loss = 0;
506	int n = instances.numInstances();
507	for (int i = 0; i < n; i++) {
508	try {
509	loss += lossFunction.loss(instances.instance(i).classValue(),
510	classifier.classifyInstance(instances.instance(i)));
511	} catch (Exception e) {
512	// what should we do here ??
513	}
514	}
515	return loss;
516	}
517
518
519	/**
520	* Classify a set of instances using a given classifier. The class value
521	* of the instances are set.
522	*
523	* @param instances the instances to be classified
524	* @param classifier a built classifier
525	* @throws Exception if one of the instances could no be classified
526	*/
527	public static void classifyInstances(Instances instances, Classifier classifier)
528	throws Exception {
529
530	Iterator it = new EnumerationIterator(instances.enumerateInstances());
531	while(it.hasNext()) {
532	Instance instance = (Instance) it.next();
533	instance.setClassValue(classifier.classifyInstance(instance));
534	}
535	}
536
537
538	/**
539	* Calculates the relation (poset) formed by the instances.
540	*
541	* @param instances the instances for which the poset is to be formed
542	* @return a <code> BooleanBitMatrix </code> for which position
543	* <code> bm.get(i,j) == true </code> iff <code>
544	* InstancesUtil.strictlySmaller(instances.instance(i),
545	* instances.instance(j)) == true </code>
546	*/
547	public static BooleanBitMatrix getBitMatrix(Instances instances) {
548	int numInstances = instances.numInstances();
549	BooleanBitMatrix bm =
550	new BooleanBitMatrix(numInstances, numInstances);
551	for (int i = 0; i < numInstances; i++ ) {
552	Instance instance1 = instances.instance(i);
553	for (int j = 0; j < numInstances; j++) {
554	Instance instance2 = instances.instance(j);
555	if (InstancesUtil.strictlySmaller(instance1, instance2)) {
556	bm.set(i, j); // arc from instance1 to instance2
557	}
558	}
559	}
560	return bm;
561	}
562
563	/**
564	* Calculatus the number of elements in the closed interval
565	* <code> [low,up]. </code> If the class index is set, then
566	* the class attribute does not play part in the calculations,
567	* this is we work in the data space. The code also works with
568	* numeric attributes, but is primarily intended for ordinal attributes.
569	*
570	* @param low the lower bound of the interval
571	* @param up the upper bound of the interval
572	* @return the size of the interval (in floating point format)
573	* @throws IllegalArgumentException if the given instances do not
574	* constitute an interval.
575	*/
576	public static double numberInInterval(Instance low, Instance up)
577	throws IllegalArgumentException {
578
579	Coordinates cLow = new Coordinates(low);
580	Coordinates cUp = new Coordinates(up);
581	if (cLow.smallerOrEqual(cUp) == false) {
582	throw new IllegalArgumentException
583	("The given instances are not the bounds of an interval");
584	}
585	double number = 1;
586	int dim = cLow.dimension();
587	for (int i = 0; i < dim; i++) {
588	number *= (cUp.getValue(i) - cLow.getValue(i) + 1);
589	}
590	return number;
591	}
592
593
594	/**
595	* Calculatutes the number of vectors in the data space that are smaller
596	* or equal than the given instance.
597	*
598	* @param instance the given instance
599	* @return the number of vectors in the data space smaller or equal
600	* than the given instance
601	* @throws IllegalArgumentException if there are numeric attributes
602	*/
603	public static double numberOfSmallerVectors(Instance instance)
604	throws IllegalArgumentException {
605
606	double[] values = InstancesUtil.toDataDouble(instance);
607	double nr = 1;
608
609	for (int i = 0; i < values.length; i++) {
610	if (instance.attribute(i).isNumeric()) {
611	throw new IllegalArgumentException
612	("Numeric attributes are not supported");
613	}
614	nr *= (values[i] + 1);
615	}
616
617	return nr;
618	}
619
620	/**
621	* Calculatutes the number of vectors in the data space that are
622	* greater or equal than the given instance.
623	*
624	* @param instance the given instance
625	* @return the number of vectors in the data space greater of equal
626	* than the given instance
627	* @throws IllegalArgumentException if there are numeric attributes
628	*/
629	public static double numberOfGreaterVectors(Instance instance)
630	throws IllegalArgumentException {
631
632	double[] values = InstancesUtil.toDataDouble(instance);
633	double nr = 1;
634
635	for (int i = 0; i < values.length; i++) {
636	if (instance.attribute(i).isNumeric()) {
637	throw new IllegalArgumentException
638	("Numeric attributes are not supported");
639	}
640	nr *= (instance.attribute(i).numValues() - values[i]);
641	}
642
643	return nr;
644	}
645
646	/**
647	* Write the instances in ARFF-format to the indicated
648	* <code> BufferedWriter </code>.
649	* @param instances the instances to write
650	* @param file the <code> BufferedWriter </code> to write to
651	* @throws IOException if something goes wrong while writing the instances
652	*/
653	public static void write(Instances instances, BufferedWriter file)
654	throws IOException{
655
656	file.write(instances.toString()); // XXX can probably be done better
657	}
658
659
660	/**
661	* Return a histogram of the values for the specified attribute.
662	*
663	* @param instances the instances
664	* @param attributeIndex the attribute to consider
665	* @return a <code> DiscreteEstimator </code> where the <code>i</code>th
666	* @throws IllegalArgumentException if the attribute at the specified
667	* index is numeric
668	*/
669	public static DiscreteEstimator countValues(Instances instances, int attributeIndex)
670	throws IllegalArgumentException{
671
672	int numValues = instances.attribute(attributeIndex).numValues();
673	if (numValues == 0) {
674	throw new IllegalArgumentException
675	("Can't create histogram for numeric attribute");
676	}
677
678	DiscreteEstimator de = new DiscreteEstimator(numValues, false);
679	Iterator it = new EnumerationIterator(instances.enumerateInstances());
680	while (it.hasNext()) {
681	Instance instance = (Instance) it.next();
682	if (!instance.isMissing(attributeIndex)) {
683	de.addValue(instance.value(attributeIndex), instance.weight());
684	}
685	}
686	return de;
687	}
688
689	/**
690	* Create, without replacement, a random subsample of the given size
691	* from the given instances.
692	*
693	* @param instances the instances to sample from
694	* @param size the requested size of the sample
695	* @param random the random generator to use
696	* @return a sample of the requested size, drawn from the given
697	* instances without replacement
698	* @throws IllegalArgumentException if the size exceeds the number
699	* of instances
700	*/
701	public static Instances sampleWithoutReplacement(
702	Instances instances, int size, Random random) {
703
704	if (size > instances.numInstances()) {
705	throw new IllegalArgumentException
706	("Size of requested sample exceeds number of instances");
707	}
708
709	int numInstances = instances.numInstances();
710	int[] indices = new int[instances.numInstances()];
711	for (int i = 0; i < numInstances; i++) {
712	indices[i] = i;
713	}
714
715	Instances sample = new Instances(instances, size);
716	int index;
717	for (int i = 0; i < size; i++) {
718	index = random.nextInt(numInstances--);
719	sample.add(instances.instance(indices[index]));
720	swap(indices, index, numInstances);
721	}
722	return sample;
723	}
724
725	/**
726	* Swaps two elements of the given array.
727	*
728	* @param aa the array
729	* @param i the index of the first element
730	* @param j the index of the second element
731	*/
732	final private static void swap(int[] aa, int i, int j) {
733	int tmp = aa[i];
734	aa[i] = aa[j];
735	aa[j] = tmp;
736	}
737	/**
738	* Generates a random sample of instances. Each attribute must be nominal, and the
739	* class labels are not set.
740	*
741	* @param headerInfo Instances whose header information is used to determine how the
742	* set of returned instances will look
743	* @param numberOfExamples the desired size of the returned set
744	* @param random the random number generator to use
745	* @return a set of Instances containing the random sample.
746	* @throws IllegalArgumentException if numeric attributes are given
747	*/
748	public static Instances generateRandomSample(
749	Instances headerInfo, int numberOfExamples, Random random)
750	throws IllegalArgumentException {
751
752	int n = headerInfo.numAttributes();
753	double[] info = new double[n];
754	int classIndex = headerInfo.classIndex();
755	for (int i = 0; i < n; i++) {
756	info[i] = headerInfo.attribute(i).numValues();
757	if (i != classIndex && info[i] == 0) {
758	throw new IllegalArgumentException
759	("Numeric attributes are currently not supported");
760	}
761	}
762	Instances sample = new Instances(headerInfo, numberOfExamples);
763	sample.setRelationName(headerInfo.relationName() +
764	".random.sample.of." + numberOfExamples);
765	for (int i = 0; i < numberOfExamples; i++) {
766	sample.add(randomSample(info, classIndex, random));
767	}
768	return sample;
769	}
770	/**
771	* Generates a random instance.
772	*
773	* @param info array that gives for each attribute the number of possible values
774	* @param classIndex the index of the class attribute
775	* @param random the random number generator used
776	* @return a random instance
777	*/
778	private static Instance randomSample(double[] info,
779	int classIndex, Random random) {
780
781	double[] attValues = new double[info.length];
782	for (int i = 0; i < attValues.length; i++) {
783	if (i != classIndex) {
784	attValues[i] = random.nextInt( (int) info[i]);
785	}
786	}
787	return new DenseInstance(1, attValues);
788	}
789
790
791
792	/**
793	* Returns an array containing the attribute values (in internal floating
794	* point format) of the given instance in data space, this is, the class
795	* attribute (if any) is removed.
796	*
797	* @param instance the instance to get the attribute values from
798	* @return array of doubles containing the attribute values
799	*/
800	public static double[] toDataDouble(Instance instance) {
801	double[] vector = null;
802	int classIndex = instance.classIndex();
803	if(classIndex >= 0) {
804	vector = new double[instance.numAttributes() - 1];
805	} else {
806	vector = new double[instance.numAttributes()];
807	}
808	int index = 0;
809	for (int i = 0; i < instance.numAttributes(); i++) {
810	if(i != classIndex) {
811	vector[index++] = instance.value(i);
812	}
813	}
814	return vector;
815	}
816
817	/**
818	* Computes the minimal extension for a given instance.
819	*
820	* @param instances the set of instances
821	* @param instance the instance for which the minimal extension is to be
822	* calculated
823	* @return the value of the minimal extension, in internal floating point
824	* format
825	*/
826	public static double minimalExtension(Instances instances, Instance instance) {
827	return minimalExtension(instances, instance, 0);
828	}
829
830	/**
831	* Computes the minimal extension of a given instance, but the
832	* minimal value returned is <code> minValue. </code> This method
833	* may have its applications when the training set is divided into
834	* multiple Instances objects.
835	*
836	* @param instances the set of instances
837	* @param instance the instance for which the minimal extension is to
838	* be calculated
839	* @param minValue a double indicating the minimal value that should
840	* be returned
841	* @return the label of the minimal extension, in internal floating point format
842	*/
843	public static double minimalExtension(
844	Instances instances, Instance instance, double minValue) {
845
846	double value = minValue;
847
848	Iterator it =
849	new EnumerationIterator(instances.enumerateInstances());
850	while(it.hasNext()) {
851	Instance tmp = (Instance) it.next();
852	if (tmp.classValue() > value
853	&& InstancesUtil.smallerOrEqual(tmp, instance) ) {
854	value = tmp.classValue();
855	}
856	}
857	return value;
858	}
859
860	/**
861	* Computes the maximal extension for a given instance.
862	*
863	* @param instances the set of instances
864	* @param instance the instance for which the minimal extension is to be
865	* calculated
866	* @return the value of the minimal extension, in internal floating point
867	* format
868	*/
869	public static double maximalExtension(Instances instances, Instance instance) {
870	return maximalExtension(instances, instance, instances.numClasses() - 1);
871	}
872
873	/**
874	* Computes the maximal extension of a given instance, but the
875	* maximal value returned is <code> maxValue. </code> This method
876	* may have its applications when the training set is divided into
877	* multiple Instances objects.
878	*
879	* @param instances the set of instances
880	* @param instance the instance for which the maximal extension is to
881	* be calculated
882	* @param maxValue a double indicating the maximal value that should
883	* be returned
884	* @return the value of the minimal extension, in internal floating point
885	* format
886	*/
887	public static double maximalExtension(
888	Instances instances, Instance instance, double maxValue) {
889
890	double value = maxValue;
891
892	Iterator it =
893	new EnumerationIterator(instances.enumerateInstances());
894	while(it.hasNext()) {
895	Instance tmp = (Instance) it.next();
896	if (tmp.classValue() < value
897	&& InstancesUtil.smallerOrEqual(instance, tmp) ) {
898	value = tmp.classValue();
899	}
900	}
901	return value;
902	}
903
904	/**
905	* Returns the revision string.
906	*
907	* @return the revision
908	*/
909	public String getRevision() {
910	return RevisionUtils.extract("$Revision: 5987 $");
911	}
912	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: