Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

MINND.java @ 29

Last change on this file since 29 was 29, checked in by gnappo, 14 years ago
Taggata versione per la demo e aggiunto branch.
File size: 32.6 KB

Line
1	/*
2	* This program is free software; you can redistribute it and/or modify
3	* it under the terms of the GNU General Public License as published by
4	* the Free Software Foundation; either version 2 of the License, or
5	* (at your option) any later version.
6	*
7	* This program is distributed in the hope that it will be useful,
8	* but WITHOUT ANY WARRANTY; without even the implied warranty of
9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10	* GNU General Public License for more details.
11	*
12	* You should have received a copy of the GNU General Public License
13	* along with this program; if not, write to the Free Software
14	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15	*/
16
17	/*
18	* MINND.java
19	* Copyright (C) 2005 University of Waikato, Hamilton, New Zealand
20	*
21	*/
22
23	package weka.classifiers.mi;
24
25	import weka.classifiers.Classifier;
26	import weka.classifiers.AbstractClassifier;
27	import weka.core.Capabilities;
28	import weka.core.Instance;
29	import weka.core.DenseInstance;
30	import weka.core.Instances;
31	import weka.core.MultiInstanceCapabilitiesHandler;
32	import weka.core.Option;
33	import weka.core.OptionHandler;
34	import weka.core.RevisionUtils;
35	import weka.core.TechnicalInformation;
36	import weka.core.TechnicalInformationHandler;
37	import weka.core.Utils;
38	import weka.core.Capabilities.Capability;
39	import weka.core.TechnicalInformation.Field;
40	import weka.core.TechnicalInformation.Type;
41
42	import java.util.Enumeration;
43	import java.util.Vector;
44
45	/**
46	<!-- globalinfo-start -->
47	* Multiple-Instance Nearest Neighbour with Distribution learner.<br/>
48	* <br/>
49	* It uses gradient descent to find the weight for each dimension of each exeamplar from the starting point of 1.0. In order to avoid overfitting, it uses mean-square function (i.e. the Euclidean distance) to search for the weights.<br/>
50	* It then uses the weights to cleanse the training data. After that it searches for the weights again from the starting points of the weights searched before.<br/>
51	* Finally it uses the most updated weights to cleanse the test exemplar and then finds the nearest neighbour of the test exemplar using partly-weighted Kullback distance. But the variances in the Kullback distance are the ones before cleansing.<br/>
52	* <br/>
53	* For more information see:<br/>
54	* <br/>
55	* Xin Xu (2001). A nearest distribution approach to multiple-instance learning. Hamilton, NZ.
56	* <p/>
57	<!-- globalinfo-end -->
58	*
59	<!-- technical-bibtex-start -->
60	* BibTeX:
61	* <pre>
62	* @misc{Xu2001,
63	* address = {Hamilton, NZ},
64	* author = {Xin Xu},
65	* note = {0657.591B},
66	* school = {University of Waikato},
67	* title = {A nearest distribution approach to multiple-instance learning},
68	* year = {2001}
69	* }
70	* </pre>
71	* <p/>
72	<!-- technical-bibtex-end -->
73	*
74	<!-- options-start -->
75	* Valid options are: <p/>
76	*
77	* <pre> -K <number of neighbours>
78	* Set number of nearest neighbour for prediction
79	* (default 1)</pre>
80	*
81	* <pre> -S <number of neighbours>
82	* Set number of nearest neighbour for cleansing the training data
83	* (default 1)</pre>
84	*
85	* <pre> -E <number of neighbours>
86	* Set number of nearest neighbour for cleansing the testing data
87	* (default 1)</pre>
88	*
89	<!-- options-end -->
90	*
91	* @author Xin Xu (xx5@cs.waikato.ac.nz)
92	* @version $Revision: 5987 $
93	*/
94	public class MINND
95	extends AbstractClassifier
96	implements OptionHandler, MultiInstanceCapabilitiesHandler,
97	TechnicalInformationHandler {
98
99	/** for serialization */
100	static final long serialVersionUID = -4512599203273864994L;
101
102	/** The number of nearest neighbour for prediction */
103	protected int m_Neighbour = 1;
104
105	/** The mean for each attribute of each exemplar */
106	protected double[][] m_Mean = null;
107
108	/** The variance for each attribute of each exemplar */
109	protected double[][] m_Variance = null;
110
111	/** The dimension of each exemplar, i.e. (numAttributes-2) */
112	protected int m_Dimension = 0;
113
114	/** header info of the data */
115	protected Instances m_Attributes;;
116
117	/** The class label of each exemplar */
118	protected double[] m_Class = null;
119
120	/** The number of class labels in the data */
121	protected int m_NumClasses = 0;
122
123	/** The weight of each exemplar */
124	protected double[] m_Weights = null;
125
126	/** The very small number representing zero */
127	static private double m_ZERO = 1.0e-45;
128
129	/** The learning rate in the gradient descent */
130	protected double m_Rate = -1;
131
132	/** The minimum values for numeric attributes. */
133	private double [] m_MinArray=null;
134
135	/** The maximum values for numeric attributes. */
136	private double [] m_MaxArray=null;
137
138	/** The stopping criteria of gradient descent*/
139	private double m_STOP = 1.0e-45;
140
141	/** The weights that alter the dimnesion of each exemplar */
142	private double[][] m_Change=null;
143
144	/** The noise data of each exemplar */
145	private double[][] m_NoiseM = null, m_NoiseV = null, m_ValidM = null,
146	m_ValidV = null;
147
148	/** The number of nearest neighbour instances in the selection of noises
149	in the training data*/
150	private int m_Select = 1;
151
152	/** The number of nearest neighbour exemplars in the selection of noises
153	in the test data */
154	private int m_Choose = 1;
155
156	/** The decay rate of learning rate */
157	private double m_Decay = 0.5;
158
159	/**
160	* Returns a string describing this filter
161	*
162	* @return a description of the filter suitable for
163	* displaying in the explorer/experimenter gui
164	*/
165	public String globalInfo() {
166	return
167	"Multiple-Instance Nearest Neighbour with Distribution learner.\n\n"
168	+ "It uses gradient descent to find the weight for each dimension of "
169	+ "each exeamplar from the starting point of 1.0. In order to avoid "
170	+ "overfitting, it uses mean-square function (i.e. the Euclidean "
171	+ "distance) to search for the weights.\n "
172	+ "It then uses the weights to cleanse the training data. After that "
173	+ "it searches for the weights again from the starting points of the "
174	+ "weights searched before.\n "
175	+ "Finally it uses the most updated weights to cleanse the test exemplar "
176	+ "and then finds the nearest neighbour of the test exemplar using "
177	+ "partly-weighted Kullback distance. But the variances in the Kullback "
178	+ "distance are the ones before cleansing.\n\n"
179	+ "For more information see:\n\n"
180	+ getTechnicalInformation().toString();
181	}
182
183	/**
184	* Returns an instance of a TechnicalInformation object, containing
185	* detailed information about the technical background of this class,
186	* e.g., paper reference or book this class is based on.
187	*
188	* @return the technical information about this class
189	*/
190	public TechnicalInformation getTechnicalInformation() {
191	TechnicalInformation result;
192
193	result = new TechnicalInformation(Type.MISC);
194	result.setValue(Field.AUTHOR, "Xin Xu");
195	result.setValue(Field.YEAR, "2001");
196	result.setValue(Field.TITLE, "A nearest distribution approach to multiple-instance learning");
197	result.setValue(Field.SCHOOL, "University of Waikato");
198	result.setValue(Field.ADDRESS, "Hamilton, NZ");
199	result.setValue(Field.NOTE, "0657.591B");
200
201	return result;
202	}
203
204	/**
205	* Returns default capabilities of the classifier.
206	*
207	* @return the capabilities of this classifier
208	*/
209	public Capabilities getCapabilities() {
210	Capabilities result = super.getCapabilities();
211	result.disableAll();
212
213	// attributes
214	result.enable(Capability.NOMINAL_ATTRIBUTES);
215	result.enable(Capability.RELATIONAL_ATTRIBUTES);
216	result.enable(Capability.MISSING_VALUES);
217
218	// class
219	result.enable(Capability.NOMINAL_CLASS);
220	result.enable(Capability.MISSING_CLASS_VALUES);
221
222	// other
223	result.enable(Capability.ONLY_MULTIINSTANCE);
224
225	return result;
226	}
227
228	/**
229	* Returns the capabilities of this multi-instance classifier for the
230	* relational data.
231	*
232	* @return the capabilities of this object
233	* @see Capabilities
234	*/
235	public Capabilities getMultiInstanceCapabilities() {
236	Capabilities result = super.getCapabilities();
237	result.disableAll();
238
239	// attributes
240	result.enable(Capability.NUMERIC_ATTRIBUTES);
241	result.enable(Capability.DATE_ATTRIBUTES);
242	result.enable(Capability.MISSING_VALUES);
243
244	// class
245	result.disableAllClasses();
246	result.enable(Capability.NO_CLASS);
247
248	return result;
249	}
250
251	/**
252	* As normal Nearest Neighbour algorithm does, it's lazy and simply
253	* records the exemplar information (i.e. mean and variance for each
254	* dimension of each exemplar and their classes) when building the model.
255	* There is actually no need to store the exemplars themselves.
256	*
257	* @param exs the training exemplars
258	* @throws Exception if the model cannot be built properly
259	*/
260	public void buildClassifier(Instances exs)throws Exception{
261	// can classifier handle the data?
262	getCapabilities().testWithFail(exs);
263
264	// remove instances with missing class
265	Instances newData = new Instances(exs);
266	newData.deleteWithMissingClass();
267
268	int numegs = newData.numInstances();
269	m_Dimension = newData.attribute(1).relation().numAttributes();
270	m_Attributes = newData.stringFreeStructure();
271	m_Change = new double[numegs][m_Dimension];
272	m_NumClasses = exs.numClasses();
273	m_Mean = new double[numegs][m_Dimension];
274	m_Variance = new double[numegs][m_Dimension];
275	m_Class = new double[numegs];
276	m_Weights = new double[numegs];
277	m_NoiseM = new double[numegs][m_Dimension];
278	m_NoiseV = new double[numegs][m_Dimension];
279	m_ValidM = new double[numegs][m_Dimension];
280	m_ValidV = new double[numegs][m_Dimension];
281	m_MinArray = new double[m_Dimension];
282	m_MaxArray = new double[m_Dimension];
283	for(int v=0; v < m_Dimension; v++)
284	m_MinArray[v] = m_MaxArray[v] = Double.NaN;
285
286	for(int w=0; w < numegs; w++){
287	updateMinMax(newData.instance(w));
288	}
289
290	// Scale exemplars
291	Instances data = m_Attributes;
292
293	for(int x=0; x < numegs; x++){
294	Instance example = newData.instance(x);
295	example = scale(example);
296	for (int i=0; i<m_Dimension; i++) {
297	m_Mean[x][i] = example.relationalValue(1).meanOrMode(i);
298	m_Variance[x][i] = example.relationalValue(1).variance(i);
299	if(Utils.eq(m_Variance[x][i],0.0))
300	m_Variance[x][i] = m_ZERO;
301	m_Change[x][i] = 1.0;
302	}
303	/* for(int y=0; y < m_Variance[x].length; y++){
304	if(Utils.eq(m_Variance[x][y],0.0))
305	m_Variance[x][y] = m_ZERO;
306	m_Change[x][y] = 1.0;
307	} */
308
309	data.add(example);
310	m_Class[x] = example.classValue();
311	m_Weights[x] = example.weight();
312	}
313
314	for(int z=0; z < numegs; z++)
315	findWeights(z, m_Mean);
316
317	// Pre-process and record "true estimated" parameters for distributions
318	for(int x=0; x < numegs; x++){
319	Instance example = preprocess(data, x);
320	if (getDebug())
321	System.out.println("???Exemplar "+x+" has been pre-processed:"+
322	data.instance(x).relationalValue(1).sumOfWeights()+
323	"\|"+example.relationalValue(1).sumOfWeights()+
324	"; class:"+m_Class[x]);
325	if(Utils.gr(example.relationalValue(1).sumOfWeights(), 0)){
326	for (int i=0; i<m_Dimension; i++) {
327	m_ValidM[x][i] = example.relationalValue(1).meanOrMode(i);
328	m_ValidV[x][i] = example.relationalValue(1).variance(i);
329	if(Utils.eq(m_ValidV[x][i],0.0))
330	m_ValidV[x][i] = m_ZERO;
331	}
332	/* for(int y=0; y < m_ValidV[x].length; y++){
333	if(Utils.eq(m_ValidV[x][y],0.0))
334	m_ValidV[x][y] = m_ZERO;
335	}*/
336	}
337	else{
338	m_ValidM[x] = null;
339	m_ValidV[x] = null;
340	}
341	}
342
343	for(int z=0; z < numegs; z++)
344	if(m_ValidM[z] != null)
345	findWeights(z, m_ValidM);
346
347	}
348
349	/**
350	* Pre-process the given exemplar according to the other exemplars
351	* in the given exemplars. It also updates noise data statistics.
352	*
353	* @param data the whole exemplars
354	* @param pos the position of given exemplar in data
355	* @return the processed exemplar
356	* @throws Exception if the returned exemplar is wrong
357	*/
358	public Instance preprocess(Instances data, int pos)
359	throws Exception{
360	Instance before = data.instance(pos);
361	if((int)before.classValue() == 0){
362	m_NoiseM[pos] = null;
363	m_NoiseV[pos] = null;
364	return before;
365	}
366
367	Instances after_relationInsts =before.attribute(1).relation().stringFreeStructure();
368	Instances noises_relationInsts =before.attribute(1).relation().stringFreeStructure();
369
370	Instances newData = m_Attributes;
371	Instance after = new DenseInstance(before.numAttributes());
372	Instance noises = new DenseInstance(before.numAttributes());
373	after.setDataset(newData);
374	noises.setDataset(newData);
375
376	for(int g=0; g < before.relationalValue(1).numInstances(); g++){
377	Instance datum = before.relationalValue(1).instance(g);
378	double[] dists = new double[data.numInstances()];
379
380	for(int i=0; i < data.numInstances(); i++){
381	if(i != pos)
382	dists[i] = distance(datum, m_Mean[i], m_Variance[i], i);
383	else
384	dists[i] = Double.POSITIVE_INFINITY;
385	}
386
387	int[] pred = new int[m_NumClasses];
388	for(int n=0; n < pred.length; n++)
389	pred[n] = 0;
390
391	for(int o=0; o<m_Select; o++){
392	int index = Utils.minIndex(dists);
393	pred[(int)m_Class[index]]++;
394	dists[index] = Double.POSITIVE_INFINITY;
395	}
396
397	int clas = Utils.maxIndex(pred);
398	if((int)before.classValue() != clas)
399	noises_relationInsts.add(datum);
400	else
401	after_relationInsts.add(datum);
402	}
403
404	int relationValue;
405	relationValue = noises.attribute(1).addRelation( noises_relationInsts);
406	noises.setValue(0,before.value(0));
407	noises.setValue(1, relationValue);
408	noises.setValue(2, before.classValue());
409
410	relationValue = after.attribute(1).addRelation( after_relationInsts);
411	after.setValue(0,before.value(0));
412	after.setValue(1, relationValue);
413	after.setValue(2, before.classValue());
414
415
416	if(Utils.gr(noises.relationalValue(1).sumOfWeights(), 0)){
417	for (int i=0; i<m_Dimension; i++) {
418	m_NoiseM[pos][i] = noises.relationalValue(1).meanOrMode(i);
419	m_NoiseV[pos][i] = noises.relationalValue(1).variance(i);
420	if(Utils.eq(m_NoiseV[pos][i],0.0))
421	m_NoiseV[pos][i] = m_ZERO;
422	}
423	/* for(int y=0; y < m_NoiseV[pos].length; y++){
424	if(Utils.eq(m_NoiseV[pos][y],0.0))
425	m_NoiseV[pos][y] = m_ZERO;
426	} */
427	}
428	else{
429	m_NoiseM[pos] = null;
430	m_NoiseV[pos] = null;
431	}
432
433	return after;
434	}
435
436	/**
437	* Calculates the distance between two instances
438	*
439	* @param first the first instance
440	* @param second the second instance
441	* @return the distance between the two given instances
442	*/
443	private double distance(Instance first, double[] mean, double[] var, int pos) {
444
445	double diff, distance = 0;
446
447	for(int i = 0; i < m_Dimension; i++) {
448	// If attribute is numeric
449	if(first.attribute(i).isNumeric()){
450	if (!first.isMissing(i)){
451	diff = first.value(i) - mean[i];
452	if(Utils.gr(var[i], m_ZERO))
453	distance += m_Change[pos][i] * var[i] * diff * diff;
454	else
455	distance += m_Change[pos][i] * diff * diff;
456	}
457	else{
458	if(Utils.gr(var[i], m_ZERO))
459	distance += m_Change[pos][i] * var[i];
460	else
461	distance += m_Change[pos][i] * 1.0;
462	}
463	}
464
465	}
466
467	return distance;
468	}
469
470	/**
471	* Updates the minimum and maximum values for all the attributes
472	* based on a new exemplar.
473	*
474	* @param ex the new exemplar
475	*/
476	private void updateMinMax(Instance ex) {
477	Instances insts = ex.relationalValue(1);
478	for (int j = 0;j < m_Dimension; j++) {
479	if (insts.attribute(j).isNumeric()){
480	for(int k=0; k < insts.numInstances(); k++){
481	Instance ins = insts.instance(k);
482	if(!ins.isMissing(j)){
483	if (Double.isNaN(m_MinArray[j])) {
484	m_MinArray[j] = ins.value(j);
485	m_MaxArray[j] = ins.value(j);
486	} else {
487	if (ins.value(j) < m_MinArray[j])
488	m_MinArray[j] = ins.value(j);
489	else if (ins.value(j) > m_MaxArray[j])
490	m_MaxArray[j] = ins.value(j);
491	}
492	}
493	}
494	}
495	}
496	}
497
498	/**
499	* Scale the given exemplar so that the returned exemplar
500	* has the value of 0 to 1 for each dimension
501	*
502	* @param before the given exemplar
503	* @return the resultant exemplar after scaling
504	* @throws Exception if given exampler cannot be scaled properly
505	*/
506	private Instance scale(Instance before) throws Exception{
507
508	Instances afterInsts = before.relationalValue(1).stringFreeStructure();
509	Instance after = new DenseInstance(before.numAttributes());
510	after.setDataset(m_Attributes);
511
512	for(int i=0; i < before.relationalValue(1).numInstances(); i++){
513	Instance datum = before.relationalValue(1).instance(i);
514	Instance inst = (Instance)datum.copy();
515
516	for(int j=0; j < m_Dimension; j++){
517	if(before.relationalValue(1).attribute(j).isNumeric())
518	inst.setValue(j, (datum.value(j) - m_MinArray[j])/(m_MaxArray[j] - m_MinArray[j]));
519	}
520	afterInsts.add(inst);
521	}
522
523	int attValue = after.attribute(1).addRelation(afterInsts);
524	after.setValue(0, before.value( 0));
525	after.setValue(1, attValue);
526	after.setValue(2, before.value( 2));
527
528	return after;
529	}
530
531	/**
532	* Use gradient descent to distort the MU parameter for
533	* the exemplar. The exemplar can be in the specified row in the
534	* given matrix, which has numExemplar rows and numDimension columns;
535	* or not in the matrix.
536	*
537	* @param row the given row index
538	* @param mean
539	*/
540	public void findWeights(int row, double[][] mean){
541
542	double[] neww = new double[m_Dimension];
543	double[] oldw = new double[m_Dimension];
544	System.arraycopy(m_Change[row], 0, neww, 0, m_Dimension);
545	//for(int z=0; z<m_Dimension; z++)
546	//System.out.println("mu("+row+"): "+origin[z]+" \| "+newmu[z]);
547	double newresult = target(neww, mean, row, m_Class);
548	double result = Double.POSITIVE_INFINITY;
549	double rate= 0.05;
550	if(m_Rate != -1)
551	rate = m_Rate;
552	//System.out.println("???Start searching ...");
553	search:
554	while(Utils.gr((result-newresult), m_STOP)){ // Full step
555	oldw = neww;
556	neww= new double[m_Dimension];
557
558	double[] delta = delta(oldw, mean, row, m_Class);
559
560	for(int i=0; i < m_Dimension; i++)
561	if(Utils.gr(m_Variance[row][i], 0.0))
562	neww[i] = oldw[i] + rate * delta[i];
563
564	result = newresult;
565	newresult = target(neww, mean, row, m_Class);
566
567	//System.out.println("???old: "+result+"\|new: "+newresult);
568	while(Utils.gr(newresult, result)){ // Search back
569	//System.out.println("search back");
570	if(m_Rate == -1){
571	rate *= m_Decay; // Decay
572	for(int i=0; i < m_Dimension; i++)
573	if(Utils.gr(m_Variance[row][i], 0.0))
574	neww[i] = oldw[i] + rate * delta[i];
575	newresult = target(neww, mean, row, m_Class);
576	}
577	else{
578	for(int i=0; i < m_Dimension; i++)
579	neww[i] = oldw[i];
580	break search;
581	}
582	}
583	}
584	//System.out.println("???Stop");
585	m_Change[row] = neww;
586	}
587
588	/**
589	* Delta of x in one step of gradient descent:
590	* delta(Wij) = 1/2 * sum[k=1..N, k!=i](sqrt(P)(Yi-Yk)/D - 1) (MUij -
591	* MUkj)^2 where D = sqrt(sum[j=1..P]Kkj(MUij - MUkj)^2)
592	* N is number of exemplars and P is number of dimensions
593	*
594	* @param x the weights of the exemplar in question
595	* @param rowpos row index of x in X
596	* @param Y the observed class label
597	* @return the delta for all dimensions
598	*/
599	private double[] delta(double[] x, double[][] X, int rowpos, double[] Y){
600	double y = Y[rowpos];
601
602	double[] delta=new double[m_Dimension];
603	for(int h=0; h < m_Dimension; h++)
604	delta[h] = 0.0;
605
606	for(int i=0; i < X.length; i++){
607	if((i != rowpos) && (X[i] != null)){
608	double var = (y==Y[i]) ? 0.0 : Math.sqrt((double)m_Dimension - 1);
609	double distance=0;
610	for(int j=0; j < m_Dimension; j++)
611	if(Utils.gr(m_Variance[rowpos][j], 0.0))
612	distance += x[j](X[rowpos][j]-X[i][j]) (X[rowpos][j]-X[i][j]);
613	distance = Math.sqrt(distance);
614	if(distance != 0)
615	for(int k=0; k < m_Dimension; k++)
616	if(m_Variance[rowpos][k] > 0.0)
617	delta[k] += (var/distance - 1.0) * 0.5 *
618	(X[rowpos][k]-X[i][k]) *
619	(X[rowpos][k]-X[i][k]);
620	}
621	}
622	//System.out.println("???delta: "+delta);
623	return delta;
624	}
625
626	/**
627	* Compute the target function to minimize in gradient descent
628	* The formula is:<br/>
629	* 1/2*sum[i=1..p](f(X, Xi)-var(Y, Yi))^2 <p/>
630	* where p is the number of exemplars and Y is the class label.
631	* In the case of X=MU, f() is the Euclidean distance between two
632	* exemplars together with the related weights and var() is
633	* sqrt(numDimension)*(Y-Yi) where Y-Yi is either 0 (when Y==Yi)
634	* or 1 (Y!=Yi)
635	*
636	* @param x the weights of the exemplar in question
637	* @param rowpos row index of x in X
638	* @param Y the observed class label
639	* @return the result of the target function
640	*/
641	public double target(double[] x, double[][] X, int rowpos, double[] Y){
642	double y = Y[rowpos], result=0;
643
644	for(int i=0; i < X.length; i++){
645	if((i != rowpos) && (X[i] != null)){
646	double var = (y==Y[i]) ? 0.0 : Math.sqrt((double)m_Dimension - 1);
647	double f=0;
648	for(int j=0; j < m_Dimension; j++)
649	if(Utils.gr(m_Variance[rowpos][j], 0.0)){
650	f += x[j](X[rowpos][j]-X[i][j]) (X[rowpos][j]-X[i][j]);
651	//System.out.println("i:"+i+" j: "+j+" row: "+rowpos);
652	}
653	f = Math.sqrt(f);
654	//System.out.println("???distance between "+rowpos+" and "+i+": "+f+"\|y:"+y+" vs "+Y[i]);
655	if(Double.isInfinite(f))
656	System.exit(1);
657	result += 0.5 * (f - var) * (f - var);
658	}
659	}
660	//System.out.println("???target: "+result);
661	return result;
662	}
663
664	/**
665	* Use Kullback Leibler distance to find the nearest neighbours of
666	* the given exemplar.
667	* It also uses K-Nearest Neighbour algorithm to classify the
668	* test exemplar
669	*
670	* @param ex the given test exemplar
671	* @return the classification
672	* @throws Exception if the exemplar could not be classified
673	* successfully
674	*/
675	public double classifyInstance(Instance ex)throws Exception{
676
677	ex = scale(ex);
678
679	double[] var = new double [m_Dimension];
680	for (int i=0; i<m_Dimension; i++)
681	var[i]= ex.relationalValue(1).variance(i);
682
683	// The Kullback distance to all exemplars
684	double[] kullback = new double[m_Class.length];
685
686	// The first K nearest neighbours' predictions */
687	double[] predict = new double[m_NumClasses];
688	for(int h=0; h < predict.length; h++)
689	predict[h] = 0;
690	ex = cleanse(ex);
691
692	if(ex.relationalValue(1).numInstances() == 0){
693	if (getDebug())
694	System.out.println("???Whole exemplar falls into ambiguous area!");
695	return 1.0; // Bias towards positive class
696	}
697
698	double[] mean = new double[m_Dimension];
699	for (int i=0; i<m_Dimension; i++)
700	mean [i]=ex.relationalValue(1).meanOrMode(i);
701
702	// Avoid zero sigma
703	for(int h=0; h < var.length; h++){
704	if(Utils.eq(var[h],0.0))
705	var[h] = m_ZERO;
706	}
707
708	for(int i=0; i < m_Class.length; i++){
709	if(m_ValidM[i] != null)
710	kullback[i] = kullback(mean, m_ValidM[i], var, m_Variance[i], i);
711	else
712	kullback[i] = Double.POSITIVE_INFINITY;
713	}
714
715	for(int j=0; j < m_Neighbour; j++){
716	int pos = Utils.minIndex(kullback);
717	predict[(int)m_Class[pos]] += m_Weights[pos];
718	kullback[pos] = Double.POSITIVE_INFINITY;
719	}
720
721	if (getDebug())
722	System.out.println("???There are still some unambiguous instances in this exemplar! Predicted as: "+Utils.maxIndex(predict));
723	return (double)Utils.maxIndex(predict);
724	}
725
726	/**
727	* Cleanse the given exemplar according to the valid and noise data
728	* statistics
729	*
730	* @param before the given exemplar
731	* @return the processed exemplar
732	* @throws Exception if the returned exemplar is wrong
733	*/
734	public Instance cleanse(Instance before) throws Exception{
735
736	Instances insts = before.relationalValue(1).stringFreeStructure();
737	Instance after = new DenseInstance(before.numAttributes());
738	after.setDataset(m_Attributes);
739
740	for(int g=0; g < before.relationalValue(1).numInstances(); g++){
741	Instance datum = before.relationalValue(1).instance(g);
742	double[] minNoiDists = new double[m_Choose];
743	double[] minValDists = new double[m_Choose];
744	int noiseCount = 0, validCount = 0;
745	double[] nDist = new double[m_Mean.length];
746	double[] vDist = new double[m_Mean.length];
747
748	for(int h=0; h < m_Mean.length; h++){
749	if(m_ValidM[h] == null)
750	vDist[h] = Double.POSITIVE_INFINITY;
751	else
752	vDist[h] = distance(datum, m_ValidM[h], m_ValidV[h], h);
753
754	if(m_NoiseM[h] == null)
755	nDist[h] = Double.POSITIVE_INFINITY;
756	else
757	nDist[h] = distance(datum, m_NoiseM[h], m_NoiseV[h], h);
758	}
759
760	for(int k=0; k < m_Choose; k++){
761	int pos = Utils.minIndex(vDist);
762	minValDists[k] = vDist[pos];
763	vDist[pos] = Double.POSITIVE_INFINITY;
764	pos = Utils.minIndex(nDist);
765	minNoiDists[k] = nDist[pos];
766	nDist[pos] = Double.POSITIVE_INFINITY;
767	}
768
769	int x = 0,y = 0;
770	while((x+y) < m_Choose){
771	if(minValDists[x] <= minNoiDists[y]){
772	validCount++;
773	x++;
774	}
775	else{
776	noiseCount++;
777	y++;
778	}
779	}
780	if(x >= y)
781	insts.add (datum);
782
783	}
784
785	after.setValue(0, before.value( 0));
786	after.setValue(1, after.attribute(1).addRelation(insts));
787	after.setValue(2, before.value( 2));
788
789	return after;
790	}
791
792	/**
793	* This function calculates the Kullback Leibler distance between
794	* two normal distributions. This distance is always positive.
795	* Kullback Leibler distance = integral{f(X)ln(f(X)/g(X))}
796	* Note that X is a vector. Since we assume dimensions are independent
797	* f(X)(g(X) the same) is actually the product of normal density
798	* functions of each dimensions. Also note that it should be log2
799	* instead of (ln) in the formula, but we use (ln) simply for computational
800	* convenience.
801	*
802	* The result is as follows, suppose there are P dimensions, and f(X)
803	* is the first distribution and g(X) is the second:
804	* Kullback = sum[1..P](ln(SIGMA2/SIGMA1)) +
805	* sum[1..P](SIGMA1^2 / (2*(SIGMA2^2))) +
806	* sum[1..P]((MU1-MU2)^2 / (2*(SIGMA2^2))) -
807	* P/2
808	*
809	* @param mu1 mu of the first normal distribution
810	* @param mu2 mu of the second normal distribution
811	* @param var1 variance(SIGMA^2) of the first normal distribution
812	* @param var2 variance(SIGMA^2) of the second normal distribution
813	* @return the Kullback distance of two distributions
814	*/
815	public double kullback(double[] mu1, double[] mu2,
816	double[] var1, double[] var2, int pos){
817	int p = mu1.length;
818	double result = 0;
819
820	for(int y=0; y < p; y++){
821	if((Utils.gr(var1[y], 0)) && (Utils.gr(var2[y], 0))){
822	result +=
823	((Math.log(Math.sqrt(var2[y]/var1[y]))) +
824	(var1[y] / (2.0*var2[y])) +
825	(m_Change[pos][y] * (mu1[y]-mu2[y])(mu1[y]-mu2[y]) / (2.0var2[y])) -
826	0.5);
827	}
828	}
829
830	return result;
831	}
832
833	/**
834	* Returns an enumeration describing the available options
835	*
836	* @return an enumeration of all the available options
837	*/
838	public Enumeration listOptions() {
839	Vector result = new Vector();
840
841	result.addElement(new Option(
842	"\tSet number of nearest neighbour for prediction\n"
843	+ "\t(default 1)",
844	"K", 1, "-K <number of neighbours>"));
845
846	result.addElement(new Option(
847	"\tSet number of nearest neighbour for cleansing the training data\n"
848	+ "\t(default 1)",
849	"S", 1, "-S <number of neighbours>"));
850
851	result.addElement(new Option(
852	"\tSet number of nearest neighbour for cleansing the testing data\n"
853	+ "\t(default 1)",
854	"E", 1, "-E <number of neighbours>"));
855
856	return result.elements();
857	}
858
859	/**
860	* Parses a given list of options. <p/>
861	*
862	<!-- options-start -->
863	* Valid options are: <p/>
864	*
865	* <pre> -K <number of neighbours>
866	* Set number of nearest neighbour for prediction
867	* (default 1)</pre>
868	*
869	* <pre> -S <number of neighbours>
870	* Set number of nearest neighbour for cleansing the training data
871	* (default 1)</pre>
872	*
873	* <pre> -E <number of neighbours>
874	* Set number of nearest neighbour for cleansing the testing data
875	* (default 1)</pre>
876	*
877	<!-- options-end -->
878	*
879	* @param options the list of options as an array of strings
880	* @throws Exception if an option is not supported
881	*/
882	public void setOptions(String[] options) throws Exception{
883
884	setDebug(Utils.getFlag('D', options));
885
886	String numNeighbourString = Utils.getOption('K', options);
887	if (numNeighbourString.length() != 0)
888	setNumNeighbours(Integer.parseInt(numNeighbourString));
889	else
890	setNumNeighbours(1);
891
892	numNeighbourString = Utils.getOption('S', options);
893	if (numNeighbourString.length() != 0)
894	setNumTrainingNoises(Integer.parseInt(numNeighbourString));
895	else
896	setNumTrainingNoises(1);
897
898	numNeighbourString = Utils.getOption('E', options);
899	if (numNeighbourString.length() != 0)
900	setNumTestingNoises(Integer.parseInt(numNeighbourString));
901	else
902	setNumTestingNoises(1);
903	}
904
905	/**
906	* Gets the current settings of the Classifier.
907	*
908	* @return an array of strings suitable for passing to setOptions
909	*/
910	public String[] getOptions() {
911	Vector result;
912
913	result = new Vector();
914
915	if (getDebug())
916	result.add("-D");
917
918	result.add("-K");
919	result.add("" + getNumNeighbours());
920
921	result.add("-S");
922	result.add("" + getNumTrainingNoises());
923
924	result.add("-E");
925	result.add("" + getNumTestingNoises());
926
927	return (String[]) result.toArray(new String[result.size()]);
928	}
929
930	/**
931	* Returns the tip text for this property
932	*
933	* @return tip text for this property suitable for
934	* displaying in the explorer/experimenter gui
935	*/
936	public String numNeighboursTipText() {
937	return "The number of nearest neighbours to the estimate the class prediction of test bags.";
938	}
939
940	/**
941	* Sets the number of nearest neighbours to estimate
942	* the class prediction of tests bags
943	* @param numNeighbour the number of citers
944	*/
945	public void setNumNeighbours(int numNeighbour){
946	m_Neighbour = numNeighbour;
947	}
948
949	/**
950	* Returns the number of nearest neighbours to estimate
951	* the class prediction of tests bags
952	* @return the number of neighbours
953	*/
954	public int getNumNeighbours(){
955	return m_Neighbour;
956	}
957
958	/**
959	* Returns the tip text for this property
960	*
961	* @return tip text for this property suitable for
962	* displaying in the explorer/experimenter gui
963	*/
964	public String numTrainingNoisesTipText() {
965	return "The number of nearest neighbour instances in the selection of noises in the training data.";
966	}
967
968	/**
969	* Sets the number of nearest neighbour instances in the
970	* selection of noises in the training data
971	*
972	* @param numTraining the number of noises in training data
973	*/
974	public void setNumTrainingNoises (int numTraining){
975	m_Select = numTraining;
976	}
977
978	/**
979	* Returns the number of nearest neighbour instances in the
980	* selection of noises in the training data
981	*
982	* @return the number of noises in training data
983	*/
984	public int getNumTrainingNoises(){
985	return m_Select;
986	}
987
988	/**
989	* Returns the tip text for this property
990	*
991	* @return tip text for this property suitable for
992	* displaying in the explorer/experimenter gui
993	*/
994	public String numTestingNoisesTipText() {
995	return "The number of nearest neighbour instances in the selection of noises in the test data.";
996	}
997
998	/**
999	* Returns The number of nearest neighbour instances in the
1000	* selection of noises in the test data
1001	* @return the number of noises in test data
1002	*/
1003	public int getNumTestingNoises(){
1004	return m_Choose;
1005	}
1006
1007	/**
1008	* Sets The number of nearest neighbour exemplars in the
1009	* selection of noises in the test data
1010	* @param numTesting the number of noises in test data
1011	*/
1012	public void setNumTestingNoises (int numTesting){
1013	m_Choose = numTesting;
1014	}
1015
1016	/**
1017	* Returns the revision string.
1018	*
1019	* @return the revision
1020	*/
1021	public String getRevision() {
1022	return RevisionUtils.extract("$Revision: 5987 $");
1023	}
1024
1025	/**
1026	* Main method for testing.
1027	*
1028	* @param args the options for the classifier
1029	*/
1030	public static void main(String[] args) {
1031	runClassifier(new MINND(), args);
1032	}
1033	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: branches/MetisMQI/src/main/java/weka/classifiers/mi/MINND.java @ 29

Download in other formats: