Context Navigation

KMeansInpiredMethod.java

Last change on this file was 29, checked in by gnappo, 15 years ago
Taggata versione per la demo e aggiunto branch.
File size: 14.2 KB

Line
1	/*
2	* This program is free software; you can redistribute it and/or modify
3	* it under the terms of the GNU General Public License as published by
4	* the Free Software Foundation; either version 2 of the License, or
5	* (at your option) any later version.
6	*
7	* This program is distributed in the hope that it will be useful,
8	* but WITHOUT ANY WARRANTY; without even the implied warranty of
9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10	* GNU General Public License for more details.
11	*
12	* You should have received a copy of the GNU General Public License
13	* along with this program; if not, write to the Free Software
14	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15	*/
16
17	/*
18	* KMeansInpiredMethod.java
19	* Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
20	*/
21
22	package weka.core.neighboursearch.kdtrees;
23
24	import weka.core.Instance;
25	import weka.core.Instances;
26	import weka.core.RevisionUtils;
27	import weka.core.TechnicalInformation;
28	import weka.core.TechnicalInformationHandler;
29	import weka.core.TechnicalInformation.Field;
30	import weka.core.TechnicalInformation.Type;
31
32	/**
33	<!-- globalinfo-start -->
34	* The class that splits a node into two such that the overall sum of squared distances of points to their centres on both sides of the (axis-parallel) splitting plane is minimum.<br/>
35	* <br/>
36	* For more information see also:<br/>
37	* <br/>
38	* Ashraf Masood Kibriya (2007). Fast Algorithms for Nearest Neighbour Search. Hamilton, New Zealand.
39	* <p/>
40	<!-- globalinfo-end -->
41	*
42	<!-- technical-bibtex-start -->
43	* BibTeX:
44	* <pre>
45	* @mastersthesis{Kibriya2007,
46	* address = {Hamilton, New Zealand},
47	* author = {Ashraf Masood Kibriya},
48	* school = {Department of Computer Science, School of Computing and Mathematical Sciences, University of Waikato},
49	* title = {Fast Algorithms for Nearest Neighbour Search},
50	* year = {2007}
51	* }
52	* </pre>
53	* <p/>
54	<!-- technical-bibtex-end -->
55	*
56	<!-- options-start -->
57	<!-- options-end -->
58	*
59	* @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz)
60	* @version $Revision: 5953 $
61	*/
62	public class KMeansInpiredMethod
63	extends KDTreeNodeSplitter
64	implements TechnicalInformationHandler {
65
66	/** for serialization. */
67	private static final long serialVersionUID = -866783749124714304L;
68
69	/**
70	* Returns a string describing this nearest neighbour search algorithm.
71	*
72	* @return a description of the algorithm for displaying in the
73	* explorer/experimenter gui
74	*/
75	public String globalInfo() {
76	return
77	"The class that splits a node into two such that the overall sum "
78	+ "of squared distances of points to their centres on both sides "
79	+ "of the (axis-parallel) splitting plane is minimum.\n\n"
80	+ "For more information see also:\n\n"
81	+ getTechnicalInformation().toString();
82	}
83
84	/**
85	* Returns an instance of a TechnicalInformation object, containing detailed
86	* information about the technical background of this class, e.g., paper
87	* reference or book this class is based on.
88	*
89	* @return the technical information about this class
90	*/
91	public TechnicalInformation getTechnicalInformation() {
92	TechnicalInformation result;
93
94	result = new TechnicalInformation(Type.MASTERSTHESIS);
95	result.setValue(Field.AUTHOR, "Ashraf Masood Kibriya");
96	result.setValue(Field.TITLE, "Fast Algorithms for Nearest Neighbour Search");
97	result.setValue(Field.YEAR, "2007");
98	result.setValue(Field.SCHOOL, "Department of Computer Science, School of Computing and Mathematical Sciences, University of Waikato");
99	result.setValue(Field.ADDRESS, "Hamilton, New Zealand");
100
101	return result;
102	}
103
104	/**
105	* Splits a node into two such that the overall sum of squared distances
106	* of points to their centres on both sides of the (axis-parallel)
107	* splitting plane is minimum. The two nodes created after the whole
108	* splitting are correctly initialised. And, node.left and node.right
109	* are set appropriately.
110	* @param node The node to split.
111	* @param numNodesCreated The number of nodes that so far have been
112	* created for the tree, so that the newly created nodes are
113	* assigned correct/meaningful node numbers/ids.
114	* @param nodeRanges The attributes' range for the points inside
115	* the node that is to be split.
116	* @param universe The attributes' range for the whole
117	* point-space.
118	* @throws Exception If there is some problem in splitting the
119	* given node.
120	*/
121	public void splitNode(KDTreeNode node, int numNodesCreated,
122	double[][] nodeRanges, double[][] universe) throws Exception {
123
124	correctlyInitialized();
125
126	int splitDim = -1;
127	double splitVal = Double.NEGATIVE_INFINITY;
128
129	double leftAttSum[] = new double[m_Instances.numAttributes()],
130	rightAttSum[] = new double[m_Instances.numAttributes()],
131	leftAttSqSum[] = new double[m_Instances.numAttributes()],
132	rightAttSqSum[] = new double[m_Instances.numAttributes()],
133	rightSqMean, leftSqMean, leftSqSum, rightSqSum,
134	minSum = Double.POSITIVE_INFINITY, val;
135
136	for (int dim = 0; dim < m_Instances.numAttributes(); dim++) {
137	// m_MaxRelativeWidth in KDTree ensure there'll be atleast one dim with
138	// width > 0.0
139	if (node.m_NodeRanges[dim][WIDTH] == 0.0
140	\|\| dim == m_Instances.classIndex())
141	continue;
142
143	quickSort(m_Instances, m_InstList, dim, node.m_Start, node.m_End);
144
145	for (int i = node.m_Start; i <= node.m_End; i++) {
146	for (int j = 0; j < m_Instances.numAttributes(); j++) {
147	if (j == m_Instances.classIndex())
148	continue;
149	val = m_Instances.instance(m_InstList[i]).value(j);
150	if (m_NormalizeNodeWidth) {
151	if (Double.isNaN(universe[j][MIN])
152	\|\| universe[j][MIN] == universe[j][MAX])
153	val = 0.0;
154	else
155	val = ((val - universe[j][MIN]) / universe[j][WIDTH]); // normalizing
156	// value
157	}
158	if (i == node.m_Start) {
159	leftAttSum[j] = rightAttSum[j] = leftAttSqSum[j] = rightAttSqSum[j] = 0.0;
160	}
161	rightAttSum[j] += val;
162	rightAttSqSum[j] += val * val;
163	}
164	}
165
166	for (int i = node.m_Start; i <= node.m_End - 1; i++) {
167	Instance inst = m_Instances.instance(m_InstList[i]);
168	leftSqSum = rightSqSum = 0.0;
169	for (int j = 0; j < m_Instances.numAttributes(); j++) {
170	if (j == m_Instances.classIndex())
171	continue;
172	val = inst.value(j);
173
174	if (m_NormalizeNodeWidth) {
175	if (Double.isNaN(universe[j][MIN])
176	\|\| universe[j][MIN] == universe[j][MAX])
177	val = 0.0;
178	else
179	val = ((val - universe[j][MIN]) / universe[j][WIDTH]); // normalizing
180	// value
181	}
182
183	leftAttSum[j] += val;
184	rightAttSum[j] -= val;
185	leftAttSqSum[j] += val * val;
186	rightAttSqSum[j] -= val * val;
187	leftSqMean = leftAttSum[j] / (i - node.m_Start + 1);
188	leftSqMean *= leftSqMean;
189	rightSqMean = rightAttSum[j] / (node.m_End - i);
190	rightSqMean *= rightSqMean;
191
192	leftSqSum += leftAttSqSum[j] - (i - node.m_Start + 1) * leftSqMean;
193	rightSqSum += rightAttSqSum[j] - (node.m_End - i) * rightSqMean;
194	}
195
196	if (minSum > (leftSqSum + rightSqSum)) {
197	minSum = leftSqSum + rightSqSum;
198
199	if (i < node.m_End)
200	splitVal = (m_Instances.instance(m_InstList[i]).value(dim) + m_Instances
201	.instance(m_InstList[i + 1]).value(dim)) / 2;
202	else
203	splitVal = m_Instances.instance(m_InstList[i]).value(dim);
204
205	splitDim = dim;
206	}
207	}// end for instance i
208	}// end for attribute dim
209
210	int rightStart = rearrangePoints(m_InstList, node.m_Start, node.m_End,
211	splitDim, splitVal);
212
213	if (rightStart == node.m_Start \|\| rightStart > node.m_End) {
214	System.out.println("node.m_Start: " + node.m_Start + " node.m_End: "
215	+ node.m_End + " splitDim: " + splitDim + " splitVal: " + splitVal
216	+ " node.min: " + node.m_NodeRanges[splitDim][MIN] + " node.max: "
217	+ node.m_NodeRanges[splitDim][MAX] + " node.numInstances: "
218	+ node.numInstances());
219
220	if (rightStart == node.m_Start)
221	throw new Exception("Left child is empty in node " + node.m_NodeNumber
222	+ ". Not possible with "
223	+ "KMeanInspiredMethod splitting method. Please " + "check code.");
224	else
225	throw new Exception("Right child is empty in node " + node.m_NodeNumber
226	+ ". Not possible with "
227	+ "KMeansInspiredMethod splitting method. Please " + "check code.");
228	}
229
230	node.m_SplitDim = splitDim;
231	node.m_SplitValue = splitVal;
232	node.m_Left = new KDTreeNode(numNodesCreated + 1, node.m_Start,
233	rightStart - 1, m_EuclideanDistance.initializeRanges(m_InstList,
234	node.m_Start, rightStart - 1));
235	node.m_Right = new KDTreeNode(numNodesCreated + 2, rightStart, node.m_End,
236	m_EuclideanDistance
237	.initializeRanges(m_InstList, rightStart, node.m_End));
238	}
239
240	/**
241	* Partitions the instances around a pivot. Used by quicksort and
242	* kthSmallestValue.
243	*
244	* @param insts The instances on which the tree is (or is
245	* to be) built.
246	* @param index The master index array containing indices
247	* of the instances.
248	* @param attidx The attribution/dimension based on which
249	* the instances should be partitioned.
250	* @param l The begining index of the portion of master index
251	* array that should be partitioned.
252	* @param r The end index of the portion of master index array
253	* that should be partitioned.
254	* @return the index of the middle element
255	*/
256	protected static int partition(Instances insts, int[] index, int attidx, int l, int r) {
257
258	double pivot = insts.instance(index[(l + r) / 2]).value(attidx);
259	int help;
260
261	while (l < r) {
262	while ((insts.instance(index[l]).value(attidx) < pivot) && (l < r)) {
263	l++;
264	}
265	while ((insts.instance(index[r]).value(attidx) > pivot) && (l < r)) {
266	r--;
267	}
268	if (l < r) {
269	help = index[l];
270	index[l] = index[r];
271	index[r] = help;
272	l++;
273	r--;
274	}
275	}
276	if ((l == r) && (insts.instance(index[r]).value(attidx) > pivot)) {
277	r--;
278	}
279
280	return r;
281	}
282
283	/**
284	* Sorts the instances according to the given attribute/dimension.
285	* The sorting is done on the master index array and not on the
286	* actual instances object.
287	*
288	* @param insts The instances on which the tree is (or is
289	* to be) built.
290	* @param indices The master index array containing indices
291	* of the instances.
292	* @param attidx The dimension/attribute based on which
293	* the instances should be sorted.
294	* @param left The begining index of the portion of the master
295	* index array that needs to be sorted.
296	* @param right The end index of the portion of the master index
297	* array that needs to be sorted.
298	*/
299	protected static void quickSort(Instances insts, int[] indices, int attidx, int left, int right) {
300
301	if (left < right) {
302	int middle = partition(insts, indices, attidx, left, right);
303	quickSort(insts, indices, attidx, left, middle);
304	quickSort(insts, indices, attidx, middle + 1, right);
305	}
306	}
307
308	/**
309	* Method to validate the sorting done by quickSort().
310	*
311	* @param insts The instances on which the tree is (or is
312	* to be) built.
313	* @param indices The master index array containing indices
314	* of the instances.
315	* @param attidx The dimension/attribute based on which
316	* the instances should be sorted.
317	* @param start The start of the portion in master index
318	* array that needs to be sorted.
319	* @param end The end of the portion in master index
320	* array that needs to be sorted.
321	* @throws Exception If the indices of the instances
322	* are not in sorted order.
323	*/
324	private static void checkSort(Instances insts, int[] indices, int attidx,
325	int start, int end) throws Exception {
326	for(int i=start+1; i<=end; i++) {
327	if( insts.instance(indices[i-1]).value(attidx) >
328	insts.instance(indices[i]).value(attidx) ) {
329	System.out.println("value[i-1]: "+insts.instance(indices[i-1]).value(attidx));
330	System.out.println("value[i]: "+insts.instance(indices[i]).value(attidx));
331	System.out.println("indices[i-1]: "+indices[i-1]);
332	System.out.println("indices[i]: "+indices[i]);
333	System.out.println("i: "+i);
334	if(insts.instance(indices[i-1]).value(attidx) > insts.instance(indices[i]).value(attidx))
335	System.out.println("value[i-1] > value[i]");
336
337	throw new Exception("Indices not sorted correctly.");
338	}//end if
339	}
340	}
341
342	/**
343	* Re-arranges the indices array so that in the portion of the array
344	* belonging to the node to be split, the points <= to the splitVal
345	* are on the left of the portion and those > the splitVal are on the right.
346	*
347	* @param indices The master index array.
348	* @param startidx The begining index of portion of indices that needs
349	* re-arranging.
350	* @param endidx The end index of portion of indices that needs
351	* re-arranging.
352	* @param splitDim The split dimension/attribute.
353	* @param splitVal The split value.
354	* @return The startIdx of the points > the splitVal (the points
355	* belonging to the right child of the node).
356	*/
357	protected int rearrangePoints(int[] indices, final int startidx, final int endidx,
358	final int splitDim, final double splitVal) {
359
360	int tmp, left = startidx - 1;
361	for (int i = startidx; i <= endidx; i++) {
362	if (m_EuclideanDistance.valueIsSmallerEqual(m_Instances
363	.instance(indices[i]), splitDim, splitVal)) {
364	left++;
365	tmp = indices[left];
366	indices[left] = indices[i];
367	indices[i] = tmp;
368	}// end valueIsSmallerEqual
369	}// endfor
370	return left + 1;
371	}
372
373	/**
374	* Returns the revision string.
375	*
376	* @return the revision
377	*/
378	public String getRevision() {
379	return RevisionUtils.extract("$Revision: 5953 $");
380	}
381	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: branches/MetisMQI/src/main/java/weka/core/neighboursearch/kdtrees/KMeansInpiredMethod.java

Download in other formats: