source: src/main/java/weka/clusterers/forOPTICSAndDBScan/Databases/SequentialDatabase.java @ 17

Last change on this file since 17 was 4, checked in by gnappo, 14 years ago

Import di weka.

File size: 10.9 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    Copyright (C) 2004
19 *    & Matthias Schubert (schubert@dbs.ifi.lmu.de)
20 *    & Zhanna Melnikova-Albrecht (melnikov@cip.ifi.lmu.de)
21 *    & Rainer Holzmann (holzmann@cip.ifi.lmu.de)
22 */
23
24package weka.clusterers.forOPTICSAndDBScan.Databases;
25
26import weka.clusterers.forOPTICSAndDBScan.DataObjects.DataObject;
27import weka.clusterers.forOPTICSAndDBScan.Utils.EpsilonRange_ListElement;
28import weka.clusterers.forOPTICSAndDBScan.Utils.PriorityQueue;
29import weka.clusterers.forOPTICSAndDBScan.Utils.PriorityQueueElement;
30import weka.core.Instances;
31import weka.core.RevisionHandler;
32import weka.core.RevisionUtils;
33
34import java.io.Serializable;
35import java.util.ArrayList;
36import java.util.Iterator;
37import java.util.List;
38import java.util.TreeMap;
39
40/**
41 * <p>
42 * SequentialDatabase.java <br/>
43 * Authors: Rainer Holzmann, Zhanna Melnikova-Albrecht, Matthias Schubert <br/>
44 * Date: Aug 20, 2004 <br/>
45 * Time: 1:23:38 PM <br/>
46 * $ Revision 1.4 $ <br/>
47 * </p>
48 *
49 * @author Matthias Schubert (schubert@dbs.ifi.lmu.de)
50 * @author Zhanna Melnikova-Albrecht (melnikov@cip.ifi.lmu.de)
51 * @author Rainer Holzmann (holzmann@cip.ifi.lmu.de)
52 * @version $Revision: 1.4 $
53 */
54public class SequentialDatabase
55    implements Database, Serializable, RevisionHandler {
56
57    /** for serialization */
58    private static final long serialVersionUID = 787245523118665778L;
59
60    /**
61     * Internal, sorted Treemap for storing all the DataObjects
62     */
63    private TreeMap treeMap;
64
65    /**
66     * Holds the original instances delivered from WEKA
67     */
68    private Instances instances;
69
70    /**
71     * Holds the minimum value for each attribute
72     */
73    private double[] attributeMinValues;
74
75    /**
76     * Holds the maximum value for each attribute
77     */
78    private double[] attributeMaxValues;
79
80    // *****************************************************************************************************************
81    // constructors
82    // *****************************************************************************************************************
83
84    /**
85     * Constructs a new sequential database and holds the original instances
86     * @param instances
87     */
88    public SequentialDatabase(Instances instances) {
89        this.instances = instances;
90        treeMap = new TreeMap();
91    }
92
93    // *****************************************************************************************************************
94    // methods
95    // *****************************************************************************************************************
96
97    /**
98     * Select a dataObject from the database
99     * @param key The key that is associated with the dataObject
100     * @return dataObject
101     */
102    public DataObject getDataObject(String key) {
103        return (DataObject) treeMap.get(key);
104    }
105
106    /**
107     * Sets the minimum and maximum values for each attribute in different arrays
108     * by walking through every DataObject of the database
109     */
110    public void setMinMaxValues() {
111        attributeMinValues = new double[getInstances().numAttributes()];
112        attributeMaxValues = new double[getInstances().numAttributes()];
113
114        //Init
115        for (int i = 0; i < getInstances().numAttributes(); i++) {
116            attributeMinValues[i] = attributeMaxValues[i] = Double.NaN;
117        }
118
119        Iterator iterator = dataObjectIterator();
120        while (iterator.hasNext()) {
121            DataObject dataObject = (DataObject) iterator.next();
122            for (int j = 0; j < getInstances().numAttributes(); j++) {
123                if (Double.isNaN(attributeMinValues[j])) {
124                    attributeMinValues[j] = dataObject.getInstance().value(j);
125                    attributeMaxValues[j] = dataObject.getInstance().value(j);
126                } else {
127                    if (dataObject.getInstance().value(j) < attributeMinValues[j])
128                        attributeMinValues[j] = dataObject.getInstance().value(j);
129                    if (dataObject.getInstance().value(j) > attributeMaxValues[j])
130                        attributeMaxValues[j] = dataObject.getInstance().value(j);
131                }
132            }
133        }
134    }
135
136    /**
137     * Returns the array of minimum-values for each attribute
138     * @return attributeMinValues
139     */
140    public double[] getAttributeMinValues() {
141        return attributeMinValues;
142    }
143
144    /**
145     * Returns the array of maximum-values for each attribute
146     * @return attributeMaxValues
147     */
148    public double[] getAttributeMaxValues() {
149        return attributeMaxValues;
150    }
151
152    /**
153     * Performs an epsilon range query for this dataObject
154     * @param epsilon Specifies the range for the query
155     * @param queryDataObject The dataObject that is used as query-object for epsilon range query
156     * @return List with all the DataObjects that are within the specified range
157     */
158    public List epsilonRangeQuery(double epsilon, DataObject queryDataObject) {
159        ArrayList epsilonRange_List = new ArrayList();
160        Iterator iterator = dataObjectIterator();
161        while (iterator.hasNext()) {
162            DataObject dataObject = (DataObject) iterator.next();
163            double distance = queryDataObject.distance(dataObject);
164            if (distance < epsilon) {
165                epsilonRange_List.add(dataObject);
166            }
167        }
168
169        return epsilonRange_List;
170    }
171
172    /**
173     * Emits the k next-neighbours and performs an epsilon-range-query at the parallel.
174     * The returned list contains two elements:
175     * At index=0 --> list with all k next-neighbours;
176     * At index=1 --> list with all dataObjects within epsilon;
177     * @param k number of next neighbours
178     * @param epsilon Specifies the range for the query
179     * @param dataObject the start object
180     * @return list with the k-next neighbours (PriorityQueueElements) and a list
181     *         with candidates from the epsilon-range-query (EpsilonRange_ListElements)
182     */
183    public List k_nextNeighbourQuery(int k, double epsilon, DataObject dataObject) {
184        Iterator iterator = dataObjectIterator();
185
186        List return_List = new ArrayList();
187        List nextNeighbours_List = new ArrayList();
188        List epsilonRange_List = new ArrayList();
189
190        PriorityQueue priorityQueue = new PriorityQueue();
191
192        while (iterator.hasNext()) {
193            DataObject next_dataObject = (DataObject) iterator.next();
194            double dist = dataObject.distance(next_dataObject);
195
196            if (dist <= epsilon) epsilonRange_List.add(new EpsilonRange_ListElement(dist, next_dataObject));
197
198            if (priorityQueue.size() < k) {
199                priorityQueue.add(dist, next_dataObject);
200            } else {
201                if (dist < priorityQueue.getPriority(0)) {
202                    priorityQueue.next(); //removes the highest distance
203                    priorityQueue.add(dist, next_dataObject);
204                }
205            }
206        }
207
208        while (priorityQueue.hasNext()) {
209            nextNeighbours_List.add(0, priorityQueue.next());
210        }
211
212        return_List.add(nextNeighbours_List);
213        return_List.add(epsilonRange_List);
214        return return_List;
215    }
216
217    /**
218     * Calculates the coreDistance for the specified DataObject.
219     * The returned list contains three elements:
220     * At index=0 --> list with all k next-neighbours;
221     * At index=1 --> list with all dataObjects within epsilon;
222     * At index=2 --> coreDistance as Double-value
223     * @param minPoints minPoints-many neighbours within epsilon must be found to have a non-undefined coreDistance
224     * @param epsilon Specifies the range for the query
225     * @param dataObject Calculate coreDistance for this dataObject
226     * @return list with the k-next neighbours (PriorityQueueElements) and a list
227     *         with candidates from the epsilon-range-query (EpsilonRange_ListElements) and
228     *         the double-value for the calculated coreDistance
229     */
230    public List coreDistance(int minPoints, double epsilon, DataObject dataObject) {
231        List list = k_nextNeighbourQuery(minPoints, epsilon, dataObject);
232
233        if (((List) list.get(1)).size() < minPoints) {
234            list.add(new Double(DataObject.UNDEFINED));
235            return list;
236        } else {
237            List nextNeighbours_List = (List) list.get(0);
238            PriorityQueueElement priorityQueueElement =
239                    (PriorityQueueElement) nextNeighbours_List.get(nextNeighbours_List.size() - 1);
240            if (priorityQueueElement.getPriority() <= epsilon) {
241                list.add(new Double(priorityQueueElement.getPriority()));
242                return list;
243            } else {
244                list.add(new Double(DataObject.UNDEFINED));
245                return list;
246            }
247        }
248    }
249
250    /**
251     * Returns the size of the database (the number of dataObjects in the database)
252     * @return size
253     */
254    public int size() {
255        return treeMap.size();
256    }
257
258    /**
259     * Returns an iterator over all the keys
260     * @return iterator
261     */
262    public Iterator keyIterator() {
263        return treeMap.keySet().iterator();
264    }
265
266    /**
267     * Returns an iterator over all the dataObjects in the database
268     * @return iterator
269     */
270    public Iterator dataObjectIterator() {
271        return treeMap.values().iterator();
272    }
273
274    /**
275     * Tests if the database contains the dataObject_Query
276     * @param dataObject_Query The query-object
277     * @return true if the database contains dataObject_Query, else false
278     */
279    public boolean contains(DataObject dataObject_Query) {
280        Iterator iterator = dataObjectIterator();
281        while (iterator.hasNext()) {
282            DataObject dataObject = (DataObject) iterator.next();
283            if (dataObject.equals(dataObject_Query)) return true;
284        }
285        return false;
286    }
287
288    /**
289     * Inserts a new dataObject into the database
290     * @param dataObject
291     */
292    public void insert(DataObject dataObject) {
293        treeMap.put(dataObject.getKey(), dataObject);
294    }
295
296    /**
297     * Returns the original instances delivered from WEKA
298     * @return instances
299     */
300    public Instances getInstances() {
301        return instances;
302    }
303   
304    /**
305     * Returns the revision string.
306     *
307     * @return          the revision
308     */
309    public String getRevision() {
310      return RevisionUtils.extract("$Revision: 1.4 $");
311    }
312}
Note: See TracBrowser for help on using the repository browser.