| 1 | /* |
|---|
| 2 | * This program is free software; you can redistribute it and/or modify |
|---|
| 3 | * it under the terms of the GNU General Public License as published by |
|---|
| 4 | * the Free Software Foundation; either version 2 of the License, or |
|---|
| 5 | * (at your option) any later version. |
|---|
| 6 | * |
|---|
| 7 | * This program is distributed in the hope that it will be useful, |
|---|
| 8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|---|
| 9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|---|
| 10 | * GNU General Public License for more details. |
|---|
| 11 | * |
|---|
| 12 | * You should have received a copy of the GNU General Public License |
|---|
| 13 | * along with this program; if not, write to the Free Software |
|---|
| 14 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
|---|
| 15 | */ |
|---|
| 16 | |
|---|
| 17 | /* |
|---|
| 18 | * Copyright (C) 2004 |
|---|
| 19 | * & Matthias Schubert (schubert@dbs.ifi.lmu.de) |
|---|
| 20 | * & Zhanna Melnikova-Albrecht (melnikov@cip.ifi.lmu.de) |
|---|
| 21 | * & Rainer Holzmann (holzmann@cip.ifi.lmu.de) |
|---|
| 22 | */ |
|---|
| 23 | |
|---|
| 24 | package weka.clusterers.forOPTICSAndDBScan.Databases; |
|---|
| 25 | |
|---|
| 26 | import weka.clusterers.forOPTICSAndDBScan.DataObjects.DataObject; |
|---|
| 27 | import weka.clusterers.forOPTICSAndDBScan.Utils.EpsilonRange_ListElement; |
|---|
| 28 | import weka.clusterers.forOPTICSAndDBScan.Utils.PriorityQueue; |
|---|
| 29 | import weka.clusterers.forOPTICSAndDBScan.Utils.PriorityQueueElement; |
|---|
| 30 | import weka.core.Instances; |
|---|
| 31 | import weka.core.RevisionHandler; |
|---|
| 32 | import weka.core.RevisionUtils; |
|---|
| 33 | |
|---|
| 34 | import java.io.Serializable; |
|---|
| 35 | import java.util.ArrayList; |
|---|
| 36 | import java.util.Iterator; |
|---|
| 37 | import java.util.List; |
|---|
| 38 | import java.util.TreeMap; |
|---|
| 39 | |
|---|
| 40 | /** |
|---|
| 41 | * <p> |
|---|
| 42 | * SequentialDatabase.java <br/> |
|---|
| 43 | * Authors: Rainer Holzmann, Zhanna Melnikova-Albrecht, Matthias Schubert <br/> |
|---|
| 44 | * Date: Aug 20, 2004 <br/> |
|---|
| 45 | * Time: 1:23:38 PM <br/> |
|---|
| 46 | * $ Revision 1.4 $ <br/> |
|---|
| 47 | * </p> |
|---|
| 48 | * |
|---|
| 49 | * @author Matthias Schubert (schubert@dbs.ifi.lmu.de) |
|---|
| 50 | * @author Zhanna Melnikova-Albrecht (melnikov@cip.ifi.lmu.de) |
|---|
| 51 | * @author Rainer Holzmann (holzmann@cip.ifi.lmu.de) |
|---|
| 52 | * @version $Revision: 1.4 $ |
|---|
| 53 | */ |
|---|
| 54 | public class SequentialDatabase |
|---|
| 55 | implements Database, Serializable, RevisionHandler { |
|---|
| 56 | |
|---|
| 57 | /** for serialization */ |
|---|
| 58 | private static final long serialVersionUID = 787245523118665778L; |
|---|
| 59 | |
|---|
| 60 | /** |
|---|
| 61 | * Internal, sorted Treemap for storing all the DataObjects |
|---|
| 62 | */ |
|---|
| 63 | private TreeMap treeMap; |
|---|
| 64 | |
|---|
| 65 | /** |
|---|
| 66 | * Holds the original instances delivered from WEKA |
|---|
| 67 | */ |
|---|
| 68 | private Instances instances; |
|---|
| 69 | |
|---|
| 70 | /** |
|---|
| 71 | * Holds the minimum value for each attribute |
|---|
| 72 | */ |
|---|
| 73 | private double[] attributeMinValues; |
|---|
| 74 | |
|---|
| 75 | /** |
|---|
| 76 | * Holds the maximum value for each attribute |
|---|
| 77 | */ |
|---|
| 78 | private double[] attributeMaxValues; |
|---|
| 79 | |
|---|
| 80 | // ***************************************************************************************************************** |
|---|
| 81 | // constructors |
|---|
| 82 | // ***************************************************************************************************************** |
|---|
| 83 | |
|---|
| 84 | /** |
|---|
| 85 | * Constructs a new sequential database and holds the original instances |
|---|
| 86 | * @param instances |
|---|
| 87 | */ |
|---|
| 88 | public SequentialDatabase(Instances instances) { |
|---|
| 89 | this.instances = instances; |
|---|
| 90 | treeMap = new TreeMap(); |
|---|
| 91 | } |
|---|
| 92 | |
|---|
| 93 | // ***************************************************************************************************************** |
|---|
| 94 | // methods |
|---|
| 95 | // ***************************************************************************************************************** |
|---|
| 96 | |
|---|
| 97 | /** |
|---|
| 98 | * Select a dataObject from the database |
|---|
| 99 | * @param key The key that is associated with the dataObject |
|---|
| 100 | * @return dataObject |
|---|
| 101 | */ |
|---|
| 102 | public DataObject getDataObject(String key) { |
|---|
| 103 | return (DataObject) treeMap.get(key); |
|---|
| 104 | } |
|---|
| 105 | |
|---|
| 106 | /** |
|---|
| 107 | * Sets the minimum and maximum values for each attribute in different arrays |
|---|
| 108 | * by walking through every DataObject of the database |
|---|
| 109 | */ |
|---|
| 110 | public void setMinMaxValues() { |
|---|
| 111 | attributeMinValues = new double[getInstances().numAttributes()]; |
|---|
| 112 | attributeMaxValues = new double[getInstances().numAttributes()]; |
|---|
| 113 | |
|---|
| 114 | //Init |
|---|
| 115 | for (int i = 0; i < getInstances().numAttributes(); i++) { |
|---|
| 116 | attributeMinValues[i] = attributeMaxValues[i] = Double.NaN; |
|---|
| 117 | } |
|---|
| 118 | |
|---|
| 119 | Iterator iterator = dataObjectIterator(); |
|---|
| 120 | while (iterator.hasNext()) { |
|---|
| 121 | DataObject dataObject = (DataObject) iterator.next(); |
|---|
| 122 | for (int j = 0; j < getInstances().numAttributes(); j++) { |
|---|
| 123 | if (Double.isNaN(attributeMinValues[j])) { |
|---|
| 124 | attributeMinValues[j] = dataObject.getInstance().value(j); |
|---|
| 125 | attributeMaxValues[j] = dataObject.getInstance().value(j); |
|---|
| 126 | } else { |
|---|
| 127 | if (dataObject.getInstance().value(j) < attributeMinValues[j]) |
|---|
| 128 | attributeMinValues[j] = dataObject.getInstance().value(j); |
|---|
| 129 | if (dataObject.getInstance().value(j) > attributeMaxValues[j]) |
|---|
| 130 | attributeMaxValues[j] = dataObject.getInstance().value(j); |
|---|
| 131 | } |
|---|
| 132 | } |
|---|
| 133 | } |
|---|
| 134 | } |
|---|
| 135 | |
|---|
| 136 | /** |
|---|
| 137 | * Returns the array of minimum-values for each attribute |
|---|
| 138 | * @return attributeMinValues |
|---|
| 139 | */ |
|---|
| 140 | public double[] getAttributeMinValues() { |
|---|
| 141 | return attributeMinValues; |
|---|
| 142 | } |
|---|
| 143 | |
|---|
| 144 | /** |
|---|
| 145 | * Returns the array of maximum-values for each attribute |
|---|
| 146 | * @return attributeMaxValues |
|---|
| 147 | */ |
|---|
| 148 | public double[] getAttributeMaxValues() { |
|---|
| 149 | return attributeMaxValues; |
|---|
| 150 | } |
|---|
| 151 | |
|---|
| 152 | /** |
|---|
| 153 | * Performs an epsilon range query for this dataObject |
|---|
| 154 | * @param epsilon Specifies the range for the query |
|---|
| 155 | * @param queryDataObject The dataObject that is used as query-object for epsilon range query |
|---|
| 156 | * @return List with all the DataObjects that are within the specified range |
|---|
| 157 | */ |
|---|
| 158 | public List epsilonRangeQuery(double epsilon, DataObject queryDataObject) { |
|---|
| 159 | ArrayList epsilonRange_List = new ArrayList(); |
|---|
| 160 | Iterator iterator = dataObjectIterator(); |
|---|
| 161 | while (iterator.hasNext()) { |
|---|
| 162 | DataObject dataObject = (DataObject) iterator.next(); |
|---|
| 163 | double distance = queryDataObject.distance(dataObject); |
|---|
| 164 | if (distance < epsilon) { |
|---|
| 165 | epsilonRange_List.add(dataObject); |
|---|
| 166 | } |
|---|
| 167 | } |
|---|
| 168 | |
|---|
| 169 | return epsilonRange_List; |
|---|
| 170 | } |
|---|
| 171 | |
|---|
| 172 | /** |
|---|
| 173 | * Emits the k next-neighbours and performs an epsilon-range-query at the parallel. |
|---|
| 174 | * The returned list contains two elements: |
|---|
| 175 | * At index=0 --> list with all k next-neighbours; |
|---|
| 176 | * At index=1 --> list with all dataObjects within epsilon; |
|---|
| 177 | * @param k number of next neighbours |
|---|
| 178 | * @param epsilon Specifies the range for the query |
|---|
| 179 | * @param dataObject the start object |
|---|
| 180 | * @return list with the k-next neighbours (PriorityQueueElements) and a list |
|---|
| 181 | * with candidates from the epsilon-range-query (EpsilonRange_ListElements) |
|---|
| 182 | */ |
|---|
| 183 | public List k_nextNeighbourQuery(int k, double epsilon, DataObject dataObject) { |
|---|
| 184 | Iterator iterator = dataObjectIterator(); |
|---|
| 185 | |
|---|
| 186 | List return_List = new ArrayList(); |
|---|
| 187 | List nextNeighbours_List = new ArrayList(); |
|---|
| 188 | List epsilonRange_List = new ArrayList(); |
|---|
| 189 | |
|---|
| 190 | PriorityQueue priorityQueue = new PriorityQueue(); |
|---|
| 191 | |
|---|
| 192 | while (iterator.hasNext()) { |
|---|
| 193 | DataObject next_dataObject = (DataObject) iterator.next(); |
|---|
| 194 | double dist = dataObject.distance(next_dataObject); |
|---|
| 195 | |
|---|
| 196 | if (dist <= epsilon) epsilonRange_List.add(new EpsilonRange_ListElement(dist, next_dataObject)); |
|---|
| 197 | |
|---|
| 198 | if (priorityQueue.size() < k) { |
|---|
| 199 | priorityQueue.add(dist, next_dataObject); |
|---|
| 200 | } else { |
|---|
| 201 | if (dist < priorityQueue.getPriority(0)) { |
|---|
| 202 | priorityQueue.next(); //removes the highest distance |
|---|
| 203 | priorityQueue.add(dist, next_dataObject); |
|---|
| 204 | } |
|---|
| 205 | } |
|---|
| 206 | } |
|---|
| 207 | |
|---|
| 208 | while (priorityQueue.hasNext()) { |
|---|
| 209 | nextNeighbours_List.add(0, priorityQueue.next()); |
|---|
| 210 | } |
|---|
| 211 | |
|---|
| 212 | return_List.add(nextNeighbours_List); |
|---|
| 213 | return_List.add(epsilonRange_List); |
|---|
| 214 | return return_List; |
|---|
| 215 | } |
|---|
| 216 | |
|---|
| 217 | /** |
|---|
| 218 | * Calculates the coreDistance for the specified DataObject. |
|---|
| 219 | * The returned list contains three elements: |
|---|
| 220 | * At index=0 --> list with all k next-neighbours; |
|---|
| 221 | * At index=1 --> list with all dataObjects within epsilon; |
|---|
| 222 | * At index=2 --> coreDistance as Double-value |
|---|
| 223 | * @param minPoints minPoints-many neighbours within epsilon must be found to have a non-undefined coreDistance |
|---|
| 224 | * @param epsilon Specifies the range for the query |
|---|
| 225 | * @param dataObject Calculate coreDistance for this dataObject |
|---|
| 226 | * @return list with the k-next neighbours (PriorityQueueElements) and a list |
|---|
| 227 | * with candidates from the epsilon-range-query (EpsilonRange_ListElements) and |
|---|
| 228 | * the double-value for the calculated coreDistance |
|---|
| 229 | */ |
|---|
| 230 | public List coreDistance(int minPoints, double epsilon, DataObject dataObject) { |
|---|
| 231 | List list = k_nextNeighbourQuery(minPoints, epsilon, dataObject); |
|---|
| 232 | |
|---|
| 233 | if (((List) list.get(1)).size() < minPoints) { |
|---|
| 234 | list.add(new Double(DataObject.UNDEFINED)); |
|---|
| 235 | return list; |
|---|
| 236 | } else { |
|---|
| 237 | List nextNeighbours_List = (List) list.get(0); |
|---|
| 238 | PriorityQueueElement priorityQueueElement = |
|---|
| 239 | (PriorityQueueElement) nextNeighbours_List.get(nextNeighbours_List.size() - 1); |
|---|
| 240 | if (priorityQueueElement.getPriority() <= epsilon) { |
|---|
| 241 | list.add(new Double(priorityQueueElement.getPriority())); |
|---|
| 242 | return list; |
|---|
| 243 | } else { |
|---|
| 244 | list.add(new Double(DataObject.UNDEFINED)); |
|---|
| 245 | return list; |
|---|
| 246 | } |
|---|
| 247 | } |
|---|
| 248 | } |
|---|
| 249 | |
|---|
| 250 | /** |
|---|
| 251 | * Returns the size of the database (the number of dataObjects in the database) |
|---|
| 252 | * @return size |
|---|
| 253 | */ |
|---|
| 254 | public int size() { |
|---|
| 255 | return treeMap.size(); |
|---|
| 256 | } |
|---|
| 257 | |
|---|
| 258 | /** |
|---|
| 259 | * Returns an iterator over all the keys |
|---|
| 260 | * @return iterator |
|---|
| 261 | */ |
|---|
| 262 | public Iterator keyIterator() { |
|---|
| 263 | return treeMap.keySet().iterator(); |
|---|
| 264 | } |
|---|
| 265 | |
|---|
| 266 | /** |
|---|
| 267 | * Returns an iterator over all the dataObjects in the database |
|---|
| 268 | * @return iterator |
|---|
| 269 | */ |
|---|
| 270 | public Iterator dataObjectIterator() { |
|---|
| 271 | return treeMap.values().iterator(); |
|---|
| 272 | } |
|---|
| 273 | |
|---|
| 274 | /** |
|---|
| 275 | * Tests if the database contains the dataObject_Query |
|---|
| 276 | * @param dataObject_Query The query-object |
|---|
| 277 | * @return true if the database contains dataObject_Query, else false |
|---|
| 278 | */ |
|---|
| 279 | public boolean contains(DataObject dataObject_Query) { |
|---|
| 280 | Iterator iterator = dataObjectIterator(); |
|---|
| 281 | while (iterator.hasNext()) { |
|---|
| 282 | DataObject dataObject = (DataObject) iterator.next(); |
|---|
| 283 | if (dataObject.equals(dataObject_Query)) return true; |
|---|
| 284 | } |
|---|
| 285 | return false; |
|---|
| 286 | } |
|---|
| 287 | |
|---|
| 288 | /** |
|---|
| 289 | * Inserts a new dataObject into the database |
|---|
| 290 | * @param dataObject |
|---|
| 291 | */ |
|---|
| 292 | public void insert(DataObject dataObject) { |
|---|
| 293 | treeMap.put(dataObject.getKey(), dataObject); |
|---|
| 294 | } |
|---|
| 295 | |
|---|
| 296 | /** |
|---|
| 297 | * Returns the original instances delivered from WEKA |
|---|
| 298 | * @return instances |
|---|
| 299 | */ |
|---|
| 300 | public Instances getInstances() { |
|---|
| 301 | return instances; |
|---|
| 302 | } |
|---|
| 303 | |
|---|
| 304 | /** |
|---|
| 305 | * Returns the revision string. |
|---|
| 306 | * |
|---|
| 307 | * @return the revision |
|---|
| 308 | */ |
|---|
| 309 | public String getRevision() { |
|---|
| 310 | return RevisionUtils.extract("$Revision: 1.4 $"); |
|---|
| 311 | } |
|---|
| 312 | } |
|---|