1 | /* |
---|
2 | * This program is free software; you can redistribute it and/or modify |
---|
3 | * it under the terms of the GNU General Public License as published by |
---|
4 | * the Free Software Foundation; either version 2 of the License, or |
---|
5 | * (at your option) any later version. |
---|
6 | * |
---|
7 | * This program is distributed in the hope that it will be useful, |
---|
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
10 | * GNU General Public License for more details. |
---|
11 | * |
---|
12 | * You should have received a copy of the GNU General Public License |
---|
13 | * along with this program; if not, write to the Free Software |
---|
14 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
---|
15 | */ |
---|
16 | |
---|
17 | /* |
---|
18 | * Copyright (C) 2004 |
---|
19 | * & Matthias Schubert (schubert@dbs.ifi.lmu.de) |
---|
20 | * & Zhanna Melnikova-Albrecht (melnikov@cip.ifi.lmu.de) |
---|
21 | * & Rainer Holzmann (holzmann@cip.ifi.lmu.de) |
---|
22 | */ |
---|
23 | |
---|
24 | package weka.clusterers; |
---|
25 | |
---|
26 | import weka.clusterers.forOPTICSAndDBScan.DataObjects.DataObject; |
---|
27 | import weka.clusterers.forOPTICSAndDBScan.Databases.Database; |
---|
28 | import weka.clusterers.forOPTICSAndDBScan.OPTICS_GUI.OPTICS_Visualizer; |
---|
29 | import weka.clusterers.forOPTICSAndDBScan.OPTICS_GUI.SERObject; |
---|
30 | import weka.clusterers.forOPTICSAndDBScan.Utils.EpsilonRange_ListElement; |
---|
31 | import weka.clusterers.forOPTICSAndDBScan.Utils.UpdateQueue; |
---|
32 | import weka.clusterers.forOPTICSAndDBScan.Utils.UpdateQueueElement; |
---|
33 | import weka.core.Capabilities; |
---|
34 | import weka.core.FastVector; |
---|
35 | import weka.core.Instance; |
---|
36 | import weka.core.Instances; |
---|
37 | import weka.core.Option; |
---|
38 | import weka.core.OptionHandler; |
---|
39 | import weka.core.RevisionUtils; |
---|
40 | import weka.core.TechnicalInformation; |
---|
41 | import weka.core.TechnicalInformationHandler; |
---|
42 | import weka.core.Utils; |
---|
43 | import weka.core.Capabilities.Capability; |
---|
44 | import weka.core.TechnicalInformation.Field; |
---|
45 | import weka.core.TechnicalInformation.Type; |
---|
46 | import weka.filters.Filter; |
---|
47 | import weka.filters.unsupervised.attribute.ReplaceMissingValues; |
---|
48 | |
---|
49 | import java.io.BufferedWriter; |
---|
50 | import java.io.File; |
---|
51 | import java.io.FileOutputStream; |
---|
52 | import java.io.FileWriter; |
---|
53 | import java.io.ObjectOutputStream; |
---|
54 | import java.lang.reflect.Constructor; |
---|
55 | import java.lang.reflect.InvocationTargetException; |
---|
56 | import java.text.DecimalFormat; |
---|
57 | import java.util.Calendar; |
---|
58 | import java.util.Enumeration; |
---|
59 | import java.util.GregorianCalendar; |
---|
60 | import java.util.Iterator; |
---|
61 | import java.util.List; |
---|
62 | import java.util.Vector; |
---|
63 | |
---|
64 | /** |
---|
65 | <!-- globalinfo-start --> |
---|
66 | * Mihael Ankerst, Markus M. Breunig, Hans-Peter Kriegel, Joerg Sander: OPTICS: Ordering Points To Identify the Clustering Structure. In: ACM SIGMOD International Conference on Management of Data, 49-60, 1999. |
---|
67 | * <p/> |
---|
68 | <!-- globalinfo-end --> |
---|
69 | * |
---|
70 | <!-- technical-bibtex-start --> |
---|
71 | * BibTeX: |
---|
72 | * <pre> |
---|
73 | * @inproceedings{Ankerst1999, |
---|
74 | * author = {Mihael Ankerst and Markus M. Breunig and Hans-Peter Kriegel and Joerg Sander}, |
---|
75 | * booktitle = {ACM SIGMOD International Conference on Management of Data}, |
---|
76 | * pages = {49-60}, |
---|
77 | * publisher = {ACM Press}, |
---|
78 | * title = {OPTICS: Ordering Points To Identify the Clustering Structure}, |
---|
79 | * year = {1999} |
---|
80 | * } |
---|
81 | * </pre> |
---|
82 | * <p/> |
---|
83 | <!-- technical-bibtex-end --> |
---|
84 | * |
---|
85 | <!-- options-start --> |
---|
86 | * Valid options are: <p/> |
---|
87 | * |
---|
88 | * <pre> -E <double> |
---|
89 | * epsilon (default = 0.9)</pre> |
---|
90 | * |
---|
91 | * <pre> -M <int> |
---|
92 | * minPoints (default = 6)</pre> |
---|
93 | * |
---|
94 | * <pre> -I <String> |
---|
95 | * index (database) used for OPTICS (default = weka.clusterers.forOPTICSAndDBScan.Databases.SequentialDatabase)</pre> |
---|
96 | * |
---|
97 | * <pre> -D <String> |
---|
98 | * distance-type (default = weka.clusterers.forOPTICSAndDBScan.DataObjects.EuclidianDataObject)</pre> |
---|
99 | * |
---|
100 | * <pre> -F |
---|
101 | * write results to OPTICS_#TimeStamp#.TXT - File</pre> |
---|
102 | * |
---|
103 | * <pre> -no-gui |
---|
104 | * suppress the display of the GUI after building the clusterer</pre> |
---|
105 | * |
---|
106 | * <pre> -db-output <file> |
---|
107 | * The file to save the generated database to. If a directory |
---|
108 | * is provided, the database doesn't get saved. |
---|
109 | * The generated file can be viewed with the OPTICS Visualizer: |
---|
110 | * java weka.clusterers.forOPTICSAndDBScan.OPTICS_GUI.OPTICS_Visualizer [file.ser] |
---|
111 | * (default: .)</pre> |
---|
112 | * |
---|
113 | <!-- options-end --> |
---|
114 | * |
---|
115 | * @author Matthias Schubert (schubert@dbs.ifi.lmu.de) |
---|
116 | * @author Zhanna Melnikova-Albrecht (melnikov@cip.ifi.lmu.de) |
---|
117 | * @author Rainer Holzmann (holzmann@cip.ifi.lmu.de) |
---|
118 | * @version $Revision: 5488 $ |
---|
119 | */ |
---|
120 | public class OPTICS |
---|
121 | extends AbstractClusterer |
---|
122 | implements OptionHandler, TechnicalInformationHandler { |
---|
123 | |
---|
124 | /** for serialization */ |
---|
125 | static final long serialVersionUID = 274552680222105221L; |
---|
126 | |
---|
127 | /** |
---|
128 | * Specifies the radius for a range-query |
---|
129 | */ |
---|
130 | private double epsilon = 0.9; |
---|
131 | |
---|
132 | /** |
---|
133 | * Specifies the density (the range-query must contain at least minPoints DataObjects) |
---|
134 | */ |
---|
135 | private int minPoints = 6; |
---|
136 | |
---|
137 | /** |
---|
138 | * Replace missing values in training instances |
---|
139 | */ |
---|
140 | private ReplaceMissingValues replaceMissingValues_Filter; |
---|
141 | |
---|
142 | /** |
---|
143 | * Holds the number of clusters generated |
---|
144 | */ |
---|
145 | private int numberOfGeneratedClusters; |
---|
146 | |
---|
147 | /** |
---|
148 | * Holds the distance-type that is used |
---|
149 | * (default = weka.clusterers.forOPTICSAndDBScan.DataObjects.EuclidianDataObject) |
---|
150 | */ |
---|
151 | private String database_distanceType = "weka.clusterers.forOPTICSAndDBScan.DataObjects.EuclidianDataObject"; |
---|
152 | |
---|
153 | /** |
---|
154 | * Holds the type of the used database |
---|
155 | * (default = weka.clusterers.forOPTICSAndDBScan.Databases.SequentialDatabase) |
---|
156 | */ |
---|
157 | private String database_Type = "weka.clusterers.forOPTICSAndDBScan.Databases.SequentialDatabase"; |
---|
158 | |
---|
159 | /** |
---|
160 | * The database that is used for OPTICS |
---|
161 | */ |
---|
162 | private Database database; |
---|
163 | |
---|
164 | /** |
---|
165 | * Holds the time-value (seconds) for the duration of the clustering-process |
---|
166 | */ |
---|
167 | private double elapsedTime; |
---|
168 | |
---|
169 | /** |
---|
170 | * Flag that indicates if the results are written to a file or not |
---|
171 | */ |
---|
172 | private boolean writeOPTICSresults = false; |
---|
173 | |
---|
174 | /** |
---|
175 | * Holds the ClusterOrder (dataObjects with their r_dist and c_dist) for the GUI |
---|
176 | */ |
---|
177 | private FastVector resultVector; |
---|
178 | |
---|
179 | /** whether to display the GUI after building the clusterer or not. */ |
---|
180 | private boolean showGUI = true; |
---|
181 | |
---|
182 | /** the file to save the generated database object to. */ |
---|
183 | private File databaseOutput = new File("."); |
---|
184 | |
---|
185 | // ***************************************************************************************************************** |
---|
186 | // constructors |
---|
187 | // ***************************************************************************************************************** |
---|
188 | |
---|
189 | // ***************************************************************************************************************** |
---|
190 | // methods |
---|
191 | // ***************************************************************************************************************** |
---|
192 | |
---|
193 | /** |
---|
194 | * Returns default capabilities of the clusterer. |
---|
195 | * |
---|
196 | * @return the capabilities of this clusterer |
---|
197 | */ |
---|
198 | public Capabilities getCapabilities() { |
---|
199 | Capabilities result = super.getCapabilities(); |
---|
200 | result.disableAll(); |
---|
201 | result.enable(Capability.NO_CLASS); |
---|
202 | |
---|
203 | // attributes |
---|
204 | result.enable(Capability.NOMINAL_ATTRIBUTES); |
---|
205 | result.enable(Capability.NUMERIC_ATTRIBUTES); |
---|
206 | result.enable(Capability.DATE_ATTRIBUTES); |
---|
207 | result.enable(Capability.MISSING_VALUES); |
---|
208 | |
---|
209 | return result; |
---|
210 | } |
---|
211 | |
---|
212 | /** |
---|
213 | * Generate Clustering via OPTICS |
---|
214 | * @param instances The instances that need to be clustered |
---|
215 | * @throws java.lang.Exception If clustering was not successful |
---|
216 | */ |
---|
217 | public void buildClusterer(Instances instances) throws Exception { |
---|
218 | // can clusterer handle the data? |
---|
219 | getCapabilities().testWithFail(instances); |
---|
220 | |
---|
221 | resultVector = new FastVector(); |
---|
222 | long time_1 = System.currentTimeMillis(); |
---|
223 | |
---|
224 | numberOfGeneratedClusters = 0; |
---|
225 | |
---|
226 | replaceMissingValues_Filter = new ReplaceMissingValues(); |
---|
227 | replaceMissingValues_Filter.setInputFormat(instances); |
---|
228 | Instances filteredInstances = Filter.useFilter(instances, replaceMissingValues_Filter); |
---|
229 | |
---|
230 | database = databaseForName(getDatabase_Type(), filteredInstances); |
---|
231 | for (int i = 0; i < database.getInstances().numInstances(); i++) { |
---|
232 | DataObject dataObject = dataObjectForName(getDatabase_distanceType(), |
---|
233 | database.getInstances().instance(i), |
---|
234 | Integer.toString(i), |
---|
235 | database); |
---|
236 | database.insert(dataObject); |
---|
237 | } |
---|
238 | database.setMinMaxValues(); |
---|
239 | |
---|
240 | UpdateQueue seeds = new UpdateQueue(); |
---|
241 | |
---|
242 | /** OPTICS-Begin */ |
---|
243 | Iterator iterator = database.dataObjectIterator(); |
---|
244 | while (iterator.hasNext()) { |
---|
245 | DataObject dataObject = (DataObject) iterator.next(); |
---|
246 | if (!dataObject.isProcessed()) { |
---|
247 | expandClusterOrder(dataObject, seeds); |
---|
248 | } |
---|
249 | } |
---|
250 | |
---|
251 | long time_2 = System.currentTimeMillis(); |
---|
252 | elapsedTime = (double) (time_2 - time_1) / 1000.0; |
---|
253 | |
---|
254 | if (writeOPTICSresults) { |
---|
255 | String fileName = ""; |
---|
256 | GregorianCalendar gregorianCalendar = new GregorianCalendar(); |
---|
257 | String timeStamp = gregorianCalendar.get(Calendar.DAY_OF_MONTH) + "-" + |
---|
258 | (gregorianCalendar.get(Calendar.MONTH) + 1) + |
---|
259 | "-" + gregorianCalendar.get(Calendar.YEAR) + |
---|
260 | "--" + gregorianCalendar.get(Calendar.HOUR_OF_DAY) + |
---|
261 | "-" + gregorianCalendar.get(Calendar.MINUTE) + |
---|
262 | "-" + gregorianCalendar.get(Calendar.SECOND); |
---|
263 | fileName = "OPTICS_" + timeStamp + ".TXT"; |
---|
264 | |
---|
265 | FileWriter fileWriter = new FileWriter(fileName); |
---|
266 | BufferedWriter bufferedOPTICSWriter = new BufferedWriter(fileWriter); |
---|
267 | for (int i = 0; i < resultVector.size(); i++) { |
---|
268 | bufferedOPTICSWriter.write(format_dataObject((DataObject) resultVector.elementAt(i))); |
---|
269 | } |
---|
270 | bufferedOPTICSWriter.flush(); |
---|
271 | bufferedOPTICSWriter.close(); |
---|
272 | } |
---|
273 | |
---|
274 | // explicit file provided to write the generated database to? |
---|
275 | if (!databaseOutput.isDirectory()) { |
---|
276 | try { |
---|
277 | FileOutputStream fos = new FileOutputStream(databaseOutput); |
---|
278 | ObjectOutputStream oos = new ObjectOutputStream(fos); |
---|
279 | oos.writeObject(getSERObject()); |
---|
280 | oos.flush(); |
---|
281 | oos.close(); |
---|
282 | fos.close(); |
---|
283 | } |
---|
284 | catch (Exception e) { |
---|
285 | System.err.println( |
---|
286 | "Error writing generated database to file '" + getDatabaseOutput() + "': " |
---|
287 | + e); |
---|
288 | e.printStackTrace(); |
---|
289 | } |
---|
290 | } |
---|
291 | |
---|
292 | if (showGUI) |
---|
293 | new OPTICS_Visualizer(getSERObject(), "OPTICS Visualizer - Main Window"); |
---|
294 | } |
---|
295 | |
---|
296 | /** |
---|
297 | * Expands the ClusterOrder for this dataObject |
---|
298 | * @param dataObject Start-DataObject |
---|
299 | * @param seeds SeedList that stores dataObjects with reachability-distances |
---|
300 | */ |
---|
301 | private void expandClusterOrder(DataObject dataObject, UpdateQueue seeds) { |
---|
302 | List list = database.coreDistance(getMinPoints(), getEpsilon(), dataObject); |
---|
303 | List epsilonRange_List = (List) list.get(1); |
---|
304 | dataObject.setReachabilityDistance(DataObject.UNDEFINED); |
---|
305 | dataObject.setCoreDistance(((Double) list.get(2)).doubleValue()); |
---|
306 | dataObject.setProcessed(true); |
---|
307 | |
---|
308 | resultVector.addElement(dataObject); |
---|
309 | |
---|
310 | if (dataObject.getCoreDistance() != DataObject.UNDEFINED) { |
---|
311 | update(seeds, epsilonRange_List, dataObject); |
---|
312 | while (seeds.hasNext()) { |
---|
313 | UpdateQueueElement updateQueueElement = seeds.next(); |
---|
314 | DataObject currentDataObject = (DataObject) updateQueueElement.getObject(); |
---|
315 | currentDataObject.setReachabilityDistance(updateQueueElement.getPriority()); |
---|
316 | List list_1 = database.coreDistance(getMinPoints(), getEpsilon(), currentDataObject); |
---|
317 | List epsilonRange_List_1 = (List) list_1.get(1); |
---|
318 | currentDataObject.setCoreDistance(((Double) list_1.get(2)).doubleValue()); |
---|
319 | currentDataObject.setProcessed(true); |
---|
320 | |
---|
321 | resultVector.addElement(currentDataObject); |
---|
322 | |
---|
323 | if (currentDataObject.getCoreDistance() != DataObject.UNDEFINED) { |
---|
324 | update(seeds, epsilonRange_List_1, currentDataObject); |
---|
325 | } |
---|
326 | } |
---|
327 | } |
---|
328 | } |
---|
329 | |
---|
330 | /** |
---|
331 | * Wraps the dataObject into a String, that contains the dataObject's key, the dataObject itself, |
---|
332 | * the coreDistance and its reachabilityDistance in a formatted manner. |
---|
333 | * @param dataObject The dataObject that is wrapped into a formatted string. |
---|
334 | * @return String Formatted string |
---|
335 | */ |
---|
336 | private String format_dataObject(DataObject dataObject) { |
---|
337 | StringBuffer stringBuffer = new StringBuffer(); |
---|
338 | |
---|
339 | stringBuffer.append("(" + Utils.doubleToString(Double.parseDouble(dataObject.getKey()), |
---|
340 | (Integer.toString(database.size()).length()), 0) + ".) " |
---|
341 | + Utils.padRight(dataObject.toString(), 40) + " --> c_dist: " + |
---|
342 | |
---|
343 | ((dataObject.getCoreDistance() == DataObject.UNDEFINED) ? |
---|
344 | Utils.padRight("UNDEFINED", 12) : |
---|
345 | Utils.padRight(Utils.doubleToString(dataObject.getCoreDistance(), 2, 3), 12)) + |
---|
346 | |
---|
347 | " r_dist: " + |
---|
348 | ((dataObject.getReachabilityDistance() == DataObject.UNDEFINED) ? |
---|
349 | Utils.padRight("UNDEFINED", 12) : |
---|
350 | Utils.doubleToString(dataObject.getReachabilityDistance(), 2, 3)) + "\n"); |
---|
351 | |
---|
352 | return stringBuffer.toString(); |
---|
353 | } |
---|
354 | |
---|
355 | /** |
---|
356 | * Updates reachability-distances in the Seeds-List |
---|
357 | * @param seeds UpdateQueue that holds DataObjects with their corresponding reachability-distances |
---|
358 | * @param epsilonRange_list List of DataObjects that were found in epsilon-range of centralObject |
---|
359 | * @param centralObject |
---|
360 | */ |
---|
361 | private void update(UpdateQueue seeds, List epsilonRange_list, DataObject centralObject) { |
---|
362 | double coreDistance = centralObject.getCoreDistance(); |
---|
363 | double new_r_dist = DataObject.UNDEFINED; |
---|
364 | |
---|
365 | for (int i = 0; i < epsilonRange_list.size(); i++) { |
---|
366 | EpsilonRange_ListElement listElement = (EpsilonRange_ListElement) epsilonRange_list.get(i); |
---|
367 | DataObject neighbourhood_object = listElement.getDataObject(); |
---|
368 | if (!neighbourhood_object.isProcessed()) { |
---|
369 | new_r_dist = Math.max(coreDistance, listElement.getDistance()); |
---|
370 | seeds.add(new_r_dist, neighbourhood_object, neighbourhood_object.getKey()); |
---|
371 | } |
---|
372 | } |
---|
373 | } |
---|
374 | |
---|
375 | /** |
---|
376 | * Classifies a given instance. |
---|
377 | * |
---|
378 | * @param instance The instance to be assigned to a cluster |
---|
379 | * @return int The number of the assigned cluster as an integer |
---|
380 | * @throws java.lang.Exception If instance could not be clustered |
---|
381 | * successfully |
---|
382 | */ |
---|
383 | public int clusterInstance(Instance instance) throws Exception { |
---|
384 | throw new Exception(); |
---|
385 | } |
---|
386 | |
---|
387 | /** |
---|
388 | * Returns the number of clusters. |
---|
389 | * |
---|
390 | * @return int The number of clusters generated for a training dataset. |
---|
391 | * @throws java.lang.Exception If number of clusters could not be returned |
---|
392 | * successfully |
---|
393 | */ |
---|
394 | public int numberOfClusters() throws Exception { |
---|
395 | return numberOfGeneratedClusters; |
---|
396 | } |
---|
397 | |
---|
398 | /** |
---|
399 | * Returns an enumeration of all the available options. |
---|
400 | * |
---|
401 | * @return Enumeration An enumeration of all available options. |
---|
402 | */ |
---|
403 | public Enumeration listOptions() { |
---|
404 | Vector vector = new Vector(); |
---|
405 | |
---|
406 | vector.addElement( |
---|
407 | new Option( |
---|
408 | "\tepsilon (default = 0.9)", |
---|
409 | "E", 1, "-E <double>")); |
---|
410 | |
---|
411 | vector.addElement( |
---|
412 | new Option("\tminPoints (default = 6)", |
---|
413 | "M", 1, "-M <int>")); |
---|
414 | |
---|
415 | vector.addElement( |
---|
416 | new Option( |
---|
417 | "\tindex (database) used for OPTICS (default = weka.clusterers.forOPTICSAndDBScan.Databases.SequentialDatabase)", |
---|
418 | "I", 1, "-I <String>")); |
---|
419 | |
---|
420 | vector.addElement( |
---|
421 | new Option( |
---|
422 | "\tdistance-type (default = weka.clusterers.forOPTICSAndDBScan.DataObjects.EuclidianDataObject)", |
---|
423 | "D", 1, "-D <String>")); |
---|
424 | |
---|
425 | vector.addElement( |
---|
426 | new Option( |
---|
427 | "\twrite results to OPTICS_#TimeStamp#.TXT - File", |
---|
428 | "F", 0, "-F")); |
---|
429 | |
---|
430 | vector.addElement( |
---|
431 | new Option( |
---|
432 | "\tsuppress the display of the GUI after building the clusterer", |
---|
433 | "no-gui", 0, "-no-gui")); |
---|
434 | |
---|
435 | vector.addElement( |
---|
436 | new Option( |
---|
437 | "\tThe file to save the generated database to. If a directory\n" |
---|
438 | + "\tis provided, the database doesn't get saved.\n" |
---|
439 | + "\tThe generated file can be viewed with the OPTICS Visualizer:\n" |
---|
440 | + "\t java " + OPTICS_Visualizer.class.getName() + " [file.ser]\n" |
---|
441 | + "\t(default: .)", |
---|
442 | "db-output", 1, "-db-output <file>")); |
---|
443 | |
---|
444 | return vector.elements(); |
---|
445 | } |
---|
446 | |
---|
447 | /** |
---|
448 | * Sets the OptionHandler's options using the given list. All options |
---|
449 | * will be set (or reset) during this call (i.e. incremental setting |
---|
450 | * of options is not possible). <p/> |
---|
451 | * |
---|
452 | <!-- options-start --> |
---|
453 | * Valid options are: <p/> |
---|
454 | * |
---|
455 | * <pre> -E <double> |
---|
456 | * epsilon (default = 0.9)</pre> |
---|
457 | * |
---|
458 | * <pre> -M <int> |
---|
459 | * minPoints (default = 6)</pre> |
---|
460 | * |
---|
461 | * <pre> -I <String> |
---|
462 | * index (database) used for OPTICS (default = weka.clusterers.forOPTICSAndDBScan.Databases.SequentialDatabase)</pre> |
---|
463 | * |
---|
464 | * <pre> -D <String> |
---|
465 | * distance-type (default = weka.clusterers.forOPTICSAndDBScan.DataObjects.EuclidianDataObject)</pre> |
---|
466 | * |
---|
467 | * <pre> -F |
---|
468 | * write results to OPTICS_#TimeStamp#.TXT - File</pre> |
---|
469 | * |
---|
470 | * <pre> -no-gui |
---|
471 | * suppress the display of the GUI after building the clusterer</pre> |
---|
472 | * |
---|
473 | * <pre> -db-output <file> |
---|
474 | * The file to save the generated database to. If a directory |
---|
475 | * is provided, the database doesn't get saved. |
---|
476 | * The generated file can be viewed with the OPTICS Visualizer: |
---|
477 | * java weka.clusterers.forOPTICSAndDBScan.OPTICS_GUI.OPTICS_Visualizer [file.ser] |
---|
478 | * (default: .)</pre> |
---|
479 | * |
---|
480 | <!-- options-end --> |
---|
481 | * |
---|
482 | * @param options The list of options as an array of strings |
---|
483 | * @throws java.lang.Exception If an option is not supported |
---|
484 | */ |
---|
485 | public void setOptions(String[] options) throws Exception { |
---|
486 | String optionString = Utils.getOption('E', options); |
---|
487 | if (optionString.length() != 0) |
---|
488 | setEpsilon(Double.parseDouble(optionString)); |
---|
489 | else |
---|
490 | setEpsilon(0.9); |
---|
491 | |
---|
492 | optionString = Utils.getOption('M', options); |
---|
493 | if (optionString.length() != 0) |
---|
494 | setMinPoints(Integer.parseInt(optionString)); |
---|
495 | else |
---|
496 | setMinPoints(6); |
---|
497 | |
---|
498 | optionString = Utils.getOption('I', options); |
---|
499 | if (optionString.length() != 0) |
---|
500 | setDatabase_Type(optionString); |
---|
501 | else |
---|
502 | setDatabase_Type(weka.clusterers.forOPTICSAndDBScan.Databases.SequentialDatabase.class.getName()); |
---|
503 | |
---|
504 | optionString = Utils.getOption('D', options); |
---|
505 | if (optionString.length() != 0) |
---|
506 | setDatabase_distanceType(optionString); |
---|
507 | else |
---|
508 | setDatabase_distanceType(weka.clusterers.forOPTICSAndDBScan.DataObjects.EuclidianDataObject.class.getName()); |
---|
509 | |
---|
510 | setWriteOPTICSresults(Utils.getFlag('F', options)); |
---|
511 | |
---|
512 | setShowGUI(!Utils.getFlag("no-gui", options)); |
---|
513 | |
---|
514 | optionString = Utils.getOption("db-output", options); |
---|
515 | if (optionString.length() != 0) |
---|
516 | setDatabaseOutput(new File(optionString)); |
---|
517 | else |
---|
518 | setDatabaseOutput(new File(".")); |
---|
519 | } |
---|
520 | |
---|
521 | /** |
---|
522 | * Gets the current option settings for the OptionHandler. |
---|
523 | * |
---|
524 | * @return String[] The list of current option settings as an array of strings |
---|
525 | */ |
---|
526 | public String[] getOptions() { |
---|
527 | Vector<String> result; |
---|
528 | |
---|
529 | result = new Vector<String>(); |
---|
530 | |
---|
531 | result.add("-E"); |
---|
532 | result.add("" + getEpsilon()); |
---|
533 | |
---|
534 | result.add("-M"); |
---|
535 | result.add("" + getMinPoints()); |
---|
536 | |
---|
537 | result.add("-I"); |
---|
538 | result.add("" + getDatabase_Type()); |
---|
539 | |
---|
540 | result.add("-D"); |
---|
541 | result.add("" + getDatabase_distanceType()); |
---|
542 | |
---|
543 | if (getWriteOPTICSresults()) |
---|
544 | result.add("-F"); |
---|
545 | |
---|
546 | if (!getShowGUI()) |
---|
547 | result.add("-no-gui"); |
---|
548 | |
---|
549 | result.add("-db-output"); |
---|
550 | result.add("" + getDatabaseOutput()); |
---|
551 | |
---|
552 | return result.toArray(new String[result.size()]); |
---|
553 | } |
---|
554 | |
---|
555 | /** |
---|
556 | * Returns a new Class-Instance of the specified database |
---|
557 | * @param database_Type String of the specified database |
---|
558 | * @param instances Instances that were delivered from WEKA |
---|
559 | * @return Database New constructed Database |
---|
560 | */ |
---|
561 | public Database databaseForName(String database_Type, Instances instances) { |
---|
562 | Object o = null; |
---|
563 | |
---|
564 | Constructor co = null; |
---|
565 | try { |
---|
566 | co = (Class.forName(database_Type)).getConstructor(new Class[]{Instances.class}); |
---|
567 | o = co.newInstance(new Object[]{instances}); |
---|
568 | } catch (NoSuchMethodException e) { |
---|
569 | e.printStackTrace(); |
---|
570 | } catch (SecurityException e) { |
---|
571 | e.printStackTrace(); |
---|
572 | } catch (ClassNotFoundException e) { |
---|
573 | e.printStackTrace(); |
---|
574 | } catch (InstantiationException e) { |
---|
575 | e.printStackTrace(); |
---|
576 | } catch (IllegalAccessException e) { |
---|
577 | e.printStackTrace(); |
---|
578 | } catch (InvocationTargetException e) { |
---|
579 | e.printStackTrace(); |
---|
580 | } |
---|
581 | |
---|
582 | return (Database) o; |
---|
583 | } |
---|
584 | |
---|
585 | /** |
---|
586 | * Returns a new Class-Instance of the specified database |
---|
587 | * @param database_distanceType String of the specified distance-type |
---|
588 | * @param instance The original instance that needs to hold by this DataObject |
---|
589 | * @param key Key for this DataObject |
---|
590 | * @param database Link to the database |
---|
591 | * @return DataObject New constructed DataObject |
---|
592 | */ |
---|
593 | public DataObject dataObjectForName(String database_distanceType, Instance instance, String key, Database database) { |
---|
594 | Object o = null; |
---|
595 | |
---|
596 | Constructor co = null; |
---|
597 | try { |
---|
598 | co = (Class.forName(database_distanceType)). |
---|
599 | getConstructor(new Class[]{Instance.class, String.class, Database.class}); |
---|
600 | o = co.newInstance(new Object[]{instance, key, database}); |
---|
601 | } catch (NoSuchMethodException e) { |
---|
602 | e.printStackTrace(); |
---|
603 | } catch (SecurityException e) { |
---|
604 | e.printStackTrace(); |
---|
605 | } catch (ClassNotFoundException e) { |
---|
606 | e.printStackTrace(); |
---|
607 | } catch (InstantiationException e) { |
---|
608 | e.printStackTrace(); |
---|
609 | } catch (IllegalAccessException e) { |
---|
610 | e.printStackTrace(); |
---|
611 | } catch (InvocationTargetException e) { |
---|
612 | e.printStackTrace(); |
---|
613 | } |
---|
614 | |
---|
615 | return (DataObject) o; |
---|
616 | } |
---|
617 | |
---|
618 | /** |
---|
619 | * Sets a new value for minPoints |
---|
620 | * @param minPoints MinPoints |
---|
621 | */ |
---|
622 | public void setMinPoints(int minPoints) { |
---|
623 | this.minPoints = minPoints; |
---|
624 | } |
---|
625 | |
---|
626 | /** |
---|
627 | * Sets a new value for epsilon |
---|
628 | * @param epsilon Epsilon |
---|
629 | */ |
---|
630 | public void setEpsilon(double epsilon) { |
---|
631 | this.epsilon = epsilon; |
---|
632 | } |
---|
633 | |
---|
634 | /** |
---|
635 | * Returns the value of epsilon |
---|
636 | * @return double Epsilon |
---|
637 | */ |
---|
638 | public double getEpsilon() { |
---|
639 | return epsilon; |
---|
640 | } |
---|
641 | |
---|
642 | /** |
---|
643 | * Returns the value of minPoints |
---|
644 | * @return int MinPoints |
---|
645 | */ |
---|
646 | public int getMinPoints() { |
---|
647 | return minPoints; |
---|
648 | } |
---|
649 | |
---|
650 | /** |
---|
651 | * Returns the distance-type |
---|
652 | * @return String Distance-type |
---|
653 | */ |
---|
654 | public String getDatabase_distanceType() { |
---|
655 | return database_distanceType; |
---|
656 | } |
---|
657 | |
---|
658 | /** |
---|
659 | * Returns the type of the used index (database) |
---|
660 | * @return String Index-type |
---|
661 | */ |
---|
662 | public String getDatabase_Type() { |
---|
663 | return database_Type; |
---|
664 | } |
---|
665 | |
---|
666 | /** |
---|
667 | * Sets a new distance-type |
---|
668 | * @param database_distanceType The new distance-type |
---|
669 | */ |
---|
670 | public void setDatabase_distanceType(String database_distanceType) { |
---|
671 | this.database_distanceType = database_distanceType; |
---|
672 | } |
---|
673 | |
---|
674 | /** |
---|
675 | * Sets a new database-type |
---|
676 | * @param database_Type The new database-type |
---|
677 | */ |
---|
678 | public void setDatabase_Type(String database_Type) { |
---|
679 | this.database_Type = database_Type; |
---|
680 | } |
---|
681 | |
---|
682 | /** |
---|
683 | * Returns the flag for writing actions |
---|
684 | * @return writeOPTICSresults (flag) |
---|
685 | */ |
---|
686 | public boolean getWriteOPTICSresults() { |
---|
687 | return writeOPTICSresults; |
---|
688 | } |
---|
689 | |
---|
690 | /** |
---|
691 | * Sets the flag for writing actions |
---|
692 | * @param writeOPTICSresults Results are written to a file if the flag is set |
---|
693 | */ |
---|
694 | public void setWriteOPTICSresults(boolean writeOPTICSresults) { |
---|
695 | this.writeOPTICSresults = writeOPTICSresults; |
---|
696 | } |
---|
697 | |
---|
698 | /** |
---|
699 | * Returns the flag for showing the OPTICS visualizer GUI. |
---|
700 | * |
---|
701 | * @return true if the GUI is displayed |
---|
702 | */ |
---|
703 | public boolean getShowGUI() { |
---|
704 | return showGUI; |
---|
705 | } |
---|
706 | |
---|
707 | /** |
---|
708 | * Sets the flag for displaying the GUI. |
---|
709 | * |
---|
710 | * @param value if true, then the OPTICS visualizer GUI will be |
---|
711 | * displayed after building the clusterer |
---|
712 | */ |
---|
713 | public void setShowGUI(boolean value) { |
---|
714 | showGUI = value; |
---|
715 | } |
---|
716 | |
---|
717 | /** |
---|
718 | * Returns the file to save the database to - if directory, database is not |
---|
719 | * saved. |
---|
720 | * |
---|
721 | * @return the file to save the database to a directory if saving |
---|
722 | * is ignored |
---|
723 | */ |
---|
724 | public File getDatabaseOutput() { |
---|
725 | return databaseOutput; |
---|
726 | } |
---|
727 | |
---|
728 | /** |
---|
729 | * Sets the the file to save the generated database to. If a directory |
---|
730 | * is provided, the datbase doesn't get saved. |
---|
731 | * |
---|
732 | * @param value the file to save the database to or a directory if |
---|
733 | * saving is to be ignored |
---|
734 | */ |
---|
735 | public void setDatabaseOutput(File value) { |
---|
736 | databaseOutput = value; |
---|
737 | } |
---|
738 | |
---|
739 | /** |
---|
740 | * Returns the resultVector |
---|
741 | * @return resultVector |
---|
742 | */ |
---|
743 | public FastVector getResultVector() { |
---|
744 | return resultVector; |
---|
745 | } |
---|
746 | |
---|
747 | /** |
---|
748 | * Returns the tip text for this property |
---|
749 | * @return tip text for this property suitable for |
---|
750 | * displaying in the explorer/experimenter gui |
---|
751 | */ |
---|
752 | public String epsilonTipText() { |
---|
753 | return "radius of the epsilon-range-queries"; |
---|
754 | } |
---|
755 | |
---|
756 | /** |
---|
757 | * Returns the tip text for this property |
---|
758 | * @return tip text for this property suitable for |
---|
759 | * displaying in the explorer/experimenter gui |
---|
760 | */ |
---|
761 | public String minPointsTipText() { |
---|
762 | return "minimun number of DataObjects required in an epsilon-range-query"; |
---|
763 | } |
---|
764 | |
---|
765 | /** |
---|
766 | * Returns the tip text for this property |
---|
767 | * @return tip text for this property suitable for |
---|
768 | * displaying in the explorer/experimenter gui |
---|
769 | */ |
---|
770 | public String database_TypeTipText() { |
---|
771 | return "used database"; |
---|
772 | } |
---|
773 | |
---|
774 | /** |
---|
775 | * Returns the tip text for this property |
---|
776 | * @return tip text for this property suitable for |
---|
777 | * displaying in the explorer/experimenter gui |
---|
778 | */ |
---|
779 | public String database_distanceTypeTipText() { |
---|
780 | return "used distance-type"; |
---|
781 | } |
---|
782 | |
---|
783 | /** |
---|
784 | * Returns the tip text for this property |
---|
785 | * @return tip text for this property suitable for |
---|
786 | * displaying in the explorer/experimenter gui |
---|
787 | */ |
---|
788 | public String writeOPTICSresultsTipText() { |
---|
789 | return "if the -F option is set, the results are written to OPTICS_#TimeStamp#.TXT"; |
---|
790 | } |
---|
791 | |
---|
792 | /** |
---|
793 | * Returns the tip text for this property. |
---|
794 | * |
---|
795 | * @return tip text for this property suitable for |
---|
796 | * displaying in the explorer/experimenter gui |
---|
797 | */ |
---|
798 | public String showGUITipText() { |
---|
799 | return "Defines whether the OPTICS Visualizer is displayed after the clusterer has been built or not."; |
---|
800 | } |
---|
801 | |
---|
802 | /** |
---|
803 | * Returns the tip text for this property. |
---|
804 | * |
---|
805 | * @return tip text for this property suitable for |
---|
806 | * displaying in the explorer/experimenter gui |
---|
807 | */ |
---|
808 | public String databaseOutputTipText() { |
---|
809 | return |
---|
810 | "The optional output file for the generated database object - can " |
---|
811 | + "be viewed with the OPTICS Visualizer.\n" |
---|
812 | + "java " + OPTICS_Visualizer.class.getName() + " [file.ser]"; |
---|
813 | } |
---|
814 | |
---|
815 | /** |
---|
816 | * Returns a string describing this DataMining-Algorithm |
---|
817 | * @return String Information for the gui-explorer |
---|
818 | */ |
---|
819 | public String globalInfo() { |
---|
820 | return getTechnicalInformation().toString(); |
---|
821 | } |
---|
822 | |
---|
823 | /** |
---|
824 | * Returns an instance of a TechnicalInformation object, containing |
---|
825 | * detailed information about the technical background of this class, |
---|
826 | * e.g., paper reference or book this class is based on. |
---|
827 | * |
---|
828 | * @return the technical information about this class |
---|
829 | */ |
---|
830 | public TechnicalInformation getTechnicalInformation() { |
---|
831 | TechnicalInformation result; |
---|
832 | |
---|
833 | result = new TechnicalInformation(Type.INPROCEEDINGS); |
---|
834 | result.setValue(Field.AUTHOR, "Mihael Ankerst and Markus M. Breunig and Hans-Peter Kriegel and Joerg Sander"); |
---|
835 | result.setValue(Field.TITLE, "OPTICS: Ordering Points To Identify the Clustering Structure"); |
---|
836 | result.setValue(Field.BOOKTITLE, "ACM SIGMOD International Conference on Management of Data"); |
---|
837 | result.setValue(Field.YEAR, "1999"); |
---|
838 | result.setValue(Field.PAGES, "49-60"); |
---|
839 | result.setValue(Field.PUBLISHER, "ACM Press"); |
---|
840 | |
---|
841 | return result; |
---|
842 | } |
---|
843 | |
---|
844 | /** |
---|
845 | * Returns the internal database |
---|
846 | * |
---|
847 | * @return the internal database |
---|
848 | */ |
---|
849 | public SERObject getSERObject() { |
---|
850 | SERObject serObject = new SERObject(resultVector, |
---|
851 | database.size(), |
---|
852 | database.getInstances().numAttributes(), |
---|
853 | getEpsilon(), |
---|
854 | getMinPoints(), |
---|
855 | writeOPTICSresults, |
---|
856 | getDatabase_Type(), |
---|
857 | getDatabase_distanceType(), |
---|
858 | numberOfGeneratedClusters, |
---|
859 | Utils.doubleToString(elapsedTime, 3, 3)); |
---|
860 | return serObject; |
---|
861 | } |
---|
862 | |
---|
863 | /** |
---|
864 | * Returns a description of the clusterer |
---|
865 | * |
---|
866 | * @return the clusterer as string |
---|
867 | */ |
---|
868 | public String toString() { |
---|
869 | StringBuffer stringBuffer = new StringBuffer(); |
---|
870 | stringBuffer.append("OPTICS clustering results\n" + |
---|
871 | "============================================================================================\n\n"); |
---|
872 | stringBuffer.append("Clustered DataObjects: " + database.size() + "\n"); |
---|
873 | stringBuffer.append("Number of attributes: " + database.getInstances().numAttributes() + "\n"); |
---|
874 | stringBuffer.append("Epsilon: " + getEpsilon() + "; minPoints: " + getMinPoints() + "\n"); |
---|
875 | stringBuffer.append("Write results to file: " + (writeOPTICSresults ? "yes" : "no") + "\n"); |
---|
876 | stringBuffer.append("Index: " + getDatabase_Type() + "\n"); |
---|
877 | stringBuffer.append("Distance-type: " + getDatabase_distanceType() + "\n"); |
---|
878 | stringBuffer.append("Number of generated clusters: " + numberOfGeneratedClusters + "\n"); |
---|
879 | DecimalFormat decimalFormat = new DecimalFormat(".##"); |
---|
880 | stringBuffer.append("Elapsed time: " + decimalFormat.format(elapsedTime) + "\n\n"); |
---|
881 | |
---|
882 | for (int i = 0; i < resultVector.size(); i++) { |
---|
883 | stringBuffer.append(format_dataObject((DataObject) resultVector.elementAt(i))); |
---|
884 | } |
---|
885 | return stringBuffer.toString() + "\n"; |
---|
886 | } |
---|
887 | |
---|
888 | /** |
---|
889 | * Returns the revision string. |
---|
890 | * |
---|
891 | * @return the revision |
---|
892 | */ |
---|
893 | public String getRevision() { |
---|
894 | return RevisionUtils.extract("$Revision: 5488 $"); |
---|
895 | } |
---|
896 | |
---|
897 | /** |
---|
898 | * Main Method for testing OPTICS |
---|
899 | * @param args Valid parameters are: 'E' epsilon (default = 0.9); 'M' minPoints (default = 6); |
---|
900 | * 'I' index-type (default = weka.clusterers.forOPTICSAndDBScan.Databases.SequentialDatabase); |
---|
901 | * 'D' distance-type (default = weka.clusterers.forOPTICSAndDBScan.DataObjects.EuclidianDataObject); |
---|
902 | * 'F' write results to OPTICS_#TimeStamp#.TXT - File |
---|
903 | */ |
---|
904 | public static void main(String[] args) { |
---|
905 | runClusterer(new OPTICS(), args); |
---|
906 | } |
---|
907 | } |
---|