source: src/main/java/weka/clusterers/OPTICS.java @ 8

Last change on this file since 8 was 4, checked in by gnappo, 14 years ago

Import di weka.

File size: 31.6 KB
RevLine 
[4]1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    Copyright (C) 2004
19 *    & Matthias Schubert (schubert@dbs.ifi.lmu.de)
20 *    & Zhanna Melnikova-Albrecht (melnikov@cip.ifi.lmu.de)
21 *    & Rainer Holzmann (holzmann@cip.ifi.lmu.de)
22 */
23
24package weka.clusterers;
25
26import weka.clusterers.forOPTICSAndDBScan.DataObjects.DataObject;
27import weka.clusterers.forOPTICSAndDBScan.Databases.Database;
28import weka.clusterers.forOPTICSAndDBScan.OPTICS_GUI.OPTICS_Visualizer;
29import weka.clusterers.forOPTICSAndDBScan.OPTICS_GUI.SERObject;
30import weka.clusterers.forOPTICSAndDBScan.Utils.EpsilonRange_ListElement;
31import weka.clusterers.forOPTICSAndDBScan.Utils.UpdateQueue;
32import weka.clusterers.forOPTICSAndDBScan.Utils.UpdateQueueElement;
33import weka.core.Capabilities;
34import weka.core.FastVector;
35import weka.core.Instance;
36import weka.core.Instances;
37import weka.core.Option;
38import weka.core.OptionHandler;
39import weka.core.RevisionUtils;
40import weka.core.TechnicalInformation;
41import weka.core.TechnicalInformationHandler;
42import weka.core.Utils;
43import weka.core.Capabilities.Capability;
44import weka.core.TechnicalInformation.Field;
45import weka.core.TechnicalInformation.Type;
46import weka.filters.Filter;
47import weka.filters.unsupervised.attribute.ReplaceMissingValues;
48
49import java.io.BufferedWriter;
50import java.io.File;
51import java.io.FileOutputStream;
52import java.io.FileWriter;
53import java.io.ObjectOutputStream;
54import java.lang.reflect.Constructor;
55import java.lang.reflect.InvocationTargetException;
56import java.text.DecimalFormat;
57import java.util.Calendar;
58import java.util.Enumeration;
59import java.util.GregorianCalendar;
60import java.util.Iterator;
61import java.util.List;
62import java.util.Vector;
63
64/**
65 <!-- globalinfo-start -->
66 * Mihael Ankerst, Markus M. Breunig, Hans-Peter Kriegel, Joerg Sander: OPTICS: Ordering Points To Identify the Clustering Structure. In: ACM SIGMOD International Conference on Management of Data, 49-60, 1999.
67 * <p/>
68 <!-- globalinfo-end -->
69 *
70 <!-- technical-bibtex-start -->
71 * BibTeX:
72 * <pre>
73 * &#64;inproceedings{Ankerst1999,
74 *    author = {Mihael Ankerst and Markus M. Breunig and Hans-Peter Kriegel and Joerg Sander},
75 *    booktitle = {ACM SIGMOD International Conference on Management of Data},
76 *    pages = {49-60},
77 *    publisher = {ACM Press},
78 *    title = {OPTICS: Ordering Points To Identify the Clustering Structure},
79 *    year = {1999}
80 * }
81 * </pre>
82 * <p/>
83 <!-- technical-bibtex-end -->
84 *
85 <!-- options-start -->
86 * Valid options are: <p/>
87 *
88 * <pre> -E &lt;double&gt;
89 *  epsilon (default = 0.9)</pre>
90 *
91 * <pre> -M &lt;int&gt;
92 *  minPoints (default = 6)</pre>
93 *
94 * <pre> -I &lt;String&gt;
95 *  index (database) used for OPTICS (default = weka.clusterers.forOPTICSAndDBScan.Databases.SequentialDatabase)</pre>
96 *
97 * <pre> -D &lt;String&gt;
98 *  distance-type (default = weka.clusterers.forOPTICSAndDBScan.DataObjects.EuclidianDataObject)</pre>
99 *
100 * <pre> -F
101 *  write results to OPTICS_#TimeStamp#.TXT - File</pre>
102 *
103 * <pre> -no-gui
104 *  suppress the display of the GUI after building the clusterer</pre>
105 *
106 * <pre> -db-output &lt;file&gt;
107 *  The file to save the generated database to. If a directory
108 *  is provided, the database doesn't get saved.
109 *  The generated file can be viewed with the OPTICS Visualizer:
110 *    java weka.clusterers.forOPTICSAndDBScan.OPTICS_GUI.OPTICS_Visualizer [file.ser]
111 *  (default: .)</pre>
112 *
113 <!-- options-end -->
114 *
115 * @author Matthias Schubert (schubert@dbs.ifi.lmu.de)
116 * @author Zhanna Melnikova-Albrecht (melnikov@cip.ifi.lmu.de)
117 * @author Rainer Holzmann (holzmann@cip.ifi.lmu.de)
118 * @version $Revision: 5488 $
119 */
120public class OPTICS 
121    extends AbstractClusterer
122    implements OptionHandler, TechnicalInformationHandler {
123
124    /** for serialization */
125    static final long serialVersionUID = 274552680222105221L;
126 
127    /**
128     * Specifies the radius for a range-query
129     */
130    private double epsilon = 0.9;
131
132    /**
133     * Specifies the density (the range-query must contain at least minPoints DataObjects)
134     */
135    private int minPoints = 6;
136
137    /**
138     * Replace missing values in training instances
139     */
140    private ReplaceMissingValues replaceMissingValues_Filter;
141
142    /**
143     * Holds the number of clusters generated
144     */
145    private int numberOfGeneratedClusters;
146
147    /**
148     * Holds the distance-type that is used
149     * (default = weka.clusterers.forOPTICSAndDBScan.DataObjects.EuclidianDataObject)
150     */
151    private String database_distanceType = "weka.clusterers.forOPTICSAndDBScan.DataObjects.EuclidianDataObject";
152
153    /**
154     * Holds the type of the used database
155     * (default = weka.clusterers.forOPTICSAndDBScan.Databases.SequentialDatabase)
156     */
157    private String database_Type = "weka.clusterers.forOPTICSAndDBScan.Databases.SequentialDatabase";
158
159    /**
160     * The database that is used for OPTICS
161     */
162    private Database database;
163
164    /**
165     * Holds the time-value (seconds) for the duration of the clustering-process
166     */
167    private double elapsedTime;
168
169    /**
170     * Flag that indicates if the results are written to a file or not
171     */
172    private boolean writeOPTICSresults = false;
173
174    /**
175     * Holds the ClusterOrder (dataObjects with their r_dist and c_dist) for the GUI
176     */
177    private FastVector resultVector;
178
179    /** whether to display the GUI after building the clusterer or not. */
180    private boolean showGUI = true;
181   
182    /** the file to save the generated database object to. */
183    private File databaseOutput = new File(".");
184   
185    // *****************************************************************************************************************
186    // constructors
187    // *****************************************************************************************************************
188
189    // *****************************************************************************************************************
190    // methods
191    // *****************************************************************************************************************
192
193    /**
194     * Returns default capabilities of the clusterer.
195     *
196     * @return      the capabilities of this clusterer
197     */
198    public Capabilities getCapabilities() {
199      Capabilities result = super.getCapabilities();
200      result.disableAll();
201      result.enable(Capability.NO_CLASS);
202
203      // attributes
204      result.enable(Capability.NOMINAL_ATTRIBUTES);
205      result.enable(Capability.NUMERIC_ATTRIBUTES);
206      result.enable(Capability.DATE_ATTRIBUTES);
207      result.enable(Capability.MISSING_VALUES);
208
209      return result;
210    }
211
212    /**
213     * Generate Clustering via OPTICS
214     * @param instances The instances that need to be clustered
215     * @throws java.lang.Exception If clustering was not successful
216     */
217    public void buildClusterer(Instances instances) throws Exception {
218        // can clusterer handle the data?
219        getCapabilities().testWithFail(instances);
220
221        resultVector = new FastVector();
222        long time_1 = System.currentTimeMillis();
223
224        numberOfGeneratedClusters = 0;
225
226        replaceMissingValues_Filter = new ReplaceMissingValues();
227        replaceMissingValues_Filter.setInputFormat(instances);
228        Instances filteredInstances = Filter.useFilter(instances, replaceMissingValues_Filter);
229
230        database = databaseForName(getDatabase_Type(), filteredInstances);
231        for (int i = 0; i < database.getInstances().numInstances(); i++) {
232            DataObject dataObject = dataObjectForName(getDatabase_distanceType(),
233                    database.getInstances().instance(i),
234                    Integer.toString(i),
235                    database);
236            database.insert(dataObject);
237        }
238        database.setMinMaxValues();
239
240        UpdateQueue seeds = new UpdateQueue();
241
242        /** OPTICS-Begin */
243        Iterator iterator = database.dataObjectIterator();
244        while (iterator.hasNext()) {
245            DataObject dataObject = (DataObject) iterator.next();
246            if (!dataObject.isProcessed()) {
247                expandClusterOrder(dataObject, seeds);
248            }
249        }
250
251        long time_2 = System.currentTimeMillis();
252        elapsedTime = (double) (time_2 - time_1) / 1000.0;
253
254        if (writeOPTICSresults) {
255            String fileName = "";
256            GregorianCalendar gregorianCalendar = new GregorianCalendar();
257            String timeStamp = gregorianCalendar.get(Calendar.DAY_OF_MONTH) + "-" +
258                    (gregorianCalendar.get(Calendar.MONTH) + 1) +
259                    "-" + gregorianCalendar.get(Calendar.YEAR) +
260                    "--" + gregorianCalendar.get(Calendar.HOUR_OF_DAY) +
261                    "-" + gregorianCalendar.get(Calendar.MINUTE) +
262                    "-" + gregorianCalendar.get(Calendar.SECOND);
263            fileName = "OPTICS_" + timeStamp + ".TXT";
264
265            FileWriter fileWriter = new FileWriter(fileName);
266            BufferedWriter bufferedOPTICSWriter = new BufferedWriter(fileWriter);
267            for (int i = 0; i < resultVector.size(); i++) {
268                bufferedOPTICSWriter.write(format_dataObject((DataObject) resultVector.elementAt(i)));
269            }
270            bufferedOPTICSWriter.flush();
271            bufferedOPTICSWriter.close();
272        }
273
274        // explicit file provided to write the generated database to?
275        if (!databaseOutput.isDirectory()) {
276          try {
277            FileOutputStream fos = new FileOutputStream(databaseOutput);
278            ObjectOutputStream oos = new ObjectOutputStream(fos);
279            oos.writeObject(getSERObject());
280            oos.flush();
281            oos.close();
282            fos.close();
283          }
284          catch (Exception e) {
285            System.err.println(
286                "Error writing generated database to file '" + getDatabaseOutput() + "': " 
287                + e);
288            e.printStackTrace();
289          }
290        }
291       
292        if (showGUI)
293          new OPTICS_Visualizer(getSERObject(), "OPTICS Visualizer - Main Window");
294    }
295
296    /**
297     * Expands the ClusterOrder for this dataObject
298     * @param dataObject Start-DataObject
299     * @param seeds SeedList that stores dataObjects with reachability-distances
300     */
301    private void expandClusterOrder(DataObject dataObject, UpdateQueue seeds) {
302        List list = database.coreDistance(getMinPoints(), getEpsilon(), dataObject);
303        List epsilonRange_List = (List) list.get(1);
304        dataObject.setReachabilityDistance(DataObject.UNDEFINED);
305        dataObject.setCoreDistance(((Double) list.get(2)).doubleValue());
306        dataObject.setProcessed(true);
307
308        resultVector.addElement(dataObject);
309
310        if (dataObject.getCoreDistance() != DataObject.UNDEFINED) {
311            update(seeds, epsilonRange_List, dataObject);
312            while (seeds.hasNext()) {
313                UpdateQueueElement updateQueueElement = seeds.next();
314                DataObject currentDataObject = (DataObject) updateQueueElement.getObject();
315                currentDataObject.setReachabilityDistance(updateQueueElement.getPriority());
316                List list_1 = database.coreDistance(getMinPoints(), getEpsilon(), currentDataObject);
317                List epsilonRange_List_1 = (List) list_1.get(1);
318                currentDataObject.setCoreDistance(((Double) list_1.get(2)).doubleValue());
319                currentDataObject.setProcessed(true);
320
321                resultVector.addElement(currentDataObject);
322
323                if (currentDataObject.getCoreDistance() != DataObject.UNDEFINED) {
324                    update(seeds, epsilonRange_List_1, currentDataObject);
325                }
326            }
327        }
328    }
329
330    /**
331     * Wraps the dataObject into a String, that contains the dataObject's key, the dataObject itself,
332     * the coreDistance and its reachabilityDistance in a formatted manner.
333     * @param dataObject The dataObject that is wrapped into a formatted string.
334     * @return String Formatted string
335     */
336    private String format_dataObject(DataObject dataObject) {
337        StringBuffer stringBuffer = new StringBuffer();
338
339        stringBuffer.append("(" + Utils.doubleToString(Double.parseDouble(dataObject.getKey()),
340                (Integer.toString(database.size()).length()), 0) + ".) "
341                + Utils.padRight(dataObject.toString(), 40) + "  -->  c_dist: " +
342
343                ((dataObject.getCoreDistance() == DataObject.UNDEFINED) ?
344                Utils.padRight("UNDEFINED", 12) :
345                Utils.padRight(Utils.doubleToString(dataObject.getCoreDistance(), 2, 3), 12)) +
346
347                " r_dist: " +
348                ((dataObject.getReachabilityDistance() == DataObject.UNDEFINED) ?
349                Utils.padRight("UNDEFINED", 12) :
350                Utils.doubleToString(dataObject.getReachabilityDistance(), 2, 3)) + "\n");
351
352        return stringBuffer.toString();
353    }
354
355    /**
356     * Updates reachability-distances in the Seeds-List
357     * @param seeds UpdateQueue that holds DataObjects with their corresponding reachability-distances
358     * @param epsilonRange_list List of DataObjects that were found in epsilon-range of centralObject
359     * @param centralObject
360     */
361    private void update(UpdateQueue seeds, List epsilonRange_list, DataObject centralObject) {
362        double coreDistance = centralObject.getCoreDistance();
363        double new_r_dist = DataObject.UNDEFINED;
364
365        for (int i = 0; i < epsilonRange_list.size(); i++) {
366            EpsilonRange_ListElement listElement = (EpsilonRange_ListElement) epsilonRange_list.get(i);
367            DataObject neighbourhood_object = listElement.getDataObject();
368            if (!neighbourhood_object.isProcessed()) {
369                new_r_dist = Math.max(coreDistance, listElement.getDistance());
370                seeds.add(new_r_dist, neighbourhood_object, neighbourhood_object.getKey());
371            }
372        }
373    }
374
375    /**
376     * Classifies a given instance.
377     *
378     * @param instance The instance to be assigned to a cluster
379     * @return int The number of the assigned cluster as an integer
380     * @throws java.lang.Exception If instance could not be clustered
381     * successfully
382     */
383    public int clusterInstance(Instance instance) throws Exception {
384        throw new Exception();
385    }
386
387    /**
388     * Returns the number of clusters.
389     *
390     * @return int The number of clusters generated for a training dataset.
391     * @throws java.lang.Exception If number of clusters could not be returned
392     * successfully
393     */
394    public int numberOfClusters() throws Exception {
395        return numberOfGeneratedClusters;
396    }
397
398    /**
399     * Returns an enumeration of all the available options.
400     *
401     * @return Enumeration An enumeration of all available options.
402     */
403    public Enumeration listOptions() {
404        Vector vector = new Vector();
405
406        vector.addElement(
407            new Option(
408                "\tepsilon (default = 0.9)",
409                "E", 1, "-E <double>"));
410       
411        vector.addElement(
412            new Option("\tminPoints (default = 6)",
413                "M", 1, "-M <int>"));
414       
415        vector.addElement(
416            new Option(
417                "\tindex (database) used for OPTICS (default = weka.clusterers.forOPTICSAndDBScan.Databases.SequentialDatabase)",
418                "I", 1, "-I <String>"));
419       
420        vector.addElement(
421            new Option(
422                "\tdistance-type (default = weka.clusterers.forOPTICSAndDBScan.DataObjects.EuclidianDataObject)",
423                "D", 1, "-D <String>"));
424       
425        vector.addElement(
426            new Option(
427                "\twrite results to OPTICS_#TimeStamp#.TXT - File",
428                "F", 0, "-F"));
429       
430        vector.addElement(
431            new Option(
432                "\tsuppress the display of the GUI after building the clusterer",
433                "no-gui", 0, "-no-gui"));
434       
435        vector.addElement(
436            new Option(
437                "\tThe file to save the generated database to. If a directory\n"
438                + "\tis provided, the database doesn't get saved.\n"
439                + "\tThe generated file can be viewed with the OPTICS Visualizer:\n"
440                + "\t  java " + OPTICS_Visualizer.class.getName() + " [file.ser]\n"
441                + "\t(default: .)",
442                "db-output", 1, "-db-output <file>"));
443       
444        return vector.elements();
445    }
446
447    /**
448     * Sets the OptionHandler's options using the given list. All options
449     * will be set (or reset) during this call (i.e. incremental setting
450     * of options is not possible). <p/>
451     *
452     <!-- options-start -->
453     * Valid options are: <p/>
454     *
455     * <pre> -E &lt;double&gt;
456     *  epsilon (default = 0.9)</pre>
457     *
458     * <pre> -M &lt;int&gt;
459     *  minPoints (default = 6)</pre>
460     *
461     * <pre> -I &lt;String&gt;
462     *  index (database) used for OPTICS (default = weka.clusterers.forOPTICSAndDBScan.Databases.SequentialDatabase)</pre>
463     *
464     * <pre> -D &lt;String&gt;
465     *  distance-type (default = weka.clusterers.forOPTICSAndDBScan.DataObjects.EuclidianDataObject)</pre>
466     *
467     * <pre> -F
468     *  write results to OPTICS_#TimeStamp#.TXT - File</pre>
469     *
470     * <pre> -no-gui
471     *  suppress the display of the GUI after building the clusterer</pre>
472     *
473     * <pre> -db-output &lt;file&gt;
474     *  The file to save the generated database to. If a directory
475     *  is provided, the database doesn't get saved.
476     *  The generated file can be viewed with the OPTICS Visualizer:
477     *    java weka.clusterers.forOPTICSAndDBScan.OPTICS_GUI.OPTICS_Visualizer [file.ser]
478     *  (default: .)</pre>
479     *
480     <!-- options-end -->
481     *
482     * @param options The list of options as an array of strings
483     * @throws java.lang.Exception If an option is not supported
484     */
485    public void setOptions(String[] options) throws Exception {
486        String optionString = Utils.getOption('E', options);
487        if (optionString.length() != 0)
488            setEpsilon(Double.parseDouble(optionString));
489        else
490            setEpsilon(0.9);
491
492        optionString = Utils.getOption('M', options);
493        if (optionString.length() != 0)
494            setMinPoints(Integer.parseInt(optionString));
495        else
496            setMinPoints(6);
497
498        optionString = Utils.getOption('I', options);
499        if (optionString.length() != 0)
500            setDatabase_Type(optionString);
501        else
502            setDatabase_Type(weka.clusterers.forOPTICSAndDBScan.Databases.SequentialDatabase.class.getName());
503
504        optionString = Utils.getOption('D', options);
505        if (optionString.length() != 0)
506            setDatabase_distanceType(optionString);
507        else
508            setDatabase_distanceType(weka.clusterers.forOPTICSAndDBScan.DataObjects.EuclidianDataObject.class.getName());
509
510        setWriteOPTICSresults(Utils.getFlag('F', options));
511
512        setShowGUI(!Utils.getFlag("no-gui", options));
513
514        optionString = Utils.getOption("db-output", options);
515        if (optionString.length() != 0)
516            setDatabaseOutput(new File(optionString));
517        else
518            setDatabaseOutput(new File("."));
519    }
520
521    /**
522     * Gets the current option settings for the OptionHandler.
523     *
524     * @return String[] The list of current option settings as an array of strings
525     */
526    public String[] getOptions() {
527        Vector<String>  result;
528       
529        result = new Vector<String>();
530       
531        result.add("-E");
532        result.add("" + getEpsilon());
533       
534        result.add("-M");
535        result.add("" + getMinPoints());
536       
537        result.add("-I");
538        result.add("" + getDatabase_Type());
539       
540        result.add("-D");
541        result.add("" + getDatabase_distanceType());
542
543        if (getWriteOPTICSresults())
544          result.add("-F");
545
546        if (!getShowGUI())
547          result.add("-no-gui");
548       
549        result.add("-db-output");
550        result.add("" + getDatabaseOutput());
551       
552        return result.toArray(new String[result.size()]);
553    }
554
555    /**
556     * Returns a new Class-Instance of the specified database
557     * @param database_Type String of the specified database
558     * @param instances Instances that were delivered from WEKA
559     * @return Database New constructed Database
560     */
561    public Database databaseForName(String database_Type, Instances instances) {
562        Object o = null;
563
564        Constructor co = null;
565        try {
566            co = (Class.forName(database_Type)).getConstructor(new Class[]{Instances.class});
567            o = co.newInstance(new Object[]{instances});
568        } catch (NoSuchMethodException e) {
569            e.printStackTrace();
570        } catch (SecurityException e) {
571            e.printStackTrace();
572        } catch (ClassNotFoundException e) {
573            e.printStackTrace();
574        } catch (InstantiationException e) {
575            e.printStackTrace();
576        } catch (IllegalAccessException e) {
577            e.printStackTrace();
578        } catch (InvocationTargetException e) {
579            e.printStackTrace();
580        }
581
582        return (Database) o;
583    }
584
585    /**
586     * Returns a new Class-Instance of the specified database
587     * @param database_distanceType String of the specified distance-type
588     * @param instance The original instance that needs to hold by this DataObject
589     * @param key Key for this DataObject
590     * @param database Link to the database
591     * @return DataObject New constructed DataObject
592     */
593    public DataObject dataObjectForName(String database_distanceType, Instance instance, String key, Database database) {
594        Object o = null;
595
596        Constructor co = null;
597        try {
598            co = (Class.forName(database_distanceType)).
599                    getConstructor(new Class[]{Instance.class, String.class, Database.class});
600            o = co.newInstance(new Object[]{instance, key, database});
601        } catch (NoSuchMethodException e) {
602            e.printStackTrace();
603        } catch (SecurityException e) {
604            e.printStackTrace();
605        } catch (ClassNotFoundException e) {
606            e.printStackTrace();
607        } catch (InstantiationException e) {
608            e.printStackTrace();
609        } catch (IllegalAccessException e) {
610            e.printStackTrace();
611        } catch (InvocationTargetException e) {
612            e.printStackTrace();
613        }
614
615        return (DataObject) o;
616    }
617
618    /**
619     * Sets a new value for minPoints
620     * @param minPoints MinPoints
621     */
622    public void setMinPoints(int minPoints) {
623        this.minPoints = minPoints;
624    }
625
626    /**
627     * Sets a new value for epsilon
628     * @param epsilon Epsilon
629     */
630    public void setEpsilon(double epsilon) {
631        this.epsilon = epsilon;
632    }
633
634    /**
635     * Returns the value of epsilon
636     * @return double Epsilon
637     */
638    public double getEpsilon() {
639        return epsilon;
640    }
641
642    /**
643     * Returns the value of minPoints
644     * @return int MinPoints
645     */
646    public int getMinPoints() {
647        return minPoints;
648    }
649
650    /**
651     * Returns the distance-type
652     * @return String Distance-type
653     */
654    public String getDatabase_distanceType() {
655        return database_distanceType;
656    }
657
658    /**
659     * Returns the type of the used index (database)
660     * @return String Index-type
661     */
662    public String getDatabase_Type() {
663        return database_Type;
664    }
665
666    /**
667     * Sets a new distance-type
668     * @param database_distanceType The new distance-type
669     */
670    public void setDatabase_distanceType(String database_distanceType) {
671        this.database_distanceType = database_distanceType;
672    }
673
674    /**
675     * Sets a new database-type
676     * @param database_Type The new database-type
677     */
678    public void setDatabase_Type(String database_Type) {
679        this.database_Type = database_Type;
680    }
681
682    /**
683     * Returns the flag for writing actions
684     * @return writeOPTICSresults (flag)
685     */
686    public boolean getWriteOPTICSresults() {
687        return writeOPTICSresults;
688    }
689
690    /**
691     * Sets the flag for writing actions
692     * @param writeOPTICSresults Results are written to a file if the flag is set
693     */
694    public void setWriteOPTICSresults(boolean writeOPTICSresults) {
695        this.writeOPTICSresults = writeOPTICSresults;
696    }
697
698    /**
699     * Returns the flag for showing the OPTICS visualizer GUI.
700     *
701     * @return          true if the GUI is displayed
702     */
703    public boolean getShowGUI() {
704        return showGUI;
705    }
706
707    /**
708     * Sets the flag for displaying the GUI.
709     *
710     * @param value     if true, then the OPTICS visualizer GUI will be
711     *                  displayed after building the clusterer
712     */
713    public void setShowGUI(boolean value) {
714        showGUI = value;
715    }
716
717    /**
718     * Returns the file to save the database to - if directory, database is not
719     * saved.
720     *
721     * @return          the file to save the database to a directory if saving
722     *                  is ignored
723     */
724    public File getDatabaseOutput() {
725        return databaseOutput;
726    }
727
728    /**
729     * Sets the the file to save the generated database to. If a directory
730     * is provided, the datbase doesn't get saved.
731     *
732     * @param value     the file to save the database to or a directory if
733     *                  saving is to be ignored
734     */
735    public void setDatabaseOutput(File value) {
736        databaseOutput = value;
737    }
738
739    /**
740     * Returns the resultVector
741     * @return resultVector
742     */
743    public FastVector getResultVector() {
744        return resultVector;
745    }
746
747    /**
748     * Returns the tip text for this property
749     * @return tip text for this property suitable for
750     * displaying in the explorer/experimenter gui
751     */
752    public String epsilonTipText() {
753        return "radius of the epsilon-range-queries";
754    }
755
756    /**
757     * Returns the tip text for this property
758     * @return tip text for this property suitable for
759     * displaying in the explorer/experimenter gui
760     */
761    public String minPointsTipText() {
762        return "minimun number of DataObjects required in an epsilon-range-query";
763    }
764
765    /**
766     * Returns the tip text for this property
767     * @return tip text for this property suitable for
768     * displaying in the explorer/experimenter gui
769     */
770    public String database_TypeTipText() {
771        return "used database";
772    }
773
774    /**
775     * Returns the tip text for this property
776     * @return tip text for this property suitable for
777     * displaying in the explorer/experimenter gui
778     */
779    public String database_distanceTypeTipText() {
780        return "used distance-type";
781    }
782
783    /**
784     * Returns the tip text for this property
785     * @return tip text for this property suitable for
786     * displaying in the explorer/experimenter gui
787     */
788    public String writeOPTICSresultsTipText() {
789        return "if the -F option is set, the results are written to OPTICS_#TimeStamp#.TXT";
790    }
791
792    /**
793     * Returns the tip text for this property.
794     *
795     * @return          tip text for this property suitable for
796     *                  displaying in the explorer/experimenter gui
797     */
798    public String showGUITipText() {
799        return "Defines whether the OPTICS Visualizer is displayed after the clusterer has been built or not.";
800    }
801
802    /**
803     * Returns the tip text for this property.
804     *
805     * @return          tip text for this property suitable for
806     *                  displaying in the explorer/experimenter gui
807     */
808    public String databaseOutputTipText() {
809        return 
810            "The optional output file for the generated database object - can "
811          + "be viewed with the OPTICS Visualizer.\n"
812          + "java " + OPTICS_Visualizer.class.getName() + " [file.ser]";
813    }
814
815    /**
816     * Returns a string describing this DataMining-Algorithm
817     * @return String Information for the gui-explorer
818     */
819    public String globalInfo() {
820        return getTechnicalInformation().toString();
821    }
822
823    /**
824     * Returns an instance of a TechnicalInformation object, containing
825     * detailed information about the technical background of this class,
826     * e.g., paper reference or book this class is based on.
827     *
828     * @return the technical information about this class
829     */
830    public TechnicalInformation getTechnicalInformation() {
831      TechnicalInformation      result;
832     
833      result = new TechnicalInformation(Type.INPROCEEDINGS);
834      result.setValue(Field.AUTHOR, "Mihael Ankerst and Markus M. Breunig and Hans-Peter Kriegel and Joerg Sander");
835      result.setValue(Field.TITLE, "OPTICS: Ordering Points To Identify the Clustering Structure");
836      result.setValue(Field.BOOKTITLE, "ACM SIGMOD International Conference on Management of Data");
837      result.setValue(Field.YEAR, "1999");
838      result.setValue(Field.PAGES, "49-60");
839      result.setValue(Field.PUBLISHER, "ACM Press");
840     
841      return result;
842    }
843
844    /**
845     * Returns the internal database
846     *
847     * @return the internal database
848     */
849    public SERObject getSERObject() {
850        SERObject serObject = new SERObject(resultVector,
851                database.size(),
852                database.getInstances().numAttributes(),
853                getEpsilon(),
854                getMinPoints(),
855                writeOPTICSresults,
856                getDatabase_Type(),
857                getDatabase_distanceType(),
858                numberOfGeneratedClusters,
859                Utils.doubleToString(elapsedTime, 3, 3));
860        return serObject;
861    }
862
863    /**
864     * Returns a description of the clusterer
865     *
866     * @return the clusterer as string
867     */
868    public String toString() {
869        StringBuffer stringBuffer = new StringBuffer();
870        stringBuffer.append("OPTICS clustering results\n" +
871                "============================================================================================\n\n");
872        stringBuffer.append("Clustered DataObjects: " + database.size() + "\n");
873        stringBuffer.append("Number of attributes: " + database.getInstances().numAttributes() + "\n");
874        stringBuffer.append("Epsilon: " + getEpsilon() + "; minPoints: " + getMinPoints() + "\n");
875        stringBuffer.append("Write results to file: " + (writeOPTICSresults ? "yes" : "no") + "\n");
876        stringBuffer.append("Index: " + getDatabase_Type() + "\n");
877        stringBuffer.append("Distance-type: " + getDatabase_distanceType() + "\n");
878        stringBuffer.append("Number of generated clusters: " + numberOfGeneratedClusters + "\n");
879        DecimalFormat decimalFormat = new DecimalFormat(".##");
880        stringBuffer.append("Elapsed time: " + decimalFormat.format(elapsedTime) + "\n\n");
881
882        for (int i = 0; i < resultVector.size(); i++) {
883            stringBuffer.append(format_dataObject((DataObject) resultVector.elementAt(i)));
884        }
885        return stringBuffer.toString() + "\n";
886    }
887   
888    /**
889     * Returns the revision string.
890     *
891     * @return          the revision
892     */
893    public String getRevision() {
894      return RevisionUtils.extract("$Revision: 5488 $");
895    }
896
897    /**
898     * Main Method for testing OPTICS
899     * @param args Valid parameters are: 'E' epsilon (default = 0.9); 'M' minPoints (default = 6);
900     *                                   'I' index-type (default = weka.clusterers.forOPTICSAndDBScan.Databases.SequentialDatabase);
901     *                                   'D' distance-type (default = weka.clusterers.forOPTICSAndDBScan.DataObjects.EuclidianDataObject);
902     *                                   'F' write results to OPTICS_#TimeStamp#.TXT - File
903     */
904    public static void main(String[] args) {
905        runClusterer(new OPTICS(), args);
906    }
907}
Note: See TracBrowser for help on using the repository browser.