[4] | 1 | /* |
---|
| 2 | * This program is free software; you can redistribute it and/or modify |
---|
| 3 | * it under the terms of the GNU General Public License as published by |
---|
| 4 | * the Free Software Foundation; either version 2 of the License, or |
---|
| 5 | * (at your option) any later version. |
---|
| 6 | * |
---|
| 7 | * This program is distributed in the hope that it will be useful, |
---|
| 8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
| 9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
| 10 | * GNU General Public License for more details. |
---|
| 11 | * |
---|
| 12 | * You should have received a copy of the GNU General Public License |
---|
| 13 | * along with this program; if not, write to the Free Software |
---|
| 14 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
---|
| 15 | */ |
---|
| 16 | |
---|
| 17 | /* |
---|
| 18 | * Copyright (C) 2008 |
---|
| 19 | * & Alexander Smirnov (austellus@gmail.com) |
---|
| 20 | */ |
---|
| 21 | package weka.clusterers; |
---|
| 22 | |
---|
| 23 | import java.io.Serializable; |
---|
| 24 | import weka.core.Capabilities; |
---|
| 25 | import weka.core.Instance; |
---|
| 26 | import weka.core.Instances; |
---|
| 27 | import weka.core.RevisionUtils; |
---|
| 28 | import weka.core.SparseInstance; |
---|
| 29 | import weka.core.Option; |
---|
| 30 | import weka.core.OptionHandler; |
---|
| 31 | import weka.core.TechnicalInformation; |
---|
| 32 | import weka.core.TechnicalInformationHandler; |
---|
| 33 | import weka.core.Utils; |
---|
| 34 | import weka.core.Capabilities.Capability; |
---|
| 35 | import weka.core.TechnicalInformation.Field; |
---|
| 36 | import weka.core.TechnicalInformation.Type; |
---|
| 37 | import weka.filters.Filter; |
---|
| 38 | import weka.filters.unsupervised.attribute.ReplaceMissingValues; |
---|
| 39 | import java.lang.reflect.Constructor; |
---|
| 40 | import java.lang.reflect.InvocationTargetException; |
---|
| 41 | import java.text.DecimalFormat; |
---|
| 42 | import java.util.Enumeration; |
---|
| 43 | import java.util.Iterator; |
---|
| 44 | import java.util.List; |
---|
| 45 | import java.util.Vector; |
---|
| 46 | import java.util.HashMap; |
---|
| 47 | import java.util.ArrayList; |
---|
| 48 | |
---|
| 49 | /** |
---|
| 50 | <!-- globalinfo-start --> |
---|
| 51 | * Yiling Yang, Xudong Guan, Jinyuan You: CLOPE: a fast and effective clustering algorithm for transactional data. In: Proceedings of the eighth ACM SIGKDD international conference on Knowledge discovery and data mining, 682-687, 2002. |
---|
| 52 | * <p/> |
---|
| 53 | <!-- globalinfo-end --> |
---|
| 54 | * |
---|
| 55 | <!-- technical-bibtex-start --> |
---|
| 56 | * BibTeX: |
---|
| 57 | * <pre> |
---|
| 58 | * @inproceedings{Yang2002, |
---|
| 59 | * author = {Yiling Yang and Xudong Guan and Jinyuan You}, |
---|
| 60 | * booktitle = {Proceedings of the eighth ACM SIGKDD international conference on Knowledge discovery and data mining}, |
---|
| 61 | * pages = {682-687}, |
---|
| 62 | * publisher = {ACM New York, NY, USA}, |
---|
| 63 | * title = {CLOPE: a fast and effective clustering algorithm for transactional data}, |
---|
| 64 | * year = {2002} |
---|
| 65 | * } |
---|
| 66 | * </pre> |
---|
| 67 | * <p/> |
---|
| 68 | <!-- technical-bibtex-end --> |
---|
| 69 | * |
---|
| 70 | <!-- options-start --> |
---|
| 71 | * Valid options are: <p/> |
---|
| 72 | * |
---|
| 73 | * <pre> -R <num> |
---|
| 74 | * Repulsion |
---|
| 75 | * (default 2.6)</pre> |
---|
| 76 | * |
---|
| 77 | <!-- options-end --> |
---|
| 78 | * |
---|
| 79 | * @author Alexander Smirnov (austellus@gmail.com) |
---|
| 80 | * @version $Revision: 5488 $ |
---|
| 81 | */ |
---|
| 82 | public class CLOPE |
---|
| 83 | extends AbstractClusterer |
---|
| 84 | implements OptionHandler, TechnicalInformationHandler { |
---|
| 85 | |
---|
| 86 | /** for serialization */ |
---|
| 87 | static final long serialVersionUID = -567567567567588L; |
---|
| 88 | |
---|
| 89 | /** |
---|
| 90 | * Inner class for cluster of CLOPE. |
---|
| 91 | * |
---|
| 92 | * @see Serializable |
---|
| 93 | */ |
---|
| 94 | private class CLOPECluster implements Serializable { |
---|
| 95 | |
---|
| 96 | /** |
---|
| 97 | * Number of transactions |
---|
| 98 | */ |
---|
| 99 | public int N = 0; //number of transactions |
---|
| 100 | |
---|
| 101 | /** |
---|
| 102 | * Number of distinct items (or width) |
---|
| 103 | */ |
---|
| 104 | public int W = 0; |
---|
| 105 | |
---|
| 106 | /** |
---|
| 107 | * Size of cluster |
---|
| 108 | */ |
---|
| 109 | public int S = 0; |
---|
| 110 | |
---|
| 111 | /** |
---|
| 112 | * Hash of <item, occurrence> pairs |
---|
| 113 | */ |
---|
| 114 | public HashMap occ = new HashMap(); |
---|
| 115 | |
---|
| 116 | /** |
---|
| 117 | * Add item to cluster |
---|
| 118 | */ |
---|
| 119 | public void AddItem(String Item) { |
---|
| 120 | int count; |
---|
| 121 | if (!this.occ.containsKey(Item)) { |
---|
| 122 | this.occ.put(Item, 1); |
---|
| 123 | } else { |
---|
| 124 | count = (Integer) this.occ.get(Item); |
---|
| 125 | count++; |
---|
| 126 | this.occ.remove(Item); |
---|
| 127 | this.occ.put(Item, count); |
---|
| 128 | } |
---|
| 129 | this.S++; |
---|
| 130 | } |
---|
| 131 | |
---|
| 132 | public void AddItem(Integer Item) { |
---|
| 133 | int count; |
---|
| 134 | if (!this.occ.containsKey(Item)) { |
---|
| 135 | this.occ.put(Item, 1); |
---|
| 136 | } else { |
---|
| 137 | count = (Integer) this.occ.get(Item); |
---|
| 138 | count++; |
---|
| 139 | this.occ.remove(Item); |
---|
| 140 | this.occ.put(Item, count); |
---|
| 141 | } |
---|
| 142 | this.S++; |
---|
| 143 | } |
---|
| 144 | |
---|
| 145 | /** |
---|
| 146 | * Delete item from cluster |
---|
| 147 | */ |
---|
| 148 | public void DeleteItem(String Item) { |
---|
| 149 | int count; |
---|
| 150 | |
---|
| 151 | count = (Integer) this.occ.get(Item); |
---|
| 152 | |
---|
| 153 | if (count == 1) { |
---|
| 154 | this.occ.remove(Item); |
---|
| 155 | |
---|
| 156 | } else { |
---|
| 157 | count--; |
---|
| 158 | this.occ.remove(Item); |
---|
| 159 | this.occ.put(Item, count); |
---|
| 160 | } |
---|
| 161 | this.S--; |
---|
| 162 | } |
---|
| 163 | |
---|
| 164 | public void DeleteItem(Integer Item) { |
---|
| 165 | int count; |
---|
| 166 | |
---|
| 167 | count = (Integer) this.occ.get(Item); |
---|
| 168 | |
---|
| 169 | if (count == 1) { |
---|
| 170 | this.occ.remove(Item); |
---|
| 171 | |
---|
| 172 | } else { |
---|
| 173 | count--; |
---|
| 174 | this.occ.remove(Item); |
---|
| 175 | this.occ.put(Item, count); |
---|
| 176 | } |
---|
| 177 | this.S--; |
---|
| 178 | } |
---|
| 179 | |
---|
| 180 | /** |
---|
| 181 | * Calculate Delta |
---|
| 182 | */ |
---|
| 183 | public double DeltaAdd(Instance inst, double r) { |
---|
| 184 | //System.out.println("DeltaAdd"); |
---|
| 185 | int S_new; |
---|
| 186 | int W_new; |
---|
| 187 | double profit; |
---|
| 188 | double profit_new; |
---|
| 189 | double deltaprofit; |
---|
| 190 | S_new = 0; |
---|
| 191 | W_new = occ.size(); |
---|
| 192 | |
---|
| 193 | if (inst instanceof SparseInstance) { |
---|
| 194 | //System.out.println("DeltaAddSparceInstance"); |
---|
| 195 | for (int i = 0; i < inst.numValues(); i++) { |
---|
| 196 | S_new++; |
---|
| 197 | |
---|
| 198 | if ((Integer) this.occ.get(inst.index(i)) == null) { |
---|
| 199 | W_new++; |
---|
| 200 | } |
---|
| 201 | } |
---|
| 202 | } else { |
---|
| 203 | for (int i = 0; i < inst.numAttributes(); i++) { |
---|
| 204 | if (!inst.isMissing(i)) { |
---|
| 205 | S_new++; |
---|
| 206 | if ((Integer) this.occ.get(i + inst.toString(i)) == null) { |
---|
| 207 | W_new++; |
---|
| 208 | } |
---|
| 209 | } |
---|
| 210 | } |
---|
| 211 | } |
---|
| 212 | S_new += S; |
---|
| 213 | |
---|
| 214 | |
---|
| 215 | if (N == 0) { |
---|
| 216 | deltaprofit = S_new / Math.pow(W_new, r); |
---|
| 217 | } else { |
---|
| 218 | profit = S * N / Math.pow(W, r); |
---|
| 219 | profit_new = S_new * (N + 1) / Math.pow(W_new, r); |
---|
| 220 | deltaprofit = profit_new - profit; |
---|
| 221 | } |
---|
| 222 | return deltaprofit; |
---|
| 223 | } |
---|
| 224 | |
---|
| 225 | /** |
---|
| 226 | * Add instance to cluster |
---|
| 227 | */ |
---|
| 228 | public void AddInstance(Instance inst) { |
---|
| 229 | if (inst instanceof SparseInstance) { |
---|
| 230 | // System.out.println("AddSparceInstance"); |
---|
| 231 | for (int i = 0; i < inst.numValues(); i++) { |
---|
| 232 | AddItem(inst.index(i)); |
---|
| 233 | // for(int i=0;i<inst.numAttributes();int++){ |
---|
| 234 | // AddItem(inst.index(i)+inst.value(i)); |
---|
| 235 | } |
---|
| 236 | } else { |
---|
| 237 | for (int i = 0; i < inst.numAttributes(); i++) { |
---|
| 238 | |
---|
| 239 | if (!inst.isMissing(i)) { |
---|
| 240 | |
---|
| 241 | AddItem(i + inst.toString(i)); |
---|
| 242 | } |
---|
| 243 | } |
---|
| 244 | } |
---|
| 245 | this.W = this.occ.size(); |
---|
| 246 | this.N++; |
---|
| 247 | } |
---|
| 248 | |
---|
| 249 | /** |
---|
| 250 | * Delete instance from cluster |
---|
| 251 | */ |
---|
| 252 | public void DeleteInstance(Instance inst) { |
---|
| 253 | if (inst instanceof SparseInstance) { |
---|
| 254 | // System.out.println("DeleteSparceInstance"); |
---|
| 255 | for (int i = 0; i < inst.numValues(); i++) { |
---|
| 256 | DeleteItem(inst.index(i)); |
---|
| 257 | } |
---|
| 258 | } else { |
---|
| 259 | for (int i = 0; i <= inst.numAttributes() - 1; i++) { |
---|
| 260 | |
---|
| 261 | if (!inst.isMissing(i)) { |
---|
| 262 | DeleteItem(i + inst.toString(i)); |
---|
| 263 | } |
---|
| 264 | } |
---|
| 265 | } |
---|
| 266 | this.W = this.occ.size(); |
---|
| 267 | this.N--; |
---|
| 268 | } |
---|
| 269 | } |
---|
| 270 | /** |
---|
| 271 | * Array of clusters |
---|
| 272 | */ |
---|
| 273 | public ArrayList<CLOPECluster> clusters = new ArrayList<CLOPECluster>(); |
---|
| 274 | |
---|
| 275 | /** |
---|
| 276 | * Specifies the repulsion default |
---|
| 277 | */ |
---|
| 278 | protected double m_RepulsionDefault = 2.6; |
---|
| 279 | |
---|
| 280 | /** |
---|
| 281 | * Specifies the repulsion |
---|
| 282 | */ |
---|
| 283 | protected double m_Repulsion = m_RepulsionDefault; |
---|
| 284 | |
---|
| 285 | /** |
---|
| 286 | * Number of clusters |
---|
| 287 | */ |
---|
| 288 | protected int m_numberOfClusters = -1; |
---|
| 289 | |
---|
| 290 | /** |
---|
| 291 | * Counter for the processed instances |
---|
| 292 | */ |
---|
| 293 | protected int m_processed_InstanceID; |
---|
| 294 | |
---|
| 295 | /** |
---|
| 296 | * Number of instances |
---|
| 297 | */ |
---|
| 298 | protected int m_numberOfInstances; |
---|
| 299 | |
---|
| 300 | /** |
---|
| 301 | * |
---|
| 302 | */ |
---|
| 303 | protected ArrayList<Integer> m_clusterAssignments = new ArrayList(); |
---|
| 304 | |
---|
| 305 | /** |
---|
| 306 | * whether the number of clusters was already determined |
---|
| 307 | */ |
---|
| 308 | protected boolean m_numberOfClustersDetermined = false; |
---|
| 309 | |
---|
| 310 | public int numberOfClusters() { |
---|
| 311 | determineNumberOfClusters(); |
---|
| 312 | return m_numberOfClusters; |
---|
| 313 | } |
---|
| 314 | |
---|
| 315 | protected void determineNumberOfClusters() { |
---|
| 316 | |
---|
| 317 | m_numberOfClusters = clusters.size(); |
---|
| 318 | |
---|
| 319 | m_numberOfClustersDetermined = true; |
---|
| 320 | } |
---|
| 321 | |
---|
| 322 | public Enumeration listOptions() { |
---|
| 323 | Vector result = new Vector(); |
---|
| 324 | result.addElement(new Option( |
---|
| 325 | "\tRepulsion\n" + "\t(default " + m_RepulsionDefault + ")", |
---|
| 326 | "R", 1, "-R <num>")); |
---|
| 327 | return result.elements(); |
---|
| 328 | } |
---|
| 329 | |
---|
| 330 | /** |
---|
| 331 | * Parses a given list of options. <p/> |
---|
| 332 | * |
---|
| 333 | <!-- options-start --> |
---|
| 334 | * Valid options are: <p/> |
---|
| 335 | * |
---|
| 336 | * <pre> -R <num> |
---|
| 337 | * Repulsion |
---|
| 338 | * (default 2.6)</pre> |
---|
| 339 | * |
---|
| 340 | <!-- options-end --> |
---|
| 341 | * |
---|
| 342 | * @param options the list of options as an array of strings |
---|
| 343 | * @throws Exception if an option is not supported |
---|
| 344 | */ |
---|
| 345 | public void setOptions(String[] options) throws Exception { |
---|
| 346 | String tmpStr; |
---|
| 347 | |
---|
| 348 | tmpStr = Utils.getOption('R', options); |
---|
| 349 | if (tmpStr.length() != 0) { |
---|
| 350 | setRepulsion(Double.parseDouble(tmpStr)); |
---|
| 351 | } else { |
---|
| 352 | setRepulsion(m_RepulsionDefault); |
---|
| 353 | } |
---|
| 354 | } |
---|
| 355 | |
---|
| 356 | /** |
---|
| 357 | * Gets the current settings of CLOPE |
---|
| 358 | * |
---|
| 359 | * @return an array of strings suitable for passing to setOptions() |
---|
| 360 | */ |
---|
| 361 | public String[] getOptions() { |
---|
| 362 | Vector result; |
---|
| 363 | |
---|
| 364 | result = new Vector(); |
---|
| 365 | |
---|
| 366 | result.add("-R"); |
---|
| 367 | result.add("" + getRepulsion()); |
---|
| 368 | |
---|
| 369 | return (String[]) result.toArray(new String[result.size()]); |
---|
| 370 | } |
---|
| 371 | |
---|
| 372 | /** |
---|
| 373 | * Returns the tip text for this property |
---|
| 374 | * @return tip text for this property suitable for |
---|
| 375 | * displaying in the explorer/experimenter gui |
---|
| 376 | */ |
---|
| 377 | public String repulsionTipText() { |
---|
| 378 | return "Repulsion to be used."; |
---|
| 379 | } |
---|
| 380 | |
---|
| 381 | /** |
---|
| 382 | * set the repulsion |
---|
| 383 | * |
---|
| 384 | * @param value the repulsion |
---|
| 385 | * @throws Exception if number of clusters is negative |
---|
| 386 | */ |
---|
| 387 | public void setRepulsion(double value) { |
---|
| 388 | m_Repulsion = value; |
---|
| 389 | } |
---|
| 390 | |
---|
| 391 | /** |
---|
| 392 | * gets the repulsion |
---|
| 393 | * |
---|
| 394 | * @return the repulsion |
---|
| 395 | */ |
---|
| 396 | public double getRepulsion() { |
---|
| 397 | return m_Repulsion; |
---|
| 398 | } |
---|
| 399 | |
---|
| 400 | /** |
---|
| 401 | * Returns default capabilities of the clusterer. |
---|
| 402 | * |
---|
| 403 | * @return the capabilities of this clusterer |
---|
| 404 | */ |
---|
| 405 | public Capabilities getCapabilities() { |
---|
| 406 | Capabilities result = super.getCapabilities(); |
---|
| 407 | result.disableAll(); |
---|
| 408 | result.enable(Capability.NO_CLASS); |
---|
| 409 | |
---|
| 410 | // attributes |
---|
| 411 | result.enable(Capability.NOMINAL_ATTRIBUTES); |
---|
| 412 | // result.enable(Capability.NUMERIC_ATTRIBUTES); |
---|
| 413 | result.enable(Capability.MISSING_VALUES); |
---|
| 414 | |
---|
| 415 | return result; |
---|
| 416 | } |
---|
| 417 | |
---|
| 418 | /** |
---|
| 419 | * Generate Clustering via CLOPE |
---|
| 420 | * @param data The instances that need to be clustered |
---|
| 421 | * @throws java.lang.Exception If clustering was not successful |
---|
| 422 | */ |
---|
| 423 | public void buildClusterer(Instances data) throws Exception { |
---|
| 424 | clusters.clear(); |
---|
| 425 | m_processed_InstanceID = 0; |
---|
| 426 | m_clusterAssignments.clear(); |
---|
| 427 | m_numberOfInstances = data.numInstances(); |
---|
| 428 | boolean moved; |
---|
| 429 | //Phase 1 |
---|
| 430 | for (int i = 0; i < data.numInstances(); i++) { |
---|
| 431 | int clusterid = AddInstanceToBestCluster(data.instance(i)); |
---|
| 432 | m_clusterAssignments.add(clusterid); |
---|
| 433 | |
---|
| 434 | } |
---|
| 435 | //Phase 2 |
---|
| 436 | do { |
---|
| 437 | moved = false; |
---|
| 438 | for (int i = 0; i < data.numInstances(); i++) { |
---|
| 439 | m_processed_InstanceID = i; |
---|
| 440 | int clusterid = MoveInstanceToBestCluster(data.instance(i)); |
---|
| 441 | if (clusterid != m_clusterAssignments.get(i)) { |
---|
| 442 | moved = true; |
---|
| 443 | m_clusterAssignments.set(i, clusterid); |
---|
| 444 | } |
---|
| 445 | } |
---|
| 446 | } while (!moved); |
---|
| 447 | m_processed_InstanceID = 0; |
---|
| 448 | } |
---|
| 449 | |
---|
| 450 | /** |
---|
| 451 | * the default constructor |
---|
| 452 | */ |
---|
| 453 | public CLOPE() { |
---|
| 454 | super(); |
---|
| 455 | } |
---|
| 456 | |
---|
| 457 | /** |
---|
| 458 | * Add instance to best cluster |
---|
| 459 | */ |
---|
| 460 | public int AddInstanceToBestCluster(Instance inst) { |
---|
| 461 | |
---|
| 462 | double delta; |
---|
| 463 | double deltamax; |
---|
| 464 | int clustermax = -1; |
---|
| 465 | if (clusters.size() > 0) { |
---|
| 466 | int tempS = 0; |
---|
| 467 | int tempW = 0; |
---|
| 468 | if (inst instanceof SparseInstance) { |
---|
| 469 | for (int i = 0; i < inst.numValues(); i++) { |
---|
| 470 | tempS++; |
---|
| 471 | tempW++; |
---|
| 472 | } |
---|
| 473 | } else { |
---|
| 474 | for (int i = 0; i < inst.numAttributes(); i++) { |
---|
| 475 | if (!inst.isMissing(i)) { |
---|
| 476 | tempS++; |
---|
| 477 | tempW++; |
---|
| 478 | } |
---|
| 479 | } |
---|
| 480 | } |
---|
| 481 | |
---|
| 482 | deltamax = tempS / Math.pow(tempW, m_Repulsion); |
---|
| 483 | |
---|
| 484 | for (int i = 0; i < clusters.size(); i++) { |
---|
| 485 | CLOPECluster tempcluster = clusters.get(i); |
---|
| 486 | delta = tempcluster.DeltaAdd(inst, m_Repulsion); |
---|
| 487 | // System.out.println("delta " + delta); |
---|
| 488 | if (delta > deltamax) { |
---|
| 489 | deltamax = delta; |
---|
| 490 | clustermax = i; |
---|
| 491 | } |
---|
| 492 | } |
---|
| 493 | } else { |
---|
| 494 | CLOPECluster newcluster = new CLOPECluster(); |
---|
| 495 | clusters.add(newcluster); |
---|
| 496 | newcluster.AddInstance(inst); |
---|
| 497 | return clusters.size() - 1; |
---|
| 498 | } |
---|
| 499 | |
---|
| 500 | if (clustermax == -1) { |
---|
| 501 | CLOPECluster newcluster = new CLOPECluster(); |
---|
| 502 | clusters.add(newcluster); |
---|
| 503 | newcluster.AddInstance(inst); |
---|
| 504 | return clusters.size() - 1; |
---|
| 505 | } |
---|
| 506 | clusters.get(clustermax).AddInstance(inst); |
---|
| 507 | return clustermax; |
---|
| 508 | } |
---|
| 509 | |
---|
| 510 | /** |
---|
| 511 | * Move instance to best cluster |
---|
| 512 | */ |
---|
| 513 | public int MoveInstanceToBestCluster(Instance inst) { |
---|
| 514 | |
---|
| 515 | clusters.get(m_clusterAssignments.get(m_processed_InstanceID)).DeleteInstance(inst); |
---|
| 516 | m_clusterAssignments.set(m_processed_InstanceID, -1); |
---|
| 517 | double delta; |
---|
| 518 | double deltamax; |
---|
| 519 | int clustermax = -1; |
---|
| 520 | int tempS = 0; |
---|
| 521 | int tempW = 0; |
---|
| 522 | |
---|
| 523 | if (inst instanceof SparseInstance) { |
---|
| 524 | for (int i = 0; i < inst.numValues(); i++) { |
---|
| 525 | tempS++; |
---|
| 526 | tempW++; |
---|
| 527 | } |
---|
| 528 | } else { |
---|
| 529 | for (int i = 0; i < inst.numAttributes(); i++) { |
---|
| 530 | if (!inst.isMissing(i)) { |
---|
| 531 | tempS++; |
---|
| 532 | tempW++; |
---|
| 533 | } |
---|
| 534 | } |
---|
| 535 | } |
---|
| 536 | |
---|
| 537 | deltamax = tempS / Math.pow(tempW, m_Repulsion); |
---|
| 538 | for (int i = 0; i < clusters.size(); i++) { |
---|
| 539 | CLOPECluster tempcluster = clusters.get(i); |
---|
| 540 | delta = tempcluster.DeltaAdd(inst, m_Repulsion); |
---|
| 541 | // System.out.println("delta " + delta); |
---|
| 542 | if (delta > deltamax) { |
---|
| 543 | deltamax = delta; |
---|
| 544 | clustermax = i; |
---|
| 545 | } |
---|
| 546 | } |
---|
| 547 | if (clustermax == -1) { |
---|
| 548 | CLOPECluster newcluster = new CLOPECluster(); |
---|
| 549 | clusters.add(newcluster); |
---|
| 550 | newcluster.AddInstance(inst); |
---|
| 551 | return clusters.size() - 1; |
---|
| 552 | } |
---|
| 553 | clusters.get(clustermax).AddInstance(inst); |
---|
| 554 | return clustermax; |
---|
| 555 | } |
---|
| 556 | |
---|
| 557 | /** |
---|
| 558 | * Classifies a given instance. |
---|
| 559 | * |
---|
| 560 | * @param instance The instance to be assigned to a cluster |
---|
| 561 | * @return int The number of the assigned cluster as an integer |
---|
| 562 | * @throws java.lang.Exception If instance could not be clustered |
---|
| 563 | * successfully |
---|
| 564 | */ |
---|
| 565 | public int clusterInstance(Instance instance) throws Exception { |
---|
| 566 | if (m_processed_InstanceID >= m_numberOfInstances) { |
---|
| 567 | m_processed_InstanceID = 0; |
---|
| 568 | } |
---|
| 569 | int i = m_clusterAssignments.get(m_processed_InstanceID); |
---|
| 570 | m_processed_InstanceID++; |
---|
| 571 | return i; |
---|
| 572 | } |
---|
| 573 | |
---|
| 574 | /** |
---|
| 575 | * return a string describing this clusterer |
---|
| 576 | * |
---|
| 577 | * @return a description of the clusterer as a string |
---|
| 578 | */ |
---|
| 579 | public String toString() { |
---|
| 580 | StringBuffer stringBuffer = new StringBuffer(); |
---|
| 581 | stringBuffer.append("CLOPE clustering results\n" + |
---|
| 582 | "========================================================================================\n\n"); |
---|
| 583 | stringBuffer.append("Clustered instances: " + m_clusterAssignments.size() + "\n"); |
---|
| 584 | return stringBuffer.toString() + "\n"; |
---|
| 585 | } |
---|
| 586 | |
---|
| 587 | /** |
---|
| 588 | * Returns a string describing this DataMining-Algorithm |
---|
| 589 | * @return String Information for the gui-explorer |
---|
| 590 | */ |
---|
| 591 | public String globalInfo() { |
---|
| 592 | return getTechnicalInformation().toString(); |
---|
| 593 | } |
---|
| 594 | |
---|
| 595 | /** |
---|
| 596 | * Returns an instance of a TechnicalInformation object, containing |
---|
| 597 | * detailed information about the technical background of this class, |
---|
| 598 | * e.g., paper reference or book this class is based on. |
---|
| 599 | * |
---|
| 600 | * @return the technical information about this class |
---|
| 601 | */ |
---|
| 602 | public TechnicalInformation getTechnicalInformation() { |
---|
| 603 | TechnicalInformation result; |
---|
| 604 | |
---|
| 605 | result = new TechnicalInformation(Type.INPROCEEDINGS); |
---|
| 606 | result.setValue(Field.AUTHOR, "Yiling Yang and Xudong Guan and Jinyuan You"); |
---|
| 607 | result.setValue(Field.TITLE, "CLOPE: a fast and effective clustering algorithm for transactional data"); |
---|
| 608 | result.setValue(Field.BOOKTITLE, "Proceedings of the eighth ACM SIGKDD international conference on Knowledge discovery and data mining"); |
---|
| 609 | result.setValue(Field.YEAR, "2002"); |
---|
| 610 | result.setValue(Field.PAGES, "682-687"); |
---|
| 611 | result.setValue(Field.PUBLISHER, "ACM New York, NY, USA"); |
---|
| 612 | |
---|
| 613 | return result; |
---|
| 614 | } |
---|
| 615 | |
---|
| 616 | /** |
---|
| 617 | * Returns the revision string. |
---|
| 618 | * |
---|
| 619 | * @return the revision |
---|
| 620 | */ |
---|
| 621 | public String getRevision() { |
---|
| 622 | return RevisionUtils.extract("$Revision: 5488 $"); |
---|
| 623 | } |
---|
| 624 | |
---|
| 625 | /** |
---|
| 626 | * Main method for testing this class. |
---|
| 627 | * |
---|
| 628 | * @param argv should contain the following arguments: <p> |
---|
| 629 | * -t training file [-R repulsion] |
---|
| 630 | */ |
---|
| 631 | public static void main(String[] argv) { |
---|
| 632 | runClusterer(new CLOPE(), argv); |
---|
| 633 | } |
---|
| 634 | } |
---|
| 635 | |
---|