Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

MultilayerPerceptron.java @ 29

Last change on this file since 29 was 29, checked in by gnappo, 14 years ago
Taggata versione per la demo e aggiunto branch.
File size: 80.3 KB

Line
1	/*
2	* This program is free software; you can redistribute it and/or modify
3	* it under the terms of the GNU General Public License as published by
4	* the Free Software Foundation; either version 2 of the License, or
5	* (at your option) any later version.
6	*
7	* This program is distributed in the hope that it will be useful,
8	* but WITHOUT ANY WARRANTY; without even the implied warranty of
9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10	* GNU General Public License for more details.
11	*
12	* You should have received a copy of the GNU General Public License
13	* along with this program; if not, write to the Free Software
14	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15	*/
16
17	/*
18	* MultilayerPerceptron.java
19	* Copyright (C) 2000 University of Waikato, Hamilton, New Zealand
20	*/
21
22	package weka.classifiers.functions;
23
24	import weka.classifiers.Classifier;
25	import weka.classifiers.AbstractClassifier;
26	import weka.classifiers.functions.neural.LinearUnit;
27	import weka.classifiers.functions.neural.NeuralConnection;
28	import weka.classifiers.functions.neural.NeuralNode;
29	import weka.classifiers.functions.neural.SigmoidUnit;
30	import weka.core.Capabilities;
31	import weka.core.FastVector;
32	import weka.core.Instance;
33	import weka.core.Instances;
34	import weka.core.Option;
35	import weka.core.OptionHandler;
36	import weka.core.Randomizable;
37	import weka.core.RevisionHandler;
38	import weka.core.RevisionUtils;
39	import weka.core.Utils;
40	import weka.core.WeightedInstancesHandler;
41	import weka.core.Capabilities.Capability;
42	import weka.filters.Filter;
43	import weka.filters.unsupervised.attribute.NominalToBinary;
44
45	import java.awt.BorderLayout;
46	import java.awt.Color;
47	import java.awt.Component;
48	import java.awt.Dimension;
49	import java.awt.FontMetrics;
50	import java.awt.Graphics;
51	import java.awt.event.ActionEvent;
52	import java.awt.event.ActionListener;
53	import java.awt.event.MouseAdapter;
54	import java.awt.event.MouseEvent;
55	import java.awt.event.WindowAdapter;
56	import java.awt.event.WindowEvent;
57	import java.util.Enumeration;
58	import java.util.Random;
59	import java.util.StringTokenizer;
60	import java.util.Vector;
61
62	import javax.swing.BorderFactory;
63	import javax.swing.Box;
64	import javax.swing.BoxLayout;
65	import javax.swing.JButton;
66	import javax.swing.JFrame;
67	import javax.swing.JLabel;
68	import javax.swing.JOptionPane;
69	import javax.swing.JPanel;
70	import javax.swing.JScrollPane;
71	import javax.swing.JTextField;
72
73	/**
74	<!-- globalinfo-start -->
75	* A Classifier that uses backpropagation to classify instances.<br/>
76	* This network can be built by hand, created by an algorithm or both. The network can also be monitored and modified during training time. The nodes in this network are all sigmoid (except for when the class is numeric in which case the the output nodes become unthresholded linear units).
77	* <p/>
78	<!-- globalinfo-end -->
79	*
80	<!-- options-start -->
81	* Valid options are: <p/>
82	*
83	* <pre> -L <learning rate>
84	* Learning Rate for the backpropagation algorithm.
85	* (Value should be between 0 - 1, Default = 0.3).</pre>
86	*
87	* <pre> -M <momentum>
88	* Momentum Rate for the backpropagation algorithm.
89	* (Value should be between 0 - 1, Default = 0.2).</pre>
90	*
91	* <pre> -N <number of epochs>
92	* Number of epochs to train through.
93	* (Default = 500).</pre>
94	*
95	* <pre> -V <percentage size of validation set>
96	* Percentage size of validation set to use to terminate
97	* training (if this is non zero it can pre-empt num of epochs.
98	* (Value should be between 0 - 100, Default = 0).</pre>
99	*
100	* <pre> -S <seed>
101	* The value used to seed the random number generator
102	* (Value should be >= 0 and and a long, Default = 0).</pre>
103	*
104	* <pre> -E <threshold for number of consequetive errors>
105	* The consequetive number of errors allowed for validation
106	* testing before the netwrok terminates.
107	* (Value should be > 0, Default = 20).</pre>
108	*
109	* <pre> -G
110	* GUI will be opened.
111	* (Use this to bring up a GUI).</pre>
112	*
113	* <pre> -A
114	* Autocreation of the network connections will NOT be done.
115	* (This will be ignored if -G is NOT set)</pre>
116	*
117	* <pre> -B
118	* A NominalToBinary filter will NOT automatically be used.
119	* (Set this to not use a NominalToBinary filter).</pre>
120	*
121	* <pre> -H <comma seperated numbers for nodes on each layer>
122	* The hidden layers to be created for the network.
123	* (Value should be a list of comma separated Natural
124	* numbers or the letters 'a' = (attribs + classes) / 2,
125	* 'i' = attribs, 'o' = classes, 't' = attribs .+ classes)
126	* for wildcard values, Default = a).</pre>
127	*
128	* <pre> -C
129	* Normalizing a numeric class will NOT be done.
130	* (Set this to not normalize the class if it's numeric).</pre>
131	*
132	* <pre> -I
133	* Normalizing the attributes will NOT be done.
134	* (Set this to not normalize the attributes).</pre>
135	*
136	* <pre> -R
137	* Reseting the network will NOT be allowed.
138	* (Set this to not allow the network to reset).</pre>
139	*
140	* <pre> -D
141	* Learning rate decay will occur.
142	* (Set this to cause the learning rate to decay).</pre>
143	*
144	<!-- options-end -->
145	*
146	* @author Malcolm Ware (mfw4@cs.waikato.ac.nz)
147	* @version $Revision: 5928 $
148	*/
149	public class MultilayerPerceptron
150	extends AbstractClassifier
151	implements OptionHandler, WeightedInstancesHandler, Randomizable {
152
153	/** for serialization */
154	static final long serialVersionUID = 572250905027665169L;
155
156	/**
157	* Main method for testing this class.
158	*
159	* @param argv should contain command line options (see setOptions)
160	*/
161	public static void main(String [] argv) {
162	runClassifier(new MultilayerPerceptron(), argv);
163	}
164
165
166	/**
167	* This inner class is used to connect the nodes in the network up to
168	* the data that they are classifying, Note that objects of this class are
169	* only suitable to go on the attribute side or class side of the network
170	* and not both.
171	*/
172	protected class NeuralEnd
173	extends NeuralConnection {
174
175	/** for serialization */
176	static final long serialVersionUID = 7305185603191183338L;
177
178	/**
179	* the value that represents the instance value this node represents.
180	* For an input it is the attribute number, for an output, if nominal
181	* it is the class value.
182	*/
183	private int m_link;
184
185	/** True if node is an input, False if it's an output. */
186	private boolean m_input;
187
188	/**
189	* Constructor
190	*/
191	public NeuralEnd(String id) {
192	super(id);
193
194	m_link = 0;
195	m_input = true;
196
197	}
198
199	/**
200	* Call this function to determine if the point at x,y is on the unit.
201	* @param g The graphics context for font size info.
202	* @param x The x coord.
203	* @param y The y coord.
204	* @param w The width of the display.
205	* @param h The height of the display.
206	* @return True if the point is on the unit, false otherwise.
207	*/
208	public boolean onUnit(Graphics g, int x, int y, int w, int h) {
209
210	FontMetrics fm = g.getFontMetrics();
211	int l = (int)(m_x * w) - fm.stringWidth(m_id) / 2;
212	int t = (int)(m_y * h) - fm.getHeight() / 2;
213	if (x < l \|\| x > l + fm.stringWidth(m_id) + 4
214	\|\| y < t \|\| y > t + fm.getHeight() + fm.getDescent() + 4) {
215	return false;
216	}
217	return true;
218
219	}
220
221
222	/**
223	* This will draw the node id to the graphics context.
224	* @param g The graphics context.
225	* @param w The width of the drawing area.
226	* @param h The height of the drawing area.
227	*/
228	public void drawNode(Graphics g, int w, int h) {
229
230	if ((m_type & PURE_INPUT) == PURE_INPUT) {
231	g.setColor(Color.green);
232	}
233	else {
234	g.setColor(Color.orange);
235	}
236
237	FontMetrics fm = g.getFontMetrics();
238	int l = (int)(m_x * w) - fm.stringWidth(m_id) / 2;
239	int t = (int)(m_y * h) - fm.getHeight() / 2;
240	g.fill3DRect(l, t, fm.stringWidth(m_id) + 4
241	, fm.getHeight() + fm.getDescent() + 4
242	, true);
243	g.setColor(Color.black);
244
245	g.drawString(m_id, l + 2, t + fm.getHeight() + 2);
246
247	}
248
249
250	/**
251	* Call this function to draw the node highlighted.
252	* @param g The graphics context.
253	* @param w The width of the drawing area.
254	* @param h The height of the drawing area.
255	*/
256	public void drawHighlight(Graphics g, int w, int h) {
257
258	g.setColor(Color.black);
259	FontMetrics fm = g.getFontMetrics();
260	int l = (int)(m_x * w) - fm.stringWidth(m_id) / 2;
261	int t = (int)(m_y * h) - fm.getHeight() / 2;
262	g.fillRect(l - 2, t - 2, fm.stringWidth(m_id) + 8
263	, fm.getHeight() + fm.getDescent() + 8);
264	drawNode(g, w, h);
265	}
266
267	/**
268	* Call this to get the output value of this unit.
269	* @param calculate True if the value should be calculated if it hasn't
270	* been already.
271	* @return The output value, or NaN, if the value has not been calculated.
272	*/
273	public double outputValue(boolean calculate) {
274
275	if (Double.isNaN(m_unitValue) && calculate) {
276	if (m_input) {
277	if (m_currentInstance.isMissing(m_link)) {
278	m_unitValue = 0;
279	}
280	else {
281
282	m_unitValue = m_currentInstance.value(m_link);
283	}
284	}
285	else {
286	//node is an output.
287	m_unitValue = 0;
288	for (int noa = 0; noa < m_numInputs; noa++) {
289	m_unitValue += m_inputList[noa].outputValue(true);
290
291	}
292	if (m_numeric && m_normalizeClass) {
293	//then scale the value;
294	//this scales linearly from between -1 and 1
295	m_unitValue = m_unitValue *
296	m_attributeRanges[m_instances.classIndex()] +
297	m_attributeBases[m_instances.classIndex()];
298	}
299	}
300	}
301	return m_unitValue;
302
303
304	}
305
306	/**
307	* Call this to get the error value of this unit, which in this case is
308	* the difference between the predicted class, and the actual class.
309	* @param calculate True if the value should be calculated if it hasn't
310	* been already.
311	* @return The error value, or NaN, if the value has not been calculated.
312	*/
313	public double errorValue(boolean calculate) {
314
315	if (!Double.isNaN(m_unitValue) && Double.isNaN(m_unitError)
316	&& calculate) {
317
318	if (m_input) {
319	m_unitError = 0;
320	for (int noa = 0; noa < m_numOutputs; noa++) {
321	m_unitError += m_outputList[noa].errorValue(true);
322	}
323	}
324	else {
325	if (m_currentInstance.classIsMissing()) {
326	m_unitError = .1;
327	}
328	else if (m_instances.classAttribute().isNominal()) {
329	if (m_currentInstance.classValue() == m_link) {
330	m_unitError = 1 - m_unitValue;
331	}
332	else {
333	m_unitError = 0 - m_unitValue;
334	}
335	}
336	else if (m_numeric) {
337
338	if (m_normalizeClass) {
339	if (m_attributeRanges[m_instances.classIndex()] == 0) {
340	m_unitError = 0;
341	}
342	else {
343	m_unitError = (m_currentInstance.classValue() - m_unitValue ) /
344	m_attributeRanges[m_instances.classIndex()];
345	//m_numericRange;
346
347	}
348	}
349	else {
350	m_unitError = m_currentInstance.classValue() - m_unitValue;
351	}
352	}
353	}
354	}
355	return m_unitError;
356	}
357
358
359	/**
360	* Call this to reset the value and error for this unit, ready for the next
361	* run. This will also call the reset function of all units that are
362	* connected as inputs to this one.
363	* This is also the time that the update for the listeners will be
364	* performed.
365	*/
366	public void reset() {
367
368	if (!Double.isNaN(m_unitValue) \|\| !Double.isNaN(m_unitError)) {
369	m_unitValue = Double.NaN;
370	m_unitError = Double.NaN;
371	m_weightsUpdated = false;
372	for (int noa = 0; noa < m_numInputs; noa++) {
373	m_inputList[noa].reset();
374	}
375	}
376	}
377
378	/**
379	* Call this to have the connection save the current
380	* weights.
381	*/
382	public void saveWeights() {
383	for (int i = 0; i < m_numInputs; i++) {
384	m_inputList[i].saveWeights();
385	}
386	}
387
388	/**
389	* Call this to have the connection restore from the saved
390	* weights.
391	*/
392	public void restoreWeights() {
393	for (int i = 0; i < m_numInputs; i++) {
394	m_inputList[i].restoreWeights();
395	}
396	}
397
398
399	/**
400	* Call this function to set What this end unit represents.
401	* @param input True if this unit is used for entering an attribute,
402	* False if it's used for determining a class value.
403	* @param val The attribute number or class type that this unit represents.
404	* (for nominal attributes).
405	*/
406	public void setLink(boolean input, int val) throws Exception {
407	m_input = input;
408
409	if (input) {
410	m_type = PURE_INPUT;
411	}
412	else {
413	m_type = PURE_OUTPUT;
414	}
415	if (val < 0 \|\| (input && val > m_instances.numAttributes())
416	\|\| (!input && m_instances.classAttribute().isNominal()
417	&& val > m_instances.classAttribute().numValues())) {
418	m_link = 0;
419	}
420	else {
421	m_link = val;
422	}
423	}
424
425	/**
426	* @return link for this node.
427	*/
428	public int getLink() {
429	return m_link;
430	}
431
432	/**
433	* Returns the revision string.
434	*
435	* @return the revision
436	*/
437	public String getRevision() {
438	return RevisionUtils.extract("$Revision: 5928 $");
439	}
440	}
441
442
443
444	/** Inner class used to draw the nodes onto.(uses the node lists!!)
445	* This will also handle the user input. */
446	private class NodePanel
447	extends JPanel
448	implements RevisionHandler {
449
450	/** for serialization */
451	static final long serialVersionUID = -3067621833388149984L;
452
453	/**
454	* The constructor.
455	*/
456	public NodePanel() {
457
458
459	addMouseListener(new MouseAdapter() {
460
461	public void mousePressed(MouseEvent e) {
462
463	if (!m_stopped) {
464	return;
465	}
466	if ((e.getModifiers() & MouseEvent.BUTTON1_MASK) == MouseEvent.BUTTON1_MASK &&
467	!e.isAltDown()) {
468	Graphics g = NodePanel.this.getGraphics();
469	int x = e.getX();
470	int y = e.getY();
471	int w = NodePanel.this.getWidth();
472	int h = NodePanel.this.getHeight();
473	FastVector tmp = new FastVector(4);
474	for (int noa = 0; noa < m_numAttributes; noa++) {
475	if (m_inputs[noa].onUnit(g, x, y, w, h)) {
476	tmp.addElement(m_inputs[noa]);
477	selection(tmp,
478	(e.getModifiers() & MouseEvent.CTRL_MASK) == MouseEvent.CTRL_MASK
479	, true);
480	return;
481	}
482	}
483	for (int noa = 0; noa < m_numClasses; noa++) {
484	if (m_outputs[noa].onUnit(g, x, y, w, h)) {
485	tmp.addElement(m_outputs[noa]);
486	selection(tmp,
487	(e.getModifiers() & MouseEvent.CTRL_MASK) == MouseEvent.CTRL_MASK
488	, true);
489	return;
490	}
491	}
492	for (int noa = 0; noa < m_neuralNodes.length; noa++) {
493	if (m_neuralNodes[noa].onUnit(g, x, y, w, h)) {
494	tmp.addElement(m_neuralNodes[noa]);
495	selection(tmp,
496	(e.getModifiers() & MouseEvent.CTRL_MASK) == MouseEvent.CTRL_MASK
497	, true);
498	return;
499	}
500
501	}
502	NeuralNode temp = new NeuralNode(String.valueOf(m_nextId),
503	m_random, m_sigmoidUnit);
504	m_nextId++;
505	temp.setX((double)e.getX() / w);
506	temp.setY((double)e.getY() / h);
507	tmp.addElement(temp);
508	addNode(temp);
509	selection(tmp, (e.getModifiers() & MouseEvent.CTRL_MASK) == MouseEvent.CTRL_MASK
510	, true);
511	}
512	else {
513	//then right click
514	Graphics g = NodePanel.this.getGraphics();
515	int x = e.getX();
516	int y = e.getY();
517	int w = NodePanel.this.getWidth();
518	int h = NodePanel.this.getHeight();
519	FastVector tmp = new FastVector(4);
520	for (int noa = 0; noa < m_numAttributes; noa++) {
521	if (m_inputs[noa].onUnit(g, x, y, w, h)) {
522	tmp.addElement(m_inputs[noa]);
523	selection(tmp,
524	(e.getModifiers() & MouseEvent.CTRL_MASK) == MouseEvent.CTRL_MASK
525	, false);
526	return;
527	}
528
529
530	}
531	for (int noa = 0; noa < m_numClasses; noa++) {
532	if (m_outputs[noa].onUnit(g, x, y, w, h)) {
533	tmp.addElement(m_outputs[noa]);
534	selection(tmp,
535	(e.getModifiers() & MouseEvent.CTRL_MASK) == MouseEvent.CTRL_MASK
536	, false);
537	return;
538	}
539	}
540	for (int noa = 0; noa < m_neuralNodes.length; noa++) {
541	if (m_neuralNodes[noa].onUnit(g, x, y, w, h)) {
542	tmp.addElement(m_neuralNodes[noa]);
543	selection(tmp,
544	(e.getModifiers() & MouseEvent.CTRL_MASK) == MouseEvent.CTRL_MASK
545	, false);
546	return;
547	}
548	}
549	selection(null, (e.getModifiers() & MouseEvent.CTRL_MASK) == MouseEvent.CTRL_MASK
550	, false);
551	}
552	}
553	});
554	}
555
556
557	/**
558	* This function gets called when the user has clicked something
559	* It will amend the current selection or connect the current selection
560	* to the new selection.
561	* Or if nothing was selected and the right button was used it will
562	* delete the node.
563	* @param v The units that were selected.
564	* @param ctrl True if ctrl was held down.
565	* @param left True if it was the left mouse button.
566	*/
567	private void selection(FastVector v, boolean ctrl, boolean left) {
568
569	if (v == null) {
570	//then unselect all.
571	m_selected.removeAllElements();
572	repaint();
573	return;
574	}
575
576
577	//then exclusive or the new selection with the current one.
578	if ((ctrl \|\| m_selected.size() == 0) && left) {
579	boolean removed = false;
580	for (int noa = 0; noa < v.size(); noa++) {
581	removed = false;
582	for (int nob = 0; nob < m_selected.size(); nob++) {
583	if (v.elementAt(noa) == m_selected.elementAt(nob)) {
584	//then remove that element
585	m_selected.removeElementAt(nob);
586	removed = true;
587	break;
588	}
589	}
590	if (!removed) {
591	m_selected.addElement(v.elementAt(noa));
592	}
593	}
594	repaint();
595	return;
596	}
597
598
599	if (left) {
600	//then connect the current selection to the new one.
601	for (int noa = 0; noa < m_selected.size(); noa++) {
602	for (int nob = 0; nob < v.size(); nob++) {
603	NeuralConnection
604	.connect((NeuralConnection)m_selected.elementAt(noa)
605	, (NeuralConnection)v.elementAt(nob));
606	}
607	}
608	}
609	else if (m_selected.size() > 0) {
610	//then disconnect the current selection from the new one.
611
612	for (int noa = 0; noa < m_selected.size(); noa++) {
613	for (int nob = 0; nob < v.size(); nob++) {
614	NeuralConnection
615	.disconnect((NeuralConnection)m_selected.elementAt(noa)
616	, (NeuralConnection)v.elementAt(nob));
617
618	NeuralConnection
619	.disconnect((NeuralConnection)v.elementAt(nob)
620	, (NeuralConnection)m_selected.elementAt(noa));
621
622	}
623	}
624	}
625	else {
626	//then remove the selected node. (it was right clicked while
627	//no other units were selected
628	for (int noa = 0; noa < v.size(); noa++) {
629	((NeuralConnection)v.elementAt(noa)).removeAllInputs();
630	((NeuralConnection)v.elementAt(noa)).removeAllOutputs();
631	removeNode((NeuralConnection)v.elementAt(noa));
632	}
633	}
634	repaint();
635	}
636
637	/**
638	* This will paint the nodes ontot the panel.
639	* @param g The graphics context.
640	*/
641	public void paintComponent(Graphics g) {
642
643	super.paintComponent(g);
644	int x = getWidth();
645	int y = getHeight();
646	if (25 * m_numAttributes > 25 * m_numClasses &&
647	25 * m_numAttributes > y) {
648	setSize(x, 25 * m_numAttributes);
649	}
650	else if (25 * m_numClasses > y) {
651	setSize(x, 25 * m_numClasses);
652	}
653	else {
654	setSize(x, y);
655	}
656
657	y = getHeight();
658	for (int noa = 0; noa < m_numAttributes; noa++) {
659	m_inputs[noa].drawInputLines(g, x, y);
660	}
661	for (int noa = 0; noa < m_numClasses; noa++) {
662	m_outputs[noa].drawInputLines(g, x, y);
663	m_outputs[noa].drawOutputLines(g, x, y);
664	}
665	for (int noa = 0; noa < m_neuralNodes.length; noa++) {
666	m_neuralNodes[noa].drawInputLines(g, x, y);
667	}
668	for (int noa = 0; noa < m_numAttributes; noa++) {
669	m_inputs[noa].drawNode(g, x, y);
670	}
671	for (int noa = 0; noa < m_numClasses; noa++) {
672	m_outputs[noa].drawNode(g, x, y);
673	}
674	for (int noa = 0; noa < m_neuralNodes.length; noa++) {
675	m_neuralNodes[noa].drawNode(g, x, y);
676	}
677
678	for (int noa = 0; noa < m_selected.size(); noa++) {
679	((NeuralConnection)m_selected.elementAt(noa)).drawHighlight(g, x, y);
680	}
681	}
682
683	/**
684	* Returns the revision string.
685	*
686	* @return the revision
687	*/
688	public String getRevision() {
689	return RevisionUtils.extract("$Revision: 5928 $");
690	}
691	}
692
693	/**
694	* This provides the basic controls for working with the neuralnetwork
695	* @author Malcolm Ware (mfw4@cs.waikato.ac.nz)
696	* @version $Revision: 5928 $
697	*/
698	class ControlPanel
699	extends JPanel
700	implements RevisionHandler {
701
702	/** for serialization */
703	static final long serialVersionUID = 7393543302294142271L;
704
705	/** The start stop button. */
706	public JButton m_startStop;
707
708	/** The button to accept the network (even if it hasn't done all epochs. */
709	public JButton m_acceptButton;
710
711	/** A label to state the number of epochs processed so far. */
712	public JPanel m_epochsLabel;
713
714	/** A label to state the total number of epochs to be processed. */
715	public JLabel m_totalEpochsLabel;
716
717	/** A text field to allow the changing of the total number of epochs. */
718	public JTextField m_changeEpochs;
719
720	/** A label to state the learning rate. */
721	public JLabel m_learningLabel;
722
723	/** A label to state the momentum. */
724	public JLabel m_momentumLabel;
725
726	/** A text field to allow the changing of the learning rate. */
727	public JTextField m_changeLearning;
728
729	/** A text field to allow the changing of the momentum. */
730	public JTextField m_changeMomentum;
731
732	/** A label to state roughly the accuracy of the network.(because the
733	accuracy is calculated per epoch, but the network is changing
734	throughout each epoch train).
735	*/
736	public JPanel m_errorLabel;
737
738	/** The constructor. */
739	public ControlPanel() {
740	setBorder(BorderFactory.createTitledBorder("Controls"));
741
742	m_totalEpochsLabel = new JLabel("Num Of Epochs ");
743	m_epochsLabel = new JPanel(){
744	/** for serialization */
745	private static final long serialVersionUID = 2562773937093221399L;
746
747	public void paintComponent(Graphics g) {
748	super.paintComponent(g);
749	g.setColor(m_controlPanel.m_totalEpochsLabel.getForeground());
750	g.drawString("Epoch " + m_epoch, 0, 10);
751	}
752	};
753	m_epochsLabel.setFont(m_totalEpochsLabel.getFont());
754
755	m_changeEpochs = new JTextField();
756	m_changeEpochs.setText("" + m_numEpochs);
757	m_errorLabel = new JPanel(){
758	/** for serialization */
759	private static final long serialVersionUID = 4390239056336679189L;
760
761	public void paintComponent(Graphics g) {
762	super.paintComponent(g);
763	g.setColor(m_controlPanel.m_totalEpochsLabel.getForeground());
764	if (m_valSize == 0) {
765	g.drawString("Error per Epoch = " +
766	Utils.doubleToString(m_error, 7), 0, 10);
767	}
768	else {
769	g.drawString("Validation Error per Epoch = "
770	+ Utils.doubleToString(m_error, 7), 0, 10);
771	}
772	}
773	};
774	m_errorLabel.setFont(m_epochsLabel.getFont());
775
776	m_learningLabel = new JLabel("Learning Rate = ");
777	m_momentumLabel = new JLabel("Momentum = ");
778	m_changeLearning = new JTextField();
779	m_changeMomentum = new JTextField();
780	m_changeLearning.setText("" + m_learningRate);
781	m_changeMomentum.setText("" + m_momentum);
782	setLayout(new BorderLayout(15, 10));
783
784	m_stopIt = true;
785	m_accepted = false;
786	m_startStop = new JButton("Start");
787	m_startStop.setActionCommand("Start");
788
789	m_acceptButton = new JButton("Accept");
790	m_acceptButton.setActionCommand("Accept");
791
792	JPanel buttons = new JPanel();
793	buttons.setLayout(new BoxLayout(buttons, BoxLayout.Y_AXIS));
794	buttons.add(m_startStop);
795	buttons.add(m_acceptButton);
796	add(buttons, BorderLayout.WEST);
797	JPanel data = new JPanel();
798	data.setLayout(new BoxLayout(data, BoxLayout.Y_AXIS));
799
800	Box ab = new Box(BoxLayout.X_AXIS);
801	ab.add(m_epochsLabel);
802	data.add(ab);
803
804	ab = new Box(BoxLayout.X_AXIS);
805	Component b = Box.createGlue();
806	ab.add(m_totalEpochsLabel);
807	ab.add(m_changeEpochs);
808	m_changeEpochs.setMaximumSize(new Dimension(200, 20));
809	ab.add(b);
810	data.add(ab);
811
812	ab = new Box(BoxLayout.X_AXIS);
813	ab.add(m_errorLabel);
814	data.add(ab);
815
816	add(data, BorderLayout.CENTER);
817
818	data = new JPanel();
819	data.setLayout(new BoxLayout(data, BoxLayout.Y_AXIS));
820	ab = new Box(BoxLayout.X_AXIS);
821	b = Box.createGlue();
822	ab.add(m_learningLabel);
823	ab.add(m_changeLearning);
824	m_changeLearning.setMaximumSize(new Dimension(200, 20));
825	ab.add(b);
826	data.add(ab);
827
828	ab = new Box(BoxLayout.X_AXIS);
829	b = Box.createGlue();
830	ab.add(m_momentumLabel);
831	ab.add(m_changeMomentum);
832	m_changeMomentum.setMaximumSize(new Dimension(200, 20));
833	ab.add(b);
834	data.add(ab);
835
836	add(data, BorderLayout.EAST);
837
838	m_startStop.addActionListener(new ActionListener() {
839	public void actionPerformed(ActionEvent e) {
840	if (e.getActionCommand().equals("Start")) {
841	m_stopIt = false;
842	m_startStop.setText("Stop");
843	m_startStop.setActionCommand("Stop");
844	int n = Integer.valueOf(m_changeEpochs.getText()).intValue();
845
846	m_numEpochs = n;
847	m_changeEpochs.setText("" + m_numEpochs);
848
849	double m=Double.valueOf(m_changeLearning.getText()).
850	doubleValue();
851	setLearningRate(m);
852	m_changeLearning.setText("" + m_learningRate);
853
854	m = Double.valueOf(m_changeMomentum.getText()).doubleValue();
855	setMomentum(m);
856	m_changeMomentum.setText("" + m_momentum);
857
858	blocker(false);
859	}
860	else if (e.getActionCommand().equals("Stop")) {
861	m_stopIt = true;
862	m_startStop.setText("Start");
863	m_startStop.setActionCommand("Start");
864	}
865	}
866	});
867
868	m_acceptButton.addActionListener(new ActionListener() {
869	public void actionPerformed(ActionEvent e) {
870	m_accepted = true;
871	blocker(false);
872	}
873	});
874
875	m_changeEpochs.addActionListener(new ActionListener() {
876	public void actionPerformed(ActionEvent e) {
877	int n = Integer.valueOf(m_changeEpochs.getText()).intValue();
878	if (n > 0) {
879	m_numEpochs = n;
880	blocker(false);
881	}
882	}
883	});
884	}
885
886	/**
887	* Returns the revision string.
888	*
889	* @return the revision
890	*/
891	public String getRevision() {
892	return RevisionUtils.extract("$Revision: 5928 $");
893	}
894	}
895
896
897	/** a ZeroR model in case no model can be built from the data */
898	private Classifier m_ZeroR;
899
900	/** The training instances. */
901	private Instances m_instances;
902
903	/** The current instance running through the network. */
904	private Instance m_currentInstance;
905
906	/** A flag to say that it's a numeric class. */
907	private boolean m_numeric;
908
909	/** The ranges for all the attributes. */
910	private double[] m_attributeRanges;
911
912	/** The base values for all the attributes. */
913	private double[] m_attributeBases;
914
915	/** The output units.(only feeds the errors, does no calcs) */
916	private NeuralEnd[] m_outputs;
917
918	/** The input units.(only feeds the inputs does no calcs) */
919	private NeuralEnd[] m_inputs;
920
921	/** All the nodes that actually comprise the logical neural net. */
922	private NeuralConnection[] m_neuralNodes;
923
924	/** The number of classes. */
925	private int m_numClasses = 0;
926
927	/** The number of attributes. */
928	private int m_numAttributes = 0; //note the number doesn't include the class.
929
930	/** The panel the nodes are displayed on. */
931	private NodePanel m_nodePanel;
932
933	/** The control panel. */
934	private ControlPanel m_controlPanel;
935
936	/** The next id number available for default naming. */
937	private int m_nextId;
938
939	/** A Vector list of the units currently selected. */
940	private FastVector m_selected;
941
942	/** A Vector list of the graphers. */
943	private FastVector m_graphers;
944
945	/** The number of epochs to train through. */
946	private int m_numEpochs;
947
948	/** a flag to state if the network should be running, or stopped. */
949	private boolean m_stopIt;
950
951	/** a flag to state that the network has in fact stopped. */
952	private boolean m_stopped;
953
954	/** a flag to state that the network should be accepted the way it is. */
955	private boolean m_accepted;
956	/** The window for the network. */
957	private JFrame m_win;
958
959	/** A flag to tell the build classifier to automatically build a neural net.
960	*/
961	private boolean m_autoBuild;
962
963	/** A flag to state that the gui for the network should be brought up.
964	To allow interaction while training. */
965	private boolean m_gui;
966
967	/** An int to say how big the validation set should be. */
968	private int m_valSize;
969
970	/** The number to to use to quit on validation testing. */
971	private int m_driftThreshold;
972
973	/** The number used to seed the random number generator. */
974	private int m_randomSeed;
975
976	/** The actual random number generator. */
977	private Random m_random;
978
979	/** A flag to state that a nominal to binary filter should be used. */
980	private boolean m_useNomToBin;
981
982	/** The actual filter. */
983	private NominalToBinary m_nominalToBinaryFilter;
984
985	/** The string that defines the hidden layers */
986	private String m_hiddenLayers;
987
988	/** This flag states that the user wants the input values normalized. */
989	private boolean m_normalizeAttributes;
990
991	/** This flag states that the user wants the learning rate to decay. */
992	private boolean m_decay;
993
994	/** This is the learning rate for the network. */
995	private double m_learningRate;
996
997	/** This is the momentum for the network. */
998	private double m_momentum;
999
1000	/** Shows the number of the epoch that the network just finished. */
1001	private int m_epoch;
1002
1003	/** Shows the error of the epoch that the network just finished. */
1004	private double m_error;
1005
1006	/** This flag states that the user wants the network to restart if it
1007	* is found to be generating infinity or NaN for the error value. This
1008	* would restart the network with the current options except that the
1009	* learning rate would be smaller than before, (perhaps half of its current
1010	* value). This option will not be available if the gui is chosen (if the
1011	* gui is open the user can fix the network themselves, it is an
1012	* architectural minefield for the network to be reset with the gui open). */
1013	private boolean m_reset;
1014
1015	/** This flag states that the user wants the class to be normalized while
1016	* processing in the network is done. (the final answer will be in the
1017	* original range regardless). This option will only be used when the class
1018	* is numeric. */
1019	private boolean m_normalizeClass;
1020
1021	/**
1022	* this is a sigmoid unit.
1023	*/
1024	private SigmoidUnit m_sigmoidUnit;
1025
1026	/**
1027	* This is a linear unit.
1028	*/
1029	private LinearUnit m_linearUnit;
1030
1031	/**
1032	* The constructor.
1033	*/
1034	public MultilayerPerceptron() {
1035	m_instances = null;
1036	m_currentInstance = null;
1037	m_controlPanel = null;
1038	m_nodePanel = null;
1039	m_epoch = 0;
1040	m_error = 0;
1041
1042
1043	m_outputs = new NeuralEnd[0];
1044	m_inputs = new NeuralEnd[0];
1045	m_numAttributes = 0;
1046	m_numClasses = 0;
1047	m_neuralNodes = new NeuralConnection[0];
1048	m_selected = new FastVector(4);
1049	m_graphers = new FastVector(2);
1050	m_nextId = 0;
1051	m_stopIt = true;
1052	m_stopped = true;
1053	m_accepted = false;
1054	m_numeric = false;
1055	m_random = null;
1056	m_nominalToBinaryFilter = new NominalToBinary();
1057	m_sigmoidUnit = new SigmoidUnit();
1058	m_linearUnit = new LinearUnit();
1059	//setting all the options to their defaults. To completely change these
1060	//defaults they will also need to be changed down the bottom in the
1061	//setoptions function (the text info in the accompanying functions should
1062	//also be changed to reflect the new defaults
1063	m_normalizeClass = true;
1064	m_normalizeAttributes = true;
1065	m_autoBuild = true;
1066	m_gui = false;
1067	m_useNomToBin = true;
1068	m_driftThreshold = 20;
1069	m_numEpochs = 500;
1070	m_valSize = 0;
1071	m_randomSeed = 0;
1072	m_hiddenLayers = "a";
1073	m_learningRate = .3;
1074	m_momentum = .2;
1075	m_reset = true;
1076	m_decay = false;
1077	}
1078
1079	/**
1080	* @param d True if the learning rate should decay.
1081	*/
1082	public void setDecay(boolean d) {
1083	m_decay = d;
1084	}
1085
1086	/**
1087	* @return the flag for having the learning rate decay.
1088	*/
1089	public boolean getDecay() {
1090	return m_decay;
1091	}
1092
1093	/**
1094	* This sets the network up to be able to reset itself with the current
1095	* settings and the learning rate at half of what it is currently. This
1096	* will only happen if the network creates NaN or infinite errors. Also this
1097	* will continue to happen until the network is trained properly. The
1098	* learning rate will also get set back to it's original value at the end of
1099	* this. This can only be set to true if the GUI is not brought up.
1100	* @param r True if the network should restart with it's current options
1101	* and set the learning rate to half what it currently is.
1102	*/
1103	public void setReset(boolean r) {
1104	if (m_gui) {
1105	r = false;
1106	}
1107	m_reset = r;
1108
1109	}
1110
1111	/**
1112	* @return The flag for reseting the network.
1113	*/
1114	public boolean getReset() {
1115	return m_reset;
1116	}
1117
1118	/**
1119	* @param c True if the class should be normalized (the class will only ever
1120	* be normalized if it is numeric). (Normalization puts the range between
1121	* -1 - 1).
1122	*/
1123	public void setNormalizeNumericClass(boolean c) {
1124	m_normalizeClass = c;
1125	}
1126
1127	/**
1128	* @return The flag for normalizing a numeric class.
1129	*/
1130	public boolean getNormalizeNumericClass() {
1131	return m_normalizeClass;
1132	}
1133
1134	/**
1135	* @param a True if the attributes should be normalized (even nominal
1136	* attributes will get normalized here) (range goes between -1 - 1).
1137	*/
1138	public void setNormalizeAttributes(boolean a) {
1139	m_normalizeAttributes = a;
1140	}
1141
1142	/**
1143	* @return The flag for normalizing attributes.
1144	*/
1145	public boolean getNormalizeAttributes() {
1146	return m_normalizeAttributes;
1147	}
1148
1149	/**
1150	* @param f True if a nominalToBinary filter should be used on the
1151	* data.
1152	*/
1153	public void setNominalToBinaryFilter(boolean f) {
1154	m_useNomToBin = f;
1155	}
1156
1157	/**
1158	* @return The flag for nominal to binary filter use.
1159	*/
1160	public boolean getNominalToBinaryFilter() {
1161	return m_useNomToBin;
1162	}
1163
1164	/**
1165	* This seeds the random number generator, that is used when a random
1166	* number is needed for the network.
1167	* @param l The seed.
1168	*/
1169	public void setSeed(int l) {
1170	if (l >= 0) {
1171	m_randomSeed = l;
1172	}
1173	}
1174
1175	/**
1176	* @return The seed for the random number generator.
1177	*/
1178	public int getSeed() {
1179	return m_randomSeed;
1180	}
1181
1182	/**
1183	* This sets the threshold to use for when validation testing is being done.
1184	* It works by ending testing once the error on the validation set has
1185	* consecutively increased a certain number of times.
1186	* @param t The threshold to use for this.
1187	*/
1188	public void setValidationThreshold(int t) {
1189	if (t > 0) {
1190	m_driftThreshold = t;
1191	}
1192	}
1193
1194	/**
1195	* @return The threshold used for validation testing.
1196	*/
1197	public int getValidationThreshold() {
1198	return m_driftThreshold;
1199	}
1200
1201	/**
1202	* The learning rate can be set using this command.
1203	* NOTE That this is a static variable so it affect all networks that are
1204	* running.
1205	* Must be greater than 0 and no more than 1.
1206	* @param l The New learning rate.
1207	*/
1208	public void setLearningRate(double l) {
1209	if (l > 0 && l <= 1) {
1210	m_learningRate = l;
1211
1212	if (m_controlPanel != null) {
1213	m_controlPanel.m_changeLearning.setText("" + l);
1214	}
1215	}
1216	}
1217
1218	/**
1219	* @return The learning rate for the nodes.
1220	*/
1221	public double getLearningRate() {
1222	return m_learningRate;
1223	}
1224
1225	/**
1226	* The momentum can be set using this command.
1227	* THE same conditions apply to this as to the learning rate.
1228	* @param m The new Momentum.
1229	*/
1230	public void setMomentum(double m) {
1231	if (m >= 0 && m <= 1) {
1232	m_momentum = m;
1233
1234	if (m_controlPanel != null) {
1235	m_controlPanel.m_changeMomentum.setText("" + m);
1236	}
1237	}
1238	}
1239
1240	/**
1241	* @return The momentum for the nodes.
1242	*/
1243	public double getMomentum() {
1244	return m_momentum;
1245	}
1246
1247	/**
1248	* This will set whether the network is automatically built
1249	* or if it is left up to the user. (there is nothing to stop a user
1250	* from altering an autobuilt network however).
1251	* @param a True if the network should be auto built.
1252	*/
1253	public void setAutoBuild(boolean a) {
1254	if (!m_gui) {
1255	a = true;
1256	}
1257	m_autoBuild = a;
1258	}
1259
1260	/**
1261	* @return The auto build state.
1262	*/
1263	public boolean getAutoBuild() {
1264	return m_autoBuild;
1265	}
1266
1267
1268	/**
1269	* This will set what the hidden layers are made up of when auto build is
1270	* enabled. Note to have no hidden units, just put a single 0, Any more
1271	* 0's will indicate that the string is badly formed and make it unaccepted.
1272	* Negative numbers, and floats will do the same. There are also some
1273	* wildcards. These are 'a' = (number of attributes + number of classes) / 2,
1274	* 'i' = number of attributes, 'o' = number of classes, and 't' = number of
1275	* attributes + number of classes.
1276	* @param h A string with a comma seperated list of numbers. Each number is
1277	* the number of nodes to be on a hidden layer.
1278	*/
1279	public void setHiddenLayers(String h) {
1280	String tmp = "";
1281	StringTokenizer tok = new StringTokenizer(h, ",");
1282	if (tok.countTokens() == 0) {
1283	return;
1284	}
1285	double dval;
1286	int val;
1287	String c;
1288	boolean first = true;
1289	while (tok.hasMoreTokens()) {
1290	c = tok.nextToken().trim();
1291
1292	if (c.equals("a") \|\| c.equals("i") \|\| c.equals("o") \|\|
1293	c.equals("t")) {
1294	tmp += c;
1295	}
1296	else {
1297	dval = Double.valueOf(c).doubleValue();
1298	val = (int)dval;
1299
1300	if ((val == dval && (val != 0 \|\| (tok.countTokens() == 0 && first)) &&
1301	val >= 0)) {
1302	tmp += val;
1303	}
1304	else {
1305	return;
1306	}
1307	}
1308
1309	first = false;
1310	if (tok.hasMoreTokens()) {
1311	tmp += ", ";
1312	}
1313	}
1314	m_hiddenLayers = tmp;
1315	}
1316
1317	/**
1318	* @return A string representing the hidden layers, each number is the number
1319	* of nodes on a hidden layer.
1320	*/
1321	public String getHiddenLayers() {
1322	return m_hiddenLayers;
1323	}
1324
1325	/**
1326	* This will set whether A GUI is brought up to allow interaction by the user
1327	* with the neural network during training.
1328	* @param a True if gui should be created.
1329	*/
1330	public void setGUI(boolean a) {
1331	m_gui = a;
1332	if (!a) {
1333	setAutoBuild(true);
1334
1335	}
1336	else {
1337	setReset(false);
1338	}
1339	}
1340
1341	/**
1342	* @return The true if should show gui.
1343	*/
1344	public boolean getGUI() {
1345	return m_gui;
1346	}
1347
1348	/**
1349	* This will set the size of the validation set.
1350	* @param a The size of the validation set, as a percentage of the whole.
1351	*/
1352	public void setValidationSetSize(int a) {
1353	if (a < 0 \|\| a > 99) {
1354	return;
1355	}
1356	m_valSize = a;
1357	}
1358
1359	/**
1360	* @return The percentage size of the validation set.
1361	*/
1362	public int getValidationSetSize() {
1363	return m_valSize;
1364	}
1365
1366
1367
1368
1369	/**
1370	* Set the number of training epochs to perform.
1371	* Must be greater than 0.
1372	* @param n The number of epochs to train through.
1373	*/
1374	public void setTrainingTime(int n) {
1375	if (n > 0) {
1376	m_numEpochs = n;
1377	}
1378	}
1379
1380	/**
1381	* @return The number of epochs to train through.
1382	*/
1383	public int getTrainingTime() {
1384	return m_numEpochs;
1385	}
1386
1387	/**
1388	* Call this function to place a node into the network list.
1389	* @param n The node to place in the list.
1390	*/
1391	private void addNode(NeuralConnection n) {
1392
1393	NeuralConnection[] temp1 = new NeuralConnection[m_neuralNodes.length + 1];
1394	for (int noa = 0; noa < m_neuralNodes.length; noa++) {
1395	temp1[noa] = m_neuralNodes[noa];
1396	}
1397
1398	temp1[temp1.length-1] = n;
1399	m_neuralNodes = temp1;
1400	}
1401
1402	/**
1403	* Call this function to remove the passed node from the list.
1404	* This will only remove the node if it is in the neuralnodes list.
1405	* @param n The neuralConnection to remove.
1406	* @return True if removed false if not (because it wasn't there).
1407	*/
1408	private boolean removeNode(NeuralConnection n) {
1409	NeuralConnection[] temp1 = new NeuralConnection[m_neuralNodes.length - 1];
1410	int skip = 0;
1411	for (int noa = 0; noa < m_neuralNodes.length; noa++) {
1412	if (n == m_neuralNodes[noa]) {
1413	skip++;
1414	}
1415	else if (!((noa - skip) >= temp1.length)) {
1416	temp1[noa - skip] = m_neuralNodes[noa];
1417	}
1418	else {
1419	return false;
1420	}
1421	}
1422	m_neuralNodes = temp1;
1423	return true;
1424	}
1425
1426	/**
1427	* This function sets what the m_numeric flag to represent the passed class
1428	* it also performs the normalization of the attributes if applicable
1429	* and sets up the info to normalize the class. (note that regardless of
1430	* the options it will fill an array with the range and base, set to
1431	* normalize all attributes and the class to be between -1 and 1)
1432	* @param inst the instances.
1433	* @return The modified instances. This needs to be done. If the attributes
1434	* are normalized then deep copies will be made of all the instances which
1435	* will need to be passed back out.
1436	*/
1437	private Instances setClassType(Instances inst) throws Exception {
1438	if (inst != null) {
1439	// x bounds
1440	double min=Double.POSITIVE_INFINITY;
1441	double max=Double.NEGATIVE_INFINITY;
1442	double value;
1443	m_attributeRanges = new double[inst.numAttributes()];
1444	m_attributeBases = new double[inst.numAttributes()];
1445	for (int noa = 0; noa < inst.numAttributes(); noa++) {
1446	min = Double.POSITIVE_INFINITY;
1447	max = Double.NEGATIVE_INFINITY;
1448	for (int i=0; i < inst.numInstances();i++) {
1449	if (!inst.instance(i).isMissing(noa)) {
1450	value = inst.instance(i).value(noa);
1451	if (value < min) {
1452	min = value;
1453	}
1454	if (value > max) {
1455	max = value;
1456	}
1457	}
1458	}
1459
1460	m_attributeRanges[noa] = (max - min) / 2;
1461	m_attributeBases[noa] = (max + min) / 2;
1462	if (noa != inst.classIndex() && m_normalizeAttributes) {
1463	for (int i = 0; i < inst.numInstances(); i++) {
1464	if (m_attributeRanges[noa] != 0) {
1465	inst.instance(i).setValue(noa, (inst.instance(i).value(noa)
1466	- m_attributeBases[noa]) /
1467	m_attributeRanges[noa]);
1468	}
1469	else {
1470	inst.instance(i).setValue(noa, inst.instance(i).value(noa) -
1471	m_attributeBases[noa]);
1472	}
1473	}
1474	}
1475	}
1476	if (inst.classAttribute().isNumeric()) {
1477	m_numeric = true;
1478	}
1479	else {
1480	m_numeric = false;
1481	}
1482	}
1483	return inst;
1484	}
1485
1486	/**
1487	* A function used to stop the code that called buildclassifier
1488	* from continuing on before the user has finished the decision tree.
1489	* @param tf True to stop the thread, False to release the thread that is
1490	* waiting there (if one).
1491	*/
1492	public synchronized void blocker(boolean tf) {
1493	if (tf) {
1494	try {
1495	wait();
1496	} catch(InterruptedException e) {
1497	}
1498	}
1499	else {
1500	notifyAll();
1501	}
1502	}
1503
1504	/**
1505	* Call this function to update the control panel for the gui.
1506	*/
1507	private void updateDisplay() {
1508
1509	if (m_gui) {
1510	m_controlPanel.m_errorLabel.repaint();
1511	m_controlPanel.m_epochsLabel.repaint();
1512	}
1513	}
1514
1515
1516	/**
1517	* this will reset all the nodes in the network.
1518	*/
1519	private void resetNetwork() {
1520	for (int noc = 0; noc < m_numClasses; noc++) {
1521	m_outputs[noc].reset();
1522	}
1523	}
1524
1525	/**
1526	* This will cause the output values of all the nodes to be calculated.
1527	* Note that the m_currentInstance is used to calculate these values.
1528	*/
1529	private void calculateOutputs() {
1530	for (int noc = 0; noc < m_numClasses; noc++) {
1531	//get the values.
1532	m_outputs[noc].outputValue(true);
1533	}
1534	}
1535
1536	/**
1537	* This will cause the error values to be calculated for all nodes.
1538	* Note that the m_currentInstance is used to calculate these values.
1539	* Also the output values should have been calculated first.
1540	* @return The squared error.
1541	*/
1542	private double calculateErrors() throws Exception {
1543	double ret = 0, temp = 0;
1544	for (int noc = 0; noc < m_numAttributes; noc++) {
1545	//get the errors.
1546	m_inputs[noc].errorValue(true);
1547
1548	}
1549	for (int noc = 0; noc < m_numClasses; noc++) {
1550	temp = m_outputs[noc].errorValue(false);
1551	ret += temp * temp;
1552	}
1553	return ret;
1554
1555	}
1556
1557	/**
1558	* This will cause the weight values to be updated based on the learning
1559	* rate, momentum and the errors that have been calculated for each node.
1560	* @param l The learning rate to update with.
1561	* @param m The momentum to update with.
1562	*/
1563	private void updateNetworkWeights(double l, double m) {
1564	for (int noc = 0; noc < m_numClasses; noc++) {
1565	//update weights
1566	m_outputs[noc].updateWeights(l, m);
1567	}
1568
1569	}
1570
1571	/**
1572	* This creates the required input units.
1573	*/
1574	private void setupInputs() throws Exception {
1575	m_inputs = new NeuralEnd[m_numAttributes];
1576	int now = 0;
1577	for (int noa = 0; noa < m_numAttributes+1; noa++) {
1578	if (m_instances.classIndex() != noa) {
1579	m_inputs[noa - now] = new NeuralEnd(m_instances.attribute(noa).name());
1580
1581	m_inputs[noa - now].setX(.1);
1582	m_inputs[noa - now].setY((noa - now + 1.0) / (m_numAttributes + 1));
1583	m_inputs[noa - now].setLink(true, noa);
1584	}
1585	else {
1586	now = 1;
1587	}
1588	}
1589
1590	}
1591
1592	/**
1593	* This creates the required output units.
1594	*/
1595	private void setupOutputs() throws Exception {
1596
1597	m_outputs = new NeuralEnd[m_numClasses];
1598	for (int noa = 0; noa < m_numClasses; noa++) {
1599	if (m_numeric) {
1600	m_outputs[noa] = new NeuralEnd(m_instances.classAttribute().name());
1601	}
1602	else {
1603	m_outputs[noa]= new NeuralEnd(m_instances.classAttribute().value(noa));
1604	}
1605
1606	m_outputs[noa].setX(.9);
1607	m_outputs[noa].setY((noa + 1.0) / (m_numClasses + 1));
1608	m_outputs[noa].setLink(false, noa);
1609	NeuralNode temp = new NeuralNode(String.valueOf(m_nextId), m_random,
1610	m_sigmoidUnit);
1611	m_nextId++;
1612	temp.setX(.75);
1613	temp.setY((noa + 1.0) / (m_numClasses + 1));
1614	addNode(temp);
1615	NeuralConnection.connect(temp, m_outputs[noa]);
1616	}
1617
1618	}
1619
1620	/**
1621	* Call this function to automatically generate the hidden units
1622	*/
1623	private void setupHiddenLayer()
1624	{
1625	StringTokenizer tok = new StringTokenizer(m_hiddenLayers, ",");
1626	int val = 0; //num of nodes in a layer
1627	int prev = 0; //used to remember the previous layer
1628	int num = tok.countTokens(); //number of layers
1629	String c;
1630	for (int noa = 0; noa < num; noa++) {
1631	//note that I am using the Double to get the value rather than the
1632	//Integer class, because for some reason the Double implementation can
1633	//handle leading white space and the integer version can't!?!
1634	c = tok.nextToken().trim();
1635	if (c.equals("a")) {
1636	val = (m_numAttributes + m_numClasses) / 2;
1637	}
1638	else if (c.equals("i")) {
1639	val = m_numAttributes;
1640	}
1641	else if (c.equals("o")) {
1642	val = m_numClasses;
1643	}
1644	else if (c.equals("t")) {
1645	val = m_numAttributes + m_numClasses;
1646	}
1647	else {
1648	val = Double.valueOf(c).intValue();
1649	}
1650	for (int nob = 0; nob < val; nob++) {
1651	NeuralNode temp = new NeuralNode(String.valueOf(m_nextId), m_random,
1652	m_sigmoidUnit);
1653	m_nextId++;
1654	temp.setX(.5 / (num) * noa + .25);
1655	temp.setY((nob + 1.0) / (val + 1));
1656	addNode(temp);
1657	if (noa > 0) {
1658	//then do connections
1659	for (int noc = m_neuralNodes.length - nob - 1 - prev;
1660	noc < m_neuralNodes.length - nob - 1; noc++) {
1661	NeuralConnection.connect(m_neuralNodes[noc], temp);
1662	}
1663	}
1664	}
1665	prev = val;
1666	}
1667	tok = new StringTokenizer(m_hiddenLayers, ",");
1668	c = tok.nextToken();
1669	if (c.equals("a")) {
1670	val = (m_numAttributes + m_numClasses) / 2;
1671	}
1672	else if (c.equals("i")) {
1673	val = m_numAttributes;
1674	}
1675	else if (c.equals("o")) {
1676	val = m_numClasses;
1677	}
1678	else if (c.equals("t")) {
1679	val = m_numAttributes + m_numClasses;
1680	}
1681	else {
1682	val = Double.valueOf(c).intValue();
1683	}
1684
1685	if (val == 0) {
1686	for (int noa = 0; noa < m_numAttributes; noa++) {
1687	for (int nob = 0; nob < m_numClasses; nob++) {
1688	NeuralConnection.connect(m_inputs[noa], m_neuralNodes[nob]);
1689	}
1690	}
1691	}
1692	else {
1693	for (int noa = 0; noa < m_numAttributes; noa++) {
1694	for (int nob = m_numClasses; nob < m_numClasses + val; nob++) {
1695	NeuralConnection.connect(m_inputs[noa], m_neuralNodes[nob]);
1696	}
1697	}
1698	for (int noa = m_neuralNodes.length - prev; noa < m_neuralNodes.length;
1699	noa++) {
1700	for (int nob = 0; nob < m_numClasses; nob++) {
1701	NeuralConnection.connect(m_neuralNodes[noa], m_neuralNodes[nob]);
1702	}
1703	}
1704	}
1705
1706	}
1707
1708	/**
1709	* This will go through all the nodes and check if they are connected
1710	* to a pure output unit. If so they will be set to be linear units.
1711	* If not they will be set to be sigmoid units.
1712	*/
1713	private void setEndsToLinear() {
1714	for (int noa = 0; noa < m_neuralNodes.length; noa++) {
1715	if ((m_neuralNodes[noa].getType() & NeuralConnection.OUTPUT) ==
1716	NeuralConnection.OUTPUT) {
1717	((NeuralNode)m_neuralNodes[noa]).setMethod(m_linearUnit);
1718	}
1719	else {
1720	((NeuralNode)m_neuralNodes[noa]).setMethod(m_sigmoidUnit);
1721	}
1722	}
1723	}
1724
1725	/**
1726	* Returns default capabilities of the classifier.
1727	*
1728	* @return the capabilities of this classifier
1729	*/
1730	public Capabilities getCapabilities() {
1731	Capabilities result = super.getCapabilities();
1732	result.disableAll();
1733
1734	// attributes
1735	result.enable(Capability.NOMINAL_ATTRIBUTES);
1736	result.enable(Capability.NUMERIC_ATTRIBUTES);
1737	result.enable(Capability.DATE_ATTRIBUTES);
1738	result.enable(Capability.MISSING_VALUES);
1739
1740	// class
1741	result.enable(Capability.NOMINAL_CLASS);
1742	result.enable(Capability.NUMERIC_CLASS);
1743	result.enable(Capability.DATE_CLASS);
1744	result.enable(Capability.MISSING_CLASS_VALUES);
1745
1746	return result;
1747	}
1748
1749	/**
1750	* Call this function to build and train a neural network for the training
1751	* data provided.
1752	* @param i The training data.
1753	* @throws Exception if can't build classification properly.
1754	*/
1755	public void buildClassifier(Instances i) throws Exception {
1756
1757	// can classifier handle the data?
1758	getCapabilities().testWithFail(i);
1759
1760	// remove instances with missing class
1761	i = new Instances(i);
1762	i.deleteWithMissingClass();
1763
1764	// only class? -> build ZeroR model
1765	if (i.numAttributes() == 1) {
1766	System.err.println(
1767	"Cannot build model (only class attribute present in data!), "
1768	+ "using ZeroR model instead!");
1769	m_ZeroR = new weka.classifiers.rules.ZeroR();
1770	m_ZeroR.buildClassifier(i);
1771	return;
1772	}
1773	else {
1774	m_ZeroR = null;
1775	}
1776
1777	m_epoch = 0;
1778	m_error = 0;
1779	m_instances = null;
1780	m_currentInstance = null;
1781	m_controlPanel = null;
1782	m_nodePanel = null;
1783
1784
1785	m_outputs = new NeuralEnd[0];
1786	m_inputs = new NeuralEnd[0];
1787	m_numAttributes = 0;
1788	m_numClasses = 0;
1789	m_neuralNodes = new NeuralConnection[0];
1790
1791	m_selected = new FastVector(4);
1792	m_graphers = new FastVector(2);
1793	m_nextId = 0;
1794	m_stopIt = true;
1795	m_stopped = true;
1796	m_accepted = false;
1797	m_instances = new Instances(i);
1798	m_random = new Random(m_randomSeed);
1799	m_instances.randomize(m_random);
1800
1801	if (m_useNomToBin) {
1802	m_nominalToBinaryFilter = new NominalToBinary();
1803	m_nominalToBinaryFilter.setInputFormat(m_instances);
1804	m_instances = Filter.useFilter(m_instances,
1805	m_nominalToBinaryFilter);
1806	}
1807	m_numAttributes = m_instances.numAttributes() - 1;
1808	m_numClasses = m_instances.numClasses();
1809
1810
1811	setClassType(m_instances);
1812
1813
1814
1815	//this sets up the validation set.
1816	Instances valSet = null;
1817	//numinval is needed later
1818	int numInVal = (int)(m_valSize / 100.0 * m_instances.numInstances());
1819	if (m_valSize > 0) {
1820	if (numInVal == 0) {
1821	numInVal = 1;
1822	}
1823	valSet = new Instances(m_instances, 0, numInVal);
1824	}
1825	///////////
1826
1827	setupInputs();
1828
1829	setupOutputs();
1830	if (m_autoBuild) {
1831	setupHiddenLayer();
1832	}
1833
1834	/////////////////////////////
1835	//this sets up the gui for usage
1836	if (m_gui) {
1837	m_win = new JFrame();
1838
1839	m_win.addWindowListener(new WindowAdapter() {
1840	public void windowClosing(WindowEvent e) {
1841	boolean k = m_stopIt;
1842	m_stopIt = true;
1843	int well =JOptionPane.showConfirmDialog(m_win,
1844	"Are You Sure...\n"
1845	+ "Click Yes To Accept"
1846	+ " The Neural Network"
1847	+ "\n Click No To Return",
1848	"Accept Neural Network",
1849	JOptionPane.YES_NO_OPTION);
1850
1851	if (well == 0) {
1852	m_win.setDefaultCloseOperation(JFrame.DISPOSE_ON_CLOSE);
1853	m_accepted = true;
1854	blocker(false);
1855	}
1856	else {
1857	m_win.setDefaultCloseOperation(JFrame.DO_NOTHING_ON_CLOSE);
1858	}
1859	m_stopIt = k;
1860	}
1861	});
1862
1863	m_win.getContentPane().setLayout(new BorderLayout());
1864	m_win.setTitle("Neural Network");
1865	m_nodePanel = new NodePanel();
1866	// without the following two lines, the NodePanel.paintComponents(Graphics)
1867	// method will go berserk if the network doesn't fit completely: it will
1868	// get called on a constant basis, using 100% of the CPU
1869	// see the following forum thread:
1870	// http://forum.java.sun.com/thread.jspa?threadID=580929&messageID=2945011
1871	m_nodePanel.setPreferredSize(new Dimension(640, 480));
1872	m_nodePanel.revalidate();
1873
1874	JScrollPane sp = new JScrollPane(m_nodePanel,
1875	JScrollPane.VERTICAL_SCROLLBAR_ALWAYS,
1876	JScrollPane.HORIZONTAL_SCROLLBAR_NEVER);
1877	m_controlPanel = new ControlPanel();
1878
1879	m_win.getContentPane().add(sp, BorderLayout.CENTER);
1880	m_win.getContentPane().add(m_controlPanel, BorderLayout.SOUTH);
1881	m_win.setSize(640, 480);
1882	m_win.setVisible(true);
1883	}
1884
1885	//This sets up the initial state of the gui
1886	if (m_gui) {
1887	blocker(true);
1888	m_controlPanel.m_changeEpochs.setEnabled(false);
1889	m_controlPanel.m_changeLearning.setEnabled(false);
1890	m_controlPanel.m_changeMomentum.setEnabled(false);
1891	}
1892
1893	//For silly situations in which the network gets accepted before training
1894	//commenses
1895	if (m_numeric) {
1896	setEndsToLinear();
1897	}
1898	if (m_accepted) {
1899	m_win.dispose();
1900	m_controlPanel = null;
1901	m_nodePanel = null;
1902	m_instances = new Instances(m_instances, 0);
1903	return;
1904	}
1905
1906	//connections done.
1907	double right = 0;
1908	double driftOff = 0;
1909	double lastRight = Double.POSITIVE_INFINITY;
1910	double bestError = Double.POSITIVE_INFINITY;
1911	double tempRate;
1912	double totalWeight = 0;
1913	double totalValWeight = 0;
1914	double origRate = m_learningRate; //only used for when reset
1915
1916	//ensure that at least 1 instance is trained through.
1917	if (numInVal == m_instances.numInstances()) {
1918	numInVal--;
1919	}
1920	if (numInVal < 0) {
1921	numInVal = 0;
1922	}
1923	for (int noa = numInVal; noa < m_instances.numInstances(); noa++) {
1924	if (!m_instances.instance(noa).classIsMissing()) {
1925	totalWeight += m_instances.instance(noa).weight();
1926	}
1927	}
1928	if (m_valSize != 0) {
1929	for (int noa = 0; noa < valSet.numInstances(); noa++) {
1930	if (!valSet.instance(noa).classIsMissing()) {
1931	totalValWeight += valSet.instance(noa).weight();
1932	}
1933	}
1934	}
1935	m_stopped = false;
1936
1937
1938	for (int noa = 1; noa < m_numEpochs + 1; noa++) {
1939	right = 0;
1940	for (int nob = numInVal; nob < m_instances.numInstances(); nob++) {
1941	m_currentInstance = m_instances.instance(nob);
1942
1943	if (!m_currentInstance.classIsMissing()) {
1944
1945	//this is where the network updating (and training occurs, for the
1946	//training set
1947	resetNetwork();
1948	calculateOutputs();
1949	tempRate = m_learningRate * m_currentInstance.weight();
1950	if (m_decay) {
1951	tempRate /= noa;
1952	}
1953
1954	right += (calculateErrors() / m_instances.numClasses()) *
1955	m_currentInstance.weight();
1956	updateNetworkWeights(tempRate, m_momentum);
1957
1958	}
1959
1960	}
1961	right /= totalWeight;
1962	if (Double.isInfinite(right) \|\| Double.isNaN(right)) {
1963	if (!m_reset) {
1964	m_instances = null;
1965	throw new Exception("Network cannot train. Try restarting with a" +
1966	" smaller learning rate.");
1967	}
1968	else {
1969	//reset the network if possible
1970	if (m_learningRate <= Utils.SMALL)
1971	throw new IllegalStateException(
1972	"Learning rate got too small (" + m_learningRate
1973	+ " <= " + Utils.SMALL + ")!");
1974	m_learningRate /= 2;
1975	buildClassifier(i);
1976	m_learningRate = origRate;
1977	m_instances = new Instances(m_instances, 0);
1978	return;
1979	}
1980	}
1981
1982	////////////////////////do validation testing if applicable
1983	if (m_valSize != 0) {
1984	right = 0;
1985	for (int nob = 0; nob < valSet.numInstances(); nob++) {
1986	m_currentInstance = valSet.instance(nob);
1987	if (!m_currentInstance.classIsMissing()) {
1988	//this is where the network updating occurs, for the validation set
1989	resetNetwork();
1990	calculateOutputs();
1991	right += (calculateErrors() / valSet.numClasses())
1992	* m_currentInstance.weight();
1993	//note 'right' could be calculated here just using
1994	//the calculate output values. This would be faster.
1995	//be less modular
1996	}
1997
1998	}
1999
2000	if (right < lastRight) {
2001	if (right < bestError) {
2002	bestError = right;
2003	// save the network weights at this point
2004	for (int noc = 0; noc < m_numClasses; noc++) {
2005	m_outputs[noc].saveWeights();
2006	}
2007	driftOff = 0;
2008	}
2009	}
2010	else {
2011	driftOff++;
2012	}
2013	lastRight = right;
2014	if (driftOff > m_driftThreshold \|\| noa + 1 >= m_numEpochs) {
2015	for (int noc = 0; noc < m_numClasses; noc++) {
2016	m_outputs[noc].restoreWeights();
2017	}
2018	m_accepted = true;
2019	}
2020	right /= totalValWeight;
2021	}
2022	m_epoch = noa;
2023	m_error = right;
2024	//shows what the neuralnet is upto if a gui exists.
2025	updateDisplay();
2026	//This junction controls what state the gui is in at the end of each
2027	//epoch, Such as if it is paused, if it is resumable etc...
2028	if (m_gui) {
2029	while ((m_stopIt \|\| (m_epoch >= m_numEpochs && m_valSize == 0)) &&
2030	!m_accepted) {
2031	m_stopIt = true;
2032	m_stopped = true;
2033	if (m_epoch >= m_numEpochs && m_valSize == 0) {
2034
2035	m_controlPanel.m_startStop.setEnabled(false);
2036	}
2037	else {
2038	m_controlPanel.m_startStop.setEnabled(true);
2039	}
2040	m_controlPanel.m_startStop.setText("Start");
2041	m_controlPanel.m_startStop.setActionCommand("Start");
2042	m_controlPanel.m_changeEpochs.setEnabled(true);
2043	m_controlPanel.m_changeLearning.setEnabled(true);
2044	m_controlPanel.m_changeMomentum.setEnabled(true);
2045
2046	blocker(true);
2047	if (m_numeric) {
2048	setEndsToLinear();
2049	}
2050	}
2051	m_controlPanel.m_changeEpochs.setEnabled(false);
2052	m_controlPanel.m_changeLearning.setEnabled(false);
2053	m_controlPanel.m_changeMomentum.setEnabled(false);
2054
2055	m_stopped = false;
2056	//if the network has been accepted stop the training loop
2057	if (m_accepted) {
2058	m_win.dispose();
2059	m_controlPanel = null;
2060	m_nodePanel = null;
2061	m_instances = new Instances(m_instances, 0);
2062	return;
2063	}
2064	}
2065	if (m_accepted) {
2066	m_instances = new Instances(m_instances, 0);
2067	return;
2068	}
2069	}
2070	if (m_gui) {
2071	m_win.dispose();
2072	m_controlPanel = null;
2073	m_nodePanel = null;
2074	}
2075	m_instances = new Instances(m_instances, 0);
2076	}
2077
2078	/**
2079	* Call this function to predict the class of an instance once a
2080	* classification model has been built with the buildClassifier call.
2081	* @param i The instance to classify.
2082	* @return A double array filled with the probabilities of each class type.
2083	* @throws Exception if can't classify instance.
2084	*/
2085	public double[] distributionForInstance(Instance i) throws Exception {
2086
2087	// default model?
2088	if (m_ZeroR != null) {
2089	return m_ZeroR.distributionForInstance(i);
2090	}
2091
2092	if (m_useNomToBin) {
2093	m_nominalToBinaryFilter.input(i);
2094	m_currentInstance = m_nominalToBinaryFilter.output();
2095	}
2096	else {
2097	m_currentInstance = i;
2098	}
2099
2100	if (m_normalizeAttributes) {
2101	for (int noa = 0; noa < m_instances.numAttributes(); noa++) {
2102	if (noa != m_instances.classIndex()) {
2103	if (m_attributeRanges[noa] != 0) {
2104	m_currentInstance.setValue(noa, (m_currentInstance.value(noa) -
2105	m_attributeBases[noa]) /
2106	m_attributeRanges[noa]);
2107	}
2108	else {
2109	m_currentInstance.setValue(noa, m_currentInstance.value(noa) -
2110	m_attributeBases[noa]);
2111	}
2112	}
2113	}
2114	}
2115	resetNetwork();
2116
2117	//since all the output values are needed.
2118	//They are calculated manually here and the values collected.
2119	double[] theArray = new double[m_numClasses];
2120	for (int noa = 0; noa < m_numClasses; noa++) {
2121	theArray[noa] = m_outputs[noa].outputValue(true);
2122	}
2123	if (m_instances.classAttribute().isNumeric()) {
2124	return theArray;
2125	}
2126
2127	//now normalize the array
2128	double count = 0;
2129	for (int noa = 0; noa < m_numClasses; noa++) {
2130	count += theArray[noa];
2131	}
2132	if (count <= 0) {
2133	return null;
2134	}
2135	for (int noa = 0; noa < m_numClasses; noa++) {
2136	theArray[noa] /= count;
2137	}
2138	return theArray;
2139	}
2140
2141
2142
2143	/**
2144	* Returns an enumeration describing the available options.
2145	*
2146	* @return an enumeration of all the available options.
2147	*/
2148	public Enumeration listOptions() {
2149
2150	Vector newVector = new Vector(14);
2151
2152	newVector.addElement(new Option(
2153	"\tLearning Rate for the backpropagation algorithm.\n"
2154	+"\t(Value should be between 0 - 1, Default = 0.3).",
2155	"L", 1, "-L <learning rate>"));
2156	newVector.addElement(new Option(
2157	"\tMomentum Rate for the backpropagation algorithm.\n"
2158	+"\t(Value should be between 0 - 1, Default = 0.2).",
2159	"M", 1, "-M <momentum>"));
2160	newVector.addElement(new Option(
2161	"\tNumber of epochs to train through.\n"
2162	+"\t(Default = 500).",
2163	"N", 1,"-N <number of epochs>"));
2164	newVector.addElement(new Option(
2165	"\tPercentage size of validation set to use to terminate\n"
2166	+ "\ttraining (if this is non zero it can pre-empt num of epochs.\n"
2167	+"\t(Value should be between 0 - 100, Default = 0).",
2168	"V", 1, "-V <percentage size of validation set>"));
2169	newVector.addElement(new Option(
2170	"\tThe value used to seed the random number generator\n"
2171	+ "\t(Value should be >= 0 and and a long, Default = 0).",
2172	"S", 1, "-S <seed>"));
2173	newVector.addElement(new Option(
2174	"\tThe consequetive number of errors allowed for validation\n"
2175	+ "\ttesting before the netwrok terminates.\n"
2176	+ "\t(Value should be > 0, Default = 20).",
2177	"E", 1, "-E <threshold for number of consequetive errors>"));
2178	newVector.addElement(new Option(
2179	"\tGUI will be opened.\n"
2180	+"\t(Use this to bring up a GUI).",
2181	"G", 0,"-G"));
2182	newVector.addElement(new Option(
2183	"\tAutocreation of the network connections will NOT be done.\n"
2184	+"\t(This will be ignored if -G is NOT set)",
2185	"A", 0,"-A"));
2186	newVector.addElement(new Option(
2187	"\tA NominalToBinary filter will NOT automatically be used.\n"
2188	+"\t(Set this to not use a NominalToBinary filter).",
2189	"B", 0,"-B"));
2190	newVector.addElement(new Option(
2191	"\tThe hidden layers to be created for the network.\n"
2192	+ "\t(Value should be a list of comma separated Natural \n"
2193	+ "\tnumbers or the letters 'a' = (attribs + classes) / 2, \n"
2194	+ "\t'i' = attribs, 'o' = classes, 't' = attribs .+ classes)\n"
2195	+ "\tfor wildcard values, Default = a).",
2196	"H", 1, "-H <comma seperated numbers for nodes on each layer>"));
2197	newVector.addElement(new Option(
2198	"\tNormalizing a numeric class will NOT be done.\n"
2199	+"\t(Set this to not normalize the class if it's numeric).",
2200	"C", 0,"-C"));
2201	newVector.addElement(new Option(
2202	"\tNormalizing the attributes will NOT be done.\n"
2203	+"\t(Set this to not normalize the attributes).",
2204	"I", 0,"-I"));
2205	newVector.addElement(new Option(
2206	"\tReseting the network will NOT be allowed.\n"
2207	+"\t(Set this to not allow the network to reset).",
2208	"R", 0,"-R"));
2209	newVector.addElement(new Option(
2210	"\tLearning rate decay will occur.\n"
2211	+"\t(Set this to cause the learning rate to decay).",
2212	"D", 0,"-D"));
2213
2214
2215	return newVector.elements();
2216	}
2217
2218	/**
2219	* Parses a given list of options. <p/>
2220	*
2221	<!-- options-start -->
2222	* Valid options are: <p/>
2223	*
2224	* <pre> -L <learning rate>
2225	* Learning Rate for the backpropagation algorithm.
2226	* (Value should be between 0 - 1, Default = 0.3).</pre>
2227	*
2228	* <pre> -M <momentum>
2229	* Momentum Rate for the backpropagation algorithm.
2230	* (Value should be between 0 - 1, Default = 0.2).</pre>
2231	*
2232	* <pre> -N <number of epochs>
2233	* Number of epochs to train through.
2234	* (Default = 500).</pre>
2235	*
2236	* <pre> -V <percentage size of validation set>
2237	* Percentage size of validation set to use to terminate
2238	* training (if this is non zero it can pre-empt num of epochs.
2239	* (Value should be between 0 - 100, Default = 0).</pre>
2240	*
2241	* <pre> -S <seed>
2242	* The value used to seed the random number generator
2243	* (Value should be >= 0 and and a long, Default = 0).</pre>
2244	*
2245	* <pre> -E <threshold for number of consequetive errors>
2246	* The consequetive number of errors allowed for validation
2247	* testing before the netwrok terminates.
2248	* (Value should be > 0, Default = 20).</pre>
2249	*
2250	* <pre> -G
2251	* GUI will be opened.
2252	* (Use this to bring up a GUI).</pre>
2253	*
2254	* <pre> -A
2255	* Autocreation of the network connections will NOT be done.
2256	* (This will be ignored if -G is NOT set)</pre>
2257	*
2258	* <pre> -B
2259	* A NominalToBinary filter will NOT automatically be used.
2260	* (Set this to not use a NominalToBinary filter).</pre>
2261	*
2262	* <pre> -H <comma seperated numbers for nodes on each layer>
2263	* The hidden layers to be created for the network.
2264	* (Value should be a list of comma separated Natural
2265	* numbers or the letters 'a' = (attribs + classes) / 2,
2266	* 'i' = attribs, 'o' = classes, 't' = attribs .+ classes)
2267	* for wildcard values, Default = a).</pre>
2268	*
2269	* <pre> -C
2270	* Normalizing a numeric class will NOT be done.
2271	* (Set this to not normalize the class if it's numeric).</pre>
2272	*
2273	* <pre> -I
2274	* Normalizing the attributes will NOT be done.
2275	* (Set this to not normalize the attributes).</pre>
2276	*
2277	* <pre> -R
2278	* Reseting the network will NOT be allowed.
2279	* (Set this to not allow the network to reset).</pre>
2280	*
2281	* <pre> -D
2282	* Learning rate decay will occur.
2283	* (Set this to cause the learning rate to decay).</pre>
2284	*
2285	<!-- options-end -->
2286	*
2287	* @param options the list of options as an array of strings
2288	* @throws Exception if an option is not supported
2289	*/
2290	public void setOptions(String[] options) throws Exception {
2291	//the defaults can be found here!!!!
2292	String learningString = Utils.getOption('L', options);
2293	if (learningString.length() != 0) {
2294	setLearningRate((new Double(learningString)).doubleValue());
2295	} else {
2296	setLearningRate(0.3);
2297	}
2298	String momentumString = Utils.getOption('M', options);
2299	if (momentumString.length() != 0) {
2300	setMomentum((new Double(momentumString)).doubleValue());
2301	} else {
2302	setMomentum(0.2);
2303	}
2304	String epochsString = Utils.getOption('N', options);
2305	if (epochsString.length() != 0) {
2306	setTrainingTime(Integer.parseInt(epochsString));
2307	} else {
2308	setTrainingTime(500);
2309	}
2310	String valSizeString = Utils.getOption('V', options);
2311	if (valSizeString.length() != 0) {
2312	setValidationSetSize(Integer.parseInt(valSizeString));
2313	} else {
2314	setValidationSetSize(0);
2315	}
2316	String seedString = Utils.getOption('S', options);
2317	if (seedString.length() != 0) {
2318	setSeed(Integer.parseInt(seedString));
2319	} else {
2320	setSeed(0);
2321	}
2322	String thresholdString = Utils.getOption('E', options);
2323	if (thresholdString.length() != 0) {
2324	setValidationThreshold(Integer.parseInt(thresholdString));
2325	} else {
2326	setValidationThreshold(20);
2327	}
2328	String hiddenLayers = Utils.getOption('H', options);
2329	if (hiddenLayers.length() != 0) {
2330	setHiddenLayers(hiddenLayers);
2331	} else {
2332	setHiddenLayers("a");
2333	}
2334	if (Utils.getFlag('G', options)) {
2335	setGUI(true);
2336	} else {
2337	setGUI(false);
2338	} //small note. since the gui is the only option that can change the other
2339	//options this should be set first to allow the other options to set
2340	//properly
2341	if (Utils.getFlag('A', options)) {
2342	setAutoBuild(false);
2343	} else {
2344	setAutoBuild(true);
2345	}
2346	if (Utils.getFlag('B', options)) {
2347	setNominalToBinaryFilter(false);
2348	} else {
2349	setNominalToBinaryFilter(true);
2350	}
2351	if (Utils.getFlag('C', options)) {
2352	setNormalizeNumericClass(false);
2353	} else {
2354	setNormalizeNumericClass(true);
2355	}
2356	if (Utils.getFlag('I', options)) {
2357	setNormalizeAttributes(false);
2358	} else {
2359	setNormalizeAttributes(true);
2360	}
2361	if (Utils.getFlag('R', options)) {
2362	setReset(false);
2363	} else {
2364	setReset(true);
2365	}
2366	if (Utils.getFlag('D', options)) {
2367	setDecay(true);
2368	} else {
2369	setDecay(false);
2370	}
2371
2372	Utils.checkForRemainingOptions(options);
2373	}
2374
2375	/**
2376	* Gets the current settings of NeuralNet.
2377	*
2378	* @return an array of strings suitable for passing to setOptions()
2379	*/
2380	public String [] getOptions() {
2381
2382	String [] options = new String [21];
2383	int current = 0;
2384	options[current++] = "-L"; options[current++] = "" + getLearningRate();
2385	options[current++] = "-M"; options[current++] = "" + getMomentum();
2386	options[current++] = "-N"; options[current++] = "" + getTrainingTime();
2387	options[current++] = "-V"; options[current++] = "" +getValidationSetSize();
2388	options[current++] = "-S"; options[current++] = "" + getSeed();
2389	options[current++] = "-E"; options[current++] =""+getValidationThreshold();
2390	options[current++] = "-H"; options[current++] = getHiddenLayers();
2391	if (getGUI()) {
2392	options[current++] = "-G";
2393	}
2394	if (!getAutoBuild()) {
2395	options[current++] = "-A";
2396	}
2397	if (!getNominalToBinaryFilter()) {
2398	options[current++] = "-B";
2399	}
2400	if (!getNormalizeNumericClass()) {
2401	options[current++] = "-C";
2402	}
2403	if (!getNormalizeAttributes()) {
2404	options[current++] = "-I";
2405	}
2406	if (!getReset()) {
2407	options[current++] = "-R";
2408	}
2409	if (getDecay()) {
2410	options[current++] = "-D";
2411	}
2412
2413
2414	while (current < options.length) {
2415	options[current++] = "";
2416	}
2417	return options;
2418	}
2419
2420	/**
2421	* @return string describing the model.
2422	*/
2423	public String toString() {
2424	// only ZeroR model?
2425	if (m_ZeroR != null) {
2426	StringBuffer buf = new StringBuffer();
2427	buf.append(this.getClass().getName().replaceAll(".*\\.", "") + "\n");
2428	buf.append(this.getClass().getName().replaceAll(".*\\.", "").replaceAll(".", "=") + "\n\n");
2429	buf.append("Warning: No model could be built, hence ZeroR model is used:\n\n");
2430	buf.append(m_ZeroR.toString());
2431	return buf.toString();
2432	}
2433
2434	StringBuffer model = new StringBuffer(m_neuralNodes.length * 100);
2435	//just a rough size guess
2436	NeuralNode con;
2437	double[] weights;
2438	NeuralConnection[] inputs;
2439	for (int noa = 0; noa < m_neuralNodes.length; noa++) {
2440	con = (NeuralNode) m_neuralNodes[noa]; //this would need a change
2441	//for items other than nodes!!!
2442	weights = con.getWeights();
2443	inputs = con.getInputs();
2444	if (con.getMethod() instanceof SigmoidUnit) {
2445	model.append("Sigmoid ");
2446	}
2447	else if (con.getMethod() instanceof LinearUnit) {
2448	model.append("Linear ");
2449	}
2450	model.append("Node " + con.getId() + "\n Inputs Weights\n");
2451	model.append(" Threshold " + weights[0] + "\n");
2452	for (int nob = 1; nob < con.getNumInputs() + 1; nob++) {
2453	if ((inputs[nob - 1].getType() & NeuralConnection.PURE_INPUT)
2454	== NeuralConnection.PURE_INPUT) {
2455	model.append(" Attrib " +
2456	m_instances.attribute(((NeuralEnd)inputs[nob-1]).
2457	getLink()).name()
2458	+ " " + weights[nob] + "\n");
2459	}
2460	else {
2461	model.append(" Node " + inputs[nob-1].getId() + " " +
2462	weights[nob] + "\n");
2463	}
2464	}
2465	}
2466	//now put in the ends
2467	for (int noa = 0; noa < m_outputs.length; noa++) {
2468	inputs = m_outputs[noa].getInputs();
2469	model.append("Class " +
2470	m_instances.classAttribute().
2471	value(m_outputs[noa].getLink()) +
2472	"\n Input\n");
2473	for (int nob = 0; nob < m_outputs[noa].getNumInputs(); nob++) {
2474	if ((inputs[nob].getType() & NeuralConnection.PURE_INPUT)
2475	== NeuralConnection.PURE_INPUT) {
2476	model.append(" Attrib " +
2477	m_instances.attribute(((NeuralEnd)inputs[nob]).
2478	getLink()).name() + "\n");
2479	}
2480	else {
2481	model.append(" Node " + inputs[nob].getId() + "\n");
2482	}
2483	}
2484	}
2485	return model.toString();
2486	}
2487
2488	/**
2489	* This will return a string describing the classifier.
2490	* @return The string.
2491	*/
2492	public String globalInfo() {
2493	return
2494	"A Classifier that uses backpropagation to classify instances.\n"
2495	+ "This network can be built by hand, created by an algorithm or both. "
2496	+ "The network can also be monitored and modified during training time. "
2497	+ "The nodes in this network are all sigmoid (except for when the class "
2498	+ "is numeric in which case the the output nodes become unthresholded "
2499	+ "linear units).";
2500	}
2501
2502	/**
2503	* @return a string to describe the learning rate option.
2504	*/
2505	public String learningRateTipText() {
2506	return "The amount the" +
2507	" weights are updated.";
2508	}
2509
2510	/**
2511	* @return a string to describe the momentum option.
2512	*/
2513	public String momentumTipText() {
2514	return "Momentum applied to the weights during updating.";
2515	}
2516
2517	/**
2518	* @return a string to describe the AutoBuild option.
2519	*/
2520	public String autoBuildTipText() {
2521	return "Adds and connects up hidden layers in the network.";
2522	}
2523
2524	/**
2525	* @return a string to describe the random seed option.
2526	*/
2527	public String seedTipText() {
2528	return "Seed used to initialise the random number generator." +
2529	"Random numbers are used for setting the initial weights of the" +
2530	" connections betweem nodes, and also for shuffling the training data.";
2531	}
2532
2533	/**
2534	* @return a string to describe the validation threshold option.
2535	*/
2536	public String validationThresholdTipText() {
2537	return "Used to terminate validation testing." +
2538	"The value here dictates how many times in a row the validation set" +
2539	" error can get worse before training is terminated.";
2540	}
2541
2542	/**
2543	* @return a string to describe the GUI option.
2544	*/
2545	public String GUITipText() {
2546	return "Brings up a gui interface." +
2547	" This will allow the pausing and altering of the nueral network" +
2548	" during training.\n\n" +
2549	"* To add a node left click (this node will be automatically selected," +
2550	" ensure no other nodes were selected).\n" +
2551	"* To select a node left click on it either while no other node is" +
2552	" selected or while holding down the control key (this toggles that" +
2553	" node as being selected and not selected.\n" +
2554	"* To connect a node, first have the start node(s) selected, then click"+
2555	" either the end node or on an empty space (this will create a new node"+
2556	" that is connected with the selected nodes). The selection status of" +
2557	" nodes will stay the same after the connection. (Note these are" +
2558	" directed connections, also a connection between two nodes will not" +
2559	" be established more than once and certain connections that are" +
2560	" deemed to be invalid will not be made).\n" +
2561	"* To remove a connection select one of the connected node(s) in the" +
2562	" connection and then right click the other node (it does not matter" +
2563	" whether the node is the start or end the connection will be removed" +
2564	").\n" +
2565	"* To remove a node right click it while no other nodes (including it)" +
2566	" are selected. (This will also remove all connections to it)\n." +
2567	"* To deselect a node either left click it while holding down control," +
2568	" or right click on empty space.\n" +
2569	"* The raw inputs are provided from the labels on the left.\n" +
2570	"* The red nodes are hidden layers.\n" +
2571	"* The orange nodes are the output nodes.\n" +
2572	"* The labels on the right show the class the output node represents." +
2573	" Note that with a numeric class the output node will automatically be" +
2574	" made into an unthresholded linear unit.\n\n" +
2575	"Alterations to the neural network can only be done while the network" +
2576	" is not running, This also applies to the learning rate and other" +
2577	" fields on the control panel.\n\n" +
2578	"* You can accept the network as being finished at any time.\n" +
2579	"* The network is automatically paused at the beginning.\n" +
2580	"* There is a running indication of what epoch the network is up to" +
2581	" and what the (rough) error for that epoch was (or for" +
2582	" the validation if that is being used). Note that this error value" +
2583	" is based on a network that changes as the value is computed." +
2584	" (also depending on whether" +
2585	" the class is normalized will effect the error reported for numeric" +
2586	" classes.\n" +
2587	"* Once the network is done it will pause again and either wait to be" +
2588	" accepted or trained more.\n\n" +
2589	"Note that if the gui is not set the network will not require any" +
2590	" interaction.\n";
2591	}
2592
2593	/**
2594	* @return a string to describe the validation size option.
2595	*/
2596	public String validationSetSizeTipText() {
2597	return "The percentage size of the validation set." +
2598	"(The training will continue until it is observed that" +
2599	" the error on the validation set has been consistently getting" +
2600	" worse, or if the training time is reached).\n" +
2601	"If This is set to zero no validation set will be used and instead" +
2602	" the network will train for the specified number of epochs.";
2603	}
2604
2605	/**
2606	* @return a string to describe the learning rate option.
2607	*/
2608	public String trainingTimeTipText() {
2609	return "The number of epochs to train through." +
2610	" If the validation set is non-zero then it can terminate the network" +
2611	" early";
2612	}
2613
2614
2615	/**
2616	* @return a string to describe the nominal to binary option.
2617	*/
2618	public String nominalToBinaryFilterTipText() {
2619	return "This will preprocess the instances with the filter." +
2620	" This could help improve performance if there are nominal attributes" +
2621	" in the data.";
2622	}
2623
2624	/**
2625	* @return a string to describe the hidden layers in the network.
2626	*/
2627	public String hiddenLayersTipText() {
2628	return "This defines the hidden layers of the neural network." +
2629	" This is a list of positive whole numbers. 1 for each hidden layer." +
2630	" Comma seperated. To have no hidden layers put a single 0 here." +
2631	" This will only be used if autobuild is set. There are also wildcard" +
2632	" values 'a' = (attribs + classes) / 2, 'i' = attribs, 'o' = classes" +
2633	" , 't' = attribs + classes.";
2634	}
2635	/**
2636	* @return a string to describe the nominal to binary option.
2637	*/
2638	public String normalizeNumericClassTipText() {
2639	return "This will normalize the class if it's numeric." +
2640	" This could help improve performance of the network, It normalizes" +
2641	" the class to be between -1 and 1. Note that this is only internally" +
2642	", the output will be scaled back to the original range.";
2643	}
2644	/**
2645	* @return a string to describe the nominal to binary option.
2646	*/
2647	public String normalizeAttributesTipText() {
2648	return "This will normalize the attributes." +
2649	" This could help improve performance of the network." +
2650	" This is not reliant on the class being numeric. This will also" +
2651	" normalize nominal attributes as well (after they have been run" +
2652	" through the nominal to binary filter if that is in use) so that the" +
2653	" nominal values are between -1 and 1";
2654	}
2655	/**
2656	* @return a string to describe the Reset option.
2657	*/
2658	public String resetTipText() {
2659	return "This will allow the network to reset with a lower learning rate." +
2660	" If the network diverges from the answer this will automatically" +
2661	" reset the network with a lower learning rate and begin training" +
2662	" again. This option is only available if the gui is not set. Note" +
2663	" that if the network diverges but isn't allowed to reset it will" +
2664	" fail the training process and return an error message.";
2665	}
2666
2667	/**
2668	* @return a string to describe the Decay option.
2669	*/
2670	public String decayTipText() {
2671	return "This will cause the learning rate to decrease." +
2672	" This will divide the starting learning rate by the epoch number, to" +
2673	" determine what the current learning rate should be. This may help" +
2674	" to stop the network from diverging from the target output, as well" +
2675	" as improve general performance. Note that the decaying learning" +
2676	" rate will not be shown in the gui, only the original learning rate" +
2677	". If the learning rate is changed in the gui, this is treated as the" +
2678	" starting learning rate.";
2679	}
2680
2681	/**
2682	* Returns the revision string.
2683	*
2684	* @return the revision
2685	*/
2686	public String getRevision() {
2687	return RevisionUtils.extract("$Revision: 5928 $");
2688	}
2689	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: branches/MetisMQI/src/main/java/weka/classifiers/functions/MultilayerPerceptron.java @ 29

Download in other formats: