1 | /* |
---|
2 | * This program is free software; you can redistribute it and/or modify |
---|
3 | * it under the terms of the GNU General Public License as published by |
---|
4 | * the Free Software Foundation; either version 2 of the License, or |
---|
5 | * (at your option) any later version. |
---|
6 | * |
---|
7 | * This program is distributed in the hope that it will be useful, |
---|
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
10 | * GNU General Public License for more details. |
---|
11 | * |
---|
12 | * You should have received a copy of the GNU General Public License |
---|
13 | * along with this program; if not, write to the Free Software |
---|
14 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
---|
15 | */ |
---|
16 | |
---|
17 | /* |
---|
18 | * Optimization.java |
---|
19 | * Copyright (C) 2003 University of Waikato, Hamilton, New Zealand |
---|
20 | * |
---|
21 | */ |
---|
22 | |
---|
23 | package weka.core; |
---|
24 | |
---|
25 | import weka.core.TechnicalInformation.Field; |
---|
26 | import weka.core.TechnicalInformation.Type; |
---|
27 | |
---|
28 | /** |
---|
29 | * Implementation of Active-sets method with BFGS update to solve optimization |
---|
30 | * problem with only bounds constraints in multi-dimensions. In this |
---|
31 | * implementation we consider both the lower and higher bound constraints. <p/> |
---|
32 | * |
---|
33 | * Here is the sketch of our searching strategy, and the detailed description |
---|
34 | * of the algorithm can be found in the Appendix of Xin Xu's MSc thesis:<p/> |
---|
35 | * Initialize everything, incl. initial value, direction, etc.<p/> |
---|
36 | * LOOP (main algorithm):<br/> |
---|
37 | * |
---|
38 | * 1. Perform the line search using the directions for free variables<br/> |
---|
39 | * 1.1 Check all the bounds that are not "active" (i.e. binding variables) |
---|
40 | * and compute the feasible step length to the bound for each of them<br/> |
---|
41 | * 1.2 Pick up the least feasible step length, say \alpha, and set it as |
---|
42 | * the upper bound of the current step length, i.e. |
---|
43 | * 0<\lambda<=\alpha<br/> |
---|
44 | * 1.3 Search for any possible step length<=\alpha that can result the |
---|
45 | * "sufficient function decrease" (\alpha condition) AND "positive |
---|
46 | * definite inverse Hessian" (\beta condition), if possible, using |
---|
47 | * SAFEGUARDED polynomial interpolation. This step length is "safe" and |
---|
48 | * thus is used to compute the next value of the free variables .<br/> |
---|
49 | * 1.4 Fix the variable(s) that are newly bound to its constraint(s).<p/> |
---|
50 | * |
---|
51 | * 2. Check whether there is convergence of all variables or their gradients. |
---|
52 | * If there is, check the possibilities to release any current bindings of |
---|
53 | * the fixed variables to their bounds based on the "reliable" second-order |
---|
54 | * Lagarange multipliers if available. If it's available and negative for |
---|
55 | * one variable, then release it. If not available, use first-order |
---|
56 | * Lagarange multiplier to test release. If there is any released |
---|
57 | * variables, STOP the loop. Otherwise update the inverse of Hessian matrix |
---|
58 | * and gradient for the newly released variables and CONTINUE LOOP.<p/> |
---|
59 | * |
---|
60 | * 3. Use BFGS formula to update the inverse of Hessian matrix. Note the |
---|
61 | * already-fixed variables must have zeros in the corresponding entries |
---|
62 | * in the inverse Hessian.<p/> |
---|
63 | * |
---|
64 | * 4. Compute the new (newton) search direction d=H^{-1}*g, where H^{-1} is the |
---|
65 | * inverse Hessian and g is the Jacobian. Note that again, the already- |
---|
66 | * fixed variables will have zero direction.<p/> |
---|
67 | * |
---|
68 | * ENDLOOP<p/> |
---|
69 | * |
---|
70 | * A typical usage of this class is to create your own subclass of this class |
---|
71 | * and provide the objective function and gradients as follows:<p/> |
---|
72 | * <pre> |
---|
73 | * class MyOpt extends Optimization { |
---|
74 | * // Provide the objective function |
---|
75 | * protected double objectiveFunction(double[] x) { |
---|
76 | * // How to calculate your objective function... |
---|
77 | * // ... |
---|
78 | * } |
---|
79 | * |
---|
80 | * // Provide the first derivatives |
---|
81 | * protected double[] evaluateGradient(double[] x) { |
---|
82 | * // How to calculate the gradient of the objective function... |
---|
83 | * // ... |
---|
84 | * } |
---|
85 | * |
---|
86 | * // If possible, provide the index^{th} row of the Hessian matrix |
---|
87 | * protected double[] evaluateHessian(double[] x, int index) { |
---|
88 | * // How to calculate the index^th variable's second derivative |
---|
89 | * // ... |
---|
90 | * } |
---|
91 | * } |
---|
92 | * </pre> |
---|
93 | * |
---|
94 | * When it's the time to use it, in some routine(s) of other class... |
---|
95 | * <pre> |
---|
96 | * MyOpt opt = new MyOpt(); |
---|
97 | * |
---|
98 | * // Set up initial variable values and bound constraints |
---|
99 | * double[] x = new double[numVariables]; |
---|
100 | * // Lower and upper bounds: 1st row is lower bounds, 2nd is upper |
---|
101 | * double[] constraints = new double[2][numVariables]; |
---|
102 | * ... |
---|
103 | * |
---|
104 | * // Find the minimum, 200 iterations as default |
---|
105 | * x = opt.findArgmin(x, constraints); |
---|
106 | * while(x == null){ // 200 iterations are not enough |
---|
107 | * x = opt.getVarbValues(); // Try another 200 iterations |
---|
108 | * x = opt.findArgmin(x, constraints); |
---|
109 | * } |
---|
110 | * |
---|
111 | * // The minimal function value |
---|
112 | * double minFunction = opt.getMinFunction(); |
---|
113 | * ... |
---|
114 | * </pre> |
---|
115 | * |
---|
116 | * It is recommended that Hessian values be provided so that the second-order |
---|
117 | * Lagrangian multiplier estimate can be calcluated. However, if it is not |
---|
118 | * provided, there is no need to override the <code>evaluateHessian()</code> |
---|
119 | * function.<p/> |
---|
120 | * |
---|
121 | * REFERENCES (see also the <code>getTechnicalInformation()</code> method):<br/> |
---|
122 | * The whole model algorithm is adapted from Chapter 5 and other related |
---|
123 | * chapters in Gill, Murray and Wright(1981) "Practical Optimization", Academic |
---|
124 | * Press. and Gill and Murray(1976) "Minimization Subject to Bounds on the |
---|
125 | * Variables", NPL Report NAC72, while Chong and Zak(1996) "An Introduction to |
---|
126 | * Optimization", John Wiley & Sons, Inc. provides us a brief but helpful |
---|
127 | * introduction to the method. <p/> |
---|
128 | * |
---|
129 | * Dennis and Schnabel(1983) "Numerical Methods for Unconstrained Optimization |
---|
130 | * and Nonlinear Equations", Prentice-Hall Inc. and Press et al.(1992) "Numeric |
---|
131 | * Recipe in C", Second Edition, Cambridge University Press. are consulted for |
---|
132 | * the polynomial interpolation used in the line search implementation. <p/> |
---|
133 | * |
---|
134 | * The Hessian modification in BFGS update uses Cholesky factorization and two |
---|
135 | * rank-one modifications:<br/> |
---|
136 | * Bk+1 = Bk + (Gk*Gk')/(Gk'Dk) + (dGk*(dGk)'))/[alpha*(dGk)'*Dk]. <br/> |
---|
137 | * where Gk is the gradient vector, Dk is the direction vector and alpha is the |
---|
138 | * step rate. <br/> |
---|
139 | * This method is due to Gill, Golub, Murray and Saunders(1974) ``Methods for |
---|
140 | * Modifying Matrix Factorizations'', Mathematics of Computation, Vol.28, |
---|
141 | * No.126, pp 505-535. <p/> |
---|
142 | * |
---|
143 | * @author Xin Xu (xx5@cs.waikato.ac.nz) |
---|
144 | * @version $Revision: 5953 $ |
---|
145 | * @see #getTechnicalInformation() |
---|
146 | */ |
---|
147 | public abstract class Optimization |
---|
148 | implements TechnicalInformationHandler, RevisionHandler { |
---|
149 | |
---|
150 | protected double m_ALF = 1.0e-4; |
---|
151 | |
---|
152 | protected double m_BETA = 0.9; |
---|
153 | |
---|
154 | protected double m_TOLX = 1.0e-6; |
---|
155 | |
---|
156 | protected double m_STPMX = 100.0; |
---|
157 | |
---|
158 | protected int m_MAXITS = 200; |
---|
159 | |
---|
160 | protected static boolean m_Debug = false; |
---|
161 | |
---|
162 | /** function value */ |
---|
163 | protected double m_f; |
---|
164 | |
---|
165 | /** G'*p */ |
---|
166 | private double m_Slope; |
---|
167 | |
---|
168 | /** Test if zero step in lnsrch */ |
---|
169 | private boolean m_IsZeroStep = false; |
---|
170 | |
---|
171 | /** Used when iteration overflow occurs */ |
---|
172 | private double[] m_X; |
---|
173 | |
---|
174 | /** Compute machine precision */ |
---|
175 | protected static double m_Epsilon, m_Zero; |
---|
176 | static { |
---|
177 | m_Epsilon=1.0; |
---|
178 | while(1.0+m_Epsilon > 1.0){ |
---|
179 | m_Epsilon /= 2.0; |
---|
180 | } |
---|
181 | m_Epsilon *= 2.0; |
---|
182 | m_Zero = Math.sqrt(m_Epsilon); |
---|
183 | if (m_Debug) |
---|
184 | System.err.print("Machine precision is "+m_Epsilon+ |
---|
185 | " and zero set to "+m_Zero); |
---|
186 | } |
---|
187 | |
---|
188 | /** |
---|
189 | * Returns an instance of a TechnicalInformation object, containing |
---|
190 | * detailed information about the technical background of this class, |
---|
191 | * e.g., paper reference or book this class is based on. |
---|
192 | * |
---|
193 | * @return the technical information about this class |
---|
194 | */ |
---|
195 | public TechnicalInformation getTechnicalInformation() { |
---|
196 | TechnicalInformation result; |
---|
197 | TechnicalInformation additional; |
---|
198 | |
---|
199 | result = new TechnicalInformation(Type.MASTERSTHESIS); |
---|
200 | result.setValue(Field.AUTHOR, "Xin Xu"); |
---|
201 | result.setValue(Field.YEAR, "2003"); |
---|
202 | result.setValue(Field.TITLE, "Statistical learning in multiple instance problem"); |
---|
203 | result.setValue(Field.SCHOOL, "University of Waikato"); |
---|
204 | result.setValue(Field.ADDRESS, "Hamilton, NZ"); |
---|
205 | result.setValue(Field.NOTE, "0657.594"); |
---|
206 | |
---|
207 | additional = result.add(Type.BOOK); |
---|
208 | additional.setValue(Field.AUTHOR, "P. E. Gill and W. Murray and M. H. Wright"); |
---|
209 | additional.setValue(Field.YEAR, "1981"); |
---|
210 | additional.setValue(Field.TITLE, "Practical Optimization"); |
---|
211 | additional.setValue(Field.PUBLISHER, "Academic Press"); |
---|
212 | additional.setValue(Field.ADDRESS, "London and New York"); |
---|
213 | |
---|
214 | additional = result.add(Type.TECHREPORT); |
---|
215 | additional.setValue(Field.AUTHOR, "P. E. Gill and W. Murray"); |
---|
216 | additional.setValue(Field.YEAR, "1976"); |
---|
217 | additional.setValue(Field.TITLE, "Minimization subject to bounds on the variables"); |
---|
218 | additional.setValue(Field.INSTITUTION, "National Physical Laboratory"); |
---|
219 | additional.setValue(Field.NUMBER, "NAC 72"); |
---|
220 | |
---|
221 | additional = result.add(Type.BOOK); |
---|
222 | additional.setValue(Field.AUTHOR, "E. K. P. Chong and S. H. Zak"); |
---|
223 | additional.setValue(Field.YEAR, "1996"); |
---|
224 | additional.setValue(Field.TITLE, "An Introduction to Optimization"); |
---|
225 | additional.setValue(Field.PUBLISHER, "John Wiley and Sons"); |
---|
226 | additional.setValue(Field.ADDRESS, "New York"); |
---|
227 | |
---|
228 | additional = result.add(Type.BOOK); |
---|
229 | additional.setValue(Field.AUTHOR, "J. E. Dennis and R. B. Schnabel"); |
---|
230 | additional.setValue(Field.YEAR, "1983"); |
---|
231 | additional.setValue(Field.TITLE, "Numerical Methods for Unconstrained Optimization and Nonlinear Equations"); |
---|
232 | additional.setValue(Field.PUBLISHER, "Prentice-Hall"); |
---|
233 | |
---|
234 | additional = result.add(Type.BOOK); |
---|
235 | additional.setValue(Field.AUTHOR, "W. H. Press and B. P. Flannery and S. A. Teukolsky and W. T. Vetterling"); |
---|
236 | additional.setValue(Field.YEAR, "1992"); |
---|
237 | additional.setValue(Field.TITLE, "Numerical Recipes in C"); |
---|
238 | additional.setValue(Field.PUBLISHER, "Cambridge University Press"); |
---|
239 | additional.setValue(Field.EDITION, "Second"); |
---|
240 | |
---|
241 | additional = result.add(Type.ARTICLE); |
---|
242 | additional.setValue(Field.AUTHOR, "P. E. Gill and G. H. Golub and W. Murray and M. A. Saunders"); |
---|
243 | additional.setValue(Field.YEAR, "1974"); |
---|
244 | additional.setValue(Field.TITLE, "Methods for modifying matrix factorizations"); |
---|
245 | additional.setValue(Field.JOURNAL, "Mathematics of Computation"); |
---|
246 | additional.setValue(Field.VOLUME, "28"); |
---|
247 | additional.setValue(Field.NUMBER, "126"); |
---|
248 | additional.setValue(Field.PAGES, "505-535"); |
---|
249 | |
---|
250 | return result; |
---|
251 | } |
---|
252 | |
---|
253 | /** |
---|
254 | * Subclass should implement this procedure to evaluate objective |
---|
255 | * function to be minimized |
---|
256 | * |
---|
257 | * @param x the variable values |
---|
258 | * @return the objective function value |
---|
259 | * @throws Exception if something goes wrong |
---|
260 | */ |
---|
261 | protected abstract double objectiveFunction(double[] x) throws Exception; |
---|
262 | |
---|
263 | /** |
---|
264 | * Subclass should implement this procedure to evaluate gradient |
---|
265 | * of the objective function |
---|
266 | * |
---|
267 | * @param x the variable values |
---|
268 | * @return the gradient vector |
---|
269 | * @throws Exception if something goes wrong |
---|
270 | */ |
---|
271 | protected abstract double[] evaluateGradient(double[] x) throws Exception; |
---|
272 | |
---|
273 | /** |
---|
274 | * Subclass is recommended to override this procedure to evaluate second-order |
---|
275 | * gradient of the objective function. If it's not provided, it returns |
---|
276 | * null. |
---|
277 | * |
---|
278 | * @param x the variables |
---|
279 | * @param index the row index in the Hessian matrix |
---|
280 | * @return one row (the row #index) of the Hessian matrix, null as default |
---|
281 | * @throws Exception if something goes wrong |
---|
282 | */ |
---|
283 | protected double[] evaluateHessian(double[] x, int index) throws Exception{ |
---|
284 | return null; |
---|
285 | } |
---|
286 | |
---|
287 | /** |
---|
288 | * Get the minimal function value |
---|
289 | * |
---|
290 | * @return minimal function value found |
---|
291 | */ |
---|
292 | public double getMinFunction() { |
---|
293 | return m_f; |
---|
294 | } |
---|
295 | |
---|
296 | /** |
---|
297 | * Set the maximal number of iterations in searching (Default 200) |
---|
298 | * |
---|
299 | * @param it the maximal number of iterations |
---|
300 | */ |
---|
301 | public void setMaxIteration(int it) { |
---|
302 | m_MAXITS=it; |
---|
303 | } |
---|
304 | |
---|
305 | /** |
---|
306 | * Set whether in debug mode |
---|
307 | * |
---|
308 | * @param db use debug or not |
---|
309 | */ |
---|
310 | public void setDebug(boolean db) { |
---|
311 | m_Debug = db; |
---|
312 | } |
---|
313 | |
---|
314 | /** |
---|
315 | * Get the variable values. Only needed when iterations exceeds |
---|
316 | * the max threshold. |
---|
317 | * |
---|
318 | * @return the current variable values |
---|
319 | */ |
---|
320 | public double[] getVarbValues() { |
---|
321 | return m_X; |
---|
322 | } |
---|
323 | |
---|
324 | /** |
---|
325 | * Find a new point x in the direction p from a point xold at which the |
---|
326 | * value of the function has decreased sufficiently, the positive |
---|
327 | * definiteness of B matrix (approximation of the inverse of the Hessian) |
---|
328 | * is preserved and no bound constraints are violated. Details see "Numerical |
---|
329 | * Methods for Unconstrained Optimization and Nonlinear Equations". |
---|
330 | * "Numeric Recipes in C" was also consulted. |
---|
331 | * |
---|
332 | * @param xold old x value |
---|
333 | * @param gradient gradient at that point |
---|
334 | * @param direct direction vector |
---|
335 | * @param stpmax maximum step length |
---|
336 | * @param isFixed indicating whether a variable has been fixed |
---|
337 | * @param nwsBounds non-working set bounds. Means these variables are free and |
---|
338 | * subject to the bound constraints in this step |
---|
339 | * @param wsBdsIndx index of variables that has working-set bounds. Means |
---|
340 | * these variables are already fixed and no longer subject to |
---|
341 | * the constraints |
---|
342 | * @return new value along direction p from xold, null if no step was taken |
---|
343 | * @throws Exception if an error occurs |
---|
344 | */ |
---|
345 | public double[] lnsrch(double[] xold, double[] gradient, |
---|
346 | double[] direct, double stpmax, |
---|
347 | boolean[] isFixed, double[][] nwsBounds, |
---|
348 | DynamicIntArray wsBdsIndx) |
---|
349 | throws Exception { |
---|
350 | |
---|
351 | int i, k,len=xold.length, |
---|
352 | fixedOne=-1; // idx of variable to be fixed |
---|
353 | double alam, alamin; // lambda to be found, and its lower bound |
---|
354 | |
---|
355 | // For convergence and bound test |
---|
356 | double temp,test,alpha=Double.POSITIVE_INFINITY,fold=m_f,sum; |
---|
357 | |
---|
358 | // For cubic interpolation |
---|
359 | double a,alam2=0,b,disc=0,maxalam=1.0,rhs1,rhs2,tmplam; |
---|
360 | |
---|
361 | double[] x = new double[len]; // New variable values |
---|
362 | |
---|
363 | // Scale the step |
---|
364 | for (sum=0.0,i=0;i<len;i++){ |
---|
365 | if(!isFixed[i]) // For fixed variables, direction = 0 |
---|
366 | sum += direct[i]*direct[i]; |
---|
367 | } |
---|
368 | sum = Math.sqrt(sum); |
---|
369 | |
---|
370 | if (m_Debug) |
---|
371 | System.err.println("fold: "+Utils.doubleToString(fold,10,7)+"\n"+ |
---|
372 | "sum: "+Utils.doubleToString(sum,10,7)+"\n"+ |
---|
373 | "stpmax: "+Utils.doubleToString(stpmax,10,7)); |
---|
374 | if (sum > stpmax){ |
---|
375 | for (i=0;i<len;i++) |
---|
376 | if(!isFixed[i]) |
---|
377 | direct[i] *= stpmax/sum; |
---|
378 | } |
---|
379 | else |
---|
380 | maxalam = stpmax/sum; |
---|
381 | |
---|
382 | // Compute initial rate of decrease, g'*d |
---|
383 | m_Slope=0.0; |
---|
384 | for (i=0;i<len;i++){ |
---|
385 | x[i] = xold[i]; |
---|
386 | if(!isFixed[i]) |
---|
387 | m_Slope += gradient[i]*direct[i]; |
---|
388 | } |
---|
389 | |
---|
390 | if (m_Debug) |
---|
391 | System.err.print("slope: " + Utils.doubleToString(m_Slope,10,7)+ "\n"); |
---|
392 | |
---|
393 | // Slope too small |
---|
394 | if(Math.abs(m_Slope)<=m_Zero){ |
---|
395 | if (m_Debug) |
---|
396 | System.err.println("Gradient and direction orthogonal -- "+ |
---|
397 | "Min. found with current fixed variables"+ |
---|
398 | " (or all variables fixed). Try to release"+ |
---|
399 | " some variables now."); |
---|
400 | return x; |
---|
401 | } |
---|
402 | |
---|
403 | // Err: slope > 0 |
---|
404 | if(m_Slope > m_Zero){ |
---|
405 | if(m_Debug) |
---|
406 | for(int h=0; h<x.length; h++) |
---|
407 | System.err.println(h+": isFixed="+isFixed[h]+", x="+ |
---|
408 | x[h]+", grad="+gradient[h]+", direct="+ |
---|
409 | direct[h]); |
---|
410 | throw new Exception("g'*p positive! -- Try to debug from here: line 327."); |
---|
411 | } |
---|
412 | |
---|
413 | // Compute LAMBDAmin and upper bound of lambda--alpha |
---|
414 | test=0.0; |
---|
415 | for(i=0;i<len;i++){ |
---|
416 | if(!isFixed[i]){// No need for fixed variables |
---|
417 | temp=Math.abs(direct[i])/Math.max(Math.abs(x[i]),1.0); |
---|
418 | if (temp > test) test=temp; |
---|
419 | } |
---|
420 | } |
---|
421 | |
---|
422 | if(test>m_Zero) // Not converge |
---|
423 | alamin = m_TOLX/test; |
---|
424 | else{ |
---|
425 | if (m_Debug) |
---|
426 | System.err.println("Zero directions for all free variables -- "+ |
---|
427 | "Min. found with current fixed variables"+ |
---|
428 | " (or all variables fixed). Try to release"+ |
---|
429 | " some variables now."); |
---|
430 | return x; |
---|
431 | } |
---|
432 | |
---|
433 | // Check whether any non-working-set bounds are "binding" |
---|
434 | for(i=0;i<len;i++){ |
---|
435 | if(!isFixed[i]){// No need for fixed variables |
---|
436 | double alpi; |
---|
437 | if((direct[i]<-m_Epsilon) && !Double.isNaN(nwsBounds[0][i])){//Not feasible |
---|
438 | alpi = (nwsBounds[0][i]-xold[i])/direct[i]; |
---|
439 | if(alpi <= m_Zero){ // Zero |
---|
440 | if (m_Debug) |
---|
441 | System.err.println("Fix variable "+i+ |
---|
442 | " to lower bound "+ nwsBounds[0][i]+ |
---|
443 | " from value "+ xold[i]); |
---|
444 | x[i] = nwsBounds[0][i]; |
---|
445 | isFixed[i]=true; // Fix this variable |
---|
446 | alpha = 0.0; |
---|
447 | nwsBounds[0][i]=Double.NaN; //Add cons. to working set |
---|
448 | wsBdsIndx.addElement(i); |
---|
449 | } |
---|
450 | else if(alpha > alpi){ // Fix one variable in one iteration |
---|
451 | alpha = alpi; |
---|
452 | fixedOne = i; |
---|
453 | } |
---|
454 | } |
---|
455 | else if((direct[i]>m_Epsilon) && !Double.isNaN(nwsBounds[1][i])){//Not feasible |
---|
456 | alpi = (nwsBounds[1][i]-xold[i])/direct[i]; |
---|
457 | if(alpi <= m_Zero){ // Zero |
---|
458 | if (m_Debug) |
---|
459 | System.err.println("Fix variable "+i+ |
---|
460 | " to upper bound "+ nwsBounds[1][i]+ |
---|
461 | " from value "+ xold[i]); |
---|
462 | x[i] = nwsBounds[1][i]; |
---|
463 | isFixed[i]=true; // Fix this variable |
---|
464 | alpha = 0.0; |
---|
465 | nwsBounds[1][i]=Double.NaN; //Add cons. to working set |
---|
466 | wsBdsIndx.addElement(i); |
---|
467 | } |
---|
468 | else if(alpha > alpi){ |
---|
469 | alpha = alpi; |
---|
470 | fixedOne = i; |
---|
471 | } |
---|
472 | } |
---|
473 | } |
---|
474 | } |
---|
475 | |
---|
476 | if (m_Debug){ |
---|
477 | System.err.println("alamin: " + Utils.doubleToString(alamin,10,7)); |
---|
478 | System.err.println("alpha: " + Utils.doubleToString(alpha,10,7)); |
---|
479 | } |
---|
480 | |
---|
481 | if(alpha <= m_Zero){ // Zero |
---|
482 | m_IsZeroStep = true; |
---|
483 | if (m_Debug) |
---|
484 | System.err.println("Alpha too small, try again"); |
---|
485 | return x; |
---|
486 | } |
---|
487 | |
---|
488 | alam = alpha; // Always try full feasible newton step |
---|
489 | if(alam > 1.0) |
---|
490 | alam = 1.0; |
---|
491 | |
---|
492 | // Iteration of one newton step, if necessary, backtracking is done |
---|
493 | double initF=fold, // Initial function value |
---|
494 | hi=alam, lo=alam, newSlope=0, fhi=m_f, flo=m_f;// Variables used for beta condition |
---|
495 | double[] newGrad; // Gradient on the new variable values |
---|
496 | |
---|
497 | kloop: |
---|
498 | for (k=0;;k++) { |
---|
499 | if(m_Debug) |
---|
500 | System.err.println("\nLine search iteration: " + k); |
---|
501 | |
---|
502 | for (i=0;i<len;i++){ |
---|
503 | if(!isFixed[i]){ |
---|
504 | x[i] = xold[i]+alam*direct[i]; // Compute xnew |
---|
505 | if(!Double.isNaN(nwsBounds[0][i]) && (x[i]<nwsBounds[0][i])){ |
---|
506 | x[i] = nwsBounds[0][i]; //Rounding error |
---|
507 | } |
---|
508 | else if(!Double.isNaN(nwsBounds[1][i]) && (x[i]>nwsBounds[1][i])){ |
---|
509 | x[i] = nwsBounds[1][i]; //Rounding error |
---|
510 | } |
---|
511 | } |
---|
512 | } |
---|
513 | |
---|
514 | m_f = objectiveFunction(x); // Compute fnew |
---|
515 | if(Double.isNaN(m_f)) |
---|
516 | throw new Exception("Objective function value is NaN!"); |
---|
517 | |
---|
518 | while(Double.isInfinite(m_f)){ // Avoid infinity |
---|
519 | if(m_Debug) |
---|
520 | System.err.println("Too large m_f. Shrink step by half."); |
---|
521 | alam *= 0.5; // Shrink by half |
---|
522 | if(alam <= m_Epsilon){ |
---|
523 | if(m_Debug) |
---|
524 | System.err.println("Wrong starting points, change them!"); |
---|
525 | return x; |
---|
526 | } |
---|
527 | |
---|
528 | for (i=0;i<len;i++) |
---|
529 | if(!isFixed[i]) |
---|
530 | x[i] = xold[i]+alam*direct[i]; |
---|
531 | |
---|
532 | m_f = objectiveFunction(x); |
---|
533 | if(Double.isNaN(m_f)) |
---|
534 | throw new Exception("Objective function value is NaN!"); |
---|
535 | |
---|
536 | initF = Double.POSITIVE_INFINITY; |
---|
537 | } |
---|
538 | |
---|
539 | if(m_Debug) { |
---|
540 | System.err.println("obj. function: " + |
---|
541 | Utils.doubleToString(m_f, 10, 7)); |
---|
542 | System.err.println("threshold: " + |
---|
543 | Utils.doubleToString(fold+m_ALF*alam*m_Slope,10,7)); |
---|
544 | } |
---|
545 | |
---|
546 | if(m_f<=fold+m_ALF*alam*m_Slope){// Alpha condition: sufficient function decrease |
---|
547 | if(m_Debug) |
---|
548 | System.err.println("Sufficient function decrease (alpha condition): "); |
---|
549 | newGrad = evaluateGradient(x); |
---|
550 | for(newSlope=0.0,i=0; i<len; i++) |
---|
551 | if(!isFixed[i]) |
---|
552 | newSlope += newGrad[i]*direct[i]; |
---|
553 | |
---|
554 | if(newSlope >= m_BETA*m_Slope){ // Beta condition: ensure pos. defnty. |
---|
555 | if(m_Debug) |
---|
556 | System.err.println("Increasing derivatives (beta condition): "); |
---|
557 | |
---|
558 | if((fixedOne!=-1) && (alam>=alpha)){ // Has bounds and over |
---|
559 | if(direct[fixedOne] > 0){ |
---|
560 | x[fixedOne] = nwsBounds[1][fixedOne]; // Avoid rounding error |
---|
561 | nwsBounds[1][fixedOne]=Double.NaN; //Add cons. to working set |
---|
562 | } |
---|
563 | else{ |
---|
564 | x[fixedOne] = nwsBounds[0][fixedOne]; // Avoid rounding error |
---|
565 | nwsBounds[0][fixedOne]=Double.NaN; //Add cons. to working set |
---|
566 | } |
---|
567 | |
---|
568 | if(m_Debug) |
---|
569 | System.err.println("Fix variable " |
---|
570 | +fixedOne+" to bound "+ x[fixedOne]+ |
---|
571 | " from value "+ xold[fixedOne]); |
---|
572 | isFixed[fixedOne]=true; // Fix the variable |
---|
573 | wsBdsIndx.addElement(fixedOne); |
---|
574 | } |
---|
575 | return x; |
---|
576 | } |
---|
577 | else if(k==0){ // First time: increase alam |
---|
578 | // Search for the smallest value not complying with alpha condition |
---|
579 | double upper = Math.min(alpha,maxalam); |
---|
580 | if(m_Debug) |
---|
581 | System.err.println("Alpha condition holds, increase alpha... "); |
---|
582 | while(!((alam>=upper) || (m_f>fold+m_ALF*alam*m_Slope))){ |
---|
583 | lo = alam; |
---|
584 | flo = m_f; |
---|
585 | alam *= 2.0; |
---|
586 | if(alam>=upper) // Avoid rounding errors |
---|
587 | alam=upper; |
---|
588 | |
---|
589 | for (i=0;i<len;i++) |
---|
590 | if(!isFixed[i]) |
---|
591 | x[i] = xold[i]+alam*direct[i]; |
---|
592 | m_f = objectiveFunction(x); |
---|
593 | if(Double.isNaN(m_f)) |
---|
594 | throw new Exception("Objective function value is NaN!"); |
---|
595 | |
---|
596 | newGrad = evaluateGradient(x); |
---|
597 | for(newSlope=0.0,i=0; i<len; i++) |
---|
598 | if(!isFixed[i]) |
---|
599 | newSlope += newGrad[i]*direct[i]; |
---|
600 | |
---|
601 | if(newSlope >= m_BETA*m_Slope){ |
---|
602 | if (m_Debug) |
---|
603 | System.err.println("Increasing derivatives (beta condition): \n"+ |
---|
604 | "newSlope = "+Utils.doubleToString(newSlope,10,7)); |
---|
605 | |
---|
606 | if((fixedOne!=-1) && (alam>=alpha)){ // Has bounds and over |
---|
607 | if(direct[fixedOne] > 0){ |
---|
608 | x[fixedOne] = nwsBounds[1][fixedOne]; // Avoid rounding error |
---|
609 | nwsBounds[1][fixedOne]=Double.NaN; //Add cons. to working set |
---|
610 | } |
---|
611 | else{ |
---|
612 | x[fixedOne] = nwsBounds[0][fixedOne]; // Avoid rounding error |
---|
613 | nwsBounds[0][fixedOne]=Double.NaN; //Add cons. to working set |
---|
614 | } |
---|
615 | |
---|
616 | if(m_Debug) |
---|
617 | System.err.println("Fix variable " |
---|
618 | +fixedOne+" to bound "+ x[fixedOne]+ |
---|
619 | " from value "+ xold[fixedOne]); |
---|
620 | isFixed[fixedOne]=true; // Fix the variable |
---|
621 | wsBdsIndx.addElement(fixedOne); |
---|
622 | } |
---|
623 | return x; |
---|
624 | } |
---|
625 | } |
---|
626 | hi = alam; |
---|
627 | fhi = m_f; |
---|
628 | break kloop; |
---|
629 | } |
---|
630 | else{ |
---|
631 | if(m_Debug) |
---|
632 | System.err.println("Alpha condition holds."); |
---|
633 | hi = alam2; lo = alam; flo = m_f; |
---|
634 | break kloop; |
---|
635 | } |
---|
636 | } |
---|
637 | else if (alam < alamin) { // No feasible lambda found |
---|
638 | if(initF<fold){ |
---|
639 | alam = Math.min(1.0,alpha); |
---|
640 | for (i=0;i<len;i++) |
---|
641 | if(!isFixed[i]) |
---|
642 | x[i] = xold[i]+alam*direct[i]; //Still take Alpha |
---|
643 | |
---|
644 | if (m_Debug) |
---|
645 | System.err.println("No feasible lambda: still take"+ |
---|
646 | " alpha="+alam); |
---|
647 | |
---|
648 | if((fixedOne!=-1) && (alam>=alpha)){ // Has bounds and over |
---|
649 | if(direct[fixedOne] > 0){ |
---|
650 | x[fixedOne] = nwsBounds[1][fixedOne]; // Avoid rounding error |
---|
651 | nwsBounds[1][fixedOne]=Double.NaN; //Add cons. to working set |
---|
652 | } |
---|
653 | else{ |
---|
654 | x[fixedOne] = nwsBounds[0][fixedOne]; // Avoid rounding error |
---|
655 | nwsBounds[0][fixedOne]=Double.NaN; //Add cons. to working set |
---|
656 | } |
---|
657 | |
---|
658 | if(m_Debug) |
---|
659 | System.err.println("Fix variable " |
---|
660 | +fixedOne+" to bound "+ x[fixedOne]+ |
---|
661 | " from value "+ xold[fixedOne]); |
---|
662 | isFixed[fixedOne]=true; // Fix the variable |
---|
663 | wsBdsIndx.addElement(fixedOne); |
---|
664 | } |
---|
665 | } |
---|
666 | else{ // Convergence on delta(x) |
---|
667 | for(i=0;i<len;i++) |
---|
668 | x[i]=xold[i]; |
---|
669 | m_f=fold; |
---|
670 | if (m_Debug) |
---|
671 | System.err.println("Cannot find feasible lambda"); |
---|
672 | } |
---|
673 | |
---|
674 | return x; |
---|
675 | } |
---|
676 | else { // Backtracking by polynomial interpolation |
---|
677 | if(k==0){ // First time backtrack: quadratic interpolation |
---|
678 | if(!Double.isInfinite(initF)) |
---|
679 | initF = m_f; |
---|
680 | // lambda = -g'(0)/(2*g''(0)) |
---|
681 | tmplam = -0.5*alam*m_Slope/((m_f-fold)/alam-m_Slope); |
---|
682 | } |
---|
683 | else { // Subsequent backtrack: cubic interpolation |
---|
684 | rhs1 = m_f-fold-alam*m_Slope; |
---|
685 | rhs2 = fhi-fold-alam2*m_Slope; |
---|
686 | a=(rhs1/(alam*alam)-rhs2/(alam2*alam2))/(alam-alam2); |
---|
687 | b=(-alam2*rhs1/(alam*alam)+alam*rhs2/(alam2*alam2))/(alam-alam2); |
---|
688 | if (a == 0.0) tmplam = -m_Slope/(2.0*b); |
---|
689 | else { |
---|
690 | disc=b*b-3.0*a*m_Slope; |
---|
691 | if (disc < 0.0) disc = 0.0; |
---|
692 | double numerator = -b+Math.sqrt(disc); |
---|
693 | if(numerator >= Double.MAX_VALUE){ |
---|
694 | numerator = Double.MAX_VALUE; |
---|
695 | if (m_Debug) |
---|
696 | System.err.print("-b+sqrt(disc) too large! Set it to MAX_VALUE."); |
---|
697 | } |
---|
698 | tmplam=numerator/(3.0*a); |
---|
699 | } |
---|
700 | if (m_Debug) |
---|
701 | System.err.print("Cubic interpolation: \n" + |
---|
702 | "a: " + Utils.doubleToString(a,10,7)+ "\n" + |
---|
703 | "b: " + Utils.doubleToString(b,10,7)+ "\n" + |
---|
704 | "disc: " + Utils.doubleToString(disc,10,7)+ "\n" + |
---|
705 | "tmplam: " + tmplam + "\n" + |
---|
706 | "alam: " + Utils.doubleToString(alam,10,7)+ "\n"); |
---|
707 | if (tmplam>0.5*alam) |
---|
708 | tmplam=0.5*alam; // lambda <= 0.5*lambda_old |
---|
709 | } |
---|
710 | } |
---|
711 | alam2=alam; |
---|
712 | fhi=m_f; |
---|
713 | alam=Math.max(tmplam,0.1*alam); // lambda >= 0.1*lambda_old |
---|
714 | |
---|
715 | if(alam>alpha){ |
---|
716 | throw new Exception("Sth. wrong in lnsrch:"+ |
---|
717 | "Lambda infeasible!(lambda="+alam+ |
---|
718 | ", alpha="+alpha+", upper="+tmplam+ |
---|
719 | "|"+(-alpha*m_Slope/(2.0*((m_f-fold)/alpha-m_Slope)))+ |
---|
720 | ", m_f="+m_f+", fold="+fold+ |
---|
721 | ", slope="+m_Slope); |
---|
722 | } |
---|
723 | } // Endfor(k=0;;k++) |
---|
724 | |
---|
725 | // Quadratic interpolation between lamda values between lo and hi. |
---|
726 | // If cannot find a value satisfying beta condition, use lo. |
---|
727 | double ldiff = hi-lo, lincr; |
---|
728 | if(m_Debug) |
---|
729 | System.err.println("Last stage of searching for beta condition (alam between " |
---|
730 | +Utils.doubleToString(lo,10,7)+" and " |
---|
731 | +Utils.doubleToString(hi,10,7)+")...\n"+ |
---|
732 | "Quadratic Interpolation(QI):\n"+ |
---|
733 | "Last newSlope = "+Utils.doubleToString(newSlope, 10, 7)); |
---|
734 | |
---|
735 | while((newSlope<m_BETA*m_Slope) && (ldiff>=alamin)){ |
---|
736 | lincr = -0.5*newSlope*ldiff*ldiff/(fhi-flo-newSlope*ldiff); |
---|
737 | |
---|
738 | if(m_Debug) |
---|
739 | System.err.println("fhi = "+fhi+"\n"+ |
---|
740 | "flo = "+flo+"\n"+ |
---|
741 | "ldiff = "+ldiff+"\n"+ |
---|
742 | "lincr (using QI) = "+lincr+"\n"); |
---|
743 | |
---|
744 | if(lincr<0.2*ldiff) lincr=0.2*ldiff; |
---|
745 | alam = lo+lincr; |
---|
746 | if(alam >= hi){ // We cannot go beyond the bounds, so the best we can try is hi |
---|
747 | alam=hi; |
---|
748 | lincr=ldiff; |
---|
749 | } |
---|
750 | for (i=0;i<len;i++) |
---|
751 | if(!isFixed[i]) |
---|
752 | x[i] = xold[i]+alam*direct[i]; |
---|
753 | m_f = objectiveFunction(x); |
---|
754 | if(Double.isNaN(m_f)) |
---|
755 | throw new Exception("Objective function value is NaN!"); |
---|
756 | |
---|
757 | if(m_f>fold+m_ALF*alam*m_Slope){ |
---|
758 | // Alpha condition fails, shrink lambda_upper |
---|
759 | ldiff = lincr; |
---|
760 | fhi = m_f; |
---|
761 | } |
---|
762 | else{ // Alpha condition holds |
---|
763 | newGrad = evaluateGradient(x); |
---|
764 | for(newSlope=0.0,i=0; i<len; i++) |
---|
765 | if(!isFixed[i]) |
---|
766 | newSlope += newGrad[i]*direct[i]; |
---|
767 | |
---|
768 | if(newSlope < m_BETA*m_Slope){ |
---|
769 | // Beta condition fails, shrink lambda_lower |
---|
770 | lo = alam; |
---|
771 | ldiff -= lincr; |
---|
772 | flo = m_f; |
---|
773 | } |
---|
774 | } |
---|
775 | } |
---|
776 | |
---|
777 | if(newSlope < m_BETA*m_Slope){ // Cannot satisfy beta condition, take lo |
---|
778 | if(m_Debug) |
---|
779 | System.err.println("Beta condition cannot be satisfied, take alpha condition"); |
---|
780 | alam=lo; |
---|
781 | for (i=0;i<len;i++) |
---|
782 | if(!isFixed[i]) |
---|
783 | x[i] = xold[i]+alam*direct[i]; |
---|
784 | m_f = flo; |
---|
785 | } |
---|
786 | else if(m_Debug) |
---|
787 | System.err.println("Both alpha and beta conditions are satisfied. alam=" |
---|
788 | +Utils.doubleToString(alam,10,7)); |
---|
789 | |
---|
790 | if((fixedOne!=-1) && (alam>=alpha)){ // Has bounds and over |
---|
791 | if(direct[fixedOne] > 0){ |
---|
792 | x[fixedOne] = nwsBounds[1][fixedOne]; // Avoid rounding error |
---|
793 | nwsBounds[1][fixedOne]=Double.NaN; //Add cons. to working set |
---|
794 | } |
---|
795 | else{ |
---|
796 | x[fixedOne] = nwsBounds[0][fixedOne]; // Avoid rounding error |
---|
797 | nwsBounds[0][fixedOne]=Double.NaN; //Add cons. to working set |
---|
798 | } |
---|
799 | |
---|
800 | if(m_Debug) |
---|
801 | System.err.println("Fix variable " |
---|
802 | +fixedOne+" to bound "+ x[fixedOne]+ |
---|
803 | " from value "+ xold[fixedOne]); |
---|
804 | isFixed[fixedOne]=true; // Fix the variable |
---|
805 | wsBdsIndx.addElement(fixedOne); |
---|
806 | } |
---|
807 | |
---|
808 | return x; |
---|
809 | } |
---|
810 | |
---|
811 | /** |
---|
812 | * Main algorithm. Descriptions see "Practical Optimization" |
---|
813 | * |
---|
814 | * @param initX initial point of x, assuming no value's on the bound! |
---|
815 | * @param constraints the bound constraints of each variable |
---|
816 | * constraints[0] is the lower bounds and |
---|
817 | * constraints[1] is the upper bounds |
---|
818 | * @return the solution of x, null if number of iterations not enough |
---|
819 | * @throws Exception if an error occurs |
---|
820 | */ |
---|
821 | public double[] findArgmin(double[] initX, double[][] constraints) |
---|
822 | throws Exception{ |
---|
823 | int l = initX.length; |
---|
824 | |
---|
825 | // Initially all variables are free, all bounds are constraints of |
---|
826 | // non-working-set constraints |
---|
827 | boolean[] isFixed = new boolean[l]; |
---|
828 | double[][] nwsBounds = new double[2][l]; |
---|
829 | // Record indice of fixed variables, simply for efficiency |
---|
830 | DynamicIntArray wsBdsIndx = new DynamicIntArray(constraints.length); |
---|
831 | // Vectors used to record the variable indices to be freed |
---|
832 | DynamicIntArray toFree=null, oldToFree=null; |
---|
833 | |
---|
834 | // Initial value of obj. function, gradient and inverse of the Hessian |
---|
835 | m_f = objectiveFunction(initX); |
---|
836 | if(Double.isNaN(m_f)) |
---|
837 | throw new Exception("Objective function value is NaN!"); |
---|
838 | |
---|
839 | double sum=0; |
---|
840 | double[] grad=evaluateGradient(initX), oldGrad, oldX, |
---|
841 | deltaGrad=new double[l], deltaX=new double[l], |
---|
842 | direct = new double[l], x = new double[l]; |
---|
843 | Matrix L = new Matrix(l, l); // Lower triangle of Cholesky factor |
---|
844 | double[] D = new double[l]; // Diagonal of Cholesky factor |
---|
845 | for(int i=0; i<l; i++){ |
---|
846 | L.setRow(i, new double[l]); |
---|
847 | L.setElement(i,i,1.0); |
---|
848 | D[i] = 1.0; |
---|
849 | direct[i] = -grad[i]; |
---|
850 | sum += grad[i]*grad[i]; |
---|
851 | x[i] = initX[i]; |
---|
852 | nwsBounds[0][i] = constraints[0][i]; |
---|
853 | nwsBounds[1][i] = constraints[1][i]; |
---|
854 | isFixed[i] = false; |
---|
855 | } |
---|
856 | double stpmax = m_STPMX*Math.max(Math.sqrt(sum), l); |
---|
857 | |
---|
858 | iterates: |
---|
859 | for(int step=0; step < m_MAXITS; step++){ |
---|
860 | if (m_Debug) |
---|
861 | System.err.println("\nIteration # " + step + ":"); |
---|
862 | |
---|
863 | // Try at most one feasible newton step, i.e. 0<lamda<=alpha |
---|
864 | oldX = x; |
---|
865 | oldGrad = grad; |
---|
866 | |
---|
867 | // Also update grad |
---|
868 | if (m_Debug) |
---|
869 | System.err.println("Line search ... "); |
---|
870 | m_IsZeroStep = false; |
---|
871 | x=lnsrch(x, grad, direct, stpmax, |
---|
872 | isFixed, nwsBounds, wsBdsIndx); |
---|
873 | if (m_Debug) |
---|
874 | System.err.println("Line search finished."); |
---|
875 | |
---|
876 | if(m_IsZeroStep){ // Zero step, simply delete rows/cols of D and L |
---|
877 | for(int f=0; f<wsBdsIndx.size(); f++){ |
---|
878 | int idx=wsBdsIndx.elementAt(f); |
---|
879 | L.setRow(idx, new double[l]); |
---|
880 | L.setColumn(idx, new double[l]); |
---|
881 | D[idx] = 0.0; |
---|
882 | } |
---|
883 | grad = evaluateGradient(x); |
---|
884 | step--; |
---|
885 | } |
---|
886 | else{ |
---|
887 | // Check converge on x |
---|
888 | boolean finish = false; |
---|
889 | double test=0.0; |
---|
890 | for(int h=0; h<l; h++){ |
---|
891 | deltaX[h] = x[h]-oldX[h]; |
---|
892 | double tmp=Math.abs(deltaX[h])/ |
---|
893 | Math.max(Math.abs(x[h]), 1.0); |
---|
894 | if(tmp > test) test = tmp; |
---|
895 | } |
---|
896 | if(test < m_Zero){ |
---|
897 | if (m_Debug) |
---|
898 | System.err.println("\nDeltaX converge: "+test); |
---|
899 | finish = true; |
---|
900 | } |
---|
901 | |
---|
902 | // Check zero gradient |
---|
903 | grad = evaluateGradient(x); |
---|
904 | test=0.0; |
---|
905 | double denom=0.0, dxSq=0.0, dgSq=0.0, newlyBounded=0.0; |
---|
906 | for(int g=0; g<l; g++){ |
---|
907 | if(!isFixed[g]){ |
---|
908 | deltaGrad[g] = grad[g] - oldGrad[g]; |
---|
909 | // Calculate the denominators |
---|
910 | denom += deltaX[g]*deltaGrad[g]; |
---|
911 | dxSq += deltaX[g]*deltaX[g]; |
---|
912 | dgSq += deltaGrad[g]*deltaGrad[g]; |
---|
913 | } |
---|
914 | else // Only newly bounded variables will be non-zero |
---|
915 | newlyBounded += deltaX[g]*(grad[g]-oldGrad[g]); |
---|
916 | |
---|
917 | // Note: CANNOT use projected gradient for testing |
---|
918 | // convergence because of newly bounded variables |
---|
919 | double tmp = Math.abs(grad[g])* |
---|
920 | Math.max(Math.abs(direct[g]),1.0)/ |
---|
921 | Math.max(Math.abs(m_f),1.0); |
---|
922 | if(tmp > test) test = tmp; |
---|
923 | } |
---|
924 | |
---|
925 | if(test < m_Zero){ |
---|
926 | if (m_Debug) |
---|
927 | System.err.println("Gradient converge: "+test); |
---|
928 | finish = true; |
---|
929 | } |
---|
930 | |
---|
931 | // dg'*dx could be < 0 using inexact lnsrch |
---|
932 | if(m_Debug) |
---|
933 | System.err.println("dg'*dx="+(denom+newlyBounded)); |
---|
934 | // dg'*dx = 0 |
---|
935 | if(Math.abs(denom+newlyBounded) < m_Zero) |
---|
936 | finish = true; |
---|
937 | |
---|
938 | int size = wsBdsIndx.size(); |
---|
939 | boolean isUpdate = true; // Whether to update BFGS formula |
---|
940 | // Converge: check whether release any current constraints |
---|
941 | if(finish){ |
---|
942 | if (m_Debug) |
---|
943 | System.err.println("Test any release possible ..."); |
---|
944 | |
---|
945 | if(toFree != null) |
---|
946 | oldToFree = (DynamicIntArray)toFree.copy(); |
---|
947 | toFree = new DynamicIntArray(wsBdsIndx.size()); |
---|
948 | |
---|
949 | for(int m=size-1; m>=0; m--){ |
---|
950 | int index=wsBdsIndx.elementAt(m); |
---|
951 | double[] hessian = evaluateHessian(x, index); |
---|
952 | double deltaL=0.0; |
---|
953 | if(hessian != null){ |
---|
954 | for(int mm=0; mm<hessian.length; mm++) |
---|
955 | if(!isFixed[mm]) // Free variable |
---|
956 | deltaL += hessian[mm]*direct[mm]; |
---|
957 | } |
---|
958 | |
---|
959 | // First and second order Lagrangian multiplier estimate |
---|
960 | // If user didn't provide Hessian, use first-order only |
---|
961 | double L1, L2; |
---|
962 | if(x[index] >= constraints[1][index]) // Upper bound |
---|
963 | L1 = -grad[index]; |
---|
964 | else if(x[index] <= constraints[0][index])// Lower bound |
---|
965 | L1 = grad[index]; |
---|
966 | else |
---|
967 | throw new Exception("x["+index+"] not fixed on the"+ |
---|
968 | " bounds where it should have been!"); |
---|
969 | |
---|
970 | // L2 = L1 + deltaL |
---|
971 | L2 = L1 + deltaL; |
---|
972 | if (m_Debug) |
---|
973 | System.err.println("Variable "+index+ |
---|
974 | ": Lagrangian="+L1+"|"+L2); |
---|
975 | |
---|
976 | //Check validity of Lagrangian multiplier estimate |
---|
977 | boolean isConverge = |
---|
978 | (2.0*Math.abs(deltaL)) < Math.min(Math.abs(L1), |
---|
979 | Math.abs(L2)); |
---|
980 | if((L1*L2>0.0) && isConverge){ //Same sign and converge: valid |
---|
981 | if(L2 < 0.0){// Negative Lagrangian: feasible |
---|
982 | toFree.addElement(index); |
---|
983 | wsBdsIndx.removeElementAt(m); |
---|
984 | finish=false; // Not optimal, cannot finish |
---|
985 | } |
---|
986 | } |
---|
987 | |
---|
988 | // Although hardly happen, better check it |
---|
989 | // If the first-order Lagrangian multiplier estimate is wrong, |
---|
990 | // avoid zigzagging |
---|
991 | if((hessian==null) && (toFree != null) && toFree.equal(oldToFree)) |
---|
992 | finish = true; |
---|
993 | } |
---|
994 | |
---|
995 | if(finish){// Min. found |
---|
996 | if (m_Debug) |
---|
997 | System.err.println("Minimum found."); |
---|
998 | m_f = objectiveFunction(x); |
---|
999 | if(Double.isNaN(m_f)) |
---|
1000 | throw new Exception("Objective function value is NaN!"); |
---|
1001 | return x; |
---|
1002 | } |
---|
1003 | |
---|
1004 | // Free some variables |
---|
1005 | for(int mmm=0; mmm<toFree.size(); mmm++){ |
---|
1006 | int freeIndx=toFree.elementAt(mmm); |
---|
1007 | isFixed[freeIndx] = false; // Free this variable |
---|
1008 | if(x[freeIndx] <= constraints[0][freeIndx]){// Lower bound |
---|
1009 | nwsBounds[0][freeIndx] = constraints[0][freeIndx]; |
---|
1010 | if (m_Debug) |
---|
1011 | System.err.println("Free variable "+freeIndx+ |
---|
1012 | " from bound "+ |
---|
1013 | nwsBounds[0][freeIndx]); |
---|
1014 | } |
---|
1015 | else{ // Upper bound |
---|
1016 | nwsBounds[1][freeIndx] = constraints[1][freeIndx]; |
---|
1017 | if (m_Debug) |
---|
1018 | System.err.println("Free variable "+freeIndx+ |
---|
1019 | " from bound "+ |
---|
1020 | nwsBounds[1][freeIndx]); |
---|
1021 | } |
---|
1022 | L.setElement(freeIndx, freeIndx, 1.0); |
---|
1023 | D[freeIndx] = 1.0; |
---|
1024 | isUpdate = false; |
---|
1025 | } |
---|
1026 | } |
---|
1027 | |
---|
1028 | if(denom<Math.max(m_Zero*Math.sqrt(dxSq)*Math.sqrt(dgSq), m_Zero)){ |
---|
1029 | if (m_Debug) |
---|
1030 | System.err.println("dg'*dx negative!"); |
---|
1031 | isUpdate = false; // Do not update |
---|
1032 | } |
---|
1033 | // If Hessian will be positive definite, update it |
---|
1034 | if(isUpdate){ |
---|
1035 | |
---|
1036 | // modify once: dg*dg'/(dg'*dx) |
---|
1037 | double coeff = 1.0/denom; // 1/(dg'*dx) |
---|
1038 | updateCholeskyFactor(L,D,deltaGrad,coeff,isFixed); |
---|
1039 | |
---|
1040 | // modify twice: g*g'/(g'*p) |
---|
1041 | coeff = 1.0/m_Slope; // 1/(g'*p) |
---|
1042 | updateCholeskyFactor(L,D,oldGrad,coeff,isFixed); |
---|
1043 | } |
---|
1044 | } |
---|
1045 | |
---|
1046 | // Find new direction |
---|
1047 | Matrix LD = new Matrix(l,l); // L*D |
---|
1048 | double[] b = new double[l]; |
---|
1049 | |
---|
1050 | for(int k=0; k<l; k++){ |
---|
1051 | if(!isFixed[k]) b[k] = -grad[k]; |
---|
1052 | else b[k] = 0.0; |
---|
1053 | |
---|
1054 | for(int j=k; j<l; j++){ // Lower triangle |
---|
1055 | if(!isFixed[j] && !isFixed[k]) |
---|
1056 | LD.setElement(j, k, L.getElement(j,k)*D[k]); |
---|
1057 | } |
---|
1058 | } |
---|
1059 | |
---|
1060 | // Solve (LD)*y = -g, where y=L'*direct |
---|
1061 | double[] LDIR = solveTriangle(LD, b, true, isFixed); |
---|
1062 | LD = null; |
---|
1063 | |
---|
1064 | for(int m=0; m<LDIR.length; m++){ |
---|
1065 | if(Double.isNaN(LDIR[m])) |
---|
1066 | throw new Exception("L*direct["+m+"] is NaN!" |
---|
1067 | +"|-g="+b[m]+"|"+isFixed[m] |
---|
1068 | +"|diag="+D[m]); |
---|
1069 | } |
---|
1070 | |
---|
1071 | // Solve L'*direct = y |
---|
1072 | direct = solveTriangle(L, LDIR, false, isFixed); |
---|
1073 | for(int m=0; m<direct.length; m++){ |
---|
1074 | if(Double.isNaN(direct[m])) |
---|
1075 | throw new Exception("direct is NaN!"); |
---|
1076 | } |
---|
1077 | |
---|
1078 | //System.gc(); |
---|
1079 | } |
---|
1080 | |
---|
1081 | if(m_Debug) |
---|
1082 | System.err.println("Cannot find minimum"+ |
---|
1083 | " -- too many interations!"); |
---|
1084 | m_X = x; |
---|
1085 | return null; |
---|
1086 | } |
---|
1087 | |
---|
1088 | /** |
---|
1089 | * Solve the linear equation of TX=B where T is a triangle matrix |
---|
1090 | * It can be solved using back/forward substitution, with O(N^2) |
---|
1091 | * complexity |
---|
1092 | * @param t the matrix T |
---|
1093 | * @param b the vector B |
---|
1094 | * @param isLower whether T is a lower or higher triangle matrix |
---|
1095 | * @param isZero which row(s) of T are not used when solving the equation. |
---|
1096 | * If it's null or all 'false', then every row is used. |
---|
1097 | * @return the solution of X |
---|
1098 | */ |
---|
1099 | public static double[] solveTriangle(Matrix t, double[] b, |
---|
1100 | boolean isLower, boolean[] isZero){ |
---|
1101 | int n = b.length; |
---|
1102 | double[] result = new double[n]; |
---|
1103 | if(isZero == null) |
---|
1104 | isZero = new boolean[n]; |
---|
1105 | |
---|
1106 | if(isLower){ // lower triangle, forward-substitution |
---|
1107 | int j = 0; |
---|
1108 | while((j<n)&&isZero[j]){result[j]=0.0; j++;} // go to the first row |
---|
1109 | |
---|
1110 | if(j<n){ |
---|
1111 | result[j] = b[j]/t.getElement(j,j); |
---|
1112 | |
---|
1113 | for(; j<n; j++){ |
---|
1114 | if(!isZero[j]){ |
---|
1115 | double numerator=b[j]; |
---|
1116 | for(int k=0; k<j; k++) |
---|
1117 | numerator -= t.getElement(j,k)*result[k]; |
---|
1118 | result[j] = numerator/t.getElement(j,j); |
---|
1119 | } |
---|
1120 | else |
---|
1121 | result[j] = 0.0; |
---|
1122 | } |
---|
1123 | } |
---|
1124 | } |
---|
1125 | else{ // Upper triangle, back-substitution |
---|
1126 | int j=n-1; |
---|
1127 | while((j>=0)&&isZero[j]){result[j]=0.0; j--;} // go to the last row |
---|
1128 | |
---|
1129 | if(j>=0){ |
---|
1130 | result[j] = b[j]/t.getElement(j,j); |
---|
1131 | |
---|
1132 | for(; j>=0; j--){ |
---|
1133 | if(!isZero[j]){ |
---|
1134 | double numerator=b[j]; |
---|
1135 | for(int k=j+1; k<n; k++) |
---|
1136 | numerator -= t.getElement(k,j)*result[k]; |
---|
1137 | result[j] = numerator/t.getElement(j,j); |
---|
1138 | } |
---|
1139 | else |
---|
1140 | result[j] = 0.0; |
---|
1141 | } |
---|
1142 | } |
---|
1143 | } |
---|
1144 | |
---|
1145 | return result; |
---|
1146 | } |
---|
1147 | |
---|
1148 | /** |
---|
1149 | * One rank update of the Cholesky factorization of B matrix in BFGS updates, |
---|
1150 | * i.e. B = LDL', and B_{new} = LDL' + coeff*(vv') where L is a unit lower triangle |
---|
1151 | * matrix and D is a diagonal matrix, and v is a vector.<br/> |
---|
1152 | * When coeff > 0, we use C1 algorithm, and otherwise we use C2 algorithm described |
---|
1153 | * in ``Methods for Modifying Matrix Factorizations'' |
---|
1154 | * |
---|
1155 | * @param L the unit triangle matrix L |
---|
1156 | * @param D the diagonal matrix D |
---|
1157 | * @param v the update vector v |
---|
1158 | * @param coeff the coeffcient of update |
---|
1159 | * @param isFixed which variables are not to be updated |
---|
1160 | */ |
---|
1161 | protected void updateCholeskyFactor(Matrix L, double[] D, |
---|
1162 | double[] v, double coeff, |
---|
1163 | boolean[] isFixed) |
---|
1164 | throws Exception{ |
---|
1165 | double t, p, b; |
---|
1166 | int n = v.length; |
---|
1167 | double[] vp = new double[n]; |
---|
1168 | for (int i=0; i<v.length; i++) |
---|
1169 | if(!isFixed[i]) |
---|
1170 | vp[i]=v[i]; |
---|
1171 | else |
---|
1172 | vp[i]=0.0; |
---|
1173 | |
---|
1174 | if(coeff>0.0){ |
---|
1175 | t = coeff; |
---|
1176 | for(int j=0; j<n; j++){ |
---|
1177 | if(isFixed[j]) continue; |
---|
1178 | |
---|
1179 | p = vp[j]; |
---|
1180 | double d=D[j], dbarj=d+t*p*p; |
---|
1181 | D[j] = dbarj; |
---|
1182 | |
---|
1183 | b = p*t/dbarj; |
---|
1184 | t *= d/dbarj; |
---|
1185 | for(int r=j+1; r<n; r++){ |
---|
1186 | if(!isFixed[r]){ |
---|
1187 | double l=L.getElement(r, j); |
---|
1188 | vp[r] -= p*l; |
---|
1189 | L.setElement(r, j, l+b*vp[r]); |
---|
1190 | } |
---|
1191 | else |
---|
1192 | L.setElement(r, j, 0.0); |
---|
1193 | } |
---|
1194 | } |
---|
1195 | } |
---|
1196 | else{ |
---|
1197 | double[] P = solveTriangle(L, v, true, isFixed); |
---|
1198 | t = 0.0; |
---|
1199 | for(int i=0; i<n; i++) |
---|
1200 | if(!isFixed[i]) |
---|
1201 | t += P[i]*P[i]/D[i]; |
---|
1202 | |
---|
1203 | double sqrt=1.0+coeff*t; |
---|
1204 | sqrt = (sqrt<0.0)? 0.0 : Math.sqrt(sqrt); |
---|
1205 | |
---|
1206 | double alpha=coeff, sigma=coeff/(1.0+sqrt), rho, theta; |
---|
1207 | |
---|
1208 | for(int j=0; j<n; j++){ |
---|
1209 | if(isFixed[j]) continue; |
---|
1210 | |
---|
1211 | double d=D[j]; |
---|
1212 | p = P[j]*P[j]/d; |
---|
1213 | theta = 1.0+sigma*p; |
---|
1214 | t -= p; |
---|
1215 | if(t<0.0) t=0.0; // Rounding error |
---|
1216 | |
---|
1217 | double plus = sigma*sigma*p*t; |
---|
1218 | if((j<n-1) && (plus <= m_Zero)) |
---|
1219 | plus=m_Zero; // Avoid rounding error |
---|
1220 | rho = theta*theta + plus; |
---|
1221 | D[j] = rho*d; |
---|
1222 | |
---|
1223 | if(Double.isNaN(D[j])){ |
---|
1224 | throw new Exception("d["+j+"] NaN! P="+P[j]+",d="+d+ |
---|
1225 | ",t="+t+",p="+p+",sigma="+sigma+ |
---|
1226 | ",sclar="+coeff); |
---|
1227 | } |
---|
1228 | |
---|
1229 | b=alpha*P[j]/(rho*d); |
---|
1230 | alpha /= rho; |
---|
1231 | rho = Math.sqrt(rho); |
---|
1232 | double sigmaOld = sigma; |
---|
1233 | sigma *= (1.0+rho)/(rho*(theta+rho)); |
---|
1234 | if((j<n-1) && |
---|
1235 | (Double.isNaN(sigma) || Double.isInfinite(sigma))) |
---|
1236 | throw new Exception("sigma NaN/Inf! rho="+rho+ |
---|
1237 | ",theta="+theta+",P["+j+"]="+ |
---|
1238 | P[j]+",p="+p+",d="+d+",t="+t+ |
---|
1239 | ",oldsigma="+sigmaOld); |
---|
1240 | |
---|
1241 | for(int r=j+1; r<n; r++){ |
---|
1242 | if(!isFixed[r]){ |
---|
1243 | double l=L.getElement(r, j); |
---|
1244 | vp[r] -= P[j]*l; |
---|
1245 | L.setElement(r, j, l+b*vp[r]); |
---|
1246 | } |
---|
1247 | else |
---|
1248 | L.setElement(r, j, 0.0); |
---|
1249 | } |
---|
1250 | } |
---|
1251 | } |
---|
1252 | } |
---|
1253 | |
---|
1254 | /** |
---|
1255 | * Implements a simple dynamic array for ints. |
---|
1256 | */ |
---|
1257 | private class DynamicIntArray |
---|
1258 | implements RevisionHandler { |
---|
1259 | |
---|
1260 | /** The int array. */ |
---|
1261 | private int[] m_Objects; |
---|
1262 | |
---|
1263 | /** The current size; */ |
---|
1264 | private int m_Size = 0; |
---|
1265 | |
---|
1266 | /** The capacity increment */ |
---|
1267 | private int m_CapacityIncrement = 1; |
---|
1268 | |
---|
1269 | /** The capacity multiplier. */ |
---|
1270 | private int m_CapacityMultiplier = 2; |
---|
1271 | |
---|
1272 | /** |
---|
1273 | * Constructs a vector with the given capacity. |
---|
1274 | * |
---|
1275 | * @param capacity the vector's initial capacity |
---|
1276 | */ |
---|
1277 | public DynamicIntArray(int capacity) { |
---|
1278 | |
---|
1279 | m_Objects = new int[capacity]; |
---|
1280 | } |
---|
1281 | |
---|
1282 | /** |
---|
1283 | * Adds an element to this vector. Increases its |
---|
1284 | * capacity if its not large enough. |
---|
1285 | * |
---|
1286 | * @param element the element to add |
---|
1287 | */ |
---|
1288 | public final void addElement(int element) { |
---|
1289 | |
---|
1290 | if (m_Size == m_Objects.length) { |
---|
1291 | int[] newObjects; |
---|
1292 | newObjects = new int[m_CapacityMultiplier * |
---|
1293 | (m_Objects.length + |
---|
1294 | m_CapacityIncrement)]; |
---|
1295 | System.arraycopy(m_Objects, 0, newObjects, 0, m_Size); |
---|
1296 | m_Objects = newObjects; |
---|
1297 | } |
---|
1298 | m_Objects[m_Size] = element; |
---|
1299 | m_Size++; |
---|
1300 | } |
---|
1301 | |
---|
1302 | /** |
---|
1303 | * Produces a copy of this vector. |
---|
1304 | * |
---|
1305 | * @return the new vector |
---|
1306 | */ |
---|
1307 | public final Object copy() { |
---|
1308 | |
---|
1309 | |
---|
1310 | DynamicIntArray copy = new DynamicIntArray(m_Objects.length); |
---|
1311 | |
---|
1312 | copy.m_Size = m_Size; |
---|
1313 | copy.m_CapacityIncrement = m_CapacityIncrement; |
---|
1314 | copy.m_CapacityMultiplier = m_CapacityMultiplier; |
---|
1315 | System.arraycopy(m_Objects, 0, copy.m_Objects, 0, m_Size); |
---|
1316 | return copy; |
---|
1317 | } |
---|
1318 | |
---|
1319 | /** |
---|
1320 | * Returns the element at the given position. |
---|
1321 | * |
---|
1322 | * @param index the element's index |
---|
1323 | * @return the element with the given index |
---|
1324 | */ |
---|
1325 | public final int elementAt(int index) { |
---|
1326 | |
---|
1327 | return m_Objects[index]; |
---|
1328 | } |
---|
1329 | |
---|
1330 | /** |
---|
1331 | * Check whether the two integer vectors equal to each other |
---|
1332 | * Two integer vectors are equal if all the elements are the |
---|
1333 | * same, regardless of the order of the elements |
---|
1334 | * |
---|
1335 | * @param b another integer vector |
---|
1336 | * @return whether they are equal |
---|
1337 | */ |
---|
1338 | private boolean equal(DynamicIntArray b){ |
---|
1339 | if((b==null) || (size()!=b.size())) |
---|
1340 | return false; |
---|
1341 | |
---|
1342 | int size=size(); |
---|
1343 | |
---|
1344 | // Only values matter, order does not matter |
---|
1345 | int[] sorta=Utils.sort(m_Objects), sortb=Utils.sort(b.m_Objects); |
---|
1346 | for(int j=0; j<size;j++) |
---|
1347 | if(m_Objects[sorta[j]] != b.m_Objects[sortb[j]]) |
---|
1348 | return false; |
---|
1349 | |
---|
1350 | return true; |
---|
1351 | } |
---|
1352 | |
---|
1353 | /** |
---|
1354 | * Deletes an element from this vector. |
---|
1355 | * |
---|
1356 | * @param index the index of the element to be deleted |
---|
1357 | */ |
---|
1358 | public final void removeElementAt(int index) { |
---|
1359 | |
---|
1360 | System.arraycopy(m_Objects, index + 1, m_Objects, index, |
---|
1361 | m_Size - index - 1); |
---|
1362 | m_Size--; |
---|
1363 | } |
---|
1364 | |
---|
1365 | /** |
---|
1366 | * Removes all components from this vector and sets its |
---|
1367 | * size to zero. |
---|
1368 | */ |
---|
1369 | public final void removeAllElements() { |
---|
1370 | |
---|
1371 | m_Objects = new int[m_Objects.length]; |
---|
1372 | m_Size = 0; |
---|
1373 | } |
---|
1374 | |
---|
1375 | /** |
---|
1376 | * Returns the vector's current size. |
---|
1377 | * |
---|
1378 | * @return the vector's current size |
---|
1379 | */ |
---|
1380 | public final int size() { |
---|
1381 | |
---|
1382 | return m_Size; |
---|
1383 | } |
---|
1384 | |
---|
1385 | /** |
---|
1386 | * Returns the revision string. |
---|
1387 | * |
---|
1388 | * @return the revision |
---|
1389 | */ |
---|
1390 | public String getRevision() { |
---|
1391 | return RevisionUtils.extract("$Revision: 5953 $"); |
---|
1392 | } |
---|
1393 | } |
---|
1394 | } |
---|