计算柯西点列,即最速下降方向dB=?gTggTBggd^B=-\frac{g^Tg}{g^TBg}gdB=?gTBggTg?g,如 果最速下降方向超出了信赖域,返回与信赖域的交点。
这时可以求得方向为信赖域半径除以dU的范数?dU信赖域半径除以d^U 的范数* d^U信赖域半径除以dU的范数?dU
即p_boundary = p_u * (trust_radius / p_u_norm)
def cauchy_point(self):"""The Cauchy point is minimal along the direction of steepest descent."""if self._cauchy_point is None:g = self.jacBg = self.hessp(g)//表示hessian矩阵和一阶导数梯度的内积self._cauchy_point = -(np.dot(g, g) / np.dot(g, Bg)) * greturn self._cauchy_point
计算牛顿步,即近似函数m(d)=f+gTd+dTBdm(d)=f+g^Td+d^TBdm(d)=f+gTd+dTBd的全局最优解(不考虑约束(d必须在信赖域半径中)),pB=?B?1gp^B=-B^{-1}gpB=?B?1g,这里没有直接求逆矩阵的方法,而是用了LU分解求解线性方程组的形式
def newton_point(self):"""The Newton point is a global minimum of the approximate function."""if self._newton_point is None:g = self.jacB = self.hesscho_info = scipy.linalg.cho_factor(B)self._newton_point = -scipy.linalg.cho_solve(cho_info, g)return self._newton_point## 标题
当然如果全局最优点在信赖域半径中,则搜所方向为牛顿步,否则
求解ttt
def get_boundaries_intersections(self, z, d, trust_radius):"""Solve the scalar quadratic equation ||z + t d|| == trust_radius.This is like a line-sphere intersection.Return the two values of t, sorted from low to high."""a = np.dot(d, d)b = 2 * np.dot(z, d)c = np.dot(z, z) - trust_radius**2sqrt_discriminant = math.sqrt(b*b - 4*a*c)ta = (-b - sqrt_discriminant) / (2*a)tb = (-b + sqrt_discriminant) / (2*a)return ta, tb
这个方法对Hessian 矩阵的要求:
This algorithm requires function values and first and second derivatives.It also performs a costly Hessian decomposition for most iterations,and the Hessian is required to be positive definite.
class DoglegSubproblem(BaseQuadraticSubproblem):"""Quadratic subproblem solved by the dogleg method"""def solve(self, trust_radius):"""Minimize a function using the dog-leg trust-region algorithm.Parameters----------trust_radius : floatWe are allowed to wander only this far away from the origin.Returns-------p : ndarrayThe proposed step.hits_boundary : boolTrue if the proposed step is on the boundary of the trust region.Notes-----The Hessian is required to be positive definite.References----------.. [1] Jorge Nocedal and Stephen Wright,Numerical Optimization, second edition,Springer-Verlag, 2006, page 73."""# Compute the Newton point.# This is the optimum for the quadratic model function.# If it is inside the trust radius then return this point.p_best = self.newton_point()if scipy.linalg.norm(p_best) < trust_radius:hits_boundary = Falsereturn p_best, hits_boundary# Compute the Cauchy point.# This is the predicted optimum along the direction of steepest descent.p_u = self.cauchy_point()# If the Cauchy point is outside the trust region,# then return the point where the path intersects the boundary.p_u_norm = scipy.linalg.norm(p_u)if p_u_norm >= trust_radius:p_boundary = p_u * (trust_radius / p_u_norm)hits_boundary = Truereturn p_boundary, hits_boundary# Compute the intersection of the trust region boundary# and the line segment connecting the Cauchy and Newton points.# This requires solving a quadratic equation.# ||p_u + t*(p_best - p_u)||**2 == trust_radius**2# Solve this for positive time t using the quadratic formula._, tb = self.get_boundaries_intersections(p_u, p_best - p_u,trust_radius)p_boundary = p_u + tb * (p_best - p_u)hits_boundary = Truereturn p_boundary, hits_boundary
信赖域完整 代码框架
def _minimize_trust_region(fun, x0, args=(), jac=None, hess=None, hessp=None,subproblem=None, initial_trust_radius=1.0,max_trust_radius=1000.0, eta=0.15, gtol=1e-4,maxiter=None, disp=False, return_all=False,callback=None, **unknown_options):"""Minimization of scalar function of one or more variables using atrust-region algorithm.Options for the trust-region algorithm are:initial_trust_radius : floatInitial trust radius.max_trust_radius : floatNever propose steps that are longer than this value.eta : floatTrust region related acceptance stringency for proposed steps.gtol : floatGradient norm must be less than `gtol`before successful termination.maxiter : intMaximum number of iterations to perform.disp : boolIf True, print convergence message.This function is called by the `minimize` function.It is not supposed to be called directly."""_check_unknown_options(unknown_options)if jac is None:raise ValueError('Jacobian is currently required for trust-region ''methods')if hess is None and hessp is None:raise ValueError('Either the Hessian or the Hessian-vector product ''is currently required for trust-region methods')if subproblem is None:raise ValueError('A subproblem solving strategy is required for ''trust-region methods')if not (0 <= eta < 0.25):raise Exception('invalid acceptance stringency')if max_trust_radius <= 0:raise Exception('the max trust radius must be positive')if initial_trust_radius <= 0:raise ValueError('the initial trust radius must be positive')if initial_trust_radius >= max_trust_radius:raise ValueError('the initial trust radius must be less than the ''max trust radius')# force the initial guess into a nice formatx0 = np.asarray(x0).flatten()# Wrap the functions, for a couple reasons.# This tracks how many times they have been called# and it automatically passes the args.nfun, fun = wrap_function(fun, args)njac, jac = wrap_function(jac, args)nhess, hess = wrap_function(hess, args)nhessp, hessp = wrap_function(hessp, args)# limit the number of iterationsif maxiter is None:maxiter = len(x0)*200# init the search statuswarnflag = 0# initialize the searchtrust_radius = initial_trust_radiusx = x0if return_all:allvecs = [x]m = subproblem(x, fun, jac, hess, hessp)k = 0# search for the function minwhile True:# Solve the sub-problem.# This gives us the proposed step relative to the current position# and it tells us whether the proposed step# has reached the trust region boundary or not.try:p, hits_boundary = m.solve(trust_radius)except np.linalg.linalg.LinAlgError as e:warnflag = 3break# calculate the predicted value at the proposed pointpredicted_value = m(p)# define the local approximation at the proposed pointx_proposed = x + pm_proposed = subproblem(x_proposed, fun, jac, hess, hessp)# evaluate the ratio defined in equation (4.4)actual_reduction = m.fun - m_proposed.funpredicted_reduction = m.fun - predicted_valueif predicted_reduction <= 0:warnflag = 2breakrho = actual_reduction / predicted_reduction# update the trust radius according to the actual/predicted ratioif rho < 0.25:trust_radius *= 0.25elif rho > 0.75 and hits_boundary:trust_radius = min(2*trust_radius, max_trust_radius)# if the ratio is high enough then accept the proposed stepif rho > eta:x = x_proposedm = m_proposed# append the best guess, call back, increment the iteration countif return_all:allvecs.append(x)if callback is not None:callback(x)k += 1# check if the gradient is small enough to stopif m.jac_mag < gtol:warnflag = 0break# check if we have looked at enough iterationsif k >= maxiter:warnflag = 1break# print some stuff if requestedstatus_messages = (_status_message['success'],_status_message['maxiter'],'A bad approximation caused failure to predict improvement.','A linalg error occurred, such as a non-psd Hessian.',)if disp:if warnflag == 0:print(status_messages[warnflag])else:print('Warning: ' + status_messages[warnflag])print(" Current function value: %f" % m.fun)print(" Iterations: %d" % k)print(" Function evaluations: %d" % nfun[0])print(" Gradient evaluations: %d" % njac[0])print(" Hessian evaluations: %d" % nhess[0])result = OptimizeResult(x=x, success=(warnflag == 0), status=warnflag,fun=m.fun, jac=m.jac, nfev=nfun[0], njev=njac[0],nhev=nhess[0], nit=k,message=status_messages[warnflag])if hess is not None:result['hess'] = m.hessif return_all:result['allvecs'] = allvecsreturn result
自己改写后的学习版本
Succeess! information is初始点x0: [15, 100]初始函数值: 1562696.000000Iterations: 66optimal x: [ 1. 1.]Current function value: 0.000000Gradient x [[ 802.00000276 -400.00000066][-400.00000066 200. ]]
"""Dog-leg trust-region optimization."""import math
import numpy as np
import scipy.linalg
from scipy.optimize import minimize, rosen, rosen_der,rosen_hessdef cauchy_point(g,B):Bg = np.dot(B,g)cauchy_point = -(np.dot(g, g) / np.dot(g, Bg)) * greturn cauchy_point
# 利用LU分解求解牛顿步
def newton_point(g,B):cho_info = scipy.linalg.cho_factor(B)newton_point = -scipy.linalg.cho_solve(cho_info, g)return newton_pointdef solve(trust_radius,g,B):# print(trust_radius)p_best = newton_point(g,B)# 如果牛顿步在信赖域中,则利用牛顿步更新if scipy.linalg.norm(p_best) < trust_radius:hits_boundary = Falsereturn p_best, hits_boundaryp_u = cauchy_point(g,B)p_u_norm = scipy.linalg.norm(p_u)# 如果最速下降方向(步长求出),在信赖域外,则返回其与信赖域的交点if p_u_norm >= trust_radius:p_boundary = p_u * (trust_radius / p_u_norm)hits_boundary = Truereturn p_boundary, hits_boundary# 否则的话,则是二者的线性组合_, tb = get_boundaries_intersections(p_u, p_best - p_u,trust_radius)p_boundary = p_u + tb * (p_best - p_u)hits_boundary = Truereturn p_boundary, hits_boundarydef get_boundaries_intersections( z, d, trust_radius):a = np.dot(d, d)b = 2 * np.dot(z, d)c = np.dot(z, z) - trust_radius**2sqrt_discriminant = math.sqrt(b*b - 4*a*c)ta = (-b - sqrt_discriminant) / (2*a)tb = (-b + sqrt_discriminant) / (2*a)return ta, tbdef minimize_trust_region( x0,eta=0.15, initial_trust_radius = 1.0,max_trust_radius = 1000.0,gtol = 1e-4):warnflag = 0trust_radius = initial_trust_radiusx = x0k = 0while True:p, hits_boundary = solve(trust_radius,rosen_der(x),rosen_hess(x))# hits_boundary 表示has reached the trust region boundary or not.x_proposed=x+p# calculate the predicted value at the proposed pointpredicted_value =rosen(x)+np.dot(rosen_der(x), p) + 0.5 * np.dot(p, np.dot(rosen_hess(x), p))actual_reduction = rosen(x) - rosen(x_proposed)predicted_reduction = rosen(x) - predicted_value# update the trust radius according to the actual/predicted ratiorho = actual_reduction / predicted_reductionif rho < 0.25:trust_radius *= 0.25elif rho > 0.75 and hits_boundary:trust_radius = min(2*trust_radius, max_trust_radius)# if the ratio is high enough then accept the proposed stepif rho > eta:x = x_proposedk += 1# check if the gradient is small enough to stopif scipy.linalg.norm(rosen_der(x))< gtol:warnflag = 0breakif warnflag == 0:print("Succeess! information is")print(" 初始点x0:" ,x0)print(" 初始函数值: %f" % rosen(x0))print(" Iterations: %d" % k)print(" optimal x: ",x)print(" Current function value: %f" % rosen(x))print(" Gradient x" , rosen_hess(x))if __name__ == '__main__':x0 = [15, 100]minimize_trust_region(x0,0.15)