BFGS求解凸二次规划
f(x)=12xTAx+bTxg=Ax+bf(x)=\frac{1}{2}x^TAx+b^Tx \\ g=Ax+bf(x)=21?xTAx+bTxg=Ax+b
def f(x):return 0.5*np.dot(np.dot(x,A),x)+np.dot(b,x)+c
def g(x):return np.dot(A,x)+b
采用精确线搜索求步长:
alpha_k = -np.dot(pk.T, g(xk))/np.dot(pk.T,np.dot(A,pk))
样正HK 采用如下公式
from scipy.optimize import minimize
from numpy import *
import numpy
import numpy as npdef minimize_bfgs(f, x0, jac=None,gtol=1e-5,disp=False):# 第一步,给出初始点x0, Ho, 计算gogfk = g(x0)k = 0N = len(x0)I = numpy.eye(N, dtype=int)Hk = I# 初始化Hessian 矩阵xk = x0gnorm = numpy.amax(numpy.abs(gfk))while (gnorm > gtol):# 第二步 : 求下降方向pk = -numpy.dot(Hk, gfk)# 第三步: 这里使用了精确线搜索确定步长,并更新xk+1alpha_k = -np.dot(pk.T, g(xk))/np.dot(pk.T,np.dot(A,pk))xkp1 = xk + alpha_k * pk# 第四步: 校正Hksk = xkp1 - xkxk = xkp1gfkp1 = g(xkp1)yk = gfkp1 - gfkgfk = gfkp1k += 1gnorm = numpy.amax(numpy.abs(gfk))if (gnorm <= gtol):breakrhok = 1.0 / (numpy.dot(yk, sk))# A1 = I - np.dot(sk[:,None],yk[:,None].T)* rhokA1= I - sk[:, None] * yk[None, :] * rhokA2 = I - yk[:, None] * sk[None, :] * rhokHk = np.dot(A1, np.dot(Hk, A2)) + (rhok * np.dot(sk[:,None] , sk[:,None].T))# Hk = numpy.dot(A1, numpy.dot(Hk, A2)) + (rhok * sk[:, None] *# sk[None, :])# numpy.newaxis是None# 都是二维np.dot 相当于矩阵乘print(" Current function value xk:[%f %f]"% (xk[0],xk[1]))print(" Current function value: %f" % f(xk))
测试如下
if __name__ == '__main__':x0 = [1, 1]A = np.array([[1, -1], [-1, 2]])b = [-2, 0]g0 = g(x0)c = 0print(minimize_bfgs(f, x0, jac=g,gtol=1e-6, disp= True))res = minimize(f, x0, method='BFGS', jac=g, options={
'gtol': 1e-6, 'disp': True})print(res)
超线性收敛,次于牛顿方法
# sk=np.array([-0.00835751 ,-0.00901759 ,-0.02819486, -0.1701776 ])# yk=np.array([ -5.30212743 ,3.92364059 , 46.13550229 ,-11.46023375])# (fun, x0, args=(), jac = None, gtol = 1e-5, disp = False):# res = minimize(rosen, x0, method='BFGS', jac=rosen_der, options={'gtol': 1e-6, 'disp': True})# res = minimize(f, x0, method='BFGS', jac=g, options={'gtol': 1e-6, 'disp': True})# print(res)# I = numpy.eye(4, dtype=int)# A1 = I - sk[:, None] * yk[None, :]# 不能真接用sk*yk.T,先得将数组变为二维的才行,所以等价于# a=sk[:,None]# b=yk[:,None]# print(a)# print(I-np.dot(a,b.T)==A1)# *用于array 表示对应元素相乘,np.dot()点乘