Source code for wavefunction_analysis.opt.optimization

from wavefunction_analysis import np

r"""
keep the walking direction along the gradient vector
while using line search scheme to determine the step size
x_{k+1} = retr_x_k (s_k grad f(x_k))
"""

[docs]
def gradient_descent(func, gradf, retraction, x0,
                     ls_method='armijo', nmax=50, thresh=1e-8,
                     *args, **kwargs):
    print('%s line search method is used in gradient descent' % ls_method)
    ls_method = line_search(ls_method)

    xi = x0
    y0 = func(x0)

    xs, ys = [x0], [y0]

    ymin = kwargs.get('ymin', None)

    for i in range(nmax):
        step_size = ls_method(func, gradf, retraction, xi, *args, **kwargs)

        # -gradf is the search direction
        dx = -step_size * gradf(xi)
        xi = retraction(xi, dx)
        yi = func(xi)

        xs.append(xi)
        ys.append(yi)

        if ymin:
            error = abs(yi - ymin)
        else:
            error = abs(yi - y0)

        print('i:%3d  value:%12.8f  error: %8.4e' % (i, yi, error), end=' ')
        if error > thresh:
            print('')
        else:
            print('optimization converged!')
            break

        y0 = yi

    return np.array(xs), np.array(ys)




[docs]
def line_search(method='armijo'):
    """
    method options: float, newton, backtracking, armijo, steepest
    """
    if isinstance(method, float): # step size is a given constant
        return ls_steepest(method).kernel

    ls_method = eval('ls_'+method) # get the working function
    if callable(ls_method):
        #print('ls_method:', ls_method, callable(ls_method))
        return ls_method
    else:
        raise NotImplementedError('line search method %s' % method)




[docs]
def ls_steepest(step=1.):
    # we need a function
    def kernel(*args, **kwargs):
        return step
    return kernel




[docs]
def ls_backtracking(func, gradf, retraction, x0,
                    tau=.5, r=1e-4, alpha=1.):
    """
    alpha is the step size we need to determine
    reduce it from a large value
    """
    y0 = func(x0)
    df = gradf(x0) # grad is also the negative descent direction
    #dot = np.dot(df, df) # -<grad, descent_direction>
    norm = np.linalg.norm(df)
    dot = norm**2

    alpha /= norm

    def kernel(alpha): # recursion
        dx = -alpha * df
        xi = retraction(x0, dx)
        dy = y0 - func(xi)

        if dy >= r * alpha * dot:
            return alpha

        alpha *= tau # reducing the step size
        return kernel(alpha)

    alpha = kernel(alpha)
    return alpha



ls_armijo = ls_backtracking



[docs]
def ls_newton_raphson(func, gradf, retraction, x0, **kwargs):
    hessf = kwargs.get('hessf')
    inv = np.linalg.inv(hessf)
    return



"""
conjugate gradient algorithm would change the search direction as well
"""

[docs]
def conjugate_gradient(func, gradf, retraction, transport, preconditioner, x0,
                       ls_method='armijo', cg_method='fletcher_reeves',
                       nmax=50, thresh=1e-8,
                       *args, **kwargs):
    print('%s line search and %s update are used in conjugate gradient' % (ls_method, cg_method))
    ls_method = line_search(ls_method)
    cg_method = choose_direction(cg_method)

    def _preconditioner(v): # do nothing
        return v
    if preconditioner is None:
        preconditioner = _preconditioner

    xi = x0
    y0 = func(x0)
    g0 = gradf(x0)

    xs, ys = [x0], [y0]

    direction = -g0 # initial search direction

    for i in range(nmax):
        step_size = ls_method(func, gradf, retraction, xi, *args, **kwargs)

        dx = step_size * direction
        xi = retraction(xi, dx)
        yi = func(xi)

        xs.append(xi)
        ys.append(yi)

        error = abs(yi - y0)

        print('i:%3d  value:%12.8f  error: %8.4e' % (i, yi, error), end=' ')
        if error > thresh:
            print('')
        else:
            print('optimization converged!')
            break

        gi = gradf(xi)
        beta = cg_method(transport, preconditioner, dx, g0, gi,
                         *args, **kwargs)
        # < T_dx (direction), G> = 0
        direction = beta * transport(None, xi, direction) - gi

        y0 = yi
        g0 = gi

    return np.array(xs), np.array(ys)




[docs]
def choose_direction(method='fletcher_reeves'):
    cg_method = eval('cg_'+method)

    if callable(cg_method):
        return cg_method
    else:
        raise NotImplementedError('conjugate gradient %s beta scheme' % method)




[docs]
def cg_fletcher_reeves(transport, preconditioner, x1, g0, g1):
    pg0, pg1 = preconditioner(g0), preconditioner(g1)

    return np.dot(g1, pg1) / np.dot(g0, pg0)




[docs]
def cg_polak_ribiere(transport, preconditioner, x1, g0, g1):
    dg = g1 - transport(None, x1, g0)
    g1 = preconditioner(g1)

    return abs(np.dot(g1, dg)) / np.dot(g0, g0)




[docs]
def newton_2nd(func, gradf, hessf, tangent_solver, geodesic, x0, dt=1.,
               nmax=50, thresh=1e-8, *args, **kwargs):
    xi = x0
    y0 = func(x0)

    xs, ys = [x0], [y0]

    for i in range(nmax):
        v = tangent_solver(xi, gradf, hessf)

        xi = geodesic(xi, v, dt)
        yi = func(xi)

        xs.append(xi)
        ys.append(yi)

        error = abs(yi - y0)

        print('i:%3d  value:%12.8f  error: %8.4e' % (i, yi, error), end=' ')
        if error > thresh:
            print('')
        else:
            print('optimization converged!')
            break

        y0 = yi

    return np.array(xs), np.array(ys)



#TODO: how to do trust_region?

[docs]
def trust_region(func, gradf, hessf, tangent_solver, geodesic, x0, dt=1.,
                 nmax=50, thresh=1e-8, *args, **kwargs):
    xi = x0
    y0 = func(x0)

    xs, ys = [x0], [y0]

    for i in range(nmax):
        v = tangent_solver(xi, gradf, hessf)

        xi = geodesic(xi, v, dt)
        yi = func(xi)

        xs.append(xi)
        ys.append(yi)

        error = abs(yi - y0)

        print('i:%3d  value:%12.8f  error: %8.4e' % (i, yi, error), end=' ')
        if error > thresh:
            print('')
        else:
            print('optimization converged!')
            break

        y0 = yi

    return np.array(xs), np.array(ys)