fixed dependencies
This commit is contained in:
368
vendor/gonum.org/v1/gonum/optimize/cg.go
generated
vendored
Normal file
368
vendor/gonum.org/v1/gonum/optimize/cg.go
generated
vendored
Normal file
@@ -0,0 +1,368 @@
|
||||
// Copyright ©2014 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package optimize
|
||||
|
||||
import (
|
||||
"math"
|
||||
|
||||
"gonum.org/v1/gonum/floats"
|
||||
)
|
||||
|
||||
const (
|
||||
iterationRestartFactor = 6
|
||||
angleRestartThreshold = -0.9
|
||||
)
|
||||
|
||||
var (
|
||||
_ Method = (*CG)(nil)
|
||||
_ localMethod = (*CG)(nil)
|
||||
_ NextDirectioner = (*CG)(nil)
|
||||
)
|
||||
|
||||
// CGVariant calculates the scaling parameter, β, used for updating the
|
||||
// conjugate direction in the nonlinear conjugate gradient (CG) method.
|
||||
type CGVariant interface {
|
||||
// Init is called at the first iteration and provides a way to initialize
|
||||
// any internal state.
|
||||
Init(loc *Location)
|
||||
// Beta returns the value of the scaling parameter that is computed
|
||||
// according to the particular variant of the CG method.
|
||||
Beta(grad, gradPrev, dirPrev []float64) float64
|
||||
}
|
||||
|
||||
var (
|
||||
_ CGVariant = (*FletcherReeves)(nil)
|
||||
_ CGVariant = (*PolakRibierePolyak)(nil)
|
||||
_ CGVariant = (*HestenesStiefel)(nil)
|
||||
_ CGVariant = (*DaiYuan)(nil)
|
||||
_ CGVariant = (*HagerZhang)(nil)
|
||||
)
|
||||
|
||||
// CG implements the nonlinear conjugate gradient method for solving nonlinear
|
||||
// unconstrained optimization problems. It is a line search method that
|
||||
// generates the search directions d_k according to the formula
|
||||
//
|
||||
// d_{k+1} = -∇f_{k+1} + β_k*d_k, d_0 = -∇f_0.
|
||||
//
|
||||
// Variants of the conjugate gradient method differ in the choice of the
|
||||
// parameter β_k. The conjugate gradient method usually requires fewer function
|
||||
// evaluations than the gradient descent method and no matrix storage, but
|
||||
// L-BFGS is usually more efficient.
|
||||
//
|
||||
// CG implements a restart strategy that takes the steepest descent direction
|
||||
// (i.e., d_{k+1} = -∇f_{k+1}) whenever any of the following conditions holds:
|
||||
//
|
||||
// - A certain number of iterations has elapsed without a restart. This number
|
||||
// is controllable via IterationRestartFactor and if equal to 0, it is set to
|
||||
// a reasonable default based on the problem dimension.
|
||||
// - The angle between the gradients at two consecutive iterations ∇f_k and
|
||||
// ∇f_{k+1} is too large.
|
||||
// - The direction d_{k+1} is not a descent direction.
|
||||
// - β_k returned from CGVariant.Beta is equal to zero.
|
||||
//
|
||||
// The line search for CG must yield step sizes that satisfy the strong Wolfe
|
||||
// conditions at every iteration, otherwise the generated search direction
|
||||
// might fail to be a descent direction. The line search should be more
|
||||
// stringent compared with those for Newton-like methods, which can be achieved
|
||||
// by setting the gradient constant in the strong Wolfe conditions to a small
|
||||
// value.
|
||||
//
|
||||
// See also William Hager, Hongchao Zhang, A survey of nonlinear conjugate
|
||||
// gradient methods. Pacific Journal of Optimization, 2 (2006), pp. 35-58, and
|
||||
// references therein.
|
||||
type CG struct {
|
||||
// Linesearcher must satisfy the strong Wolfe conditions at every iteration.
|
||||
// If Linesearcher == nil, an appropriate default is chosen.
|
||||
Linesearcher Linesearcher
|
||||
// Variant implements the particular CG formula for computing β_k.
|
||||
// If Variant is nil, an appropriate default is chosen.
|
||||
Variant CGVariant
|
||||
// InitialStep estimates the initial line search step size, because the CG
|
||||
// method does not generate well-scaled search directions.
|
||||
// If InitialStep is nil, an appropriate default is chosen.
|
||||
InitialStep StepSizer
|
||||
|
||||
// IterationRestartFactor determines the frequency of restarts based on the
|
||||
// problem dimension. The negative gradient direction is taken whenever
|
||||
// ceil(IterationRestartFactor*(problem dimension)) iterations have elapsed
|
||||
// without a restart. For medium and large-scale problems
|
||||
// IterationRestartFactor should be set to 1, low-dimensional problems a
|
||||
// larger value should be chosen. Note that if the ceil function returns 1,
|
||||
// CG will be identical to gradient descent.
|
||||
// If IterationRestartFactor is 0, it will be set to 6.
|
||||
// CG will panic if IterationRestartFactor is negative.
|
||||
IterationRestartFactor float64
|
||||
// AngleRestartThreshold sets the threshold angle for restart. The method
|
||||
// is restarted if the cosine of the angle between two consecutive
|
||||
// gradients is smaller than or equal to AngleRestartThreshold, that is, if
|
||||
// ∇f_k·∇f_{k+1} / (|∇f_k| |∇f_{k+1}|) <= AngleRestartThreshold.
|
||||
// A value of AngleRestartThreshold closer to -1 (successive gradients in
|
||||
// exact opposite directions) will tend to reduce the number of restarts.
|
||||
// If AngleRestartThreshold is 0, it will be set to -0.9.
|
||||
// CG will panic if AngleRestartThreshold is not in the interval [-1, 0].
|
||||
AngleRestartThreshold float64
|
||||
// GradStopThreshold sets the threshold for stopping if the gradient norm
|
||||
// gets too small. If GradStopThreshold is 0 it is defaulted to 1e-12, and
|
||||
// if it is NaN the setting is not used.
|
||||
GradStopThreshold float64
|
||||
|
||||
ls *LinesearchMethod
|
||||
|
||||
status Status
|
||||
err error
|
||||
|
||||
restartAfter int
|
||||
iterFromRestart int
|
||||
|
||||
dirPrev []float64
|
||||
gradPrev []float64
|
||||
gradPrevNorm float64
|
||||
}
|
||||
|
||||
func (cg *CG) Status() (Status, error) {
|
||||
return cg.status, cg.err
|
||||
}
|
||||
|
||||
func (*CG) Uses(has Available) (uses Available, err error) {
|
||||
return has.gradient()
|
||||
}
|
||||
|
||||
func (cg *CG) Init(dim, tasks int) int {
|
||||
cg.status = NotTerminated
|
||||
cg.err = nil
|
||||
return 1
|
||||
}
|
||||
|
||||
func (cg *CG) Run(operation chan<- Task, result <-chan Task, tasks []Task) {
|
||||
cg.status, cg.err = localOptimizer{}.run(cg, cg.GradStopThreshold, operation, result, tasks)
|
||||
close(operation)
|
||||
}
|
||||
|
||||
func (cg *CG) initLocal(loc *Location) (Operation, error) {
|
||||
if cg.IterationRestartFactor < 0 {
|
||||
panic("cg: IterationRestartFactor is negative")
|
||||
}
|
||||
if cg.AngleRestartThreshold < -1 || cg.AngleRestartThreshold > 0 {
|
||||
panic("cg: AngleRestartThreshold not in [-1, 0]")
|
||||
}
|
||||
|
||||
if cg.Linesearcher == nil {
|
||||
cg.Linesearcher = &MoreThuente{CurvatureFactor: 0.1}
|
||||
}
|
||||
if cg.Variant == nil {
|
||||
cg.Variant = &HestenesStiefel{}
|
||||
}
|
||||
if cg.InitialStep == nil {
|
||||
cg.InitialStep = &FirstOrderStepSize{}
|
||||
}
|
||||
|
||||
if cg.IterationRestartFactor == 0 {
|
||||
cg.IterationRestartFactor = iterationRestartFactor
|
||||
}
|
||||
if cg.AngleRestartThreshold == 0 {
|
||||
cg.AngleRestartThreshold = angleRestartThreshold
|
||||
}
|
||||
|
||||
if cg.ls == nil {
|
||||
cg.ls = &LinesearchMethod{}
|
||||
}
|
||||
cg.ls.Linesearcher = cg.Linesearcher
|
||||
cg.ls.NextDirectioner = cg
|
||||
|
||||
return cg.ls.Init(loc)
|
||||
}
|
||||
|
||||
func (cg *CG) iterateLocal(loc *Location) (Operation, error) {
|
||||
return cg.ls.Iterate(loc)
|
||||
}
|
||||
|
||||
func (cg *CG) InitDirection(loc *Location, dir []float64) (stepSize float64) {
|
||||
dim := len(loc.X)
|
||||
|
||||
cg.restartAfter = int(math.Ceil(cg.IterationRestartFactor * float64(dim)))
|
||||
cg.iterFromRestart = 0
|
||||
|
||||
// The initial direction is always the negative gradient.
|
||||
copy(dir, loc.Gradient)
|
||||
floats.Scale(-1, dir)
|
||||
|
||||
cg.dirPrev = resize(cg.dirPrev, dim)
|
||||
copy(cg.dirPrev, dir)
|
||||
cg.gradPrev = resize(cg.gradPrev, dim)
|
||||
copy(cg.gradPrev, loc.Gradient)
|
||||
cg.gradPrevNorm = floats.Norm(loc.Gradient, 2)
|
||||
|
||||
cg.Variant.Init(loc)
|
||||
return cg.InitialStep.Init(loc, dir)
|
||||
}
|
||||
|
||||
func (cg *CG) NextDirection(loc *Location, dir []float64) (stepSize float64) {
|
||||
copy(dir, loc.Gradient)
|
||||
floats.Scale(-1, dir)
|
||||
|
||||
cg.iterFromRestart++
|
||||
var restart bool
|
||||
if cg.iterFromRestart == cg.restartAfter {
|
||||
// Restart because too many iterations have been taken without a restart.
|
||||
restart = true
|
||||
}
|
||||
|
||||
gDot := floats.Dot(loc.Gradient, cg.gradPrev)
|
||||
gNorm := floats.Norm(loc.Gradient, 2)
|
||||
if gDot <= cg.AngleRestartThreshold*gNorm*cg.gradPrevNorm {
|
||||
// Restart because the angle between the last two gradients is too large.
|
||||
restart = true
|
||||
}
|
||||
|
||||
// Compute the scaling factor β_k even when restarting, because cg.Variant
|
||||
// may be keeping an inner state that needs to be updated at every iteration.
|
||||
beta := cg.Variant.Beta(loc.Gradient, cg.gradPrev, cg.dirPrev)
|
||||
if beta == 0 {
|
||||
// β_k == 0 means that the steepest descent direction will be taken, so
|
||||
// indicate that the method is in fact being restarted.
|
||||
restart = true
|
||||
}
|
||||
if !restart {
|
||||
// The method is not being restarted, so update the descent direction.
|
||||
floats.AddScaled(dir, beta, cg.dirPrev)
|
||||
if floats.Dot(loc.Gradient, dir) >= 0 {
|
||||
// Restart because the new direction is not a descent direction.
|
||||
restart = true
|
||||
copy(dir, loc.Gradient)
|
||||
floats.Scale(-1, dir)
|
||||
}
|
||||
}
|
||||
|
||||
// Get the initial line search step size from the StepSizer even if the
|
||||
// method was restarted, because StepSizers need to see every iteration.
|
||||
stepSize = cg.InitialStep.StepSize(loc, dir)
|
||||
if restart {
|
||||
// The method was restarted and since the steepest descent direction is
|
||||
// not related to the previous direction, discard the estimated step
|
||||
// size from cg.InitialStep and use step size of 1 instead.
|
||||
stepSize = 1
|
||||
// Reset to 0 the counter of iterations taken since the last restart.
|
||||
cg.iterFromRestart = 0
|
||||
}
|
||||
|
||||
copy(cg.gradPrev, loc.Gradient)
|
||||
copy(cg.dirPrev, dir)
|
||||
cg.gradPrevNorm = gNorm
|
||||
return stepSize
|
||||
}
|
||||
|
||||
func (*CG) needs() struct {
|
||||
Gradient bool
|
||||
Hessian bool
|
||||
} {
|
||||
return struct {
|
||||
Gradient bool
|
||||
Hessian bool
|
||||
}{true, false}
|
||||
}
|
||||
|
||||
// FletcherReeves implements the Fletcher-Reeves variant of the CG method that
|
||||
// computes the scaling parameter β_k according to the formula
|
||||
//
|
||||
// β_k = |∇f_{k+1}|^2 / |∇f_k|^2.
|
||||
type FletcherReeves struct {
|
||||
prevNorm float64
|
||||
}
|
||||
|
||||
func (fr *FletcherReeves) Init(loc *Location) {
|
||||
fr.prevNorm = floats.Norm(loc.Gradient, 2)
|
||||
}
|
||||
|
||||
func (fr *FletcherReeves) Beta(grad, _, _ []float64) (beta float64) {
|
||||
norm := floats.Norm(grad, 2)
|
||||
beta = (norm / fr.prevNorm) * (norm / fr.prevNorm)
|
||||
fr.prevNorm = norm
|
||||
return beta
|
||||
}
|
||||
|
||||
// PolakRibierePolyak implements the Polak-Ribiere-Polyak variant of the CG
|
||||
// method that computes the scaling parameter β_k according to the formula
|
||||
//
|
||||
// β_k = max(0, ∇f_{k+1}·y_k / |∇f_k|^2),
|
||||
//
|
||||
// where y_k = ∇f_{k+1} - ∇f_k.
|
||||
type PolakRibierePolyak struct {
|
||||
prevNorm float64
|
||||
}
|
||||
|
||||
func (pr *PolakRibierePolyak) Init(loc *Location) {
|
||||
pr.prevNorm = floats.Norm(loc.Gradient, 2)
|
||||
}
|
||||
|
||||
func (pr *PolakRibierePolyak) Beta(grad, gradPrev, _ []float64) (beta float64) {
|
||||
norm := floats.Norm(grad, 2)
|
||||
dot := floats.Dot(grad, gradPrev)
|
||||
beta = (norm*norm - dot) / (pr.prevNorm * pr.prevNorm)
|
||||
pr.prevNorm = norm
|
||||
return math.Max(0, beta)
|
||||
}
|
||||
|
||||
// HestenesStiefel implements the Hestenes-Stiefel variant of the CG method
|
||||
// that computes the scaling parameter β_k according to the formula
|
||||
//
|
||||
// β_k = max(0, ∇f_{k+1}·y_k / d_k·y_k),
|
||||
//
|
||||
// where y_k = ∇f_{k+1} - ∇f_k.
|
||||
type HestenesStiefel struct {
|
||||
y []float64
|
||||
}
|
||||
|
||||
func (hs *HestenesStiefel) Init(loc *Location) {
|
||||
hs.y = resize(hs.y, len(loc.Gradient))
|
||||
}
|
||||
|
||||
func (hs *HestenesStiefel) Beta(grad, gradPrev, dirPrev []float64) (beta float64) {
|
||||
floats.SubTo(hs.y, grad, gradPrev)
|
||||
beta = floats.Dot(grad, hs.y) / floats.Dot(dirPrev, hs.y)
|
||||
return math.Max(0, beta)
|
||||
}
|
||||
|
||||
// DaiYuan implements the Dai-Yuan variant of the CG method that computes the
|
||||
// scaling parameter β_k according to the formula
|
||||
//
|
||||
// β_k = |∇f_{k+1}|^2 / d_k·y_k,
|
||||
//
|
||||
// where y_k = ∇f_{k+1} - ∇f_k.
|
||||
type DaiYuan struct {
|
||||
y []float64
|
||||
}
|
||||
|
||||
func (dy *DaiYuan) Init(loc *Location) {
|
||||
dy.y = resize(dy.y, len(loc.Gradient))
|
||||
}
|
||||
|
||||
func (dy *DaiYuan) Beta(grad, gradPrev, dirPrev []float64) (beta float64) {
|
||||
floats.SubTo(dy.y, grad, gradPrev)
|
||||
norm := floats.Norm(grad, 2)
|
||||
return norm * norm / floats.Dot(dirPrev, dy.y)
|
||||
}
|
||||
|
||||
// HagerZhang implements the Hager-Zhang variant of the CG method that computes the
|
||||
// scaling parameter β_k according to the formula
|
||||
//
|
||||
// β_k = (y_k - 2 d_k |y_k|^2/(d_k·y_k))·∇f_{k+1} / (d_k·y_k),
|
||||
//
|
||||
// where y_k = ∇f_{k+1} - ∇f_k.
|
||||
type HagerZhang struct {
|
||||
y []float64
|
||||
}
|
||||
|
||||
func (hz *HagerZhang) Init(loc *Location) {
|
||||
hz.y = resize(hz.y, len(loc.Gradient))
|
||||
}
|
||||
|
||||
func (hz *HagerZhang) Beta(grad, gradPrev, dirPrev []float64) (beta float64) {
|
||||
floats.SubTo(hz.y, grad, gradPrev)
|
||||
dirDotY := floats.Dot(dirPrev, hz.y)
|
||||
gDotY := floats.Dot(grad, hz.y)
|
||||
gDotDir := floats.Dot(grad, dirPrev)
|
||||
yNorm := floats.Norm(hz.y, 2)
|
||||
return (gDotY - 2*gDotDir*yNorm*yNorm/dirDotY) / dirDotY
|
||||
}
|
||||
Reference in New Issue
Block a user