fixed dependencies
This commit is contained in:
6
vendor/gonum.org/v1/gonum/optimize/README.md
generated
vendored
Normal file
6
vendor/gonum.org/v1/gonum/optimize/README.md
generated
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
# Gonum optimize
|
||||
|
||||
[](https://pkg.go.dev/gonum.org/v1/gonum/optimize)
|
||||
[](https://godocs.io/gonum.org/v1/gonum/optimize)
|
||||
|
||||
Package optimize is an optimization package for the Go language.
|
||||
84
vendor/gonum.org/v1/gonum/optimize/backtracking.go
generated
vendored
Normal file
84
vendor/gonum.org/v1/gonum/optimize/backtracking.go
generated
vendored
Normal file
@@ -0,0 +1,84 @@
|
||||
// Copyright ©2014 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package optimize
|
||||
|
||||
const (
|
||||
defaultBacktrackingContraction = 0.5
|
||||
defaultBacktrackingDecrease = 1e-4
|
||||
minimumBacktrackingStepSize = 1e-20
|
||||
)
|
||||
|
||||
var _ Linesearcher = (*Backtracking)(nil)
|
||||
|
||||
// Backtracking is a Linesearcher that uses backtracking to find a point that
|
||||
// satisfies the Armijo condition with the given decrease factor. If the Armijo
|
||||
// condition has not been met, the step size is decreased by ContractionFactor.
|
||||
//
|
||||
// The Armijo condition only requires the gradient at the beginning of each
|
||||
// major iteration (not at successive step locations), and so Backtracking may
|
||||
// be a good linesearch for functions with expensive gradients. Backtracking is
|
||||
// not appropriate for optimizers that require the Wolfe conditions to be met,
|
||||
// such as BFGS.
|
||||
//
|
||||
// Both DecreaseFactor and ContractionFactor must be between zero and one, and
|
||||
// Backtracking will panic otherwise. If either DecreaseFactor or
|
||||
// ContractionFactor are zero, it will be set to a reasonable default.
|
||||
type Backtracking struct {
|
||||
DecreaseFactor float64 // Constant factor in the sufficient decrease (Armijo) condition.
|
||||
ContractionFactor float64 // Step size multiplier at each iteration (step *= ContractionFactor).
|
||||
|
||||
stepSize float64
|
||||
initF float64
|
||||
initG float64
|
||||
|
||||
lastOp Operation
|
||||
}
|
||||
|
||||
func (b *Backtracking) Init(f, g float64, step float64) Operation {
|
||||
if step <= 0 {
|
||||
panic("backtracking: bad step size")
|
||||
}
|
||||
if g >= 0 {
|
||||
panic("backtracking: initial derivative is non-negative")
|
||||
}
|
||||
|
||||
if b.ContractionFactor == 0 {
|
||||
b.ContractionFactor = defaultBacktrackingContraction
|
||||
}
|
||||
if b.DecreaseFactor == 0 {
|
||||
b.DecreaseFactor = defaultBacktrackingDecrease
|
||||
}
|
||||
if b.ContractionFactor <= 0 || b.ContractionFactor >= 1 {
|
||||
panic("backtracking: ContractionFactor must be between 0 and 1")
|
||||
}
|
||||
if b.DecreaseFactor <= 0 || b.DecreaseFactor >= 1 {
|
||||
panic("backtracking: DecreaseFactor must be between 0 and 1")
|
||||
}
|
||||
|
||||
b.stepSize = step
|
||||
b.initF = f
|
||||
b.initG = g
|
||||
|
||||
b.lastOp = FuncEvaluation
|
||||
return b.lastOp
|
||||
}
|
||||
|
||||
func (b *Backtracking) Iterate(f, _ float64) (Operation, float64, error) {
|
||||
if b.lastOp != FuncEvaluation {
|
||||
panic("backtracking: Init has not been called")
|
||||
}
|
||||
|
||||
if ArmijoConditionMet(f, b.initF, b.initG, b.stepSize, b.DecreaseFactor) {
|
||||
b.lastOp = MajorIteration
|
||||
return b.lastOp, b.stepSize, nil
|
||||
}
|
||||
b.stepSize *= b.ContractionFactor
|
||||
if b.stepSize < minimumBacktrackingStepSize {
|
||||
b.lastOp = NoOperation
|
||||
return b.lastOp, b.stepSize, ErrLinesearcherFailure
|
||||
}
|
||||
b.lastOp = FuncEvaluation
|
||||
return b.lastOp, b.stepSize, nil
|
||||
}
|
||||
192
vendor/gonum.org/v1/gonum/optimize/bfgs.go
generated
vendored
Normal file
192
vendor/gonum.org/v1/gonum/optimize/bfgs.go
generated
vendored
Normal file
@@ -0,0 +1,192 @@
|
||||
// Copyright ©2014 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package optimize
|
||||
|
||||
import (
|
||||
"math"
|
||||
|
||||
"gonum.org/v1/gonum/mat"
|
||||
)
|
||||
|
||||
var (
|
||||
_ Method = (*BFGS)(nil)
|
||||
_ localMethod = (*BFGS)(nil)
|
||||
_ NextDirectioner = (*BFGS)(nil)
|
||||
)
|
||||
|
||||
// BFGS implements the Broyden–Fletcher–Goldfarb–Shanno optimization method. It
|
||||
// is a quasi-Newton method that performs successive rank-one updates to an
|
||||
// estimate of the inverse Hessian of the objective function. It exhibits
|
||||
// super-linear convergence when in proximity to a local minimum. It has memory
|
||||
// cost that is O(n^2) relative to the input dimension.
|
||||
type BFGS struct {
|
||||
// Linesearcher selects suitable steps along the descent direction.
|
||||
// Accepted steps should satisfy the strong Wolfe conditions.
|
||||
// If Linesearcher == nil, an appropriate default is chosen.
|
||||
Linesearcher Linesearcher
|
||||
// GradStopThreshold sets the threshold for stopping if the gradient norm
|
||||
// gets too small. If GradStopThreshold is 0 it is defaulted to 1e-12, and
|
||||
// if it is NaN the setting is not used.
|
||||
GradStopThreshold float64
|
||||
|
||||
ls *LinesearchMethod
|
||||
|
||||
status Status
|
||||
err error
|
||||
|
||||
dim int
|
||||
x mat.VecDense // Location of the last major iteration.
|
||||
grad mat.VecDense // Gradient at the last major iteration.
|
||||
s mat.VecDense // Difference between locations in this and the previous iteration.
|
||||
y mat.VecDense // Difference between gradients in this and the previous iteration.
|
||||
tmp mat.VecDense
|
||||
|
||||
invHess *mat.SymDense
|
||||
|
||||
first bool // Indicator of the first iteration.
|
||||
}
|
||||
|
||||
func (b *BFGS) Status() (Status, error) {
|
||||
return b.status, b.err
|
||||
}
|
||||
|
||||
func (*BFGS) Uses(has Available) (uses Available, err error) {
|
||||
return has.gradient()
|
||||
}
|
||||
|
||||
func (b *BFGS) Init(dim, tasks int) int {
|
||||
b.status = NotTerminated
|
||||
b.err = nil
|
||||
return 1
|
||||
}
|
||||
|
||||
func (b *BFGS) Run(operation chan<- Task, result <-chan Task, tasks []Task) {
|
||||
b.status, b.err = localOptimizer{}.run(b, b.GradStopThreshold, operation, result, tasks)
|
||||
close(operation)
|
||||
}
|
||||
|
||||
func (b *BFGS) initLocal(loc *Location) (Operation, error) {
|
||||
if b.Linesearcher == nil {
|
||||
b.Linesearcher = &Bisection{}
|
||||
}
|
||||
if b.ls == nil {
|
||||
b.ls = &LinesearchMethod{}
|
||||
}
|
||||
b.ls.Linesearcher = b.Linesearcher
|
||||
b.ls.NextDirectioner = b
|
||||
|
||||
return b.ls.Init(loc)
|
||||
}
|
||||
|
||||
func (b *BFGS) iterateLocal(loc *Location) (Operation, error) {
|
||||
return b.ls.Iterate(loc)
|
||||
}
|
||||
|
||||
func (b *BFGS) InitDirection(loc *Location, dir []float64) (stepSize float64) {
|
||||
dim := len(loc.X)
|
||||
b.dim = dim
|
||||
b.first = true
|
||||
|
||||
x := mat.NewVecDense(dim, loc.X)
|
||||
grad := mat.NewVecDense(dim, loc.Gradient)
|
||||
b.x.CloneFromVec(x)
|
||||
b.grad.CloneFromVec(grad)
|
||||
|
||||
b.y.Reset()
|
||||
b.s.Reset()
|
||||
b.tmp.Reset()
|
||||
|
||||
if b.invHess == nil || cap(b.invHess.RawSymmetric().Data) < dim*dim {
|
||||
b.invHess = mat.NewSymDense(dim, nil)
|
||||
} else {
|
||||
b.invHess = mat.NewSymDense(dim, b.invHess.RawSymmetric().Data[:dim*dim])
|
||||
}
|
||||
// The values of the inverse Hessian are initialized in the first call to
|
||||
// NextDirection.
|
||||
|
||||
// Initial direction is just negative of the gradient because the Hessian
|
||||
// is an identity matrix.
|
||||
d := mat.NewVecDense(dim, dir)
|
||||
d.ScaleVec(-1, grad)
|
||||
return 1 / mat.Norm(d, 2)
|
||||
}
|
||||
|
||||
func (b *BFGS) NextDirection(loc *Location, dir []float64) (stepSize float64) {
|
||||
dim := b.dim
|
||||
if len(loc.X) != dim {
|
||||
panic("bfgs: unexpected size mismatch")
|
||||
}
|
||||
if len(loc.Gradient) != dim {
|
||||
panic("bfgs: unexpected size mismatch")
|
||||
}
|
||||
if len(dir) != dim {
|
||||
panic("bfgs: unexpected size mismatch")
|
||||
}
|
||||
|
||||
x := mat.NewVecDense(dim, loc.X)
|
||||
grad := mat.NewVecDense(dim, loc.Gradient)
|
||||
|
||||
// s = x_{k+1} - x_{k}
|
||||
b.s.SubVec(x, &b.x)
|
||||
// y = g_{k+1} - g_{k}
|
||||
b.y.SubVec(grad, &b.grad)
|
||||
|
||||
sDotY := mat.Dot(&b.s, &b.y)
|
||||
|
||||
if b.first {
|
||||
// Rescale the initial Hessian.
|
||||
// From: Nocedal, J., Wright, S.: Numerical Optimization (2nd ed).
|
||||
// Springer (2006), page 143, eq. 6.20.
|
||||
yDotY := mat.Dot(&b.y, &b.y)
|
||||
scale := sDotY / yDotY
|
||||
for i := 0; i < dim; i++ {
|
||||
for j := i; j < dim; j++ {
|
||||
if i == j {
|
||||
b.invHess.SetSym(i, i, scale)
|
||||
} else {
|
||||
b.invHess.SetSym(i, j, 0)
|
||||
}
|
||||
}
|
||||
}
|
||||
b.first = false
|
||||
}
|
||||
|
||||
if math.Abs(sDotY) != 0 {
|
||||
// Update the inverse Hessian according to the formula
|
||||
//
|
||||
// B_{k+1}^-1 = B_k^-1
|
||||
// + (s_kᵀ y_k + y_kᵀ B_k^-1 y_k) / (s_kᵀ y_k)^2 * (s_k s_kᵀ)
|
||||
// - (B_k^-1 y_k s_kᵀ + s_k y_kᵀ B_k^-1) / (s_kᵀ y_k).
|
||||
//
|
||||
// Note that y_kᵀ B_k^-1 y_k is a scalar, and that the third term is a
|
||||
// rank-two update where B_k^-1 y_k is one vector and s_k is the other.
|
||||
yBy := mat.Inner(&b.y, b.invHess, &b.y)
|
||||
b.tmp.MulVec(b.invHess, &b.y)
|
||||
scale := (1 + yBy/sDotY) / sDotY
|
||||
b.invHess.SymRankOne(b.invHess, scale, &b.s)
|
||||
b.invHess.RankTwo(b.invHess, -1/sDotY, &b.tmp, &b.s)
|
||||
}
|
||||
|
||||
// Update the stored BFGS data.
|
||||
b.x.CopyVec(x)
|
||||
b.grad.CopyVec(grad)
|
||||
|
||||
// New direction is stored in dir.
|
||||
d := mat.NewVecDense(dim, dir)
|
||||
d.MulVec(b.invHess, grad)
|
||||
d.ScaleVec(-1, d)
|
||||
|
||||
return 1
|
||||
}
|
||||
|
||||
func (*BFGS) needs() struct {
|
||||
Gradient bool
|
||||
Hessian bool
|
||||
} {
|
||||
return struct {
|
||||
Gradient bool
|
||||
Hessian bool
|
||||
}{true, false}
|
||||
}
|
||||
146
vendor/gonum.org/v1/gonum/optimize/bisection.go
generated
vendored
Normal file
146
vendor/gonum.org/v1/gonum/optimize/bisection.go
generated
vendored
Normal file
@@ -0,0 +1,146 @@
|
||||
// Copyright ©2014 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package optimize
|
||||
|
||||
import "math"
|
||||
|
||||
const defaultBisectionCurvature = 0.9
|
||||
|
||||
var _ Linesearcher = (*Bisection)(nil)
|
||||
|
||||
// Bisection is a Linesearcher that uses a bisection to find a point that
|
||||
// satisfies the strong Wolfe conditions with the given curvature factor and
|
||||
// a decrease factor of zero.
|
||||
type Bisection struct {
|
||||
// CurvatureFactor is the constant factor in the curvature condition.
|
||||
// Smaller values result in a more exact line search.
|
||||
// A set value must be in the interval (0, 1), otherwise Init will panic.
|
||||
// If it is zero, it will be defaulted to 0.9.
|
||||
CurvatureFactor float64
|
||||
|
||||
minStep float64
|
||||
maxStep float64
|
||||
currStep float64
|
||||
|
||||
initF float64
|
||||
minF float64
|
||||
maxF float64
|
||||
lastF float64
|
||||
|
||||
initGrad float64
|
||||
|
||||
lastOp Operation
|
||||
}
|
||||
|
||||
func (b *Bisection) Init(f, g float64, step float64) Operation {
|
||||
if step <= 0 {
|
||||
panic("bisection: bad step size")
|
||||
}
|
||||
if g >= 0 {
|
||||
panic("bisection: initial derivative is non-negative")
|
||||
}
|
||||
|
||||
if b.CurvatureFactor == 0 {
|
||||
b.CurvatureFactor = defaultBisectionCurvature
|
||||
}
|
||||
if b.CurvatureFactor <= 0 || b.CurvatureFactor >= 1 {
|
||||
panic("bisection: CurvatureFactor not between 0 and 1")
|
||||
}
|
||||
|
||||
b.minStep = 0
|
||||
b.maxStep = math.Inf(1)
|
||||
b.currStep = step
|
||||
|
||||
b.initF = f
|
||||
b.minF = f
|
||||
b.maxF = math.NaN()
|
||||
|
||||
b.initGrad = g
|
||||
|
||||
// Only evaluate the gradient when necessary.
|
||||
b.lastOp = FuncEvaluation
|
||||
return b.lastOp
|
||||
}
|
||||
|
||||
func (b *Bisection) Iterate(f, g float64) (Operation, float64, error) {
|
||||
if b.lastOp != FuncEvaluation && b.lastOp != GradEvaluation {
|
||||
panic("bisection: Init has not been called")
|
||||
}
|
||||
minF := b.initF
|
||||
if b.maxF < minF {
|
||||
minF = b.maxF
|
||||
}
|
||||
if b.minF < minF {
|
||||
minF = b.minF
|
||||
}
|
||||
if b.lastOp == FuncEvaluation {
|
||||
// See if the function value is good enough to make progress. If it is,
|
||||
// evaluate the gradient. If not, set it to the upper bound if the bound
|
||||
// has not yet been found, otherwise iterate toward the minimum location.
|
||||
if f <= minF {
|
||||
b.lastF = f
|
||||
b.lastOp = GradEvaluation
|
||||
return b.lastOp, b.currStep, nil
|
||||
}
|
||||
if math.IsInf(b.maxStep, 1) {
|
||||
b.maxStep = b.currStep
|
||||
b.maxF = f
|
||||
return b.nextStep((b.minStep + b.maxStep) / 2)
|
||||
}
|
||||
if b.minF <= b.maxF {
|
||||
b.maxStep = b.currStep
|
||||
b.maxF = f
|
||||
} else {
|
||||
b.minStep = b.currStep
|
||||
b.minF = f
|
||||
}
|
||||
return b.nextStep((b.minStep + b.maxStep) / 2)
|
||||
}
|
||||
f = b.lastF
|
||||
// The function value was lower. Check if this location is sufficient to
|
||||
// converge the linesearch, otherwise iterate.
|
||||
if StrongWolfeConditionsMet(f, g, minF, b.initGrad, b.currStep, 0, b.CurvatureFactor) {
|
||||
b.lastOp = MajorIteration
|
||||
return b.lastOp, b.currStep, nil
|
||||
}
|
||||
if math.IsInf(b.maxStep, 1) {
|
||||
// The function value is lower. If the gradient is positive, an upper bound
|
||||
// of the minimum been found. If the gradient is negative, search farther
|
||||
// in that direction.
|
||||
if g > 0 {
|
||||
b.maxStep = b.currStep
|
||||
b.maxF = f
|
||||
return b.nextStep((b.minStep + b.maxStep) / 2)
|
||||
}
|
||||
b.minStep = b.currStep
|
||||
b.minF = f
|
||||
return b.nextStep(b.currStep * 2)
|
||||
}
|
||||
// The interval has been bounded, and we have found a new lowest value. Use
|
||||
// the gradient to decide which direction.
|
||||
if g < 0 {
|
||||
b.minStep = b.currStep
|
||||
b.minF = f
|
||||
} else {
|
||||
b.maxStep = b.currStep
|
||||
b.maxF = f
|
||||
}
|
||||
return b.nextStep((b.minStep + b.maxStep) / 2)
|
||||
}
|
||||
|
||||
// nextStep checks if the new step is equal to the old step.
|
||||
// This can happen if min and max are the same, or if the step size is infinity,
|
||||
// both of which indicate the minimization must stop. If the steps are different,
|
||||
// it sets the new step size and returns the evaluation type and the step. If the steps
|
||||
// are the same, it returns an error.
|
||||
func (b *Bisection) nextStep(step float64) (Operation, float64, error) {
|
||||
if b.currStep == step {
|
||||
b.lastOp = NoOperation
|
||||
return b.lastOp, b.currStep, ErrLinesearcherFailure
|
||||
}
|
||||
b.currStep = step
|
||||
b.lastOp = FuncEvaluation
|
||||
return b.lastOp, b.currStep, nil
|
||||
}
|
||||
368
vendor/gonum.org/v1/gonum/optimize/cg.go
generated
vendored
Normal file
368
vendor/gonum.org/v1/gonum/optimize/cg.go
generated
vendored
Normal file
@@ -0,0 +1,368 @@
|
||||
// Copyright ©2014 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package optimize
|
||||
|
||||
import (
|
||||
"math"
|
||||
|
||||
"gonum.org/v1/gonum/floats"
|
||||
)
|
||||
|
||||
const (
|
||||
iterationRestartFactor = 6
|
||||
angleRestartThreshold = -0.9
|
||||
)
|
||||
|
||||
var (
|
||||
_ Method = (*CG)(nil)
|
||||
_ localMethod = (*CG)(nil)
|
||||
_ NextDirectioner = (*CG)(nil)
|
||||
)
|
||||
|
||||
// CGVariant calculates the scaling parameter, β, used for updating the
|
||||
// conjugate direction in the nonlinear conjugate gradient (CG) method.
|
||||
type CGVariant interface {
|
||||
// Init is called at the first iteration and provides a way to initialize
|
||||
// any internal state.
|
||||
Init(loc *Location)
|
||||
// Beta returns the value of the scaling parameter that is computed
|
||||
// according to the particular variant of the CG method.
|
||||
Beta(grad, gradPrev, dirPrev []float64) float64
|
||||
}
|
||||
|
||||
var (
|
||||
_ CGVariant = (*FletcherReeves)(nil)
|
||||
_ CGVariant = (*PolakRibierePolyak)(nil)
|
||||
_ CGVariant = (*HestenesStiefel)(nil)
|
||||
_ CGVariant = (*DaiYuan)(nil)
|
||||
_ CGVariant = (*HagerZhang)(nil)
|
||||
)
|
||||
|
||||
// CG implements the nonlinear conjugate gradient method for solving nonlinear
|
||||
// unconstrained optimization problems. It is a line search method that
|
||||
// generates the search directions d_k according to the formula
|
||||
//
|
||||
// d_{k+1} = -∇f_{k+1} + β_k*d_k, d_0 = -∇f_0.
|
||||
//
|
||||
// Variants of the conjugate gradient method differ in the choice of the
|
||||
// parameter β_k. The conjugate gradient method usually requires fewer function
|
||||
// evaluations than the gradient descent method and no matrix storage, but
|
||||
// L-BFGS is usually more efficient.
|
||||
//
|
||||
// CG implements a restart strategy that takes the steepest descent direction
|
||||
// (i.e., d_{k+1} = -∇f_{k+1}) whenever any of the following conditions holds:
|
||||
//
|
||||
// - A certain number of iterations has elapsed without a restart. This number
|
||||
// is controllable via IterationRestartFactor and if equal to 0, it is set to
|
||||
// a reasonable default based on the problem dimension.
|
||||
// - The angle between the gradients at two consecutive iterations ∇f_k and
|
||||
// ∇f_{k+1} is too large.
|
||||
// - The direction d_{k+1} is not a descent direction.
|
||||
// - β_k returned from CGVariant.Beta is equal to zero.
|
||||
//
|
||||
// The line search for CG must yield step sizes that satisfy the strong Wolfe
|
||||
// conditions at every iteration, otherwise the generated search direction
|
||||
// might fail to be a descent direction. The line search should be more
|
||||
// stringent compared with those for Newton-like methods, which can be achieved
|
||||
// by setting the gradient constant in the strong Wolfe conditions to a small
|
||||
// value.
|
||||
//
|
||||
// See also William Hager, Hongchao Zhang, A survey of nonlinear conjugate
|
||||
// gradient methods. Pacific Journal of Optimization, 2 (2006), pp. 35-58, and
|
||||
// references therein.
|
||||
type CG struct {
|
||||
// Linesearcher must satisfy the strong Wolfe conditions at every iteration.
|
||||
// If Linesearcher == nil, an appropriate default is chosen.
|
||||
Linesearcher Linesearcher
|
||||
// Variant implements the particular CG formula for computing β_k.
|
||||
// If Variant is nil, an appropriate default is chosen.
|
||||
Variant CGVariant
|
||||
// InitialStep estimates the initial line search step size, because the CG
|
||||
// method does not generate well-scaled search directions.
|
||||
// If InitialStep is nil, an appropriate default is chosen.
|
||||
InitialStep StepSizer
|
||||
|
||||
// IterationRestartFactor determines the frequency of restarts based on the
|
||||
// problem dimension. The negative gradient direction is taken whenever
|
||||
// ceil(IterationRestartFactor*(problem dimension)) iterations have elapsed
|
||||
// without a restart. For medium and large-scale problems
|
||||
// IterationRestartFactor should be set to 1, low-dimensional problems a
|
||||
// larger value should be chosen. Note that if the ceil function returns 1,
|
||||
// CG will be identical to gradient descent.
|
||||
// If IterationRestartFactor is 0, it will be set to 6.
|
||||
// CG will panic if IterationRestartFactor is negative.
|
||||
IterationRestartFactor float64
|
||||
// AngleRestartThreshold sets the threshold angle for restart. The method
|
||||
// is restarted if the cosine of the angle between two consecutive
|
||||
// gradients is smaller than or equal to AngleRestartThreshold, that is, if
|
||||
// ∇f_k·∇f_{k+1} / (|∇f_k| |∇f_{k+1}|) <= AngleRestartThreshold.
|
||||
// A value of AngleRestartThreshold closer to -1 (successive gradients in
|
||||
// exact opposite directions) will tend to reduce the number of restarts.
|
||||
// If AngleRestartThreshold is 0, it will be set to -0.9.
|
||||
// CG will panic if AngleRestartThreshold is not in the interval [-1, 0].
|
||||
AngleRestartThreshold float64
|
||||
// GradStopThreshold sets the threshold for stopping if the gradient norm
|
||||
// gets too small. If GradStopThreshold is 0 it is defaulted to 1e-12, and
|
||||
// if it is NaN the setting is not used.
|
||||
GradStopThreshold float64
|
||||
|
||||
ls *LinesearchMethod
|
||||
|
||||
status Status
|
||||
err error
|
||||
|
||||
restartAfter int
|
||||
iterFromRestart int
|
||||
|
||||
dirPrev []float64
|
||||
gradPrev []float64
|
||||
gradPrevNorm float64
|
||||
}
|
||||
|
||||
func (cg *CG) Status() (Status, error) {
|
||||
return cg.status, cg.err
|
||||
}
|
||||
|
||||
func (*CG) Uses(has Available) (uses Available, err error) {
|
||||
return has.gradient()
|
||||
}
|
||||
|
||||
func (cg *CG) Init(dim, tasks int) int {
|
||||
cg.status = NotTerminated
|
||||
cg.err = nil
|
||||
return 1
|
||||
}
|
||||
|
||||
func (cg *CG) Run(operation chan<- Task, result <-chan Task, tasks []Task) {
|
||||
cg.status, cg.err = localOptimizer{}.run(cg, cg.GradStopThreshold, operation, result, tasks)
|
||||
close(operation)
|
||||
}
|
||||
|
||||
func (cg *CG) initLocal(loc *Location) (Operation, error) {
|
||||
if cg.IterationRestartFactor < 0 {
|
||||
panic("cg: IterationRestartFactor is negative")
|
||||
}
|
||||
if cg.AngleRestartThreshold < -1 || cg.AngleRestartThreshold > 0 {
|
||||
panic("cg: AngleRestartThreshold not in [-1, 0]")
|
||||
}
|
||||
|
||||
if cg.Linesearcher == nil {
|
||||
cg.Linesearcher = &MoreThuente{CurvatureFactor: 0.1}
|
||||
}
|
||||
if cg.Variant == nil {
|
||||
cg.Variant = &HestenesStiefel{}
|
||||
}
|
||||
if cg.InitialStep == nil {
|
||||
cg.InitialStep = &FirstOrderStepSize{}
|
||||
}
|
||||
|
||||
if cg.IterationRestartFactor == 0 {
|
||||
cg.IterationRestartFactor = iterationRestartFactor
|
||||
}
|
||||
if cg.AngleRestartThreshold == 0 {
|
||||
cg.AngleRestartThreshold = angleRestartThreshold
|
||||
}
|
||||
|
||||
if cg.ls == nil {
|
||||
cg.ls = &LinesearchMethod{}
|
||||
}
|
||||
cg.ls.Linesearcher = cg.Linesearcher
|
||||
cg.ls.NextDirectioner = cg
|
||||
|
||||
return cg.ls.Init(loc)
|
||||
}
|
||||
|
||||
func (cg *CG) iterateLocal(loc *Location) (Operation, error) {
|
||||
return cg.ls.Iterate(loc)
|
||||
}
|
||||
|
||||
func (cg *CG) InitDirection(loc *Location, dir []float64) (stepSize float64) {
|
||||
dim := len(loc.X)
|
||||
|
||||
cg.restartAfter = int(math.Ceil(cg.IterationRestartFactor * float64(dim)))
|
||||
cg.iterFromRestart = 0
|
||||
|
||||
// The initial direction is always the negative gradient.
|
||||
copy(dir, loc.Gradient)
|
||||
floats.Scale(-1, dir)
|
||||
|
||||
cg.dirPrev = resize(cg.dirPrev, dim)
|
||||
copy(cg.dirPrev, dir)
|
||||
cg.gradPrev = resize(cg.gradPrev, dim)
|
||||
copy(cg.gradPrev, loc.Gradient)
|
||||
cg.gradPrevNorm = floats.Norm(loc.Gradient, 2)
|
||||
|
||||
cg.Variant.Init(loc)
|
||||
return cg.InitialStep.Init(loc, dir)
|
||||
}
|
||||
|
||||
func (cg *CG) NextDirection(loc *Location, dir []float64) (stepSize float64) {
|
||||
copy(dir, loc.Gradient)
|
||||
floats.Scale(-1, dir)
|
||||
|
||||
cg.iterFromRestart++
|
||||
var restart bool
|
||||
if cg.iterFromRestart == cg.restartAfter {
|
||||
// Restart because too many iterations have been taken without a restart.
|
||||
restart = true
|
||||
}
|
||||
|
||||
gDot := floats.Dot(loc.Gradient, cg.gradPrev)
|
||||
gNorm := floats.Norm(loc.Gradient, 2)
|
||||
if gDot <= cg.AngleRestartThreshold*gNorm*cg.gradPrevNorm {
|
||||
// Restart because the angle between the last two gradients is too large.
|
||||
restart = true
|
||||
}
|
||||
|
||||
// Compute the scaling factor β_k even when restarting, because cg.Variant
|
||||
// may be keeping an inner state that needs to be updated at every iteration.
|
||||
beta := cg.Variant.Beta(loc.Gradient, cg.gradPrev, cg.dirPrev)
|
||||
if beta == 0 {
|
||||
// β_k == 0 means that the steepest descent direction will be taken, so
|
||||
// indicate that the method is in fact being restarted.
|
||||
restart = true
|
||||
}
|
||||
if !restart {
|
||||
// The method is not being restarted, so update the descent direction.
|
||||
floats.AddScaled(dir, beta, cg.dirPrev)
|
||||
if floats.Dot(loc.Gradient, dir) >= 0 {
|
||||
// Restart because the new direction is not a descent direction.
|
||||
restart = true
|
||||
copy(dir, loc.Gradient)
|
||||
floats.Scale(-1, dir)
|
||||
}
|
||||
}
|
||||
|
||||
// Get the initial line search step size from the StepSizer even if the
|
||||
// method was restarted, because StepSizers need to see every iteration.
|
||||
stepSize = cg.InitialStep.StepSize(loc, dir)
|
||||
if restart {
|
||||
// The method was restarted and since the steepest descent direction is
|
||||
// not related to the previous direction, discard the estimated step
|
||||
// size from cg.InitialStep and use step size of 1 instead.
|
||||
stepSize = 1
|
||||
// Reset to 0 the counter of iterations taken since the last restart.
|
||||
cg.iterFromRestart = 0
|
||||
}
|
||||
|
||||
copy(cg.gradPrev, loc.Gradient)
|
||||
copy(cg.dirPrev, dir)
|
||||
cg.gradPrevNorm = gNorm
|
||||
return stepSize
|
||||
}
|
||||
|
||||
func (*CG) needs() struct {
|
||||
Gradient bool
|
||||
Hessian bool
|
||||
} {
|
||||
return struct {
|
||||
Gradient bool
|
||||
Hessian bool
|
||||
}{true, false}
|
||||
}
|
||||
|
||||
// FletcherReeves implements the Fletcher-Reeves variant of the CG method that
|
||||
// computes the scaling parameter β_k according to the formula
|
||||
//
|
||||
// β_k = |∇f_{k+1}|^2 / |∇f_k|^2.
|
||||
type FletcherReeves struct {
|
||||
prevNorm float64
|
||||
}
|
||||
|
||||
func (fr *FletcherReeves) Init(loc *Location) {
|
||||
fr.prevNorm = floats.Norm(loc.Gradient, 2)
|
||||
}
|
||||
|
||||
func (fr *FletcherReeves) Beta(grad, _, _ []float64) (beta float64) {
|
||||
norm := floats.Norm(grad, 2)
|
||||
beta = (norm / fr.prevNorm) * (norm / fr.prevNorm)
|
||||
fr.prevNorm = norm
|
||||
return beta
|
||||
}
|
||||
|
||||
// PolakRibierePolyak implements the Polak-Ribiere-Polyak variant of the CG
|
||||
// method that computes the scaling parameter β_k according to the formula
|
||||
//
|
||||
// β_k = max(0, ∇f_{k+1}·y_k / |∇f_k|^2),
|
||||
//
|
||||
// where y_k = ∇f_{k+1} - ∇f_k.
|
||||
type PolakRibierePolyak struct {
|
||||
prevNorm float64
|
||||
}
|
||||
|
||||
func (pr *PolakRibierePolyak) Init(loc *Location) {
|
||||
pr.prevNorm = floats.Norm(loc.Gradient, 2)
|
||||
}
|
||||
|
||||
func (pr *PolakRibierePolyak) Beta(grad, gradPrev, _ []float64) (beta float64) {
|
||||
norm := floats.Norm(grad, 2)
|
||||
dot := floats.Dot(grad, gradPrev)
|
||||
beta = (norm*norm - dot) / (pr.prevNorm * pr.prevNorm)
|
||||
pr.prevNorm = norm
|
||||
return math.Max(0, beta)
|
||||
}
|
||||
|
||||
// HestenesStiefel implements the Hestenes-Stiefel variant of the CG method
|
||||
// that computes the scaling parameter β_k according to the formula
|
||||
//
|
||||
// β_k = max(0, ∇f_{k+1}·y_k / d_k·y_k),
|
||||
//
|
||||
// where y_k = ∇f_{k+1} - ∇f_k.
|
||||
type HestenesStiefel struct {
|
||||
y []float64
|
||||
}
|
||||
|
||||
func (hs *HestenesStiefel) Init(loc *Location) {
|
||||
hs.y = resize(hs.y, len(loc.Gradient))
|
||||
}
|
||||
|
||||
func (hs *HestenesStiefel) Beta(grad, gradPrev, dirPrev []float64) (beta float64) {
|
||||
floats.SubTo(hs.y, grad, gradPrev)
|
||||
beta = floats.Dot(grad, hs.y) / floats.Dot(dirPrev, hs.y)
|
||||
return math.Max(0, beta)
|
||||
}
|
||||
|
||||
// DaiYuan implements the Dai-Yuan variant of the CG method that computes the
|
||||
// scaling parameter β_k according to the formula
|
||||
//
|
||||
// β_k = |∇f_{k+1}|^2 / d_k·y_k,
|
||||
//
|
||||
// where y_k = ∇f_{k+1} - ∇f_k.
|
||||
type DaiYuan struct {
|
||||
y []float64
|
||||
}
|
||||
|
||||
func (dy *DaiYuan) Init(loc *Location) {
|
||||
dy.y = resize(dy.y, len(loc.Gradient))
|
||||
}
|
||||
|
||||
func (dy *DaiYuan) Beta(grad, gradPrev, dirPrev []float64) (beta float64) {
|
||||
floats.SubTo(dy.y, grad, gradPrev)
|
||||
norm := floats.Norm(grad, 2)
|
||||
return norm * norm / floats.Dot(dirPrev, dy.y)
|
||||
}
|
||||
|
||||
// HagerZhang implements the Hager-Zhang variant of the CG method that computes the
|
||||
// scaling parameter β_k according to the formula
|
||||
//
|
||||
// β_k = (y_k - 2 d_k |y_k|^2/(d_k·y_k))·∇f_{k+1} / (d_k·y_k),
|
||||
//
|
||||
// where y_k = ∇f_{k+1} - ∇f_k.
|
||||
type HagerZhang struct {
|
||||
y []float64
|
||||
}
|
||||
|
||||
func (hz *HagerZhang) Init(loc *Location) {
|
||||
hz.y = resize(hz.y, len(loc.Gradient))
|
||||
}
|
||||
|
||||
func (hz *HagerZhang) Beta(grad, gradPrev, dirPrev []float64) (beta float64) {
|
||||
floats.SubTo(hz.y, grad, gradPrev)
|
||||
dirDotY := floats.Dot(dirPrev, hz.y)
|
||||
gDotY := floats.Dot(grad, hz.y)
|
||||
gDotDir := floats.Dot(grad, dirPrev)
|
||||
yNorm := floats.Norm(hz.y, 2)
|
||||
return (gDotY - 2*gDotDir*yNorm*yNorm/dirDotY) / dirDotY
|
||||
}
|
||||
469
vendor/gonum.org/v1/gonum/optimize/cmaes.go
generated
vendored
Normal file
469
vendor/gonum.org/v1/gonum/optimize/cmaes.go
generated
vendored
Normal file
@@ -0,0 +1,469 @@
|
||||
// Copyright ©2017 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package optimize
|
||||
|
||||
import (
|
||||
"math"
|
||||
"sort"
|
||||
|
||||
"golang.org/x/exp/rand"
|
||||
|
||||
"gonum.org/v1/gonum/floats"
|
||||
"gonum.org/v1/gonum/mat"
|
||||
"gonum.org/v1/gonum/stat/distmv"
|
||||
)
|
||||
|
||||
var _ Method = (*CmaEsChol)(nil)
|
||||
|
||||
// TODO(btracey): If we ever implement the traditional CMA-ES algorithm, provide
|
||||
// the base explanation there, and modify this description to just
|
||||
// describe the differences.
|
||||
|
||||
// CmaEsChol implements the covariance matrix adaptation evolution strategy (CMA-ES)
|
||||
// based on the Cholesky decomposition. The full algorithm is described in
|
||||
//
|
||||
// Krause, Oswin, Dídac Rodríguez Arbonès, and Christian Igel. "CMA-ES with
|
||||
// optimal covariance update and storage complexity." Advances in Neural
|
||||
// Information Processing Systems. 2016.
|
||||
// https://papers.nips.cc/paper/6457-cma-es-with-optimal-covariance-update-and-storage-complexity.pdf
|
||||
//
|
||||
// CMA-ES is a global optimization method that progressively adapts a population
|
||||
// of samples. CMA-ES combines techniques from local optimization with global
|
||||
// optimization. Specifically, the CMA-ES algorithm uses an initial multivariate
|
||||
// normal distribution to generate a population of input locations. The input locations
|
||||
// with the lowest function values are used to update the parameters of the normal
|
||||
// distribution, a new set of input locations are generated, and this procedure
|
||||
// is iterated until convergence. The initial sampling distribution will have
|
||||
// a mean specified by the initial x location, and a covariance specified by
|
||||
// the InitCholesky field.
|
||||
//
|
||||
// As the normal distribution is progressively updated according to the best samples,
|
||||
// it can be that the mean of the distribution is updated in a gradient-descent
|
||||
// like fashion, followed by a shrinking covariance.
|
||||
// It is recommended that the algorithm be run multiple times (with different
|
||||
// InitMean) to have a better chance of finding the global minimum.
|
||||
//
|
||||
// The CMA-ES-Chol algorithm differs from the standard CMA-ES algorithm in that
|
||||
// it directly updates the Cholesky decomposition of the normal distribution.
|
||||
// This changes the runtime from O(dimension^3) to O(dimension^2*population)
|
||||
// The evolution of the multi-variate normal will be similar to the baseline
|
||||
// CMA-ES algorithm, but the covariance update equation is not identical.
|
||||
//
|
||||
// For more information about the CMA-ES algorithm, see
|
||||
//
|
||||
// https://en.wikipedia.org/wiki/CMA-ES
|
||||
// https://arxiv.org/pdf/1604.00772.pdf
|
||||
type CmaEsChol struct {
|
||||
// InitStepSize sets the initial size of the covariance matrix adaptation.
|
||||
// If InitStepSize is 0, a default value of 0.5 is used. InitStepSize cannot
|
||||
// be negative, or CmaEsChol will panic.
|
||||
InitStepSize float64
|
||||
// Population sets the population size for the algorithm. If Population is
|
||||
// 0, a default value of 4 + math.Floor(3*math.Log(float64(dim))) is used.
|
||||
// Population cannot be negative or CmaEsChol will panic.
|
||||
Population int
|
||||
// InitCholesky specifies the Cholesky decomposition of the covariance
|
||||
// matrix for the initial sampling distribution. If InitCholesky is nil,
|
||||
// a default value of I is used. If it is non-nil, then it must have
|
||||
// InitCholesky.Size() be equal to the problem dimension.
|
||||
InitCholesky *mat.Cholesky
|
||||
// StopLogDet sets the threshold for stopping the optimization if the
|
||||
// distribution becomes too peaked. The log determinant is a measure of the
|
||||
// (log) "volume" of the normal distribution, and when it is too small
|
||||
// the samples are almost the same. If the log determinant of the covariance
|
||||
// matrix becomes less than StopLogDet, the optimization run is concluded.
|
||||
// If StopLogDet is 0, a default value of dim*log(1e-16) is used.
|
||||
// If StopLogDet is NaN, the stopping criterion is not used, though
|
||||
// this can cause numeric instabilities in the algorithm.
|
||||
StopLogDet float64
|
||||
// ForgetBest, when true, does not track the best overall function value found,
|
||||
// instead returning the new best sample in each iteration. If ForgetBest
|
||||
// is false, then the minimum value returned will be the lowest across all
|
||||
// iterations, regardless of when that sample was generated.
|
||||
ForgetBest bool
|
||||
// Src allows a random number generator to be supplied for generating samples.
|
||||
// If Src is nil the generator in golang.org/x/math/rand is used.
|
||||
Src rand.Source
|
||||
|
||||
// Fixed algorithm parameters.
|
||||
dim int
|
||||
pop int
|
||||
weights []float64
|
||||
muEff float64
|
||||
cc, cs, c1, cmu, ds float64
|
||||
eChi float64
|
||||
|
||||
// Function data.
|
||||
xs *mat.Dense
|
||||
fs []float64
|
||||
|
||||
// Adaptive algorithm parameters.
|
||||
invSigma float64 // inverse of the sigma parameter
|
||||
pc, ps []float64
|
||||
mean []float64
|
||||
chol mat.Cholesky
|
||||
|
||||
// Overall best.
|
||||
bestX []float64
|
||||
bestF float64
|
||||
|
||||
// Synchronization.
|
||||
sentIdx int
|
||||
receivedIdx int
|
||||
operation chan<- Task
|
||||
updateErr error
|
||||
}
|
||||
|
||||
var (
|
||||
_ Statuser = (*CmaEsChol)(nil)
|
||||
_ Method = (*CmaEsChol)(nil)
|
||||
)
|
||||
|
||||
func (cma *CmaEsChol) methodConverged() Status {
|
||||
sd := cma.StopLogDet
|
||||
switch {
|
||||
case math.IsNaN(sd):
|
||||
return NotTerminated
|
||||
case sd == 0:
|
||||
sd = float64(cma.dim) * -36.8413614879 // ln(1e-16)
|
||||
}
|
||||
if cma.chol.LogDet() < sd {
|
||||
return MethodConverge
|
||||
}
|
||||
return NotTerminated
|
||||
}
|
||||
|
||||
// Status returns the status of the method.
|
||||
func (cma *CmaEsChol) Status() (Status, error) {
|
||||
if cma.updateErr != nil {
|
||||
return Failure, cma.updateErr
|
||||
}
|
||||
return cma.methodConverged(), nil
|
||||
}
|
||||
|
||||
func (*CmaEsChol) Uses(has Available) (uses Available, err error) {
|
||||
return has.function()
|
||||
}
|
||||
|
||||
func (cma *CmaEsChol) Init(dim, tasks int) int {
|
||||
if dim <= 0 {
|
||||
panic(nonpositiveDimension)
|
||||
}
|
||||
if tasks < 0 {
|
||||
panic(negativeTasks)
|
||||
}
|
||||
|
||||
// Set fixed algorithm parameters.
|
||||
// Parameter values are from https://arxiv.org/pdf/1604.00772.pdf .
|
||||
cma.dim = dim
|
||||
cma.pop = cma.Population
|
||||
n := float64(dim)
|
||||
if cma.pop == 0 {
|
||||
cma.pop = 4 + int(3*math.Log(n)) // Note the implicit floor.
|
||||
} else if cma.pop < 0 {
|
||||
panic("cma-es-chol: negative population size")
|
||||
}
|
||||
mu := cma.pop / 2
|
||||
cma.weights = resize(cma.weights, mu)
|
||||
for i := range cma.weights {
|
||||
v := math.Log(float64(mu)+0.5) - math.Log(float64(i)+1)
|
||||
cma.weights[i] = v
|
||||
}
|
||||
floats.Scale(1/floats.Sum(cma.weights), cma.weights)
|
||||
cma.muEff = 0
|
||||
for _, v := range cma.weights {
|
||||
cma.muEff += v * v
|
||||
}
|
||||
cma.muEff = 1 / cma.muEff
|
||||
|
||||
cma.cc = (4 + cma.muEff/n) / (n + 4 + 2*cma.muEff/n)
|
||||
cma.cs = (cma.muEff + 2) / (n + cma.muEff + 5)
|
||||
cma.c1 = 2 / ((n+1.3)*(n+1.3) + cma.muEff)
|
||||
cma.cmu = math.Min(1-cma.c1, 2*(cma.muEff-2+1/cma.muEff)/((n+2)*(n+2)+cma.muEff))
|
||||
cma.ds = 1 + 2*math.Max(0, math.Sqrt((cma.muEff-1)/(n+1))-1) + cma.cs
|
||||
// E[chi] is taken from https://en.wikipedia.org/wiki/CMA-ES (there
|
||||
// listed as E[||N(0,1)||]).
|
||||
cma.eChi = math.Sqrt(n) * (1 - 1.0/(4*n) + 1/(21*n*n))
|
||||
|
||||
// Allocate memory for function data.
|
||||
cma.xs = mat.NewDense(cma.pop, dim, nil)
|
||||
cma.fs = resize(cma.fs, cma.pop)
|
||||
|
||||
// Allocate and initialize adaptive parameters.
|
||||
cma.invSigma = 1 / cma.InitStepSize
|
||||
if cma.InitStepSize == 0 {
|
||||
cma.invSigma = 10.0 / 3
|
||||
} else if cma.InitStepSize < 0 {
|
||||
panic("cma-es-chol: negative initial step size")
|
||||
}
|
||||
cma.pc = resize(cma.pc, dim)
|
||||
for i := range cma.pc {
|
||||
cma.pc[i] = 0
|
||||
}
|
||||
cma.ps = resize(cma.ps, dim)
|
||||
for i := range cma.ps {
|
||||
cma.ps[i] = 0
|
||||
}
|
||||
cma.mean = resize(cma.mean, dim) // mean location initialized at the start of Run
|
||||
|
||||
if cma.InitCholesky != nil {
|
||||
if cma.InitCholesky.SymmetricDim() != dim {
|
||||
panic("cma-es-chol: incorrect InitCholesky size")
|
||||
}
|
||||
cma.chol.Clone(cma.InitCholesky)
|
||||
} else {
|
||||
// Set the initial Cholesky to I.
|
||||
b := mat.NewDiagDense(dim, nil)
|
||||
for i := 0; i < dim; i++ {
|
||||
b.SetDiag(i, 1)
|
||||
}
|
||||
var chol mat.Cholesky
|
||||
ok := chol.Factorize(b)
|
||||
if !ok {
|
||||
panic("cma-es-chol: bad cholesky. shouldn't happen")
|
||||
}
|
||||
cma.chol = chol
|
||||
}
|
||||
|
||||
cma.bestX = resize(cma.bestX, dim)
|
||||
cma.bestF = math.Inf(1)
|
||||
|
||||
cma.sentIdx = 0
|
||||
cma.receivedIdx = 0
|
||||
cma.operation = nil
|
||||
cma.updateErr = nil
|
||||
t := min(tasks, cma.pop)
|
||||
return t
|
||||
}
|
||||
|
||||
func (cma *CmaEsChol) sendInitTasks(tasks []Task) {
|
||||
for i, task := range tasks {
|
||||
cma.sendTask(i, task)
|
||||
}
|
||||
cma.sentIdx = len(tasks)
|
||||
}
|
||||
|
||||
// sendTask generates a sample and sends the task. It does not update the cma index.
|
||||
func (cma *CmaEsChol) sendTask(idx int, task Task) {
|
||||
task.ID = idx
|
||||
task.Op = FuncEvaluation
|
||||
distmv.NormalRand(cma.xs.RawRowView(idx), cma.mean, &cma.chol, cma.Src)
|
||||
copy(task.X, cma.xs.RawRowView(idx))
|
||||
cma.operation <- task
|
||||
}
|
||||
|
||||
// bestIdx returns the best index in the functions. Returns -1 if all values
|
||||
// are NaN.
|
||||
func (cma *CmaEsChol) bestIdx() int {
|
||||
best := -1
|
||||
bestVal := math.Inf(1)
|
||||
for i, v := range cma.fs {
|
||||
if math.IsNaN(v) {
|
||||
continue
|
||||
}
|
||||
// Use equality in case somewhere evaluates to +inf.
|
||||
if v <= bestVal {
|
||||
best = i
|
||||
bestVal = v
|
||||
}
|
||||
}
|
||||
return best
|
||||
}
|
||||
|
||||
// findBestAndUpdateTask finds the best task in the current list, updates the
|
||||
// new best overall, and then stores the best location into task.
|
||||
func (cma *CmaEsChol) findBestAndUpdateTask(task Task) Task {
|
||||
// Find and update the best location.
|
||||
// Don't use floats because there may be NaN values.
|
||||
best := cma.bestIdx()
|
||||
bestF := math.NaN()
|
||||
bestX := cma.xs.RawRowView(0)
|
||||
if best != -1 {
|
||||
bestF = cma.fs[best]
|
||||
bestX = cma.xs.RawRowView(best)
|
||||
}
|
||||
if cma.ForgetBest {
|
||||
task.F = bestF
|
||||
copy(task.X, bestX)
|
||||
} else {
|
||||
if bestF < cma.bestF {
|
||||
cma.bestF = bestF
|
||||
copy(cma.bestX, bestX)
|
||||
}
|
||||
task.F = cma.bestF
|
||||
copy(task.X, cma.bestX)
|
||||
}
|
||||
return task
|
||||
}
|
||||
|
||||
func (cma *CmaEsChol) Run(operations chan<- Task, results <-chan Task, tasks []Task) {
|
||||
copy(cma.mean, tasks[0].X)
|
||||
cma.operation = operations
|
||||
// Send the initial tasks. We know there are at most as many tasks as elements
|
||||
// of the population.
|
||||
cma.sendInitTasks(tasks)
|
||||
|
||||
Loop:
|
||||
for {
|
||||
result := <-results
|
||||
switch result.Op {
|
||||
default:
|
||||
panic("unknown operation")
|
||||
case PostIteration:
|
||||
break Loop
|
||||
case MajorIteration:
|
||||
// The last thing we did was update all of the tasks and send the
|
||||
// major iteration. Now we can send a group of tasks again.
|
||||
cma.sendInitTasks(tasks)
|
||||
case FuncEvaluation:
|
||||
cma.receivedIdx++
|
||||
cma.fs[result.ID] = result.F
|
||||
switch {
|
||||
case cma.sentIdx < cma.pop:
|
||||
// There are still tasks to evaluate. Send the next.
|
||||
cma.sendTask(cma.sentIdx, result)
|
||||
cma.sentIdx++
|
||||
case cma.receivedIdx < cma.pop:
|
||||
// All the tasks have been sent, but not all of them have been received.
|
||||
// Need to wait until all are back.
|
||||
continue Loop
|
||||
default:
|
||||
// All of the evaluations have been received.
|
||||
if cma.receivedIdx != cma.pop {
|
||||
panic("bad logic")
|
||||
}
|
||||
cma.receivedIdx = 0
|
||||
cma.sentIdx = 0
|
||||
|
||||
task := cma.findBestAndUpdateTask(result)
|
||||
// Update the parameters and send a MajorIteration or a convergence.
|
||||
err := cma.update()
|
||||
// Kill the existing data.
|
||||
for i := range cma.fs {
|
||||
cma.fs[i] = math.NaN()
|
||||
cma.xs.Set(i, 0, math.NaN())
|
||||
}
|
||||
switch {
|
||||
case err != nil:
|
||||
cma.updateErr = err
|
||||
task.Op = MethodDone
|
||||
case cma.methodConverged() != NotTerminated:
|
||||
task.Op = MethodDone
|
||||
default:
|
||||
task.Op = MajorIteration
|
||||
task.ID = -1
|
||||
}
|
||||
operations <- task
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Been told to stop. Clean up.
|
||||
// Need to see best of our evaluated tasks so far. Should instead just
|
||||
// collect, then see.
|
||||
for task := range results {
|
||||
switch task.Op {
|
||||
case MajorIteration:
|
||||
case FuncEvaluation:
|
||||
cma.fs[task.ID] = task.F
|
||||
default:
|
||||
panic("unknown operation")
|
||||
}
|
||||
}
|
||||
// Send the new best value if the evaluation is better than any we've
|
||||
// found so far. Keep this separate from findBestAndUpdateTask so that
|
||||
// we only send an iteration if we find a better location.
|
||||
if !cma.ForgetBest {
|
||||
best := cma.bestIdx()
|
||||
if best != -1 && cma.fs[best] < cma.bestF {
|
||||
task := tasks[0]
|
||||
task.F = cma.fs[best]
|
||||
copy(task.X, cma.xs.RawRowView(best))
|
||||
task.Op = MajorIteration
|
||||
task.ID = -1
|
||||
operations <- task
|
||||
}
|
||||
}
|
||||
close(operations)
|
||||
}
|
||||
|
||||
// update computes the new parameters (mean, cholesky, etc.). Does not update
|
||||
// any of the synchronization parameters (taskIdx).
|
||||
func (cma *CmaEsChol) update() error {
|
||||
// Sort the function values to find the elite samples.
|
||||
ftmp := make([]float64, cma.pop)
|
||||
copy(ftmp, cma.fs)
|
||||
indexes := make([]int, cma.pop)
|
||||
for i := range indexes {
|
||||
indexes[i] = i
|
||||
}
|
||||
sort.Sort(bestSorter{F: ftmp, Idx: indexes})
|
||||
|
||||
meanOld := make([]float64, len(cma.mean))
|
||||
copy(meanOld, cma.mean)
|
||||
|
||||
// m_{t+1} = \sum_{i=1}^mu w_i x_i
|
||||
for i := range cma.mean {
|
||||
cma.mean[i] = 0
|
||||
}
|
||||
for i, w := range cma.weights {
|
||||
idx := indexes[i] // index of teh 1337 sample.
|
||||
floats.AddScaled(cma.mean, w, cma.xs.RawRowView(idx))
|
||||
}
|
||||
meanDiff := make([]float64, len(cma.mean))
|
||||
floats.SubTo(meanDiff, cma.mean, meanOld)
|
||||
|
||||
// p_{c,t+1} = (1-c_c) p_{c,t} + \sqrt(c_c*(2-c_c)*mueff) (m_{t+1}-m_t)/sigma_t
|
||||
floats.Scale(1-cma.cc, cma.pc)
|
||||
scaleC := math.Sqrt(cma.cc*(2-cma.cc)*cma.muEff) * cma.invSigma
|
||||
floats.AddScaled(cma.pc, scaleC, meanDiff)
|
||||
|
||||
// p_{sigma, t+1} = (1-c_sigma) p_{sigma,t} + \sqrt(c_s*(2-c_s)*mueff) A_t^-1 (m_{t+1}-m_t)/sigma_t
|
||||
floats.Scale(1-cma.cs, cma.ps)
|
||||
// First compute A_t^-1 (m_{t+1}-m_t), then add the scaled vector.
|
||||
tmp := make([]float64, cma.dim)
|
||||
tmpVec := mat.NewVecDense(cma.dim, tmp)
|
||||
diffVec := mat.NewVecDense(cma.dim, meanDiff)
|
||||
err := tmpVec.SolveVec(cma.chol.RawU().T(), diffVec)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
scaleS := math.Sqrt(cma.cs*(2-cma.cs)*cma.muEff) * cma.invSigma
|
||||
floats.AddScaled(cma.ps, scaleS, tmp)
|
||||
|
||||
// Compute the update to A.
|
||||
scaleChol := 1 - cma.c1 - cma.cmu
|
||||
if scaleChol == 0 {
|
||||
scaleChol = math.SmallestNonzeroFloat64 // enough to kill the old data, but still non-zero.
|
||||
}
|
||||
cma.chol.Scale(scaleChol, &cma.chol)
|
||||
cma.chol.SymRankOne(&cma.chol, cma.c1, mat.NewVecDense(cma.dim, cma.pc))
|
||||
for i, w := range cma.weights {
|
||||
idx := indexes[i]
|
||||
floats.SubTo(tmp, cma.xs.RawRowView(idx), meanOld)
|
||||
cma.chol.SymRankOne(&cma.chol, cma.cmu*w*cma.invSigma, tmpVec)
|
||||
}
|
||||
|
||||
// sigma_{t+1} = sigma_t exp(c_sigma/d_sigma * norm(p_{sigma,t+1}/ E[chi] -1)
|
||||
normPs := floats.Norm(cma.ps, 2)
|
||||
cma.invSigma /= math.Exp(cma.cs / cma.ds * (normPs/cma.eChi - 1))
|
||||
return nil
|
||||
}
|
||||
|
||||
type bestSorter struct {
|
||||
F []float64
|
||||
Idx []int
|
||||
}
|
||||
|
||||
func (b bestSorter) Len() int {
|
||||
return len(b.F)
|
||||
}
|
||||
func (b bestSorter) Less(i, j int) bool {
|
||||
return b.F[i] < b.F[j]
|
||||
}
|
||||
func (b bestSorter) Swap(i, j int) {
|
||||
b.F[i], b.F[j] = b.F[j], b.F[i]
|
||||
b.Idx[i], b.Idx[j] = b.Idx[j], b.Idx[i]
|
||||
}
|
||||
6
vendor/gonum.org/v1/gonum/optimize/doc.go
generated
vendored
Normal file
6
vendor/gonum.org/v1/gonum/optimize/doc.go
generated
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
// Copyright ©2015 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package optimize implements algorithms for finding the optimum value of functions.
|
||||
package optimize // import "gonum.org/v1/gonum/optimize"
|
||||
78
vendor/gonum.org/v1/gonum/optimize/errors.go
generated
vendored
Normal file
78
vendor/gonum.org/v1/gonum/optimize/errors.go
generated
vendored
Normal file
@@ -0,0 +1,78 @@
|
||||
// Copyright ©2014 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package optimize
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
)
|
||||
|
||||
var (
|
||||
// ErrZeroDimensional signifies an optimization was called with an input of length 0.
|
||||
ErrZeroDimensional = errors.New("optimize: zero dimensional input")
|
||||
|
||||
// ErrLinesearcherFailure signifies that a Linesearcher has iterated too
|
||||
// many times. This may occur if the gradient tolerance is set too low.
|
||||
ErrLinesearcherFailure = errors.New("linesearch: failed to converge")
|
||||
|
||||
// ErrNonDescentDirection signifies that LinesearchMethod has received a
|
||||
// search direction from a NextDirectioner in which the function is not
|
||||
// decreasing.
|
||||
ErrNonDescentDirection = errors.New("linesearch: non-descent search direction")
|
||||
|
||||
// ErrNoProgress signifies that LinesearchMethod cannot make further
|
||||
// progress because there is no change in location after Linesearcher step
|
||||
// due to floating-point arithmetic.
|
||||
ErrNoProgress = errors.New("linesearch: no change in location after Linesearcher step")
|
||||
|
||||
// ErrLinesearcherBound signifies that a Linesearcher reached a step that
|
||||
// lies out of allowed bounds.
|
||||
ErrLinesearcherBound = errors.New("linesearch: step out of bounds")
|
||||
|
||||
// ErrMissingGrad signifies that a Method requires a Gradient function that
|
||||
// is not supplied by Problem.
|
||||
ErrMissingGrad = errors.New("optimize: problem does not provide needed Grad function")
|
||||
|
||||
// ErrMissingHess signifies that a Method requires a Hessian function that
|
||||
// is not supplied by Problem.
|
||||
ErrMissingHess = errors.New("optimize: problem does not provide needed Hess function")
|
||||
)
|
||||
|
||||
// ErrFunc is returned when an initial function value is invalid. The error
|
||||
// state may be either +Inf or NaN. ErrFunc satisfies the error interface.
|
||||
type ErrFunc float64
|
||||
|
||||
func (err ErrFunc) Error() string {
|
||||
switch {
|
||||
case math.IsInf(float64(err), 1):
|
||||
return "optimize: initial function value is infinite"
|
||||
case math.IsNaN(float64(err)):
|
||||
return "optimize: initial function value is NaN"
|
||||
default:
|
||||
panic("optimize: bad ErrFunc")
|
||||
}
|
||||
}
|
||||
|
||||
// ErrGrad is returned when an initial gradient is invalid. The error gradient
|
||||
// may be either ±Inf or NaN. ErrGrad satisfies the error interface.
|
||||
type ErrGrad struct {
|
||||
Grad float64 // Grad is the invalid gradient value.
|
||||
Index int // Index is the position at which the invalid gradient was found.
|
||||
}
|
||||
|
||||
func (err ErrGrad) Error() string {
|
||||
switch {
|
||||
case math.IsInf(err.Grad, 0):
|
||||
return fmt.Sprintf("optimize: initial gradient is infinite at position %d", err.Index)
|
||||
case math.IsNaN(err.Grad):
|
||||
return fmt.Sprintf("optimize: initial gradient is NaN at position %d", err.Index)
|
||||
default:
|
||||
panic("optimize: bad ErrGrad")
|
||||
}
|
||||
}
|
||||
|
||||
// List of shared panic strings
|
||||
const badProblem = "optimize: objective function is undefined"
|
||||
85
vendor/gonum.org/v1/gonum/optimize/functionconvergence.go
generated
vendored
Normal file
85
vendor/gonum.org/v1/gonum/optimize/functionconvergence.go
generated
vendored
Normal file
@@ -0,0 +1,85 @@
|
||||
// Copyright ©2015 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package optimize
|
||||
|
||||
import (
|
||||
"math"
|
||||
)
|
||||
|
||||
// Converger returns the convergence of the optimization based on
|
||||
// locations found during optimization. Converger must not modify the value of
|
||||
// the provided Location in any of the methods.
|
||||
type Converger interface {
|
||||
Init(dim int)
|
||||
Converged(loc *Location) Status
|
||||
}
|
||||
|
||||
var (
|
||||
_ Converger = NeverTerminate{}
|
||||
_ Converger = (*FunctionConverge)(nil)
|
||||
)
|
||||
|
||||
// NeverTerminate implements Converger, always reporting NotTerminated.
|
||||
type NeverTerminate struct{}
|
||||
|
||||
func (NeverTerminate) Init(dim int) {}
|
||||
|
||||
func (NeverTerminate) Converged(loc *Location) Status {
|
||||
return NotTerminated
|
||||
}
|
||||
|
||||
// FunctionConverge tests for insufficient improvement in the optimum value
|
||||
// over the last iterations. A FunctionConvergence status is returned if
|
||||
// there is no significant decrease for FunctionConverge.Iterations. A
|
||||
// significant decrease is considered if
|
||||
//
|
||||
// f < f_best
|
||||
//
|
||||
// and
|
||||
//
|
||||
// f_best - f > FunctionConverge.Relative * maxabs(f, f_best) + FunctionConverge.Absolute
|
||||
//
|
||||
// If the decrease is significant, then the iteration counter is reset and
|
||||
// f_best is updated.
|
||||
//
|
||||
// If FunctionConverge.Iterations == 0, it has no effect.
|
||||
type FunctionConverge struct {
|
||||
Absolute float64
|
||||
Relative float64
|
||||
Iterations int
|
||||
|
||||
first bool
|
||||
best float64
|
||||
iter int
|
||||
}
|
||||
|
||||
func (fc *FunctionConverge) Init(dim int) {
|
||||
fc.first = true
|
||||
fc.best = 0
|
||||
fc.iter = 0
|
||||
}
|
||||
|
||||
func (fc *FunctionConverge) Converged(l *Location) Status {
|
||||
f := l.F
|
||||
if fc.first {
|
||||
fc.best = f
|
||||
fc.first = false
|
||||
return NotTerminated
|
||||
}
|
||||
if fc.Iterations == 0 {
|
||||
return NotTerminated
|
||||
}
|
||||
maxAbs := math.Max(math.Abs(f), math.Abs(fc.best))
|
||||
if f < fc.best && fc.best-f > fc.Relative*maxAbs+fc.Absolute {
|
||||
fc.best = f
|
||||
fc.iter = 0
|
||||
return NotTerminated
|
||||
}
|
||||
fc.iter++
|
||||
if fc.iter < fc.Iterations {
|
||||
return NotTerminated
|
||||
}
|
||||
return FunctionConvergence
|
||||
}
|
||||
95
vendor/gonum.org/v1/gonum/optimize/gradientdescent.go
generated
vendored
Normal file
95
vendor/gonum.org/v1/gonum/optimize/gradientdescent.go
generated
vendored
Normal file
@@ -0,0 +1,95 @@
|
||||
// Copyright ©2014 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package optimize
|
||||
|
||||
import "gonum.org/v1/gonum/floats"
|
||||
|
||||
var (
|
||||
_ Method = (*GradientDescent)(nil)
|
||||
_ localMethod = (*GradientDescent)(nil)
|
||||
_ NextDirectioner = (*GradientDescent)(nil)
|
||||
)
|
||||
|
||||
// GradientDescent implements the steepest descent optimization method that
|
||||
// performs successive steps along the direction of the negative gradient.
|
||||
type GradientDescent struct {
|
||||
// Linesearcher selects suitable steps along the descent direction.
|
||||
// If Linesearcher is nil, a reasonable default will be chosen.
|
||||
Linesearcher Linesearcher
|
||||
// StepSizer determines the initial step size along each direction.
|
||||
// If StepSizer is nil, a reasonable default will be chosen.
|
||||
StepSizer StepSizer
|
||||
// GradStopThreshold sets the threshold for stopping if the gradient norm
|
||||
// gets too small. If GradStopThreshold is 0 it is defaulted to 1e-12, and
|
||||
// if it is NaN the setting is not used.
|
||||
GradStopThreshold float64
|
||||
|
||||
ls *LinesearchMethod
|
||||
|
||||
status Status
|
||||
err error
|
||||
}
|
||||
|
||||
func (g *GradientDescent) Status() (Status, error) {
|
||||
return g.status, g.err
|
||||
}
|
||||
|
||||
func (*GradientDescent) Uses(has Available) (uses Available, err error) {
|
||||
return has.gradient()
|
||||
}
|
||||
|
||||
func (g *GradientDescent) Init(dim, tasks int) int {
|
||||
g.status = NotTerminated
|
||||
g.err = nil
|
||||
return 1
|
||||
}
|
||||
|
||||
func (g *GradientDescent) Run(operation chan<- Task, result <-chan Task, tasks []Task) {
|
||||
g.status, g.err = localOptimizer{}.run(g, g.GradStopThreshold, operation, result, tasks)
|
||||
close(operation)
|
||||
}
|
||||
|
||||
func (g *GradientDescent) initLocal(loc *Location) (Operation, error) {
|
||||
if g.Linesearcher == nil {
|
||||
g.Linesearcher = &Backtracking{}
|
||||
}
|
||||
if g.StepSizer == nil {
|
||||
g.StepSizer = &QuadraticStepSize{}
|
||||
}
|
||||
|
||||
if g.ls == nil {
|
||||
g.ls = &LinesearchMethod{}
|
||||
}
|
||||
g.ls.Linesearcher = g.Linesearcher
|
||||
g.ls.NextDirectioner = g
|
||||
|
||||
return g.ls.Init(loc)
|
||||
}
|
||||
|
||||
func (g *GradientDescent) iterateLocal(loc *Location) (Operation, error) {
|
||||
return g.ls.Iterate(loc)
|
||||
}
|
||||
|
||||
func (g *GradientDescent) InitDirection(loc *Location, dir []float64) (stepSize float64) {
|
||||
copy(dir, loc.Gradient)
|
||||
floats.Scale(-1, dir)
|
||||
return g.StepSizer.Init(loc, dir)
|
||||
}
|
||||
|
||||
func (g *GradientDescent) NextDirection(loc *Location, dir []float64) (stepSize float64) {
|
||||
copy(dir, loc.Gradient)
|
||||
floats.Scale(-1, dir)
|
||||
return g.StepSizer.StepSize(loc, dir)
|
||||
}
|
||||
|
||||
func (*GradientDescent) needs() struct {
|
||||
Gradient bool
|
||||
Hessian bool
|
||||
} {
|
||||
return struct {
|
||||
Gradient bool
|
||||
Hessian bool
|
||||
}{true, false}
|
||||
}
|
||||
92
vendor/gonum.org/v1/gonum/optimize/guessandcheck.go
generated
vendored
Normal file
92
vendor/gonum.org/v1/gonum/optimize/guessandcheck.go
generated
vendored
Normal file
@@ -0,0 +1,92 @@
|
||||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package optimize
|
||||
|
||||
import (
|
||||
"math"
|
||||
|
||||
"gonum.org/v1/gonum/stat/distmv"
|
||||
)
|
||||
|
||||
var _ Method = (*GuessAndCheck)(nil)
|
||||
|
||||
// GuessAndCheck is a global optimizer that evaluates the function at random
|
||||
// locations. Not a good optimizer, but useful for comparison and debugging.
|
||||
type GuessAndCheck struct {
|
||||
Rander distmv.Rander
|
||||
|
||||
bestF float64
|
||||
bestX []float64
|
||||
}
|
||||
|
||||
func (*GuessAndCheck) Uses(has Available) (uses Available, err error) {
|
||||
return has.function()
|
||||
}
|
||||
|
||||
func (g *GuessAndCheck) Init(dim, tasks int) int {
|
||||
if dim <= 0 {
|
||||
panic(nonpositiveDimension)
|
||||
}
|
||||
if tasks < 0 {
|
||||
panic(negativeTasks)
|
||||
}
|
||||
g.bestF = math.Inf(1)
|
||||
g.bestX = resize(g.bestX, dim)
|
||||
return tasks
|
||||
}
|
||||
|
||||
func (g *GuessAndCheck) sendNewLoc(operation chan<- Task, task Task) {
|
||||
g.Rander.Rand(task.X)
|
||||
task.Op = FuncEvaluation
|
||||
operation <- task
|
||||
}
|
||||
|
||||
func (g *GuessAndCheck) updateMajor(operation chan<- Task, task Task) {
|
||||
// Update the best value seen so far, and send a MajorIteration.
|
||||
if task.F < g.bestF {
|
||||
g.bestF = task.F
|
||||
copy(g.bestX, task.X)
|
||||
} else {
|
||||
task.F = g.bestF
|
||||
copy(task.X, g.bestX)
|
||||
}
|
||||
task.Op = MajorIteration
|
||||
operation <- task
|
||||
}
|
||||
|
||||
func (g *GuessAndCheck) Run(operation chan<- Task, result <-chan Task, tasks []Task) {
|
||||
// Send initial tasks to evaluate
|
||||
for _, task := range tasks {
|
||||
g.sendNewLoc(operation, task)
|
||||
}
|
||||
|
||||
// Read from the channel until PostIteration is sent.
|
||||
Loop:
|
||||
for {
|
||||
task := <-result
|
||||
switch task.Op {
|
||||
default:
|
||||
panic("unknown operation")
|
||||
case PostIteration:
|
||||
break Loop
|
||||
case MajorIteration:
|
||||
g.sendNewLoc(operation, task)
|
||||
case FuncEvaluation:
|
||||
g.updateMajor(operation, task)
|
||||
}
|
||||
}
|
||||
|
||||
// PostIteration was sent. Update the best new values.
|
||||
for task := range result {
|
||||
switch task.Op {
|
||||
default:
|
||||
panic("unknown operation")
|
||||
case MajorIteration:
|
||||
case FuncEvaluation:
|
||||
g.updateMajor(operation, task)
|
||||
}
|
||||
}
|
||||
close(operation)
|
||||
}
|
||||
132
vendor/gonum.org/v1/gonum/optimize/interfaces.go
generated
vendored
Normal file
132
vendor/gonum.org/v1/gonum/optimize/interfaces.go
generated
vendored
Normal file
@@ -0,0 +1,132 @@
|
||||
// Copyright ©2014 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package optimize
|
||||
|
||||
// A localMethod can optimize an objective function.
|
||||
//
|
||||
// It uses a reverse-communication interface between the optimization method
|
||||
// and the caller. Method acts as a client that asks the caller to perform
|
||||
// needed operations via Operation returned from Init and Iterate methods.
|
||||
// This provides independence of the optimization algorithm on user-supplied
|
||||
// data and their representation, and enables automation of common operations
|
||||
// like checking for (various types of) convergence and maintaining statistics.
|
||||
//
|
||||
// A Method can command an Evaluation, a MajorIteration or NoOperation operations.
|
||||
//
|
||||
// An evaluation operation is one or more of the Evaluation operations
|
||||
// (FuncEvaluation, GradEvaluation, etc.) which can be combined with
|
||||
// the bitwise or operator. In an evaluation operation, the requested fields of
|
||||
// Problem will be evaluated at the point specified in Location.X.
|
||||
// The corresponding fields of Location will be filled with the results that
|
||||
// can be retrieved upon the next call to Iterate. The Method interface
|
||||
// requires that entries of Location are not modified aside from the commanded
|
||||
// evaluations. Thus, the type implementing Method may use multiple Operations
|
||||
// to set the Location fields at a particular x value.
|
||||
//
|
||||
// Instead of an Evaluation, a Method may declare MajorIteration. In
|
||||
// a MajorIteration, the values in the fields of Location are treated as
|
||||
// a potential optimizer. The convergence of the optimization routine
|
||||
// (GradientThreshold, etc.) is checked at this new best point. In
|
||||
// a MajorIteration, the fields of Location must be valid and consistent.
|
||||
//
|
||||
// A Method must not return InitIteration and PostIteration operations. These are
|
||||
// reserved for the clients to be passed to Recorders. A Method must also not
|
||||
// combine the Evaluation operations with the Iteration operations.
|
||||
type localMethod interface {
|
||||
// Init initializes the method based on the initial data in loc, updates it
|
||||
// and returns the first operation to be carried out by the caller.
|
||||
// The initial location must be valid as specified by Needs.
|
||||
initLocal(loc *Location) (Operation, error)
|
||||
|
||||
// Iterate retrieves data from loc, performs one iteration of the method,
|
||||
// updates loc and returns the next operation.
|
||||
iterateLocal(loc *Location) (Operation, error)
|
||||
|
||||
needser
|
||||
}
|
||||
|
||||
type needser interface {
|
||||
// needs specifies information about the objective function needed by the
|
||||
// optimizer beyond just the function value. The information is used
|
||||
// internally for initialization and must match evaluation types returned
|
||||
// by Init and Iterate during the optimization process.
|
||||
needs() struct {
|
||||
Gradient bool
|
||||
Hessian bool
|
||||
}
|
||||
}
|
||||
|
||||
// Statuser can report the status and any error. It is intended for methods as
|
||||
// an additional error reporting mechanism apart from the errors returned from
|
||||
// Init and Iterate.
|
||||
type Statuser interface {
|
||||
Status() (Status, error)
|
||||
}
|
||||
|
||||
// Linesearcher is a type that can perform a line search. It tries to find an
|
||||
// (approximate) minimum of the objective function along the search direction
|
||||
// dir_k starting at the most recent location x_k, i.e., it tries to minimize
|
||||
// the function
|
||||
//
|
||||
// φ(step) := f(x_k + step * dir_k) where step > 0.
|
||||
//
|
||||
// Typically, a Linesearcher will be used in conjunction with LinesearchMethod
|
||||
// for performing gradient-based optimization through sequential line searches.
|
||||
type Linesearcher interface {
|
||||
// Init initializes the Linesearcher and a new line search. Value and
|
||||
// derivative contain φ(0) and φ'(0), respectively, and step contains the
|
||||
// first trial step length. It returns an Operation that must be one of
|
||||
// FuncEvaluation, GradEvaluation, FuncEvaluation|GradEvaluation. The
|
||||
// caller must evaluate φ(step), φ'(step), or both, respectively, and pass
|
||||
// the result to Linesearcher in value and derivative arguments to Iterate.
|
||||
Init(value, derivative float64, step float64) Operation
|
||||
|
||||
// Iterate takes in the values of φ and φ' evaluated at the previous step
|
||||
// and returns the next operation.
|
||||
//
|
||||
// If op is one of FuncEvaluation, GradEvaluation,
|
||||
// FuncEvaluation|GradEvaluation, the caller must evaluate φ(step),
|
||||
// φ'(step), or both, respectively, and pass the result to Linesearcher in
|
||||
// value and derivative arguments on the next call to Iterate.
|
||||
//
|
||||
// If op is MajorIteration, a sufficiently accurate minimum of φ has been
|
||||
// found at the previous step and the line search has concluded. Init must
|
||||
// be called again to initialize a new line search.
|
||||
//
|
||||
// If err is nil, op must not specify another operation. If err is not nil,
|
||||
// the values of op and step are undefined.
|
||||
Iterate(value, derivative float64) (op Operation, step float64, err error)
|
||||
}
|
||||
|
||||
// NextDirectioner implements a strategy for computing a new line search
|
||||
// direction at each major iteration. Typically, a NextDirectioner will be
|
||||
// used in conjunction with LinesearchMethod for performing gradient-based
|
||||
// optimization through sequential line searches.
|
||||
type NextDirectioner interface {
|
||||
// InitDirection initializes the NextDirectioner at the given starting location,
|
||||
// putting the initial direction in place into dir, and returning the initial
|
||||
// step size. InitDirection must not modify Location.
|
||||
InitDirection(loc *Location, dir []float64) (step float64)
|
||||
|
||||
// NextDirection updates the search direction and step size. Location is
|
||||
// the location seen at the conclusion of the most recent linesearch. The
|
||||
// next search direction is put in place into dir, and the next step size
|
||||
// is returned. NextDirection must not modify Location.
|
||||
NextDirection(loc *Location, dir []float64) (step float64)
|
||||
}
|
||||
|
||||
// StepSizer can set the next step size of the optimization given the last Location.
|
||||
// Returned step size must be positive.
|
||||
type StepSizer interface {
|
||||
Init(loc *Location, dir []float64) float64
|
||||
StepSize(loc *Location, dir []float64) float64
|
||||
}
|
||||
|
||||
// A Recorder can record the progress of the optimization, for example to print
|
||||
// the progress to StdOut or to a log file. A Recorder must not modify any data.
|
||||
type Recorder interface {
|
||||
Init() error
|
||||
Record(*Location, Operation, *Stats) error
|
||||
}
|
||||
199
vendor/gonum.org/v1/gonum/optimize/lbfgs.go
generated
vendored
Normal file
199
vendor/gonum.org/v1/gonum/optimize/lbfgs.go
generated
vendored
Normal file
@@ -0,0 +1,199 @@
|
||||
// Copyright ©2014 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package optimize
|
||||
|
||||
import (
|
||||
"gonum.org/v1/gonum/floats"
|
||||
)
|
||||
|
||||
var (
|
||||
_ Method = (*LBFGS)(nil)
|
||||
_ localMethod = (*LBFGS)(nil)
|
||||
_ NextDirectioner = (*LBFGS)(nil)
|
||||
)
|
||||
|
||||
// LBFGS implements the limited-memory BFGS method for gradient-based
|
||||
// unconstrained minimization.
|
||||
//
|
||||
// It stores a modified version of the inverse Hessian approximation H
|
||||
// implicitly from the last Store iterations while the normal BFGS method
|
||||
// stores and manipulates H directly as a dense matrix. Therefore LBFGS is more
|
||||
// appropriate than BFGS for large problems as the cost of LBFGS scales as
|
||||
// O(Store * dim) while BFGS scales as O(dim^2). The "forgetful" nature of
|
||||
// LBFGS may also make it perform better than BFGS for functions with Hessians
|
||||
// that vary rapidly spatially.
|
||||
type LBFGS struct {
|
||||
// Linesearcher selects suitable steps along the descent direction.
|
||||
// Accepted steps should satisfy the strong Wolfe conditions.
|
||||
// If Linesearcher is nil, a reasonable default will be chosen.
|
||||
Linesearcher Linesearcher
|
||||
// Store is the size of the limited-memory storage.
|
||||
// If Store is 0, it will be defaulted to 15.
|
||||
Store int
|
||||
// GradStopThreshold sets the threshold for stopping if the gradient norm
|
||||
// gets too small. If GradStopThreshold is 0 it is defaulted to 1e-12, and
|
||||
// if it is NaN the setting is not used.
|
||||
GradStopThreshold float64
|
||||
|
||||
status Status
|
||||
err error
|
||||
|
||||
ls *LinesearchMethod
|
||||
|
||||
dim int // Dimension of the problem
|
||||
x []float64 // Location at the last major iteration
|
||||
grad []float64 // Gradient at the last major iteration
|
||||
|
||||
// History
|
||||
oldest int // Index of the oldest element of the history
|
||||
y [][]float64 // Last Store values of y
|
||||
s [][]float64 // Last Store values of s
|
||||
rho []float64 // Last Store values of rho
|
||||
a []float64 // Cache of Hessian updates
|
||||
}
|
||||
|
||||
func (l *LBFGS) Status() (Status, error) {
|
||||
return l.status, l.err
|
||||
}
|
||||
|
||||
func (*LBFGS) Uses(has Available) (uses Available, err error) {
|
||||
return has.gradient()
|
||||
}
|
||||
|
||||
func (l *LBFGS) Init(dim, tasks int) int {
|
||||
l.status = NotTerminated
|
||||
l.err = nil
|
||||
return 1
|
||||
}
|
||||
|
||||
func (l *LBFGS) Run(operation chan<- Task, result <-chan Task, tasks []Task) {
|
||||
l.status, l.err = localOptimizer{}.run(l, l.GradStopThreshold, operation, result, tasks)
|
||||
close(operation)
|
||||
}
|
||||
|
||||
func (l *LBFGS) initLocal(loc *Location) (Operation, error) {
|
||||
if l.Linesearcher == nil {
|
||||
l.Linesearcher = &Bisection{}
|
||||
}
|
||||
if l.Store == 0 {
|
||||
l.Store = 15
|
||||
}
|
||||
|
||||
if l.ls == nil {
|
||||
l.ls = &LinesearchMethod{}
|
||||
}
|
||||
l.ls.Linesearcher = l.Linesearcher
|
||||
l.ls.NextDirectioner = l
|
||||
|
||||
return l.ls.Init(loc)
|
||||
}
|
||||
|
||||
func (l *LBFGS) iterateLocal(loc *Location) (Operation, error) {
|
||||
return l.ls.Iterate(loc)
|
||||
}
|
||||
|
||||
func (l *LBFGS) InitDirection(loc *Location, dir []float64) (stepSize float64) {
|
||||
dim := len(loc.X)
|
||||
l.dim = dim
|
||||
l.oldest = 0
|
||||
|
||||
l.a = resize(l.a, l.Store)
|
||||
l.rho = resize(l.rho, l.Store)
|
||||
l.y = l.initHistory(l.y)
|
||||
l.s = l.initHistory(l.s)
|
||||
|
||||
l.x = resize(l.x, dim)
|
||||
copy(l.x, loc.X)
|
||||
|
||||
l.grad = resize(l.grad, dim)
|
||||
copy(l.grad, loc.Gradient)
|
||||
|
||||
copy(dir, loc.Gradient)
|
||||
floats.Scale(-1, dir)
|
||||
return 1 / floats.Norm(dir, 2)
|
||||
}
|
||||
|
||||
func (l *LBFGS) initHistory(hist [][]float64) [][]float64 {
|
||||
c := cap(hist)
|
||||
if c < l.Store {
|
||||
n := make([][]float64, l.Store-c)
|
||||
hist = append(hist[:c], n...)
|
||||
}
|
||||
hist = hist[:l.Store]
|
||||
for i := range hist {
|
||||
hist[i] = resize(hist[i], l.dim)
|
||||
for j := range hist[i] {
|
||||
hist[i][j] = 0
|
||||
}
|
||||
}
|
||||
return hist
|
||||
}
|
||||
|
||||
func (l *LBFGS) NextDirection(loc *Location, dir []float64) (stepSize float64) {
|
||||
// Uses two-loop correction as described in
|
||||
// Nocedal, J., Wright, S.: Numerical Optimization (2nd ed). Springer (2006), chapter 7, page 178.
|
||||
|
||||
if len(loc.X) != l.dim {
|
||||
panic("lbfgs: unexpected size mismatch")
|
||||
}
|
||||
if len(loc.Gradient) != l.dim {
|
||||
panic("lbfgs: unexpected size mismatch")
|
||||
}
|
||||
if len(dir) != l.dim {
|
||||
panic("lbfgs: unexpected size mismatch")
|
||||
}
|
||||
|
||||
y := l.y[l.oldest]
|
||||
floats.SubTo(y, loc.Gradient, l.grad)
|
||||
s := l.s[l.oldest]
|
||||
floats.SubTo(s, loc.X, l.x)
|
||||
sDotY := floats.Dot(s, y)
|
||||
l.rho[l.oldest] = 1 / sDotY
|
||||
|
||||
l.oldest = (l.oldest + 1) % l.Store
|
||||
|
||||
copy(l.x, loc.X)
|
||||
copy(l.grad, loc.Gradient)
|
||||
copy(dir, loc.Gradient)
|
||||
|
||||
// Start with the most recent element and go backward,
|
||||
for i := 0; i < l.Store; i++ {
|
||||
idx := l.oldest - i - 1
|
||||
if idx < 0 {
|
||||
idx += l.Store
|
||||
}
|
||||
l.a[idx] = l.rho[idx] * floats.Dot(l.s[idx], dir)
|
||||
floats.AddScaled(dir, -l.a[idx], l.y[idx])
|
||||
}
|
||||
|
||||
// Scale the initial Hessian.
|
||||
gamma := sDotY / floats.Dot(y, y)
|
||||
floats.Scale(gamma, dir)
|
||||
|
||||
// Start with the oldest element and go forward.
|
||||
for i := 0; i < l.Store; i++ {
|
||||
idx := i + l.oldest
|
||||
if idx >= l.Store {
|
||||
idx -= l.Store
|
||||
}
|
||||
beta := l.rho[idx] * floats.Dot(l.y[idx], dir)
|
||||
floats.AddScaled(dir, l.a[idx]-beta, l.s[idx])
|
||||
}
|
||||
|
||||
// dir contains H^{-1} * g, so flip the direction for minimization.
|
||||
floats.Scale(-1, dir)
|
||||
|
||||
return 1
|
||||
}
|
||||
|
||||
func (*LBFGS) needs() struct {
|
||||
Gradient bool
|
||||
Hessian bool
|
||||
} {
|
||||
return struct {
|
||||
Gradient bool
|
||||
Hessian bool
|
||||
}{true, false}
|
||||
}
|
||||
218
vendor/gonum.org/v1/gonum/optimize/linesearch.go
generated
vendored
Normal file
218
vendor/gonum.org/v1/gonum/optimize/linesearch.go
generated
vendored
Normal file
@@ -0,0 +1,218 @@
|
||||
// Copyright ©2014 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package optimize
|
||||
|
||||
import (
|
||||
"math"
|
||||
|
||||
"gonum.org/v1/gonum/floats"
|
||||
)
|
||||
|
||||
// LinesearchMethod represents an abstract optimization method in which a
|
||||
// function is optimized through successive line search optimizations.
|
||||
type LinesearchMethod struct {
|
||||
// NextDirectioner specifies the search direction of each linesearch.
|
||||
NextDirectioner NextDirectioner
|
||||
// Linesearcher performs a linesearch along the search direction.
|
||||
Linesearcher Linesearcher
|
||||
|
||||
x []float64 // Starting point for the current iteration.
|
||||
dir []float64 // Search direction for the current iteration.
|
||||
|
||||
first bool // Indicator of the first iteration.
|
||||
nextMajor bool // Indicates that MajorIteration must be commanded at the next call to Iterate.
|
||||
eval Operation // Indicator of valid fields in Location.
|
||||
|
||||
lastStep float64 // Step taken from x in the previous call to Iterate.
|
||||
lastOp Operation // Operation returned from the previous call to Iterate.
|
||||
}
|
||||
|
||||
func (ls *LinesearchMethod) Init(loc *Location) (Operation, error) {
|
||||
if loc.Gradient == nil {
|
||||
panic("linesearch: gradient is nil")
|
||||
}
|
||||
|
||||
dim := len(loc.X)
|
||||
ls.x = resize(ls.x, dim)
|
||||
ls.dir = resize(ls.dir, dim)
|
||||
|
||||
ls.first = true
|
||||
ls.nextMajor = false
|
||||
|
||||
// Indicate that all fields of loc are valid.
|
||||
ls.eval = FuncEvaluation | GradEvaluation
|
||||
if loc.Hessian != nil {
|
||||
ls.eval |= HessEvaluation
|
||||
}
|
||||
|
||||
ls.lastStep = math.NaN()
|
||||
ls.lastOp = NoOperation
|
||||
|
||||
return ls.initNextLinesearch(loc)
|
||||
}
|
||||
|
||||
func (ls *LinesearchMethod) Iterate(loc *Location) (Operation, error) {
|
||||
switch ls.lastOp {
|
||||
case NoOperation:
|
||||
// TODO(vladimir-ch): Either Init has not been called, or the caller is
|
||||
// trying to resume the optimization run after Iterate previously
|
||||
// returned with an error. Decide what is the proper thing to do. See also #125.
|
||||
|
||||
case MajorIteration:
|
||||
// The previous updated location did not converge the full
|
||||
// optimization. Initialize a new Linesearch.
|
||||
return ls.initNextLinesearch(loc)
|
||||
|
||||
default:
|
||||
// Update the indicator of valid fields of loc.
|
||||
ls.eval |= ls.lastOp
|
||||
|
||||
if ls.nextMajor {
|
||||
ls.nextMajor = false
|
||||
|
||||
// Linesearcher previously finished, and the invalid fields of loc
|
||||
// have now been validated. Announce MajorIteration.
|
||||
ls.lastOp = MajorIteration
|
||||
return ls.lastOp, nil
|
||||
}
|
||||
}
|
||||
|
||||
// Continue the linesearch.
|
||||
|
||||
f := math.NaN()
|
||||
if ls.eval&FuncEvaluation != 0 {
|
||||
f = loc.F
|
||||
}
|
||||
projGrad := math.NaN()
|
||||
if ls.eval&GradEvaluation != 0 {
|
||||
projGrad = floats.Dot(loc.Gradient, ls.dir)
|
||||
}
|
||||
op, step, err := ls.Linesearcher.Iterate(f, projGrad)
|
||||
if err != nil {
|
||||
return ls.error(err)
|
||||
}
|
||||
|
||||
switch op {
|
||||
case MajorIteration:
|
||||
// Linesearch has been finished.
|
||||
|
||||
ls.lastOp = complementEval(loc, ls.eval)
|
||||
if ls.lastOp == NoOperation {
|
||||
// loc is complete, MajorIteration can be declared directly.
|
||||
ls.lastOp = MajorIteration
|
||||
} else {
|
||||
// Declare MajorIteration on the next call to Iterate.
|
||||
ls.nextMajor = true
|
||||
}
|
||||
|
||||
case FuncEvaluation, GradEvaluation, FuncEvaluation | GradEvaluation:
|
||||
if step != ls.lastStep {
|
||||
// We are moving to a new location, and not, say, evaluating extra
|
||||
// information at the current location.
|
||||
|
||||
// Compute the next evaluation point and store it in loc.X.
|
||||
floats.AddScaledTo(loc.X, ls.x, step, ls.dir)
|
||||
if floats.Equal(ls.x, loc.X) {
|
||||
// Step size has become so small that the next evaluation point is
|
||||
// indistinguishable from the starting point for the current
|
||||
// iteration due to rounding errors.
|
||||
return ls.error(ErrNoProgress)
|
||||
}
|
||||
ls.lastStep = step
|
||||
ls.eval = NoOperation // Indicate all invalid fields of loc.
|
||||
}
|
||||
ls.lastOp = op
|
||||
|
||||
default:
|
||||
panic("linesearch: Linesearcher returned invalid operation")
|
||||
}
|
||||
|
||||
return ls.lastOp, nil
|
||||
}
|
||||
|
||||
func (ls *LinesearchMethod) error(err error) (Operation, error) {
|
||||
ls.lastOp = NoOperation
|
||||
return ls.lastOp, err
|
||||
}
|
||||
|
||||
// initNextLinesearch initializes the next linesearch using the previous
|
||||
// complete location stored in loc. It fills loc.X and returns an evaluation
|
||||
// to be performed at loc.X.
|
||||
func (ls *LinesearchMethod) initNextLinesearch(loc *Location) (Operation, error) {
|
||||
copy(ls.x, loc.X)
|
||||
|
||||
var step float64
|
||||
if ls.first {
|
||||
ls.first = false
|
||||
step = ls.NextDirectioner.InitDirection(loc, ls.dir)
|
||||
} else {
|
||||
step = ls.NextDirectioner.NextDirection(loc, ls.dir)
|
||||
}
|
||||
|
||||
projGrad := floats.Dot(loc.Gradient, ls.dir)
|
||||
if projGrad >= 0 {
|
||||
return ls.error(ErrNonDescentDirection)
|
||||
}
|
||||
|
||||
op := ls.Linesearcher.Init(loc.F, projGrad, step)
|
||||
switch op {
|
||||
case FuncEvaluation, GradEvaluation, FuncEvaluation | GradEvaluation:
|
||||
default:
|
||||
panic("linesearch: Linesearcher returned invalid operation")
|
||||
}
|
||||
|
||||
floats.AddScaledTo(loc.X, ls.x, step, ls.dir)
|
||||
if floats.Equal(ls.x, loc.X) {
|
||||
// Step size is so small that the next evaluation point is
|
||||
// indistinguishable from the starting point for the current iteration
|
||||
// due to rounding errors.
|
||||
return ls.error(ErrNoProgress)
|
||||
}
|
||||
|
||||
ls.lastStep = step
|
||||
ls.eval = NoOperation // Invalidate all fields of loc.
|
||||
|
||||
ls.lastOp = op
|
||||
return ls.lastOp, nil
|
||||
}
|
||||
|
||||
// ArmijoConditionMet returns true if the Armijo condition (aka sufficient
|
||||
// decrease) has been met. Under normal conditions, the following should be
|
||||
// true, though this is not enforced:
|
||||
// - initGrad < 0
|
||||
// - step > 0
|
||||
// - 0 < decrease < 1
|
||||
func ArmijoConditionMet(currObj, initObj, initGrad, step, decrease float64) bool {
|
||||
return currObj <= initObj+decrease*step*initGrad
|
||||
}
|
||||
|
||||
// StrongWolfeConditionsMet returns true if the strong Wolfe conditions have been met.
|
||||
// The strong Wolfe conditions ensure sufficient decrease in the function
|
||||
// value, and sufficient decrease in the magnitude of the projected gradient.
|
||||
// Under normal conditions, the following should be true, though this is not
|
||||
// enforced:
|
||||
// - initGrad < 0
|
||||
// - step > 0
|
||||
// - 0 <= decrease < curvature < 1
|
||||
func StrongWolfeConditionsMet(currObj, currGrad, initObj, initGrad, step, decrease, curvature float64) bool {
|
||||
if currObj > initObj+decrease*step*initGrad {
|
||||
return false
|
||||
}
|
||||
return math.Abs(currGrad) < curvature*math.Abs(initGrad)
|
||||
}
|
||||
|
||||
// WeakWolfeConditionsMet returns true if the weak Wolfe conditions have been met.
|
||||
// The weak Wolfe conditions ensure sufficient decrease in the function value,
|
||||
// and sufficient decrease in the value of the projected gradient. Under normal
|
||||
// conditions, the following should be true, though this is not enforced:
|
||||
// - initGrad < 0
|
||||
// - step > 0
|
||||
// - 0 <= decrease < curvature< 1
|
||||
func WeakWolfeConditionsMet(currObj, currGrad, initObj, initGrad, step, decrease, curvature float64) bool {
|
||||
if currObj > initObj+decrease*step*initGrad {
|
||||
return false
|
||||
}
|
||||
return currGrad >= curvature*initGrad
|
||||
}
|
||||
123
vendor/gonum.org/v1/gonum/optimize/listsearch.go
generated
vendored
Normal file
123
vendor/gonum.org/v1/gonum/optimize/listsearch.go
generated
vendored
Normal file
@@ -0,0 +1,123 @@
|
||||
// Copyright ©2018 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package optimize
|
||||
|
||||
import (
|
||||
"math"
|
||||
|
||||
"gonum.org/v1/gonum/mat"
|
||||
)
|
||||
|
||||
var _ Method = (*ListSearch)(nil)
|
||||
|
||||
// ListSearch finds the optimum location from a specified list of possible
|
||||
// optimum locations.
|
||||
type ListSearch struct {
|
||||
// Locs is the list of locations to optimize. Each row of Locs is a location
|
||||
// to optimize. The number of columns of Locs must match the dimensions
|
||||
// passed to InitGlobal, and Locs must have at least one row.
|
||||
Locs mat.Matrix
|
||||
|
||||
eval int
|
||||
rows int
|
||||
bestF float64
|
||||
bestIdx int
|
||||
}
|
||||
|
||||
func (*ListSearch) Uses(has Available) (uses Available, err error) {
|
||||
return has.function()
|
||||
}
|
||||
|
||||
// Init initializes the method for optimization. The input dimension
|
||||
// must match the number of columns of Locs.
|
||||
func (l *ListSearch) Init(dim, tasks int) int {
|
||||
if dim <= 0 {
|
||||
panic(nonpositiveDimension)
|
||||
}
|
||||
if tasks < 0 {
|
||||
panic(negativeTasks)
|
||||
}
|
||||
r, c := l.Locs.Dims()
|
||||
if r == 0 {
|
||||
panic("listsearch: list matrix has no rows")
|
||||
}
|
||||
if c != dim {
|
||||
panic("listsearch: supplied dimension does not match list columns")
|
||||
}
|
||||
l.eval = 0
|
||||
l.rows = r
|
||||
l.bestF = math.Inf(1)
|
||||
l.bestIdx = -1
|
||||
return min(r, tasks)
|
||||
}
|
||||
|
||||
func (l *ListSearch) sendNewLoc(operation chan<- Task, task Task) {
|
||||
task.Op = FuncEvaluation
|
||||
task.ID = l.eval
|
||||
mat.Row(task.X, l.eval, l.Locs)
|
||||
l.eval++
|
||||
operation <- task
|
||||
}
|
||||
|
||||
func (l *ListSearch) updateMajor(operation chan<- Task, task Task) {
|
||||
// Update the best value seen so far, and send a MajorIteration.
|
||||
if task.F < l.bestF {
|
||||
l.bestF = task.F
|
||||
l.bestIdx = task.ID
|
||||
} else {
|
||||
task.F = l.bestF
|
||||
mat.Row(task.X, l.bestIdx, l.Locs)
|
||||
}
|
||||
task.Op = MajorIteration
|
||||
operation <- task
|
||||
}
|
||||
|
||||
func (l *ListSearch) Status() (Status, error) {
|
||||
if l.eval < l.rows {
|
||||
return NotTerminated, nil
|
||||
}
|
||||
return MethodConverge, nil
|
||||
}
|
||||
|
||||
func (l *ListSearch) Run(operation chan<- Task, result <-chan Task, tasks []Task) {
|
||||
// Send initial tasks to evaluate
|
||||
for _, task := range tasks {
|
||||
l.sendNewLoc(operation, task)
|
||||
}
|
||||
// Read from the channel until PostIteration is sent or until the list of
|
||||
// tasks is exhausted.
|
||||
Loop:
|
||||
for {
|
||||
task := <-result
|
||||
switch task.Op {
|
||||
default:
|
||||
panic("unknown operation")
|
||||
case PostIteration:
|
||||
break Loop
|
||||
case MajorIteration:
|
||||
if l.eval == l.rows {
|
||||
task.Op = MethodDone
|
||||
operation <- task
|
||||
continue
|
||||
}
|
||||
l.sendNewLoc(operation, task)
|
||||
case FuncEvaluation:
|
||||
l.updateMajor(operation, task)
|
||||
}
|
||||
}
|
||||
|
||||
// Post iteration was sent, or the list has been completed. Read in the final
|
||||
// list of tasks.
|
||||
for task := range result {
|
||||
switch task.Op {
|
||||
default:
|
||||
panic("unknown operation")
|
||||
case MajorIteration:
|
||||
case FuncEvaluation:
|
||||
l.updateMajor(operation, task)
|
||||
}
|
||||
}
|
||||
close(operation)
|
||||
}
|
||||
146
vendor/gonum.org/v1/gonum/optimize/local.go
generated
vendored
Normal file
146
vendor/gonum.org/v1/gonum/optimize/local.go
generated
vendored
Normal file
@@ -0,0 +1,146 @@
|
||||
// Copyright ©2014 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package optimize
|
||||
|
||||
import (
|
||||
"math"
|
||||
|
||||
"gonum.org/v1/gonum/floats"
|
||||
)
|
||||
|
||||
// localOptimizer is a helper type for running an optimization using a LocalMethod.
|
||||
type localOptimizer struct{}
|
||||
|
||||
// run controls the optimization run for a localMethod. The calling method
|
||||
// must close the operation channel at the conclusion of the optimization. This
|
||||
// provides a happens before relationship between the return of status and the
|
||||
// closure of operation, and thus a call to method.Status (if necessary).
|
||||
func (l localOptimizer) run(method localMethod, gradThresh float64, operation chan<- Task, result <-chan Task, tasks []Task) (Status, error) {
|
||||
// Local methods start with a fully-specified initial location.
|
||||
task := tasks[0]
|
||||
task = l.initialLocation(operation, result, task, method)
|
||||
if task.Op == PostIteration {
|
||||
l.finish(operation, result)
|
||||
return NotTerminated, nil
|
||||
}
|
||||
status, err := l.checkStartingLocation(task, gradThresh)
|
||||
if err != nil {
|
||||
l.finishMethodDone(operation, result, task)
|
||||
return status, err
|
||||
}
|
||||
|
||||
// Send a major iteration with the starting location.
|
||||
task.Op = MajorIteration
|
||||
operation <- task
|
||||
task = <-result
|
||||
if task.Op == PostIteration {
|
||||
l.finish(operation, result)
|
||||
return NotTerminated, nil
|
||||
}
|
||||
op, err := method.initLocal(task.Location)
|
||||
if err != nil {
|
||||
l.finishMethodDone(operation, result, task)
|
||||
return Failure, err
|
||||
}
|
||||
task.Op = op
|
||||
operation <- task
|
||||
Loop:
|
||||
for {
|
||||
r := <-result
|
||||
switch r.Op {
|
||||
case PostIteration:
|
||||
break Loop
|
||||
case MajorIteration:
|
||||
// The last operation was a MajorIteration. Check if the gradient
|
||||
// is below the threshold.
|
||||
if status := l.checkGradientConvergence(r.Gradient, gradThresh); status != NotTerminated {
|
||||
l.finishMethodDone(operation, result, task)
|
||||
return GradientThreshold, nil
|
||||
}
|
||||
fallthrough
|
||||
default:
|
||||
op, err := method.iterateLocal(r.Location)
|
||||
if err != nil {
|
||||
l.finishMethodDone(operation, result, r)
|
||||
return Failure, err
|
||||
}
|
||||
r.Op = op
|
||||
operation <- r
|
||||
}
|
||||
}
|
||||
l.finish(operation, result)
|
||||
return NotTerminated, nil
|
||||
}
|
||||
|
||||
// initialOperation returns the Operation needed to fill the initial location
|
||||
// based on the needs of the method and the values already supplied.
|
||||
func (localOptimizer) initialOperation(task Task, n needser) Operation {
|
||||
var newOp Operation
|
||||
op := task.Op
|
||||
if op&FuncEvaluation == 0 {
|
||||
newOp |= FuncEvaluation
|
||||
}
|
||||
needs := n.needs()
|
||||
if needs.Gradient && op&GradEvaluation == 0 {
|
||||
newOp |= GradEvaluation
|
||||
}
|
||||
if needs.Hessian && op&HessEvaluation == 0 {
|
||||
newOp |= HessEvaluation
|
||||
}
|
||||
return newOp
|
||||
}
|
||||
|
||||
// initialLocation fills the initial location based on the needs of the method.
|
||||
// The task passed to initialLocation should be the first task sent in RunGlobal.
|
||||
func (l localOptimizer) initialLocation(operation chan<- Task, result <-chan Task, task Task, needs needser) Task {
|
||||
task.Op = l.initialOperation(task, needs)
|
||||
operation <- task
|
||||
return <-result
|
||||
}
|
||||
|
||||
func (l localOptimizer) checkStartingLocation(task Task, gradThresh float64) (Status, error) {
|
||||
if math.IsInf(task.F, 1) || math.IsNaN(task.F) {
|
||||
return Failure, ErrFunc(task.F)
|
||||
}
|
||||
for i, v := range task.Gradient {
|
||||
if math.IsInf(v, 0) || math.IsNaN(v) {
|
||||
return Failure, ErrGrad{Grad: v, Index: i}
|
||||
}
|
||||
}
|
||||
status := l.checkGradientConvergence(task.Gradient, gradThresh)
|
||||
return status, nil
|
||||
}
|
||||
|
||||
func (localOptimizer) checkGradientConvergence(gradient []float64, gradThresh float64) Status {
|
||||
if gradient == nil || math.IsNaN(gradThresh) {
|
||||
return NotTerminated
|
||||
}
|
||||
if gradThresh == 0 {
|
||||
gradThresh = defaultGradientAbsTol
|
||||
}
|
||||
if norm := floats.Norm(gradient, math.Inf(1)); norm < gradThresh {
|
||||
return GradientThreshold
|
||||
}
|
||||
return NotTerminated
|
||||
}
|
||||
|
||||
// finish completes the channel operations to finish an optimization.
|
||||
func (localOptimizer) finish(operation chan<- Task, result <-chan Task) {
|
||||
// Guarantee that result is closed before operation is closed.
|
||||
for range result {
|
||||
}
|
||||
}
|
||||
|
||||
// finishMethodDone sends a MethodDone signal on operation, reads the result,
|
||||
// and completes the channel operations to finish an optimization.
|
||||
func (l localOptimizer) finishMethodDone(operation chan<- Task, result <-chan Task, task Task) {
|
||||
task.Op = MethodDone
|
||||
operation <- task
|
||||
task = <-result
|
||||
if task.Op != PostIteration {
|
||||
panic("optimize: task should have returned post iteration")
|
||||
}
|
||||
l.finish(operation, result)
|
||||
}
|
||||
602
vendor/gonum.org/v1/gonum/optimize/minimize.go
generated
vendored
Normal file
602
vendor/gonum.org/v1/gonum/optimize/minimize.go
generated
vendored
Normal file
@@ -0,0 +1,602 @@
|
||||
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package optimize
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"time"
|
||||
|
||||
"gonum.org/v1/gonum/floats"
|
||||
"gonum.org/v1/gonum/mat"
|
||||
)
|
||||
|
||||
const (
|
||||
nonpositiveDimension string = "optimize: non-positive input dimension"
|
||||
negativeTasks string = "optimize: negative input number of tasks"
|
||||
)
|
||||
|
||||
func min(a, b int) int {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// Task is a type to communicate between the Method and the outer
|
||||
// calling script.
|
||||
type Task struct {
|
||||
ID int
|
||||
Op Operation
|
||||
*Location
|
||||
}
|
||||
|
||||
// Location represents a location in the optimization procedure.
|
||||
type Location struct {
|
||||
// X is the function input for the location.
|
||||
X []float64
|
||||
// F is the result of evaluating the function at X.
|
||||
F float64
|
||||
// Gradient holds the first-order partial derivatives
|
||||
// of the function at X.
|
||||
// The length of Gradient must match the length of X
|
||||
// or be zero. If the capacity of Gradient is less
|
||||
// than the length of X, a new slice will be allocated.
|
||||
Gradient []float64
|
||||
// Hessian holds the second-order partial derivatives
|
||||
// of the function at X.
|
||||
// The dimensions of Hessian must match the length of X
|
||||
// or Hessian must be nil or empty. If Hessian is nil
|
||||
// a new mat.SymDense will be allocated, if it is empty
|
||||
// it will be resized to match the length of X.
|
||||
Hessian *mat.SymDense
|
||||
}
|
||||
|
||||
// Method is a type which can search for an optimum of an objective function.
|
||||
type Method interface {
|
||||
// Init initializes the method for optimization. The inputs are
|
||||
// the problem dimension and number of available concurrent tasks.
|
||||
//
|
||||
// Init returns the number of concurrent processes to use, which must be
|
||||
// less than or equal to tasks.
|
||||
Init(dim, tasks int) (concurrent int)
|
||||
// Run runs an optimization. The method sends Tasks on
|
||||
// the operation channel (for performing function evaluations, major
|
||||
// iterations, etc.). The result of the tasks will be returned on Result.
|
||||
// See the documentation for Operation types for the possible operations.
|
||||
//
|
||||
// The caller of Run will signal the termination of the optimization
|
||||
// (i.e. convergence from user settings) by sending a task with a PostIteration
|
||||
// Op field on result. More tasks may still be sent on operation after this
|
||||
// occurs, but only MajorIteration operations will still be conducted
|
||||
// appropriately. Thus, it can not be guaranteed that all Evaluations sent
|
||||
// on operation will be evaluated, however if an Evaluation is started,
|
||||
// the results of that evaluation will be sent on results.
|
||||
//
|
||||
// The Method must read from the result channel until it is closed.
|
||||
// During this, the Method may want to send new MajorIteration(s) on
|
||||
// operation. Method then must close operation, and return from Run.
|
||||
// These steps must establish a "happens-before" relationship between result
|
||||
// being closed (externally) and Run closing operation, for example
|
||||
// by using a range loop to read from result even if no results are expected.
|
||||
//
|
||||
// The last parameter to Run is a slice of tasks with length equal to
|
||||
// the return from Init. Task has an ID field which may be
|
||||
// set and modified by Method, and must not be modified by the caller.
|
||||
// The first element of tasks contains information about the initial location.
|
||||
// The Location.X field is always valid. The Operation field specifies which
|
||||
// other values of Location are known. If Operation == NoOperation, none of
|
||||
// the values should be used, otherwise the Evaluation operations will be
|
||||
// composed to specify the valid fields. Methods are free to use or
|
||||
// ignore these values.
|
||||
//
|
||||
// Successful execution of an Operation may require the Method to modify
|
||||
// fields a Location. MajorIteration calls will not modify the values in
|
||||
// the Location, but Evaluation operations will. Methods are encouraged to
|
||||
// leave Location fields untouched to allow memory re-use. If data needs to
|
||||
// be stored, the respective field should be set to nil -- Methods should
|
||||
// not allocate Location memory themselves.
|
||||
//
|
||||
// Method may have its own specific convergence criteria, which can
|
||||
// be communicated using a MethodDone operation. This will trigger a
|
||||
// PostIteration to be sent on result, and the MethodDone task will not be
|
||||
// returned on result. The Method must implement Statuser, and the
|
||||
// call to Status must return a Status other than NotTerminated.
|
||||
//
|
||||
// The operation and result tasks are guaranteed to have a buffer length
|
||||
// equal to the return from Init.
|
||||
Run(operation chan<- Task, result <-chan Task, tasks []Task)
|
||||
// Uses checks if the Method is suited to the optimization problem. The
|
||||
// input is the available functions in Problem to call, and the returns are
|
||||
// the functions which may be used and an error if there is a mismatch
|
||||
// between the Problem and the Method's capabilities.
|
||||
Uses(has Available) (uses Available, err error)
|
||||
}
|
||||
|
||||
// Minimize uses an optimizer to search for a minimum of a function. A
|
||||
// maximization problem can be transformed into a minimization problem by
|
||||
// multiplying the function by -1.
|
||||
//
|
||||
// The first argument represents the problem to be minimized. Its fields are
|
||||
// routines that evaluate the objective function, gradient, and other
|
||||
// quantities related to the problem. The objective function, p.Func, must not
|
||||
// be nil. The optimization method used may require other fields to be non-nil
|
||||
// as specified by method.Needs. Minimize will panic if these are not met. The
|
||||
// method can be determined automatically from the supplied problem which is
|
||||
// described below.
|
||||
//
|
||||
// If p.Status is not nil, it is called before every evaluation. If the
|
||||
// returned Status is other than NotTerminated or if the error is not nil, the
|
||||
// optimization run is terminated.
|
||||
//
|
||||
// The second argument specifies the initial location for the optimization.
|
||||
// Some Methods do not require an initial location, but initX must still be
|
||||
// specified for the dimension of the optimization problem.
|
||||
//
|
||||
// The third argument contains the settings for the minimization. If settings
|
||||
// is nil, the zero value will be used, see the documentation of the Settings
|
||||
// type for more information, and see the warning below. All settings will be
|
||||
// honored for all Methods, even if that setting is counter-productive to the
|
||||
// method. Minimize cannot guarantee strict adherence to the evaluation bounds
|
||||
// specified when performing concurrent evaluations and updates.
|
||||
//
|
||||
// The final argument is the optimization method to use. If method == nil, then
|
||||
// an appropriate default is chosen based on the properties of the other arguments
|
||||
// (dimension, gradient-free or gradient-based, etc.). If method is not nil,
|
||||
// Minimize panics if the Problem is not consistent with the Method (Uses
|
||||
// returns an error).
|
||||
//
|
||||
// Minimize returns a Result struct and any error that occurred. See the
|
||||
// documentation of Result for more information.
|
||||
//
|
||||
// See the documentation for Method for the details on implementing a method.
|
||||
//
|
||||
// Be aware that the default settings of Minimize are to accurately find the
|
||||
// minimum. For certain functions and optimization methods, this can take many
|
||||
// function evaluations. The Settings input struct can be used to limit this,
|
||||
// for example by modifying the maximum function evaluations or gradient tolerance.
|
||||
func Minimize(p Problem, initX []float64, settings *Settings, method Method) (*Result, error) {
|
||||
startTime := time.Now()
|
||||
if method == nil {
|
||||
method = getDefaultMethod(&p)
|
||||
}
|
||||
if settings == nil {
|
||||
settings = &Settings{}
|
||||
}
|
||||
stats := &Stats{}
|
||||
dim := len(initX)
|
||||
err := checkOptimization(p, dim, settings.Recorder)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
optLoc := newLocation(dim) // This must have an allocated X field.
|
||||
optLoc.F = math.Inf(1)
|
||||
|
||||
initOp, initLoc := getInitLocation(dim, initX, settings.InitValues)
|
||||
|
||||
converger := settings.Converger
|
||||
if converger == nil {
|
||||
converger = defaultFunctionConverge()
|
||||
}
|
||||
converger.Init(dim)
|
||||
|
||||
stats.Runtime = time.Since(startTime)
|
||||
|
||||
// Send initial location to Recorder
|
||||
if settings.Recorder != nil {
|
||||
err = settings.Recorder.Record(optLoc, InitIteration, stats)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
// Run optimization
|
||||
var status Status
|
||||
status, err = minimize(&p, method, settings, converger, stats, initOp, initLoc, optLoc, startTime)
|
||||
|
||||
// Cleanup and collect results
|
||||
if settings.Recorder != nil && err == nil {
|
||||
err = settings.Recorder.Record(optLoc, PostIteration, stats)
|
||||
}
|
||||
stats.Runtime = time.Since(startTime)
|
||||
return &Result{
|
||||
Location: *optLoc,
|
||||
Stats: *stats,
|
||||
Status: status,
|
||||
}, err
|
||||
}
|
||||
|
||||
func getDefaultMethod(p *Problem) Method {
|
||||
if p.Grad != nil {
|
||||
return &LBFGS{}
|
||||
}
|
||||
return &NelderMead{}
|
||||
}
|
||||
|
||||
// minimize performs an optimization. minimize updates the settings and optLoc,
|
||||
// and returns the final Status and error.
|
||||
func minimize(prob *Problem, method Method, settings *Settings, converger Converger, stats *Stats, initOp Operation, initLoc, optLoc *Location, startTime time.Time) (Status, error) {
|
||||
dim := len(optLoc.X)
|
||||
nTasks := settings.Concurrent
|
||||
if nTasks == 0 {
|
||||
nTasks = 1
|
||||
}
|
||||
has := availFromProblem(*prob)
|
||||
_, initErr := method.Uses(has)
|
||||
if initErr != nil {
|
||||
panic(fmt.Sprintf("optimize: specified method inconsistent with Problem: %v", initErr))
|
||||
}
|
||||
newNTasks := method.Init(dim, nTasks)
|
||||
if newNTasks > nTasks {
|
||||
panic("optimize: too many tasks returned by Method")
|
||||
}
|
||||
nTasks = newNTasks
|
||||
|
||||
// Launch the method. The method communicates tasks using the operations
|
||||
// channel, and results is used to return the evaluated results.
|
||||
operations := make(chan Task, nTasks)
|
||||
results := make(chan Task, nTasks)
|
||||
go func() {
|
||||
tasks := make([]Task, nTasks)
|
||||
tasks[0].Location = initLoc
|
||||
tasks[0].Op = initOp
|
||||
for i := 1; i < len(tasks); i++ {
|
||||
tasks[i].Location = newLocation(dim)
|
||||
}
|
||||
method.Run(operations, results, tasks)
|
||||
}()
|
||||
|
||||
// Algorithmic Overview:
|
||||
// There are three pieces to performing a concurrent optimization,
|
||||
// the distributor, the workers, and the stats combiner. At a high level,
|
||||
// the distributor reads in tasks sent by method, sending evaluations to the
|
||||
// workers, and forwarding other operations to the statsCombiner. The workers
|
||||
// read these forwarded evaluation tasks, evaluate the relevant parts of Problem
|
||||
// and forward the results on to the stats combiner. The stats combiner reads
|
||||
// in results from the workers, as well as tasks from the distributor, and
|
||||
// uses them to update optimization statistics (function evaluations, etc.)
|
||||
// and to check optimization convergence.
|
||||
//
|
||||
// The complicated part is correctly shutting down the optimization. The
|
||||
// procedure is as follows. First, the stats combiner closes done and sends
|
||||
// a PostIteration to the method. The distributor then reads that done has
|
||||
// been closed, and closes the channel with the workers. At this point, no
|
||||
// more evaluation operations will be executed. As the workers finish their
|
||||
// evaluations, they forward the results onto the stats combiner, and then
|
||||
// signal their shutdown to the stats combiner. When all workers have successfully
|
||||
// finished, the stats combiner closes the results channel, signaling to the
|
||||
// method that all results have been collected. At this point, the method
|
||||
// may send MajorIteration(s) to update an optimum location based on these
|
||||
// last returned results, and then the method will close the operations channel.
|
||||
// The Method must ensure that the closing of results happens before the
|
||||
// closing of operations in order to ensure proper shutdown order.
|
||||
// Now that no more tasks will be commanded by the method, the distributor
|
||||
// closes statsChan, and with no more statistics to update the optimization
|
||||
// concludes.
|
||||
|
||||
workerChan := make(chan Task) // Delegate tasks to the workers.
|
||||
statsChan := make(chan Task) // Send evaluation updates.
|
||||
done := make(chan struct{}) // Communicate the optimization is done.
|
||||
|
||||
// Read tasks from the method and distribute as appropriate.
|
||||
distributor := func() {
|
||||
for {
|
||||
select {
|
||||
case task := <-operations:
|
||||
switch task.Op {
|
||||
case InitIteration:
|
||||
panic("optimize: Method returned InitIteration")
|
||||
case PostIteration:
|
||||
panic("optimize: Method returned PostIteration")
|
||||
case NoOperation, MajorIteration, MethodDone:
|
||||
statsChan <- task
|
||||
default:
|
||||
if !task.Op.isEvaluation() {
|
||||
panic("optimize: expecting evaluation operation")
|
||||
}
|
||||
workerChan <- task
|
||||
}
|
||||
case <-done:
|
||||
// No more evaluations will be sent, shut down the workers, and
|
||||
// read the final tasks.
|
||||
close(workerChan)
|
||||
for task := range operations {
|
||||
if task.Op == MajorIteration {
|
||||
statsChan <- task
|
||||
}
|
||||
}
|
||||
close(statsChan)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
go distributor()
|
||||
|
||||
// Evaluate the Problem concurrently.
|
||||
worker := func() {
|
||||
x := make([]float64, dim)
|
||||
for task := range workerChan {
|
||||
evaluate(prob, task.Location, task.Op, x)
|
||||
statsChan <- task
|
||||
}
|
||||
// Signal successful worker completion.
|
||||
statsChan <- Task{Op: signalDone}
|
||||
}
|
||||
for i := 0; i < nTasks; i++ {
|
||||
go worker()
|
||||
}
|
||||
|
||||
var (
|
||||
workersDone int // effective wg for the workers
|
||||
status Status
|
||||
err error
|
||||
finalStatus Status
|
||||
finalError error
|
||||
)
|
||||
|
||||
// Update optimization statistics and check convergence.
|
||||
var methodDone bool
|
||||
for task := range statsChan {
|
||||
switch task.Op {
|
||||
default:
|
||||
if !task.Op.isEvaluation() {
|
||||
panic("minimize: evaluation task expected")
|
||||
}
|
||||
updateEvaluationStats(stats, task.Op)
|
||||
status, err = checkEvaluationLimits(prob, stats, settings)
|
||||
case signalDone:
|
||||
workersDone++
|
||||
if workersDone == nTasks {
|
||||
close(results)
|
||||
}
|
||||
continue
|
||||
case NoOperation:
|
||||
// Just send the task back.
|
||||
case MajorIteration:
|
||||
status = performMajorIteration(optLoc, task.Location, stats, converger, startTime, settings)
|
||||
case MethodDone:
|
||||
methodDone = true
|
||||
status = MethodConverge
|
||||
}
|
||||
if settings.Recorder != nil && status == NotTerminated && err == nil {
|
||||
stats.Runtime = time.Since(startTime)
|
||||
// Allow err to be overloaded if the Recorder fails.
|
||||
err = settings.Recorder.Record(task.Location, task.Op, stats)
|
||||
if err != nil {
|
||||
status = Failure
|
||||
}
|
||||
}
|
||||
// If this is the first termination status, trigger the conclusion of
|
||||
// the optimization.
|
||||
if status != NotTerminated || err != nil {
|
||||
select {
|
||||
case <-done:
|
||||
default:
|
||||
finalStatus = status
|
||||
finalError = err
|
||||
results <- Task{
|
||||
Op: PostIteration,
|
||||
}
|
||||
close(done)
|
||||
}
|
||||
}
|
||||
|
||||
// Send the result back to the Problem if there are still active workers.
|
||||
if workersDone != nTasks && task.Op != MethodDone {
|
||||
results <- task
|
||||
}
|
||||
}
|
||||
// This code block is here rather than above to ensure Status() is not called
|
||||
// before Method.Run closes operations.
|
||||
if methodDone {
|
||||
statuser, ok := method.(Statuser)
|
||||
if !ok {
|
||||
panic("optimize: method returned MethodDone but is not a Statuser")
|
||||
}
|
||||
finalStatus, finalError = statuser.Status()
|
||||
if finalStatus == NotTerminated {
|
||||
panic("optimize: method returned MethodDone but a NotTerminated status")
|
||||
}
|
||||
}
|
||||
return finalStatus, finalError
|
||||
}
|
||||
|
||||
func defaultFunctionConverge() *FunctionConverge {
|
||||
return &FunctionConverge{
|
||||
Absolute: 1e-10,
|
||||
Iterations: 100,
|
||||
}
|
||||
}
|
||||
|
||||
// newLocation allocates a new location structure with an X field of the
|
||||
// appropriate size.
|
||||
func newLocation(dim int) *Location {
|
||||
return &Location{
|
||||
X: make([]float64, dim),
|
||||
}
|
||||
}
|
||||
|
||||
// getInitLocation checks the validity of initLocation and initOperation and
|
||||
// returns the initial values as a *Location.
|
||||
func getInitLocation(dim int, initX []float64, initValues *Location) (Operation, *Location) {
|
||||
loc := newLocation(dim)
|
||||
if initX == nil {
|
||||
if initValues != nil {
|
||||
panic("optimize: initValues is non-nil but no initial location specified")
|
||||
}
|
||||
return NoOperation, loc
|
||||
}
|
||||
copy(loc.X, initX)
|
||||
if initValues == nil {
|
||||
return NoOperation, loc
|
||||
} else {
|
||||
if initValues.X != nil {
|
||||
panic("optimize: location specified in InitValues (only use InitX)")
|
||||
}
|
||||
}
|
||||
loc.F = initValues.F
|
||||
op := FuncEvaluation
|
||||
if initValues.Gradient != nil {
|
||||
if len(initValues.Gradient) != dim {
|
||||
panic("optimize: initial gradient does not match problem dimension")
|
||||
}
|
||||
loc.Gradient = initValues.Gradient
|
||||
op |= GradEvaluation
|
||||
}
|
||||
if initValues.Hessian != nil {
|
||||
if initValues.Hessian.SymmetricDim() != dim {
|
||||
panic("optimize: initial Hessian does not match problem dimension")
|
||||
}
|
||||
loc.Hessian = initValues.Hessian
|
||||
op |= HessEvaluation
|
||||
}
|
||||
return op, loc
|
||||
}
|
||||
|
||||
func checkOptimization(p Problem, dim int, recorder Recorder) error {
|
||||
if p.Func == nil {
|
||||
panic(badProblem)
|
||||
}
|
||||
if dim <= 0 {
|
||||
panic("optimize: impossible problem dimension")
|
||||
}
|
||||
if p.Status != nil {
|
||||
_, err := p.Status()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if recorder != nil {
|
||||
err := recorder.Init()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// evaluate evaluates the routines specified by the Operation at loc.X, and stores
|
||||
// the answer into loc. loc.X is copied into x before evaluating in order to
|
||||
// prevent the routines from modifying it.
|
||||
func evaluate(p *Problem, loc *Location, op Operation, x []float64) {
|
||||
if !op.isEvaluation() {
|
||||
panic(fmt.Sprintf("optimize: invalid evaluation %v", op))
|
||||
}
|
||||
copy(x, loc.X)
|
||||
if op&FuncEvaluation != 0 {
|
||||
loc.F = p.Func(x)
|
||||
}
|
||||
if op&GradEvaluation != 0 {
|
||||
// Make sure we have a destination in which to place the gradient.
|
||||
if len(loc.Gradient) == 0 {
|
||||
if cap(loc.Gradient) < len(x) {
|
||||
loc.Gradient = make([]float64, len(x))
|
||||
} else {
|
||||
loc.Gradient = loc.Gradient[:len(x)]
|
||||
}
|
||||
}
|
||||
p.Grad(loc.Gradient, x)
|
||||
}
|
||||
if op&HessEvaluation != 0 {
|
||||
// Make sure we have a destination in which to place the Hessian.
|
||||
switch {
|
||||
case loc.Hessian == nil:
|
||||
loc.Hessian = mat.NewSymDense(len(x), nil)
|
||||
case loc.Hessian.IsEmpty():
|
||||
loc.Hessian.ReuseAsSym(len(x))
|
||||
}
|
||||
p.Hess(loc.Hessian, x)
|
||||
}
|
||||
}
|
||||
|
||||
// updateEvaluationStats updates the statistics based on the operation.
|
||||
func updateEvaluationStats(stats *Stats, op Operation) {
|
||||
if op&FuncEvaluation != 0 {
|
||||
stats.FuncEvaluations++
|
||||
}
|
||||
if op&GradEvaluation != 0 {
|
||||
stats.GradEvaluations++
|
||||
}
|
||||
if op&HessEvaluation != 0 {
|
||||
stats.HessEvaluations++
|
||||
}
|
||||
}
|
||||
|
||||
// checkLocationConvergence checks if the current optimal location satisfies
|
||||
// any of the convergence criteria based on the function location.
|
||||
//
|
||||
// checkLocationConvergence returns NotTerminated if the Location does not satisfy
|
||||
// the convergence criteria given by settings. Otherwise a corresponding status is
|
||||
// returned.
|
||||
// Unlike checkLimits, checkConvergence is called only at MajorIterations.
|
||||
func checkLocationConvergence(loc *Location, settings *Settings, converger Converger) Status {
|
||||
if math.IsInf(loc.F, -1) {
|
||||
return FunctionNegativeInfinity
|
||||
}
|
||||
if loc.Gradient != nil && settings.GradientThreshold > 0 {
|
||||
norm := floats.Norm(loc.Gradient, math.Inf(1))
|
||||
if norm < settings.GradientThreshold {
|
||||
return GradientThreshold
|
||||
}
|
||||
}
|
||||
return converger.Converged(loc)
|
||||
}
|
||||
|
||||
// checkEvaluationLimits checks the optimization limits after an evaluation
|
||||
// Operation. It checks the number of evaluations (of various kinds) and checks
|
||||
// the status of the Problem, if applicable.
|
||||
func checkEvaluationLimits(p *Problem, stats *Stats, settings *Settings) (Status, error) {
|
||||
if p.Status != nil {
|
||||
status, err := p.Status()
|
||||
if err != nil || status != NotTerminated {
|
||||
return status, err
|
||||
}
|
||||
}
|
||||
if settings.FuncEvaluations > 0 && stats.FuncEvaluations >= settings.FuncEvaluations {
|
||||
return FunctionEvaluationLimit, nil
|
||||
}
|
||||
if settings.GradEvaluations > 0 && stats.GradEvaluations >= settings.GradEvaluations {
|
||||
return GradientEvaluationLimit, nil
|
||||
}
|
||||
if settings.HessEvaluations > 0 && stats.HessEvaluations >= settings.HessEvaluations {
|
||||
return HessianEvaluationLimit, nil
|
||||
}
|
||||
return NotTerminated, nil
|
||||
}
|
||||
|
||||
// checkIterationLimits checks the limits on iterations affected by MajorIteration.
|
||||
func checkIterationLimits(loc *Location, stats *Stats, settings *Settings) Status {
|
||||
if settings.MajorIterations > 0 && stats.MajorIterations >= settings.MajorIterations {
|
||||
return IterationLimit
|
||||
}
|
||||
if settings.Runtime > 0 && stats.Runtime >= settings.Runtime {
|
||||
return RuntimeLimit
|
||||
}
|
||||
return NotTerminated
|
||||
}
|
||||
|
||||
// performMajorIteration does all of the steps needed to perform a MajorIteration.
|
||||
// It increments the iteration count, updates the optimal location, and checks
|
||||
// the necessary convergence criteria.
|
||||
func performMajorIteration(optLoc, loc *Location, stats *Stats, converger Converger, startTime time.Time, settings *Settings) Status {
|
||||
optLoc.F = loc.F
|
||||
copy(optLoc.X, loc.X)
|
||||
if loc.Gradient == nil {
|
||||
optLoc.Gradient = nil
|
||||
} else {
|
||||
if optLoc.Gradient == nil {
|
||||
optLoc.Gradient = make([]float64, len(loc.Gradient))
|
||||
}
|
||||
copy(optLoc.Gradient, loc.Gradient)
|
||||
}
|
||||
stats.MajorIterations++
|
||||
stats.Runtime = time.Since(startTime)
|
||||
status := checkLocationConvergence(optLoc, settings, converger)
|
||||
if status != NotTerminated {
|
||||
return status
|
||||
}
|
||||
return checkIterationLimits(optLoc, stats, settings)
|
||||
}
|
||||
387
vendor/gonum.org/v1/gonum/optimize/morethuente.go
generated
vendored
Normal file
387
vendor/gonum.org/v1/gonum/optimize/morethuente.go
generated
vendored
Normal file
@@ -0,0 +1,387 @@
|
||||
// Copyright ©2015 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package optimize
|
||||
|
||||
import "math"
|
||||
|
||||
var _ Linesearcher = (*MoreThuente)(nil)
|
||||
|
||||
// MoreThuente is a Linesearcher that finds steps that satisfy both the
|
||||
// sufficient decrease and curvature conditions (the strong Wolfe conditions).
|
||||
//
|
||||
// References:
|
||||
// - More, J.J. and D.J. Thuente: Line Search Algorithms with Guaranteed Sufficient
|
||||
// Decrease. ACM Transactions on Mathematical Software 20(3) (1994), 286-307
|
||||
type MoreThuente struct {
|
||||
// DecreaseFactor is the constant factor in the sufficient decrease
|
||||
// (Armijo) condition.
|
||||
// It must be in the interval [0, 1). The default value is 0.
|
||||
DecreaseFactor float64
|
||||
// CurvatureFactor is the constant factor in the Wolfe conditions. Smaller
|
||||
// values result in a more exact line search.
|
||||
// A set value must be in the interval (0, 1). If it is zero, it will be
|
||||
// defaulted to 0.9.
|
||||
CurvatureFactor float64
|
||||
// StepTolerance sets the minimum acceptable width for the linesearch
|
||||
// interval. If the relative interval length is less than this value,
|
||||
// ErrLinesearcherFailure is returned.
|
||||
// It must be non-negative. If it is zero, it will be defaulted to 1e-10.
|
||||
StepTolerance float64
|
||||
|
||||
// MinimumStep is the minimum step that the linesearcher will take.
|
||||
// It must be non-negative and less than MaximumStep. Defaults to no
|
||||
// minimum (a value of 0).
|
||||
MinimumStep float64
|
||||
// MaximumStep is the maximum step that the linesearcher will take.
|
||||
// It must be greater than MinimumStep. If it is zero, it will be defaulted
|
||||
// to 1e20.
|
||||
MaximumStep float64
|
||||
|
||||
bracketed bool // Indicates if a minimum has been bracketed.
|
||||
fInit float64 // Function value at step = 0.
|
||||
gInit float64 // Derivative value at step = 0.
|
||||
|
||||
// When stage is 1, the algorithm updates the interval given by x and y
|
||||
// so that it contains a minimizer of the modified function
|
||||
// psi(step) = f(step) - f(0) - DecreaseFactor * step * f'(0).
|
||||
// When stage is 2, the interval is updated so that it contains a minimizer
|
||||
// of f.
|
||||
stage int
|
||||
|
||||
step float64 // Current step.
|
||||
lower, upper float64 // Lower and upper bounds on the next step.
|
||||
x float64 // Endpoint of the interval with a lower function value.
|
||||
fx, gx float64 // Data at x.
|
||||
y float64 // The other endpoint.
|
||||
fy, gy float64 // Data at y.
|
||||
width [2]float64 // Width of the interval at two previous iterations.
|
||||
}
|
||||
|
||||
const (
|
||||
mtMinGrowthFactor float64 = 1.1
|
||||
mtMaxGrowthFactor float64 = 4
|
||||
)
|
||||
|
||||
func (mt *MoreThuente) Init(f, g float64, step float64) Operation {
|
||||
// Based on the original Fortran code that is available, for example, from
|
||||
// http://ftp.mcs.anl.gov/pub/MINPACK-2/csrch/
|
||||
// as part of
|
||||
// MINPACK-2 Project. November 1993.
|
||||
// Argonne National Laboratory and University of Minnesota.
|
||||
// Brett M. Averick, Richard G. Carter, and Jorge J. Moré.
|
||||
|
||||
if g >= 0 {
|
||||
panic("morethuente: initial derivative is non-negative")
|
||||
}
|
||||
if step <= 0 {
|
||||
panic("morethuente: invalid initial step")
|
||||
}
|
||||
|
||||
if mt.CurvatureFactor == 0 {
|
||||
mt.CurvatureFactor = 0.9
|
||||
}
|
||||
if mt.StepTolerance == 0 {
|
||||
mt.StepTolerance = 1e-10
|
||||
}
|
||||
if mt.MaximumStep == 0 {
|
||||
mt.MaximumStep = 1e20
|
||||
}
|
||||
|
||||
if mt.MinimumStep < 0 {
|
||||
panic("morethuente: minimum step is negative")
|
||||
}
|
||||
if mt.MaximumStep <= mt.MinimumStep {
|
||||
panic("morethuente: maximum step is not greater than minimum step")
|
||||
}
|
||||
if mt.DecreaseFactor < 0 || mt.DecreaseFactor >= 1 {
|
||||
panic("morethuente: invalid decrease factor")
|
||||
}
|
||||
if mt.CurvatureFactor <= 0 || mt.CurvatureFactor >= 1 {
|
||||
panic("morethuente: invalid curvature factor")
|
||||
}
|
||||
if mt.StepTolerance <= 0 {
|
||||
panic("morethuente: step tolerance is not positive")
|
||||
}
|
||||
|
||||
if step < mt.MinimumStep {
|
||||
step = mt.MinimumStep
|
||||
}
|
||||
if step > mt.MaximumStep {
|
||||
step = mt.MaximumStep
|
||||
}
|
||||
|
||||
mt.bracketed = false
|
||||
mt.stage = 1
|
||||
mt.fInit = f
|
||||
mt.gInit = g
|
||||
|
||||
mt.x, mt.fx, mt.gx = 0, f, g
|
||||
mt.y, mt.fy, mt.gy = 0, f, g
|
||||
|
||||
mt.lower = 0
|
||||
mt.upper = step + mtMaxGrowthFactor*step
|
||||
|
||||
mt.width[0] = mt.MaximumStep - mt.MinimumStep
|
||||
mt.width[1] = 2 * mt.width[0]
|
||||
|
||||
mt.step = step
|
||||
return FuncEvaluation | GradEvaluation
|
||||
}
|
||||
|
||||
func (mt *MoreThuente) Iterate(f, g float64) (Operation, float64, error) {
|
||||
if mt.stage == 0 {
|
||||
panic("morethuente: Init has not been called")
|
||||
}
|
||||
|
||||
gTest := mt.DecreaseFactor * mt.gInit
|
||||
fTest := mt.fInit + mt.step*gTest
|
||||
|
||||
if mt.bracketed {
|
||||
if mt.step <= mt.lower || mt.step >= mt.upper || mt.upper-mt.lower <= mt.StepTolerance*mt.upper {
|
||||
// step contains the best step found (see below).
|
||||
return NoOperation, mt.step, ErrLinesearcherFailure
|
||||
}
|
||||
}
|
||||
if mt.step == mt.MaximumStep && f <= fTest && g <= gTest {
|
||||
return NoOperation, mt.step, ErrLinesearcherBound
|
||||
}
|
||||
if mt.step == mt.MinimumStep && (f > fTest || g >= gTest) {
|
||||
return NoOperation, mt.step, ErrLinesearcherFailure
|
||||
}
|
||||
|
||||
// Test for convergence.
|
||||
if f <= fTest && math.Abs(g) <= mt.CurvatureFactor*(-mt.gInit) {
|
||||
mt.stage = 0
|
||||
return MajorIteration, mt.step, nil
|
||||
}
|
||||
|
||||
if mt.stage == 1 && f <= fTest && g >= 0 {
|
||||
mt.stage = 2
|
||||
}
|
||||
|
||||
if mt.stage == 1 && f <= mt.fx && f > fTest {
|
||||
// Lower function value but the decrease is not sufficient .
|
||||
|
||||
// Compute values and derivatives of the modified function at step, x, y.
|
||||
fm := f - mt.step*gTest
|
||||
fxm := mt.fx - mt.x*gTest
|
||||
fym := mt.fy - mt.y*gTest
|
||||
gm := g - gTest
|
||||
gxm := mt.gx - gTest
|
||||
gym := mt.gy - gTest
|
||||
// Update x, y and step.
|
||||
mt.nextStep(fxm, gxm, fym, gym, fm, gm)
|
||||
// Recover values and derivates of the non-modified function at x and y.
|
||||
mt.fx = fxm + mt.x*gTest
|
||||
mt.fy = fym + mt.y*gTest
|
||||
mt.gx = gxm + gTest
|
||||
mt.gy = gym + gTest
|
||||
} else {
|
||||
// Update x, y and step.
|
||||
mt.nextStep(mt.fx, mt.gx, mt.fy, mt.gy, f, g)
|
||||
}
|
||||
|
||||
if mt.bracketed {
|
||||
// Monitor the length of the bracketing interval. If the interval has
|
||||
// not been reduced sufficiently after two steps, use bisection to
|
||||
// force its length to zero.
|
||||
width := mt.y - mt.x
|
||||
if math.Abs(width) >= 2.0/3*mt.width[1] {
|
||||
mt.step = mt.x + 0.5*width
|
||||
}
|
||||
mt.width[0], mt.width[1] = math.Abs(width), mt.width[0]
|
||||
}
|
||||
|
||||
if mt.bracketed {
|
||||
mt.lower = math.Min(mt.x, mt.y)
|
||||
mt.upper = math.Max(mt.x, mt.y)
|
||||
} else {
|
||||
mt.lower = mt.step + mtMinGrowthFactor*(mt.step-mt.x)
|
||||
mt.upper = mt.step + mtMaxGrowthFactor*(mt.step-mt.x)
|
||||
}
|
||||
|
||||
// Force the step to be in [MinimumStep, MaximumStep].
|
||||
mt.step = math.Max(mt.MinimumStep, math.Min(mt.step, mt.MaximumStep))
|
||||
|
||||
if mt.bracketed {
|
||||
if mt.step <= mt.lower || mt.step >= mt.upper || mt.upper-mt.lower <= mt.StepTolerance*mt.upper {
|
||||
// If further progress is not possible, set step to the best step
|
||||
// obtained during the search.
|
||||
mt.step = mt.x
|
||||
}
|
||||
}
|
||||
|
||||
return FuncEvaluation | GradEvaluation, mt.step, nil
|
||||
}
|
||||
|
||||
// nextStep computes the next safeguarded step and updates the interval that
|
||||
// contains a step that satisfies the sufficient decrease and curvature
|
||||
// conditions.
|
||||
func (mt *MoreThuente) nextStep(fx, gx, fy, gy, f, g float64) {
|
||||
x := mt.x
|
||||
y := mt.y
|
||||
step := mt.step
|
||||
|
||||
gNeg := g < 0
|
||||
if gx < 0 {
|
||||
gNeg = !gNeg
|
||||
}
|
||||
|
||||
var next float64
|
||||
var bracketed bool
|
||||
switch {
|
||||
case f > fx:
|
||||
// A higher function value. The minimum is bracketed between x and step.
|
||||
// We want the next step to be closer to x because the function value
|
||||
// there is lower.
|
||||
|
||||
theta := 3*(fx-f)/(step-x) + gx + g
|
||||
s := math.Max(math.Abs(gx), math.Abs(g))
|
||||
s = math.Max(s, math.Abs(theta))
|
||||
gamma := s * math.Sqrt((theta/s)*(theta/s)-(gx/s)*(g/s))
|
||||
if step < x {
|
||||
gamma *= -1
|
||||
}
|
||||
p := gamma - gx + theta
|
||||
q := gamma - gx + gamma + g
|
||||
r := p / q
|
||||
stpc := x + r*(step-x)
|
||||
stpq := x + gx/((fx-f)/(step-x)+gx)/2*(step-x)
|
||||
|
||||
if math.Abs(stpc-x) < math.Abs(stpq-x) {
|
||||
// The cubic step is closer to x than the quadratic step.
|
||||
// Take the cubic step.
|
||||
next = stpc
|
||||
} else {
|
||||
// If f is much larger than fx, then the quadratic step may be too
|
||||
// close to x. Therefore heuristically take the average of the
|
||||
// cubic and quadratic steps.
|
||||
next = stpc + (stpq-stpc)/2
|
||||
}
|
||||
bracketed = true
|
||||
|
||||
case gNeg:
|
||||
// A lower function value and derivatives of opposite sign. The minimum
|
||||
// is bracketed between x and step. If we choose a step that is far
|
||||
// from step, the next iteration will also likely fall in this case.
|
||||
|
||||
theta := 3*(fx-f)/(step-x) + gx + g
|
||||
s := math.Max(math.Abs(gx), math.Abs(g))
|
||||
s = math.Max(s, math.Abs(theta))
|
||||
gamma := s * math.Sqrt((theta/s)*(theta/s)-(gx/s)*(g/s))
|
||||
if step > x {
|
||||
gamma *= -1
|
||||
}
|
||||
p := gamma - g + theta
|
||||
q := gamma - g + gamma + gx
|
||||
r := p / q
|
||||
stpc := step + r*(x-step)
|
||||
stpq := step + g/(g-gx)*(x-step)
|
||||
|
||||
if math.Abs(stpc-step) > math.Abs(stpq-step) {
|
||||
// The cubic step is farther from x than the quadratic step.
|
||||
// Take the cubic step.
|
||||
next = stpc
|
||||
} else {
|
||||
// Take the quadratic step.
|
||||
next = stpq
|
||||
}
|
||||
bracketed = true
|
||||
|
||||
case math.Abs(g) < math.Abs(gx):
|
||||
// A lower function value, derivatives of the same sign, and the
|
||||
// magnitude of the derivative decreases. Extrapolate function values
|
||||
// at x and step so that the next step lies between step and y.
|
||||
|
||||
theta := 3*(fx-f)/(step-x) + gx + g
|
||||
s := math.Max(math.Abs(gx), math.Abs(g))
|
||||
s = math.Max(s, math.Abs(theta))
|
||||
gamma := s * math.Sqrt(math.Max(0, (theta/s)*(theta/s)-(gx/s)*(g/s)))
|
||||
if step > x {
|
||||
gamma *= -1
|
||||
}
|
||||
p := gamma - g + theta
|
||||
q := gamma + gx - g + gamma
|
||||
r := p / q
|
||||
var stpc float64
|
||||
switch {
|
||||
case r < 0 && gamma != 0:
|
||||
stpc = step + r*(x-step)
|
||||
case step > x:
|
||||
stpc = mt.upper
|
||||
default:
|
||||
stpc = mt.lower
|
||||
}
|
||||
stpq := step + g/(g-gx)*(x-step)
|
||||
|
||||
if mt.bracketed {
|
||||
// We are extrapolating so be cautious and take the step that
|
||||
// is closer to step.
|
||||
if math.Abs(stpc-step) < math.Abs(stpq-step) {
|
||||
next = stpc
|
||||
} else {
|
||||
next = stpq
|
||||
}
|
||||
// Modify next if it is close to or beyond y.
|
||||
if step > x {
|
||||
next = math.Min(step+2.0/3*(y-step), next)
|
||||
} else {
|
||||
next = math.Max(step+2.0/3*(y-step), next)
|
||||
}
|
||||
} else {
|
||||
// Minimum has not been bracketed so take the larger step...
|
||||
if math.Abs(stpc-step) > math.Abs(stpq-step) {
|
||||
next = stpc
|
||||
} else {
|
||||
next = stpq
|
||||
}
|
||||
// ...but within reason.
|
||||
next = math.Max(mt.lower, math.Min(next, mt.upper))
|
||||
}
|
||||
|
||||
default:
|
||||
// A lower function value, derivatives of the same sign, and the
|
||||
// magnitude of the derivative does not decrease. The function seems to
|
||||
// decrease rapidly in the direction of the step.
|
||||
|
||||
switch {
|
||||
case mt.bracketed:
|
||||
theta := 3*(f-fy)/(y-step) + gy + g
|
||||
s := math.Max(math.Abs(gy), math.Abs(g))
|
||||
s = math.Max(s, math.Abs(theta))
|
||||
gamma := s * math.Sqrt((theta/s)*(theta/s)-(gy/s)*(g/s))
|
||||
if step > y {
|
||||
gamma *= -1
|
||||
}
|
||||
p := gamma - g + theta
|
||||
q := gamma - g + gamma + gy
|
||||
r := p / q
|
||||
next = step + r*(y-step)
|
||||
case step > x:
|
||||
next = mt.upper
|
||||
default:
|
||||
next = mt.lower
|
||||
}
|
||||
}
|
||||
|
||||
if f > fx {
|
||||
// x is still the best step.
|
||||
mt.y = step
|
||||
mt.fy = f
|
||||
mt.gy = g
|
||||
} else {
|
||||
// step is the new best step.
|
||||
if gNeg {
|
||||
mt.y = x
|
||||
mt.fy = fx
|
||||
mt.gy = gx
|
||||
}
|
||||
mt.x = step
|
||||
mt.fx = f
|
||||
mt.gx = g
|
||||
}
|
||||
mt.bracketed = bracketed
|
||||
mt.step = next
|
||||
}
|
||||
348
vendor/gonum.org/v1/gonum/optimize/neldermead.go
generated
vendored
Normal file
348
vendor/gonum.org/v1/gonum/optimize/neldermead.go
generated
vendored
Normal file
@@ -0,0 +1,348 @@
|
||||
// Copyright ©2015 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package optimize
|
||||
|
||||
import (
|
||||
"math"
|
||||
"sort"
|
||||
|
||||
"gonum.org/v1/gonum/floats"
|
||||
)
|
||||
|
||||
// nmIterType is a Nelder-Mead evaluation kind
|
||||
type nmIterType int
|
||||
|
||||
const (
|
||||
nmReflected = iota
|
||||
nmExpanded
|
||||
nmContractedInside
|
||||
nmContractedOutside
|
||||
nmInitialize
|
||||
nmShrink
|
||||
nmMajor
|
||||
)
|
||||
|
||||
type nmVertexSorter struct {
|
||||
vertices [][]float64
|
||||
values []float64
|
||||
}
|
||||
|
||||
func (n nmVertexSorter) Len() int {
|
||||
return len(n.values)
|
||||
}
|
||||
|
||||
func (n nmVertexSorter) Less(i, j int) bool {
|
||||
return n.values[i] < n.values[j]
|
||||
}
|
||||
|
||||
func (n nmVertexSorter) Swap(i, j int) {
|
||||
n.values[i], n.values[j] = n.values[j], n.values[i]
|
||||
n.vertices[i], n.vertices[j] = n.vertices[j], n.vertices[i]
|
||||
}
|
||||
|
||||
var _ Method = (*NelderMead)(nil)
|
||||
|
||||
// NelderMead is an implementation of the Nelder-Mead simplex algorithm for
|
||||
// gradient-free nonlinear optimization (not to be confused with Danzig's
|
||||
// simplex algorithm for linear programming). The implementation follows the
|
||||
// algorithm described in
|
||||
//
|
||||
// http://epubs.siam.org/doi/pdf/10.1137/S1052623496303470
|
||||
//
|
||||
// If an initial simplex is provided, it is used and initLoc is ignored. If
|
||||
// InitialVertices and InitialValues are both nil, an initial simplex will be
|
||||
// generated automatically using the initial location as one vertex, and each
|
||||
// additional vertex as SimplexSize away in one dimension.
|
||||
//
|
||||
// If the simplex update parameters (Reflection, etc.)
|
||||
// are zero, they will be set automatically based on the dimension according to
|
||||
// the recommendations in
|
||||
//
|
||||
// http://www.webpages.uidaho.edu/~fuchang/res/ANMS.pdf
|
||||
type NelderMead struct {
|
||||
InitialVertices [][]float64
|
||||
InitialValues []float64
|
||||
Reflection float64 // Reflection parameter (>0)
|
||||
Expansion float64 // Expansion parameter (>1)
|
||||
Contraction float64 // Contraction parameter (>0, <1)
|
||||
Shrink float64 // Shrink parameter (>0, <1)
|
||||
SimplexSize float64 // size of auto-constructed initial simplex
|
||||
|
||||
status Status
|
||||
err error
|
||||
|
||||
reflection float64
|
||||
expansion float64
|
||||
contraction float64
|
||||
shrink float64
|
||||
|
||||
vertices [][]float64 // location of the vertices sorted in ascending f
|
||||
values []float64 // function values at the vertices sorted in ascending f
|
||||
centroid []float64 // centroid of all but the worst vertex
|
||||
|
||||
fillIdx int // index for filling the simplex during initialization and shrinking
|
||||
lastIter nmIterType // Last iteration
|
||||
reflectedPoint []float64 // Storage of the reflected point location
|
||||
reflectedValue float64 // Value at the last reflection point
|
||||
}
|
||||
|
||||
func (n *NelderMead) Status() (Status, error) {
|
||||
return n.status, n.err
|
||||
}
|
||||
|
||||
func (*NelderMead) Uses(has Available) (uses Available, err error) {
|
||||
return has.function()
|
||||
}
|
||||
|
||||
func (n *NelderMead) Init(dim, tasks int) int {
|
||||
n.status = NotTerminated
|
||||
n.err = nil
|
||||
return 1
|
||||
}
|
||||
|
||||
func (n *NelderMead) Run(operation chan<- Task, result <-chan Task, tasks []Task) {
|
||||
n.status, n.err = localOptimizer{}.run(n, math.NaN(), operation, result, tasks)
|
||||
close(operation)
|
||||
}
|
||||
|
||||
func (n *NelderMead) initLocal(loc *Location) (Operation, error) {
|
||||
dim := len(loc.X)
|
||||
if cap(n.vertices) < dim+1 {
|
||||
n.vertices = make([][]float64, dim+1)
|
||||
}
|
||||
n.vertices = n.vertices[:dim+1]
|
||||
for i := range n.vertices {
|
||||
n.vertices[i] = resize(n.vertices[i], dim)
|
||||
}
|
||||
n.values = resize(n.values, dim+1)
|
||||
n.centroid = resize(n.centroid, dim)
|
||||
n.reflectedPoint = resize(n.reflectedPoint, dim)
|
||||
|
||||
if n.SimplexSize == 0 {
|
||||
n.SimplexSize = 0.05
|
||||
}
|
||||
|
||||
// Default parameter choices are chosen in a dimension-dependent way
|
||||
// from http://www.webpages.uidaho.edu/~fuchang/res/ANMS.pdf
|
||||
n.reflection = n.Reflection
|
||||
if n.reflection == 0 {
|
||||
n.reflection = 1
|
||||
}
|
||||
n.expansion = n.Expansion
|
||||
if n.expansion == 0 {
|
||||
n.expansion = 1 + 2/float64(dim)
|
||||
if dim == 1 {
|
||||
n.expansion = 2
|
||||
}
|
||||
}
|
||||
n.contraction = n.Contraction
|
||||
if n.contraction == 0 {
|
||||
n.contraction = 0.75 - 1/(2*float64(dim))
|
||||
if dim == 1 {
|
||||
n.contraction = 0.5
|
||||
}
|
||||
}
|
||||
n.shrink = n.Shrink
|
||||
if n.shrink == 0 {
|
||||
n.shrink = 1 - 1/float64(dim)
|
||||
if dim == 1 {
|
||||
n.shrink = 0.5
|
||||
}
|
||||
}
|
||||
|
||||
if n.InitialVertices != nil {
|
||||
// Initial simplex provided. Copy the locations and values, and sort them.
|
||||
if len(n.InitialVertices) != dim+1 {
|
||||
panic("neldermead: incorrect number of vertices in initial simplex")
|
||||
}
|
||||
if len(n.InitialValues) != dim+1 {
|
||||
panic("neldermead: incorrect number of values in initial simplex")
|
||||
}
|
||||
for i := range n.InitialVertices {
|
||||
if len(n.InitialVertices[i]) != dim {
|
||||
panic("neldermead: vertex size mismatch")
|
||||
}
|
||||
copy(n.vertices[i], n.InitialVertices[i])
|
||||
}
|
||||
copy(n.values, n.InitialValues)
|
||||
sort.Sort(nmVertexSorter{n.vertices, n.values})
|
||||
computeCentroid(n.vertices, n.centroid)
|
||||
return n.returnNext(nmMajor, loc)
|
||||
}
|
||||
|
||||
// No simplex provided. Begin initializing initial simplex. First simplex
|
||||
// entry is the initial location, then step 1 in every direction.
|
||||
copy(n.vertices[dim], loc.X)
|
||||
n.values[dim] = loc.F
|
||||
n.fillIdx = 0
|
||||
loc.X[n.fillIdx] += n.SimplexSize
|
||||
n.lastIter = nmInitialize
|
||||
return FuncEvaluation, nil
|
||||
}
|
||||
|
||||
// computeCentroid computes the centroid of all the simplex vertices except the
|
||||
// final one
|
||||
func computeCentroid(vertices [][]float64, centroid []float64) {
|
||||
dim := len(centroid)
|
||||
for i := range centroid {
|
||||
centroid[i] = 0
|
||||
}
|
||||
for i := 0; i < dim; i++ {
|
||||
vertex := vertices[i]
|
||||
for j, v := range vertex {
|
||||
centroid[j] += v
|
||||
}
|
||||
}
|
||||
for i := range centroid {
|
||||
centroid[i] /= float64(dim)
|
||||
}
|
||||
}
|
||||
|
||||
func (n *NelderMead) iterateLocal(loc *Location) (Operation, error) {
|
||||
dim := len(loc.X)
|
||||
switch n.lastIter {
|
||||
case nmInitialize:
|
||||
n.values[n.fillIdx] = loc.F
|
||||
copy(n.vertices[n.fillIdx], loc.X)
|
||||
n.fillIdx++
|
||||
if n.fillIdx == dim {
|
||||
// Successfully finished building initial simplex.
|
||||
sort.Sort(nmVertexSorter{n.vertices, n.values})
|
||||
computeCentroid(n.vertices, n.centroid)
|
||||
return n.returnNext(nmMajor, loc)
|
||||
}
|
||||
copy(loc.X, n.vertices[dim])
|
||||
loc.X[n.fillIdx] += n.SimplexSize
|
||||
return FuncEvaluation, nil
|
||||
case nmMajor:
|
||||
// Nelder Mead iterations start with Reflection step
|
||||
return n.returnNext(nmReflected, loc)
|
||||
case nmReflected:
|
||||
n.reflectedValue = loc.F
|
||||
switch {
|
||||
case loc.F >= n.values[0] && loc.F < n.values[dim-1]:
|
||||
n.replaceWorst(loc.X, loc.F)
|
||||
return n.returnNext(nmMajor, loc)
|
||||
case loc.F < n.values[0]:
|
||||
return n.returnNext(nmExpanded, loc)
|
||||
default:
|
||||
if loc.F < n.values[dim] {
|
||||
return n.returnNext(nmContractedOutside, loc)
|
||||
}
|
||||
return n.returnNext(nmContractedInside, loc)
|
||||
}
|
||||
case nmExpanded:
|
||||
if loc.F < n.reflectedValue {
|
||||
n.replaceWorst(loc.X, loc.F)
|
||||
} else {
|
||||
n.replaceWorst(n.reflectedPoint, n.reflectedValue)
|
||||
}
|
||||
return n.returnNext(nmMajor, loc)
|
||||
case nmContractedOutside:
|
||||
if loc.F <= n.reflectedValue {
|
||||
n.replaceWorst(loc.X, loc.F)
|
||||
return n.returnNext(nmMajor, loc)
|
||||
}
|
||||
n.fillIdx = 1
|
||||
return n.returnNext(nmShrink, loc)
|
||||
case nmContractedInside:
|
||||
if loc.F < n.values[dim] {
|
||||
n.replaceWorst(loc.X, loc.F)
|
||||
return n.returnNext(nmMajor, loc)
|
||||
}
|
||||
n.fillIdx = 1
|
||||
return n.returnNext(nmShrink, loc)
|
||||
case nmShrink:
|
||||
copy(n.vertices[n.fillIdx], loc.X)
|
||||
n.values[n.fillIdx] = loc.F
|
||||
n.fillIdx++
|
||||
if n.fillIdx != dim+1 {
|
||||
return n.returnNext(nmShrink, loc)
|
||||
}
|
||||
sort.Sort(nmVertexSorter{n.vertices, n.values})
|
||||
computeCentroid(n.vertices, n.centroid)
|
||||
return n.returnNext(nmMajor, loc)
|
||||
default:
|
||||
panic("unreachable")
|
||||
}
|
||||
}
|
||||
|
||||
// returnNext updates the location based on the iteration type and the current
|
||||
// simplex, and returns the next operation.
|
||||
func (n *NelderMead) returnNext(iter nmIterType, loc *Location) (Operation, error) {
|
||||
n.lastIter = iter
|
||||
switch iter {
|
||||
case nmMajor:
|
||||
// Fill loc with the current best point and value,
|
||||
// and command a convergence check.
|
||||
copy(loc.X, n.vertices[0])
|
||||
loc.F = n.values[0]
|
||||
return MajorIteration, nil
|
||||
case nmReflected, nmExpanded, nmContractedOutside, nmContractedInside:
|
||||
// x_new = x_centroid + scale * (x_centroid - x_worst)
|
||||
var scale float64
|
||||
switch iter {
|
||||
case nmReflected:
|
||||
scale = n.reflection
|
||||
case nmExpanded:
|
||||
scale = n.reflection * n.expansion
|
||||
case nmContractedOutside:
|
||||
scale = n.reflection * n.contraction
|
||||
case nmContractedInside:
|
||||
scale = -n.contraction
|
||||
}
|
||||
dim := len(loc.X)
|
||||
floats.SubTo(loc.X, n.centroid, n.vertices[dim])
|
||||
floats.Scale(scale, loc.X)
|
||||
floats.Add(loc.X, n.centroid)
|
||||
if iter == nmReflected {
|
||||
copy(n.reflectedPoint, loc.X)
|
||||
}
|
||||
return FuncEvaluation, nil
|
||||
case nmShrink:
|
||||
// x_shrink = x_best + delta * (x_i + x_best)
|
||||
floats.SubTo(loc.X, n.vertices[n.fillIdx], n.vertices[0])
|
||||
floats.Scale(n.shrink, loc.X)
|
||||
floats.Add(loc.X, n.vertices[0])
|
||||
return FuncEvaluation, nil
|
||||
default:
|
||||
panic("unreachable")
|
||||
}
|
||||
}
|
||||
|
||||
// replaceWorst removes the worst location in the simplex and adds the new
|
||||
// {x, f} pair maintaining sorting.
|
||||
func (n *NelderMead) replaceWorst(x []float64, f float64) {
|
||||
dim := len(x)
|
||||
if f >= n.values[dim] {
|
||||
panic("increase in simplex value")
|
||||
}
|
||||
copy(n.vertices[dim], x)
|
||||
n.values[dim] = f
|
||||
|
||||
// Sort the newly-added value.
|
||||
for i := dim - 1; i >= 0; i-- {
|
||||
if n.values[i] < f {
|
||||
break
|
||||
}
|
||||
n.vertices[i], n.vertices[i+1] = n.vertices[i+1], n.vertices[i]
|
||||
n.values[i], n.values[i+1] = n.values[i+1], n.values[i]
|
||||
}
|
||||
|
||||
// Update the location of the centroid. Only one point has been replaced, so
|
||||
// subtract the worst point and add the new one.
|
||||
floats.AddScaled(n.centroid, -1/float64(dim), n.vertices[dim])
|
||||
floats.AddScaled(n.centroid, 1/float64(dim), x)
|
||||
}
|
||||
|
||||
func (*NelderMead) needs() struct {
|
||||
Gradient bool
|
||||
Hessian bool
|
||||
} {
|
||||
return struct {
|
||||
Gradient bool
|
||||
Hessian bool
|
||||
}{false, false}
|
||||
}
|
||||
182
vendor/gonum.org/v1/gonum/optimize/newton.go
generated
vendored
Normal file
182
vendor/gonum.org/v1/gonum/optimize/newton.go
generated
vendored
Normal file
@@ -0,0 +1,182 @@
|
||||
// Copyright ©2015 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package optimize
|
||||
|
||||
import (
|
||||
"math"
|
||||
|
||||
"gonum.org/v1/gonum/mat"
|
||||
)
|
||||
|
||||
const maxNewtonModifications = 20
|
||||
|
||||
var (
|
||||
_ Method = (*Newton)(nil)
|
||||
_ localMethod = (*Newton)(nil)
|
||||
_ NextDirectioner = (*Newton)(nil)
|
||||
)
|
||||
|
||||
// Newton implements a modified Newton's method for Hessian-based unconstrained
|
||||
// minimization. It applies regularization when the Hessian is not positive
|
||||
// definite, and it can converge to a local minimum from any starting point.
|
||||
//
|
||||
// Newton iteratively forms a quadratic model to the objective function f and
|
||||
// tries to minimize this approximate model. It generates a sequence of
|
||||
// locations x_k by means of
|
||||
//
|
||||
// solve H_k d_k = -∇f_k for d_k,
|
||||
// x_{k+1} = x_k + α_k d_k,
|
||||
//
|
||||
// where H_k is the Hessian matrix of f at x_k and α_k is a step size found by
|
||||
// a line search.
|
||||
//
|
||||
// Away from a minimizer H_k may not be positive definite and d_k may not be a
|
||||
// descent direction. Newton implements a Hessian modification strategy that
|
||||
// adds successively larger multiples of identity to H_k until it becomes
|
||||
// positive definite. Note that the repeated trial factorization of the
|
||||
// modified Hessian involved in this process can be computationally expensive.
|
||||
//
|
||||
// If the Hessian matrix cannot be formed explicitly or if the computational
|
||||
// cost of its factorization is prohibitive, BFGS or L-BFGS quasi-Newton method
|
||||
// can be used instead.
|
||||
type Newton struct {
|
||||
// Linesearcher is used for selecting suitable steps along the descent
|
||||
// direction d. Accepted steps should satisfy at least one of the Wolfe,
|
||||
// Goldstein or Armijo conditions.
|
||||
// If Linesearcher == nil, an appropriate default is chosen.
|
||||
Linesearcher Linesearcher
|
||||
// Increase is the factor by which a scalar tau is successively increased
|
||||
// so that (H + tau*I) is positive definite. Larger values reduce the
|
||||
// number of trial Hessian factorizations, but also reduce the second-order
|
||||
// information in H.
|
||||
// Increase must be greater than 1. If Increase is 0, it is defaulted to 5.
|
||||
Increase float64
|
||||
// GradStopThreshold sets the threshold for stopping if the gradient norm
|
||||
// gets too small. If GradStopThreshold is 0 it is defaulted to 1e-12, and
|
||||
// if it is NaN the setting is not used.
|
||||
GradStopThreshold float64
|
||||
|
||||
status Status
|
||||
err error
|
||||
|
||||
ls *LinesearchMethod
|
||||
|
||||
hess *mat.SymDense // Storage for a copy of the Hessian matrix.
|
||||
chol mat.Cholesky // Storage for the Cholesky factorization.
|
||||
tau float64
|
||||
}
|
||||
|
||||
func (n *Newton) Status() (Status, error) {
|
||||
return n.status, n.err
|
||||
}
|
||||
|
||||
func (*Newton) Uses(has Available) (uses Available, err error) {
|
||||
return has.hessian()
|
||||
}
|
||||
|
||||
func (n *Newton) Init(dim, tasks int) int {
|
||||
n.status = NotTerminated
|
||||
n.err = nil
|
||||
return 1
|
||||
}
|
||||
|
||||
func (n *Newton) Run(operation chan<- Task, result <-chan Task, tasks []Task) {
|
||||
n.status, n.err = localOptimizer{}.run(n, n.GradStopThreshold, operation, result, tasks)
|
||||
close(operation)
|
||||
}
|
||||
|
||||
func (n *Newton) initLocal(loc *Location) (Operation, error) {
|
||||
if n.Increase == 0 {
|
||||
n.Increase = 5
|
||||
}
|
||||
if n.Increase <= 1 {
|
||||
panic("optimize: Newton.Increase must be greater than 1")
|
||||
}
|
||||
if n.Linesearcher == nil {
|
||||
n.Linesearcher = &Bisection{}
|
||||
}
|
||||
if n.ls == nil {
|
||||
n.ls = &LinesearchMethod{}
|
||||
}
|
||||
n.ls.Linesearcher = n.Linesearcher
|
||||
n.ls.NextDirectioner = n
|
||||
return n.ls.Init(loc)
|
||||
}
|
||||
|
||||
func (n *Newton) iterateLocal(loc *Location) (Operation, error) {
|
||||
return n.ls.Iterate(loc)
|
||||
}
|
||||
|
||||
func (n *Newton) InitDirection(loc *Location, dir []float64) (stepSize float64) {
|
||||
dim := len(loc.X)
|
||||
n.hess = resizeSymDense(n.hess, dim)
|
||||
n.tau = 0
|
||||
return n.NextDirection(loc, dir)
|
||||
}
|
||||
|
||||
func (n *Newton) NextDirection(loc *Location, dir []float64) (stepSize float64) {
|
||||
// This method implements Algorithm 3.3 (Cholesky with Added Multiple of
|
||||
// the Identity) from Nocedal, Wright (2006), 2nd edition.
|
||||
|
||||
dim := len(loc.X)
|
||||
d := mat.NewVecDense(dim, dir)
|
||||
grad := mat.NewVecDense(dim, loc.Gradient)
|
||||
n.hess.CopySym(loc.Hessian)
|
||||
|
||||
// Find the smallest diagonal entry of the Hessian.
|
||||
minA := n.hess.At(0, 0)
|
||||
for i := 1; i < dim; i++ {
|
||||
a := n.hess.At(i, i)
|
||||
if a < minA {
|
||||
minA = a
|
||||
}
|
||||
}
|
||||
// If the smallest diagonal entry is positive, the Hessian may be positive
|
||||
// definite, and so first attempt to apply the Cholesky factorization to
|
||||
// the un-modified Hessian. If the smallest entry is negative, use the
|
||||
// final tau from the last iteration if regularization was needed,
|
||||
// otherwise guess an appropriate value for tau.
|
||||
if minA > 0 {
|
||||
n.tau = 0
|
||||
} else if n.tau == 0 {
|
||||
n.tau = -minA + 0.001
|
||||
}
|
||||
|
||||
for k := 0; k < maxNewtonModifications; k++ {
|
||||
if n.tau != 0 {
|
||||
// Add a multiple of identity to the Hessian.
|
||||
for i := 0; i < dim; i++ {
|
||||
n.hess.SetSym(i, i, loc.Hessian.At(i, i)+n.tau)
|
||||
}
|
||||
}
|
||||
// Try to apply the Cholesky factorization.
|
||||
pd := n.chol.Factorize(n.hess)
|
||||
if pd {
|
||||
// Store the solution in d's backing array, dir.
|
||||
err := n.chol.SolveVecTo(d, grad)
|
||||
if err == nil {
|
||||
d.ScaleVec(-1, d)
|
||||
return 1
|
||||
}
|
||||
}
|
||||
// Modified Hessian is not PD, so increase tau.
|
||||
n.tau = math.Max(n.Increase*n.tau, 0.001)
|
||||
}
|
||||
|
||||
// Hessian modification failed to get a PD matrix. Return the negative
|
||||
// gradient as the descent direction.
|
||||
d.ScaleVec(-1, grad)
|
||||
return 1
|
||||
}
|
||||
|
||||
func (n *Newton) needs() struct {
|
||||
Gradient bool
|
||||
Hessian bool
|
||||
} {
|
||||
return struct {
|
||||
Gradient bool
|
||||
Hessian bool
|
||||
}{true, true}
|
||||
}
|
||||
108
vendor/gonum.org/v1/gonum/optimize/printer.go
generated
vendored
Normal file
108
vendor/gonum.org/v1/gonum/optimize/printer.go
generated
vendored
Normal file
@@ -0,0 +1,108 @@
|
||||
// Copyright ©2014 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package optimize
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"gonum.org/v1/gonum/floats"
|
||||
)
|
||||
|
||||
var printerHeadings = [...]string{
|
||||
"Iter",
|
||||
"Runtime",
|
||||
"FuncEvals",
|
||||
"Func",
|
||||
"GradEvals",
|
||||
"|Gradient|∞",
|
||||
"HessEvals",
|
||||
}
|
||||
|
||||
const (
|
||||
printerBaseTmpl = "%9v %16v %9v %22v" // Base template for headings and values that are always printed.
|
||||
printerGradTmpl = " %9v %22v" // Appended to base template when loc.Gradient != nil.
|
||||
printerHessTmpl = " %9v" // Appended to base template when loc.Hessian != nil.
|
||||
)
|
||||
|
||||
var _ Recorder = (*Printer)(nil)
|
||||
|
||||
// Printer writes column-format output to the specified writer as the optimization
|
||||
// progresses. By default, it writes to os.Stdout.
|
||||
type Printer struct {
|
||||
Writer io.Writer
|
||||
HeadingInterval int
|
||||
ValueInterval time.Duration
|
||||
|
||||
lastHeading int
|
||||
lastValue time.Time
|
||||
}
|
||||
|
||||
func NewPrinter() *Printer {
|
||||
return &Printer{
|
||||
Writer: os.Stdout,
|
||||
HeadingInterval: 30,
|
||||
ValueInterval: 500 * time.Millisecond,
|
||||
}
|
||||
}
|
||||
|
||||
func (p *Printer) Init() error {
|
||||
p.lastHeading = p.HeadingInterval // So the headings are printed the first time.
|
||||
p.lastValue = time.Now().Add(-p.ValueInterval) // So the values are printed the first time.
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *Printer) Record(loc *Location, op Operation, stats *Stats) error {
|
||||
if op != MajorIteration && op != InitIteration && op != PostIteration {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Print values always on PostIteration or when ValueInterval has elapsed.
|
||||
printValues := time.Since(p.lastValue) > p.ValueInterval || op == PostIteration
|
||||
if !printValues {
|
||||
// Return early if not printing anything.
|
||||
return nil
|
||||
}
|
||||
|
||||
// Print heading when HeadingInterval lines have been printed, but never on PostIteration.
|
||||
printHeading := p.lastHeading >= p.HeadingInterval && op != PostIteration
|
||||
if printHeading {
|
||||
p.lastHeading = 1
|
||||
} else {
|
||||
p.lastHeading++
|
||||
}
|
||||
|
||||
if printHeading {
|
||||
headings := "\n" + fmt.Sprintf(printerBaseTmpl, printerHeadings[0], printerHeadings[1], printerHeadings[2], printerHeadings[3])
|
||||
if loc.Gradient != nil {
|
||||
headings += fmt.Sprintf(printerGradTmpl, printerHeadings[4], printerHeadings[5])
|
||||
}
|
||||
if loc.Hessian != nil {
|
||||
headings += fmt.Sprintf(printerHessTmpl, printerHeadings[6])
|
||||
}
|
||||
_, err := fmt.Fprintln(p.Writer, headings)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
values := fmt.Sprintf(printerBaseTmpl, stats.MajorIterations, stats.Runtime, stats.FuncEvaluations, loc.F)
|
||||
if loc.Gradient != nil {
|
||||
values += fmt.Sprintf(printerGradTmpl, stats.GradEvaluations, floats.Norm(loc.Gradient, math.Inf(1)))
|
||||
}
|
||||
if loc.Hessian != nil {
|
||||
values += fmt.Sprintf(printerHessTmpl, stats.HessEvaluations)
|
||||
}
|
||||
_, err := fmt.Fprintln(p.Writer, values)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
p.lastValue = time.Now()
|
||||
return nil
|
||||
}
|
||||
194
vendor/gonum.org/v1/gonum/optimize/stepsizers.go
generated
vendored
Normal file
194
vendor/gonum.org/v1/gonum/optimize/stepsizers.go
generated
vendored
Normal file
@@ -0,0 +1,194 @@
|
||||
// Copyright ©2014 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package optimize
|
||||
|
||||
import (
|
||||
"math"
|
||||
|
||||
"gonum.org/v1/gonum/floats"
|
||||
"gonum.org/v1/gonum/floats/scalar"
|
||||
)
|
||||
|
||||
const (
|
||||
initialStepFactor = 1
|
||||
|
||||
quadraticMinimumStepSize = 1e-3
|
||||
quadraticMaximumStepSize = 1
|
||||
quadraticThreshold = 1e-12
|
||||
|
||||
firstOrderMinimumStepSize = quadraticMinimumStepSize
|
||||
firstOrderMaximumStepSize = quadraticMaximumStepSize
|
||||
)
|
||||
|
||||
var (
|
||||
_ StepSizer = ConstantStepSize{}
|
||||
_ StepSizer = (*QuadraticStepSize)(nil)
|
||||
_ StepSizer = (*FirstOrderStepSize)(nil)
|
||||
)
|
||||
|
||||
// ConstantStepSize is a StepSizer that returns the same step size for
|
||||
// every iteration.
|
||||
type ConstantStepSize struct {
|
||||
Size float64
|
||||
}
|
||||
|
||||
func (c ConstantStepSize) Init(_ *Location, _ []float64) float64 {
|
||||
return c.Size
|
||||
}
|
||||
|
||||
func (c ConstantStepSize) StepSize(_ *Location, _ []float64) float64 {
|
||||
return c.Size
|
||||
}
|
||||
|
||||
// QuadraticStepSize estimates the initial line search step size as the minimum
|
||||
// of a quadratic that interpolates f(x_{k-1}), f(x_k) and ∇f_k⋅p_k.
|
||||
// This is useful for line search methods that do not produce well-scaled
|
||||
// descent directions, such as gradient descent or conjugate gradient methods.
|
||||
// The step size is bounded away from zero.
|
||||
type QuadraticStepSize struct {
|
||||
// Threshold determines that the initial step size should be estimated by
|
||||
// quadratic interpolation when the relative change in the objective
|
||||
// function is larger than Threshold. Otherwise the initial step size is
|
||||
// set to 2*previous step size.
|
||||
// If Threshold is zero, it will be set to 1e-12.
|
||||
Threshold float64
|
||||
// InitialStepFactor sets the step size for the first iteration to be InitialStepFactor / |g|_∞.
|
||||
// If InitialStepFactor is zero, it will be set to one.
|
||||
InitialStepFactor float64
|
||||
// MinStepSize is the lower bound on the estimated step size.
|
||||
// MinStepSize times GradientAbsTol should always be greater than machine epsilon.
|
||||
// If MinStepSize is zero, it will be set to 1e-3.
|
||||
MinStepSize float64
|
||||
// MaxStepSize is the upper bound on the estimated step size.
|
||||
// If MaxStepSize is zero, it will be set to 1.
|
||||
MaxStepSize float64
|
||||
|
||||
fPrev float64
|
||||
dirPrevNorm float64
|
||||
projGradPrev float64
|
||||
xPrev []float64
|
||||
}
|
||||
|
||||
func (q *QuadraticStepSize) Init(loc *Location, dir []float64) (stepSize float64) {
|
||||
if q.Threshold == 0 {
|
||||
q.Threshold = quadraticThreshold
|
||||
}
|
||||
if q.InitialStepFactor == 0 {
|
||||
q.InitialStepFactor = initialStepFactor
|
||||
}
|
||||
if q.MinStepSize == 0 {
|
||||
q.MinStepSize = quadraticMinimumStepSize
|
||||
}
|
||||
if q.MaxStepSize == 0 {
|
||||
q.MaxStepSize = quadraticMaximumStepSize
|
||||
}
|
||||
if q.MaxStepSize <= q.MinStepSize {
|
||||
panic("optimize: MinStepSize not smaller than MaxStepSize")
|
||||
}
|
||||
|
||||
gNorm := floats.Norm(loc.Gradient, math.Inf(1))
|
||||
stepSize = math.Max(q.MinStepSize, math.Min(q.InitialStepFactor/gNorm, q.MaxStepSize))
|
||||
|
||||
q.fPrev = loc.F
|
||||
q.dirPrevNorm = floats.Norm(dir, 2)
|
||||
q.projGradPrev = floats.Dot(loc.Gradient, dir)
|
||||
q.xPrev = resize(q.xPrev, len(loc.X))
|
||||
copy(q.xPrev, loc.X)
|
||||
return stepSize
|
||||
}
|
||||
|
||||
func (q *QuadraticStepSize) StepSize(loc *Location, dir []float64) (stepSize float64) {
|
||||
stepSizePrev := floats.Distance(loc.X, q.xPrev, 2) / q.dirPrevNorm
|
||||
projGrad := floats.Dot(loc.Gradient, dir)
|
||||
|
||||
stepSize = 2 * stepSizePrev
|
||||
if !scalar.EqualWithinRel(q.fPrev, loc.F, q.Threshold) {
|
||||
// Two consecutive function values are not relatively equal, so
|
||||
// computing the minimum of a quadratic interpolant might make sense
|
||||
|
||||
df := (loc.F - q.fPrev) / stepSizePrev
|
||||
quadTest := df - q.projGradPrev
|
||||
if quadTest > 0 {
|
||||
// There is a chance of approximating the function well by a
|
||||
// quadratic only if the finite difference (f_k-f_{k-1})/stepSizePrev
|
||||
// is larger than ∇f_{k-1}⋅p_{k-1}
|
||||
|
||||
// Set the step size to the minimizer of the quadratic function that
|
||||
// interpolates f_{k-1}, ∇f_{k-1}⋅p_{k-1} and f_k
|
||||
stepSize = -q.projGradPrev * stepSizePrev / quadTest / 2
|
||||
}
|
||||
}
|
||||
// Bound the step size to lie in [MinStepSize, MaxStepSize]
|
||||
stepSize = math.Max(q.MinStepSize, math.Min(stepSize, q.MaxStepSize))
|
||||
|
||||
q.fPrev = loc.F
|
||||
q.dirPrevNorm = floats.Norm(dir, 2)
|
||||
q.projGradPrev = projGrad
|
||||
copy(q.xPrev, loc.X)
|
||||
return stepSize
|
||||
}
|
||||
|
||||
// FirstOrderStepSize estimates the initial line search step size based on the
|
||||
// assumption that the first-order change in the function will be the same as
|
||||
// that obtained at the previous iteration. That is, the initial step size s^0_k
|
||||
// is chosen so that
|
||||
//
|
||||
// s^0_k ∇f_k⋅p_k = s_{k-1} ∇f_{k-1}⋅p_{k-1}
|
||||
//
|
||||
// This is useful for line search methods that do not produce well-scaled
|
||||
// descent directions, such as gradient descent or conjugate gradient methods.
|
||||
type FirstOrderStepSize struct {
|
||||
// InitialStepFactor sets the step size for the first iteration to be InitialStepFactor / |g|_∞.
|
||||
// If InitialStepFactor is zero, it will be set to one.
|
||||
InitialStepFactor float64
|
||||
// MinStepSize is the lower bound on the estimated step size.
|
||||
// MinStepSize times GradientAbsTol should always be greater than machine epsilon.
|
||||
// If MinStepSize is zero, it will be set to 1e-3.
|
||||
MinStepSize float64
|
||||
// MaxStepSize is the upper bound on the estimated step size.
|
||||
// If MaxStepSize is zero, it will be set to 1.
|
||||
MaxStepSize float64
|
||||
|
||||
dirPrevNorm float64
|
||||
projGradPrev float64
|
||||
xPrev []float64
|
||||
}
|
||||
|
||||
func (fo *FirstOrderStepSize) Init(loc *Location, dir []float64) (stepSize float64) {
|
||||
if fo.InitialStepFactor == 0 {
|
||||
fo.InitialStepFactor = initialStepFactor
|
||||
}
|
||||
if fo.MinStepSize == 0 {
|
||||
fo.MinStepSize = firstOrderMinimumStepSize
|
||||
}
|
||||
if fo.MaxStepSize == 0 {
|
||||
fo.MaxStepSize = firstOrderMaximumStepSize
|
||||
}
|
||||
if fo.MaxStepSize <= fo.MinStepSize {
|
||||
panic("optimize: MinStepSize not smaller than MaxStepSize")
|
||||
}
|
||||
|
||||
gNorm := floats.Norm(loc.Gradient, math.Inf(1))
|
||||
stepSize = math.Max(fo.MinStepSize, math.Min(fo.InitialStepFactor/gNorm, fo.MaxStepSize))
|
||||
|
||||
fo.dirPrevNorm = floats.Norm(dir, 2)
|
||||
fo.projGradPrev = floats.Dot(loc.Gradient, dir)
|
||||
fo.xPrev = resize(fo.xPrev, len(loc.X))
|
||||
copy(fo.xPrev, loc.X)
|
||||
return stepSize
|
||||
}
|
||||
|
||||
func (fo *FirstOrderStepSize) StepSize(loc *Location, dir []float64) (stepSize float64) {
|
||||
stepSizePrev := floats.Distance(loc.X, fo.xPrev, 2) / fo.dirPrevNorm
|
||||
projGrad := floats.Dot(loc.Gradient, dir)
|
||||
|
||||
stepSize = stepSizePrev * fo.projGradPrev / projGrad
|
||||
stepSize = math.Max(fo.MinStepSize, math.Min(stepSize, fo.MaxStepSize))
|
||||
|
||||
fo.dirPrevNorm = floats.Norm(dir, 2)
|
||||
fo.projGradPrev = floats.Dot(loc.Gradient, dir)
|
||||
copy(fo.xPrev, loc.X)
|
||||
return stepSize
|
||||
}
|
||||
123
vendor/gonum.org/v1/gonum/optimize/termination.go
generated
vendored
Normal file
123
vendor/gonum.org/v1/gonum/optimize/termination.go
generated
vendored
Normal file
@@ -0,0 +1,123 @@
|
||||
// Copyright ©2014 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package optimize
|
||||
|
||||
import "errors"
|
||||
|
||||
// Status represents the status of the optimization. Programs
|
||||
// should not rely on the underlying numeric value of the Status being constant.
|
||||
type Status int
|
||||
|
||||
const (
|
||||
NotTerminated Status = iota
|
||||
Success
|
||||
FunctionThreshold
|
||||
FunctionConvergence
|
||||
GradientThreshold
|
||||
StepConvergence
|
||||
FunctionNegativeInfinity
|
||||
MethodConverge
|
||||
Failure
|
||||
IterationLimit
|
||||
RuntimeLimit
|
||||
FunctionEvaluationLimit
|
||||
GradientEvaluationLimit
|
||||
HessianEvaluationLimit
|
||||
)
|
||||
|
||||
func (s Status) String() string {
|
||||
return statuses[s].name
|
||||
}
|
||||
|
||||
// Early returns true if the status indicates the optimization ended before a
|
||||
// minimum was found. As an example, if the maximum iterations was reached, a
|
||||
// minimum was not found, but if the gradient norm was reached then a minimum
|
||||
// was found.
|
||||
func (s Status) Early() bool {
|
||||
return statuses[s].early
|
||||
}
|
||||
|
||||
// Err returns the error associated with an early ending to the minimization. If
|
||||
// Early returns false, Err will return nil.
|
||||
func (s Status) Err() error {
|
||||
return statuses[s].err
|
||||
}
|
||||
|
||||
var statuses = []struct {
|
||||
name string
|
||||
early bool
|
||||
err error
|
||||
}{
|
||||
{
|
||||
name: "NotTerminated",
|
||||
},
|
||||
{
|
||||
name: "Success",
|
||||
},
|
||||
{
|
||||
name: "FunctionThreshold",
|
||||
},
|
||||
{
|
||||
name: "FunctionConvergence",
|
||||
},
|
||||
{
|
||||
name: "GradientThreshold",
|
||||
},
|
||||
{
|
||||
name: "StepConvergence",
|
||||
},
|
||||
{
|
||||
name: "FunctionNegativeInfinity",
|
||||
},
|
||||
{
|
||||
name: "MethodConverge",
|
||||
},
|
||||
{
|
||||
name: "Failure",
|
||||
early: true,
|
||||
err: errors.New("optimize: termination ended in failure"),
|
||||
},
|
||||
{
|
||||
name: "IterationLimit",
|
||||
early: true,
|
||||
err: errors.New("optimize: maximum number of major iterations reached"),
|
||||
},
|
||||
{
|
||||
name: "RuntimeLimit",
|
||||
early: true,
|
||||
err: errors.New("optimize: maximum runtime reached"),
|
||||
},
|
||||
{
|
||||
name: "FunctionEvaluationLimit",
|
||||
early: true,
|
||||
err: errors.New("optimize: maximum number of function evaluations reached"),
|
||||
},
|
||||
{
|
||||
name: "GradientEvaluationLimit",
|
||||
early: true,
|
||||
err: errors.New("optimize: maximum number of gradient evaluations reached"),
|
||||
},
|
||||
{
|
||||
name: "HessianEvaluationLimit",
|
||||
early: true,
|
||||
err: errors.New("optimize: maximum number of Hessian evaluations reached"),
|
||||
},
|
||||
}
|
||||
|
||||
// NewStatus returns a unique Status variable to represent a custom status.
|
||||
// NewStatus is intended to be called only during package initialization, and
|
||||
// calls to NewStatus are not thread safe.
|
||||
//
|
||||
// NewStatus takes in three arguments, the string that should be output from
|
||||
// Status.String, a boolean if the status indicates early optimization conclusion,
|
||||
// and the error to return from Err (if any).
|
||||
func NewStatus(name string, early bool, err error) Status {
|
||||
statuses = append(statuses, struct {
|
||||
name string
|
||||
early bool
|
||||
err error
|
||||
}{name, early, err})
|
||||
return Status(len(statuses) - 1)
|
||||
}
|
||||
273
vendor/gonum.org/v1/gonum/optimize/types.go
generated
vendored
Normal file
273
vendor/gonum.org/v1/gonum/optimize/types.go
generated
vendored
Normal file
@@ -0,0 +1,273 @@
|
||||
// Copyright ©2014 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package optimize
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"gonum.org/v1/gonum/mat"
|
||||
)
|
||||
|
||||
const defaultGradientAbsTol = 1e-12
|
||||
|
||||
// Operation represents the set of operations commanded by Method at each
|
||||
// iteration. It is a bitmap of various Iteration and Evaluation constants.
|
||||
// Individual constants must NOT be combined together by the binary OR operator
|
||||
// except for the Evaluation operations.
|
||||
type Operation uint64
|
||||
|
||||
// Supported Operations.
|
||||
const (
|
||||
// NoOperation specifies that no evaluation or convergence check should
|
||||
// take place.
|
||||
NoOperation Operation = 0
|
||||
// InitIteration is sent to Recorder to indicate the initial location.
|
||||
// All fields of the location to record must be valid.
|
||||
// Method must not return it.
|
||||
InitIteration Operation = 1 << (iota - 1)
|
||||
// PostIteration is sent to Recorder to indicate the final location
|
||||
// reached during an optimization run.
|
||||
// All fields of the location to record must be valid.
|
||||
// Method must not return it.
|
||||
PostIteration
|
||||
// MajorIteration indicates that the next candidate location for
|
||||
// an optimum has been found and convergence should be checked.
|
||||
MajorIteration
|
||||
// MethodDone declares that the method is done running. A method must
|
||||
// be a Statuser in order to use this iteration, and after returning
|
||||
// MethodDone, the Status must return other than NotTerminated.
|
||||
MethodDone
|
||||
// FuncEvaluation specifies that the objective function
|
||||
// should be evaluated.
|
||||
FuncEvaluation
|
||||
// GradEvaluation specifies that the gradient
|
||||
// of the objective function should be evaluated.
|
||||
GradEvaluation
|
||||
// HessEvaluation specifies that the Hessian
|
||||
// of the objective function should be evaluated.
|
||||
HessEvaluation
|
||||
// signalDone is used internally to signal completion.
|
||||
signalDone
|
||||
|
||||
// Mask for the evaluating operations.
|
||||
evalMask = FuncEvaluation | GradEvaluation | HessEvaluation
|
||||
)
|
||||
|
||||
func (op Operation) isEvaluation() bool {
|
||||
return op&evalMask != 0 && op&^evalMask == 0
|
||||
}
|
||||
|
||||
func (op Operation) String() string {
|
||||
if op&evalMask != 0 {
|
||||
return fmt.Sprintf("Evaluation(Func: %t, Grad: %t, Hess: %t, Extra: 0b%b)",
|
||||
op&FuncEvaluation != 0,
|
||||
op&GradEvaluation != 0,
|
||||
op&HessEvaluation != 0,
|
||||
op&^(evalMask))
|
||||
}
|
||||
s, ok := operationNames[op]
|
||||
if ok {
|
||||
return s
|
||||
}
|
||||
return fmt.Sprintf("Operation(%d)", op)
|
||||
}
|
||||
|
||||
var operationNames = map[Operation]string{
|
||||
NoOperation: "NoOperation",
|
||||
InitIteration: "InitIteration",
|
||||
MajorIteration: "MajorIteration",
|
||||
PostIteration: "PostIteration",
|
||||
MethodDone: "MethodDone",
|
||||
signalDone: "signalDone",
|
||||
}
|
||||
|
||||
// Result represents the answer of an optimization run. It contains the optimum
|
||||
// function value, X location, and gradient as well as the Status at convergence
|
||||
// and Statistics taken during the run.
|
||||
type Result struct {
|
||||
Location
|
||||
Stats
|
||||
Status Status
|
||||
}
|
||||
|
||||
// Stats contains the statistics of the run.
|
||||
type Stats struct {
|
||||
MajorIterations int // Total number of major iterations
|
||||
FuncEvaluations int // Number of evaluations of Func
|
||||
GradEvaluations int // Number of evaluations of Grad
|
||||
HessEvaluations int // Number of evaluations of Hess
|
||||
Runtime time.Duration // Total runtime of the optimization
|
||||
}
|
||||
|
||||
// complementEval returns an evaluating operation that evaluates fields of loc
|
||||
// not evaluated by eval.
|
||||
func complementEval(loc *Location, eval Operation) (complEval Operation) {
|
||||
if eval&FuncEvaluation == 0 {
|
||||
complEval = FuncEvaluation
|
||||
}
|
||||
if loc.Gradient != nil && eval&GradEvaluation == 0 {
|
||||
complEval |= GradEvaluation
|
||||
}
|
||||
if loc.Hessian != nil && eval&HessEvaluation == 0 {
|
||||
complEval |= HessEvaluation
|
||||
}
|
||||
return complEval
|
||||
}
|
||||
|
||||
// Problem describes the optimization problem to be solved.
|
||||
type Problem struct {
|
||||
// Func evaluates the objective function at the given location. Func
|
||||
// must not modify x.
|
||||
Func func(x []float64) float64
|
||||
|
||||
// Grad evaluates the gradient at x and stores the result in grad which will
|
||||
// be the same length as x. Grad must not modify x.
|
||||
Grad func(grad, x []float64)
|
||||
|
||||
// Hess evaluates the Hessian at x and stores the result in-place in hess which
|
||||
// will have dimensions matching the length of x. Hess must not modify x.
|
||||
Hess func(hess *mat.SymDense, x []float64)
|
||||
|
||||
// Status reports the status of the objective function being optimized and any
|
||||
// error. This can be used to terminate early, for example when the function is
|
||||
// not able to evaluate itself. The user can use one of the pre-provided Status
|
||||
// constants, or may call NewStatus to create a custom Status value.
|
||||
Status func() (Status, error)
|
||||
}
|
||||
|
||||
// Available describes the functions available to call in Problem.
|
||||
type Available struct {
|
||||
Grad bool
|
||||
Hess bool
|
||||
}
|
||||
|
||||
func availFromProblem(prob Problem) Available {
|
||||
return Available{Grad: prob.Grad != nil, Hess: prob.Hess != nil}
|
||||
}
|
||||
|
||||
// function tests if the Problem described by the receiver is suitable for an
|
||||
// unconstrained Method that only calls the function, and returns the result.
|
||||
func (has Available) function() (uses Available, err error) {
|
||||
// TODO(btracey): This needs to be modified when optimize supports
|
||||
// constrained optimization.
|
||||
return Available{}, nil
|
||||
}
|
||||
|
||||
// gradient tests if the Problem described by the receiver is suitable for an
|
||||
// unconstrained gradient-based Method, and returns the result.
|
||||
func (has Available) gradient() (uses Available, err error) {
|
||||
// TODO(btracey): This needs to be modified when optimize supports
|
||||
// constrained optimization.
|
||||
if !has.Grad {
|
||||
return Available{}, ErrMissingGrad
|
||||
}
|
||||
return Available{Grad: true}, nil
|
||||
}
|
||||
|
||||
// hessian tests if the Problem described by the receiver is suitable for an
|
||||
// unconstrained Hessian-based Method, and returns the result.
|
||||
func (has Available) hessian() (uses Available, err error) {
|
||||
// TODO(btracey): This needs to be modified when optimize supports
|
||||
// constrained optimization.
|
||||
if !has.Grad {
|
||||
return Available{}, ErrMissingGrad
|
||||
}
|
||||
if !has.Hess {
|
||||
return Available{}, ErrMissingHess
|
||||
}
|
||||
return Available{Grad: true, Hess: true}, nil
|
||||
}
|
||||
|
||||
// Settings represents settings of the optimization run. It contains initial
|
||||
// settings, convergence information, and Recorder information. Convergence
|
||||
// settings are only checked at MajorIterations, while Evaluation thresholds
|
||||
// are checked at every Operation. See the field comments for default values.
|
||||
type Settings struct {
|
||||
// InitValues specifies properties (function value, gradient, etc.) known
|
||||
// at the initial location passed to Minimize. If InitValues is non-nil, then
|
||||
// the function value F must be provided, the location X must not be specified
|
||||
// and other fields may be specified. The values in Location may be modified
|
||||
// during the call to Minimize.
|
||||
InitValues *Location
|
||||
|
||||
// GradientThreshold stops optimization with GradientThreshold status if the
|
||||
// infinity norm of the gradient is less than this value. This defaults to
|
||||
// a value of 0 (and so gradient convergence is not checked), however note
|
||||
// that many Methods (LBFGS, CG, etc.) will converge with a small value of
|
||||
// the gradient, and so to fully disable this setting the Method may need to
|
||||
// be modified.
|
||||
// This setting has no effect if the gradient is not used by the Method.
|
||||
GradientThreshold float64
|
||||
|
||||
// Converger checks if the optimization has converged based on the (history
|
||||
// of) locations found during the optimization. Minimize will pass the
|
||||
// Location at every MajorIteration to the Converger.
|
||||
//
|
||||
// If the Converger is nil, a default value of
|
||||
// FunctionConverge {
|
||||
// Absolute: 1e-10,
|
||||
// Iterations: 100,
|
||||
// }
|
||||
// will be used. NeverTerminated can be used to always return a
|
||||
// NotTerminated status.
|
||||
Converger Converger
|
||||
|
||||
// MajorIterations is the maximum number of iterations allowed.
|
||||
// IterationLimit status is returned if the number of major iterations
|
||||
// equals or exceeds this value.
|
||||
// If it equals zero, this setting has no effect.
|
||||
// The default value is 0.
|
||||
MajorIterations int
|
||||
|
||||
// Runtime is the maximum runtime allowed. RuntimeLimit status is returned
|
||||
// if the duration of the run is longer than this value. Runtime is only
|
||||
// checked at MajorIterations of the Method.
|
||||
// If it equals zero, this setting has no effect.
|
||||
// The default value is 0.
|
||||
Runtime time.Duration
|
||||
|
||||
// FuncEvaluations is the maximum allowed number of function evaluations.
|
||||
// FunctionEvaluationLimit status is returned if the total number of calls
|
||||
// to Func equals or exceeds this number.
|
||||
// If it equals zero, this setting has no effect.
|
||||
// The default value is 0.
|
||||
FuncEvaluations int
|
||||
|
||||
// GradEvaluations is the maximum allowed number of gradient evaluations.
|
||||
// GradientEvaluationLimit status is returned if the total number of calls
|
||||
// to Grad equals or exceeds this number.
|
||||
// If it equals zero, this setting has no effect.
|
||||
// The default value is 0.
|
||||
GradEvaluations int
|
||||
|
||||
// HessEvaluations is the maximum allowed number of Hessian evaluations.
|
||||
// HessianEvaluationLimit status is returned if the total number of calls
|
||||
// to Hess equals or exceeds this number.
|
||||
// If it equals zero, this setting has no effect.
|
||||
// The default value is 0.
|
||||
HessEvaluations int
|
||||
|
||||
Recorder Recorder
|
||||
|
||||
// Concurrent represents how many concurrent evaluations are possible.
|
||||
Concurrent int
|
||||
}
|
||||
|
||||
// resize takes x and returns a slice of length dim. It returns a resliced x
|
||||
// if cap(x) >= dim, and a new slice otherwise.
|
||||
func resize(x []float64, dim int) []float64 {
|
||||
if dim > cap(x) {
|
||||
return make([]float64, dim)
|
||||
}
|
||||
return x[:dim]
|
||||
}
|
||||
|
||||
func resizeSymDense(m *mat.SymDense, dim int) *mat.SymDense {
|
||||
if m == nil || cap(m.RawSymmetric().Data) < dim*dim {
|
||||
return mat.NewSymDense(dim, nil)
|
||||
}
|
||||
return mat.NewSymDense(dim, m.RawSymmetric().Data[:dim*dim])
|
||||
}
|
||||
Reference in New Issue
Block a user