fixed dependencies

2024-10-24 15:46:01 +08:00
parent d16a5bd9c0
commit 1161e8d054
2005 changed files with 690883 additions and 0 deletions
--- a/vendor/gonum.org/v1/gonum/optimize/cmaes.go
+++ b/vendor/gonum.org/v1/gonum/optimize/cmaes.go
@@ -0,0 +1,469 @@
+// Copyright ©2017 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package optimize
+
+import (
+	"math"
+	"sort"
+
+	"golang.org/x/exp/rand"
+
+	"gonum.org/v1/gonum/floats"
+	"gonum.org/v1/gonum/mat"
+	"gonum.org/v1/gonum/stat/distmv"
+)
+
+var _ Method = (*CmaEsChol)(nil)
+
+// TODO(btracey): If we ever implement the traditional CMA-ES algorithm, provide
+// the base explanation there, and modify this description to just
+// describe the differences.
+
+// CmaEsChol implements the covariance matrix adaptation evolution strategy (CMA-ES)
+// based on the Cholesky decomposition. The full algorithm is described in
+//
+//	Krause, Oswin, Dídac Rodríguez Arbonès, and Christian Igel. "CMA-ES with
+//	optimal covariance update and storage complexity." Advances in Neural
+//	Information Processing Systems. 2016.
+//	https://papers.nips.cc/paper/6457-cma-es-with-optimal-covariance-update-and-storage-complexity.pdf
+//
+// CMA-ES is a global optimization method that progressively adapts a population
+// of samples. CMA-ES combines techniques from local optimization with global
+// optimization. Specifically, the CMA-ES algorithm uses an initial multivariate
+// normal distribution to generate a population of input locations. The input locations
+// with the lowest function values are used to update the parameters of the normal
+// distribution, a new set of input locations are generated, and this procedure
+// is iterated until convergence. The initial sampling distribution will have
+// a mean specified by the initial x location, and a covariance specified by
+// the InitCholesky field.
+//
+// As the normal distribution is progressively updated according to the best samples,
+// it can be that the mean of the distribution is updated in a gradient-descent
+// like fashion, followed by a shrinking covariance.
+// It is recommended that the algorithm be run multiple times (with different
+// InitMean) to have a better chance of finding the global minimum.
+//
+// The CMA-ES-Chol algorithm differs from the standard CMA-ES algorithm in that
+// it directly updates the Cholesky decomposition of the normal distribution.
+// This changes the runtime from O(dimension^3) to O(dimension^2*population)
+// The evolution of the multi-variate normal will be similar to the baseline
+// CMA-ES algorithm, but the covariance update equation is not identical.
+//
+// For more information about the CMA-ES algorithm, see
+//
+//	https://en.wikipedia.org/wiki/CMA-ES
+//	https://arxiv.org/pdf/1604.00772.pdf
+type CmaEsChol struct {
+	// InitStepSize sets the initial size of the covariance matrix adaptation.
+	// If InitStepSize is 0, a default value of 0.5 is used. InitStepSize cannot
+	// be negative, or CmaEsChol will panic.
+	InitStepSize float64
+	// Population sets the population size for the algorithm. If Population is
+	// 0, a default value of 4 + math.Floor(3*math.Log(float64(dim))) is used.
+	// Population cannot be negative or CmaEsChol will panic.
+	Population int
+	// InitCholesky specifies the Cholesky decomposition of the covariance
+	// matrix for the initial sampling distribution. If InitCholesky is nil,
+	// a default value of I is used. If it is non-nil, then it must have
+	// InitCholesky.Size() be equal to the problem dimension.
+	InitCholesky *mat.Cholesky
+	// StopLogDet sets the threshold for stopping the optimization if the
+	// distribution becomes too peaked. The log determinant is a measure of the
+	// (log) "volume" of the normal distribution, and when it is too small
+	// the samples are almost the same. If the log determinant of the covariance
+	// matrix becomes less than StopLogDet, the optimization run is concluded.
+	// If StopLogDet is 0, a default value of dim*log(1e-16) is used.
+	// If StopLogDet is NaN, the stopping criterion is not used, though
+	// this can cause numeric instabilities in the algorithm.
+	StopLogDet float64
+	// ForgetBest, when true, does not track the best overall function value found,
+	// instead returning the new best sample in each iteration. If ForgetBest
+	// is false, then the minimum value returned will be the lowest across all
+	// iterations, regardless of when that sample was generated.
+	ForgetBest bool
+	// Src allows a random number generator to be supplied for generating samples.
+	// If Src is nil the generator in golang.org/x/math/rand is used.
+	Src rand.Source
+
+	// Fixed algorithm parameters.
+	dim                 int
+	pop                 int
+	weights             []float64
+	muEff               float64
+	cc, cs, c1, cmu, ds float64
+	eChi                float64
+
+	// Function data.
+	xs *mat.Dense
+	fs []float64
+
+	// Adaptive algorithm parameters.
+	invSigma float64 // inverse of the sigma parameter
+	pc, ps   []float64
+	mean     []float64
+	chol     mat.Cholesky
+
+	// Overall best.
+	bestX []float64
+	bestF float64
+
+	// Synchronization.
+	sentIdx     int
+	receivedIdx int
+	operation   chan<- Task
+	updateErr   error
+}
+
+var (
+	_ Statuser = (*CmaEsChol)(nil)
+	_ Method   = (*CmaEsChol)(nil)
+)
+
+func (cma *CmaEsChol) methodConverged() Status {
+	sd := cma.StopLogDet
+	switch {
+	case math.IsNaN(sd):
+		return NotTerminated
+	case sd == 0:
+		sd = float64(cma.dim) * -36.8413614879 // ln(1e-16)
+	}
+	if cma.chol.LogDet() < sd {
+		return MethodConverge
+	}
+	return NotTerminated
+}
+
+// Status returns the status of the method.
+func (cma *CmaEsChol) Status() (Status, error) {
+	if cma.updateErr != nil {
+		return Failure, cma.updateErr
+	}
+	return cma.methodConverged(), nil
+}
+
+func (*CmaEsChol) Uses(has Available) (uses Available, err error) {
+	return has.function()
+}
+
+func (cma *CmaEsChol) Init(dim, tasks int) int {
+	if dim <= 0 {
+		panic(nonpositiveDimension)
+	}
+	if tasks < 0 {
+		panic(negativeTasks)
+	}
+
+	// Set fixed algorithm parameters.
+	// Parameter values are from https://arxiv.org/pdf/1604.00772.pdf .
+	cma.dim = dim
+	cma.pop = cma.Population
+	n := float64(dim)
+	if cma.pop == 0 {
+		cma.pop = 4 + int(3*math.Log(n)) // Note the implicit floor.
+	} else if cma.pop < 0 {
+		panic("cma-es-chol: negative population size")
+	}
+	mu := cma.pop / 2
+	cma.weights = resize(cma.weights, mu)
+	for i := range cma.weights {
+		v := math.Log(float64(mu)+0.5) - math.Log(float64(i)+1)
+		cma.weights[i] = v
+	}
+	floats.Scale(1/floats.Sum(cma.weights), cma.weights)
+	cma.muEff = 0
+	for _, v := range cma.weights {
+		cma.muEff += v * v
+	}
+	cma.muEff = 1 / cma.muEff
+
+	cma.cc = (4 + cma.muEff/n) / (n + 4 + 2*cma.muEff/n)
+	cma.cs = (cma.muEff + 2) / (n + cma.muEff + 5)
+	cma.c1 = 2 / ((n+1.3)*(n+1.3) + cma.muEff)
+	cma.cmu = math.Min(1-cma.c1, 2*(cma.muEff-2+1/cma.muEff)/((n+2)*(n+2)+cma.muEff))
+	cma.ds = 1 + 2*math.Max(0, math.Sqrt((cma.muEff-1)/(n+1))-1) + cma.cs
+	// E[chi] is taken from https://en.wikipedia.org/wiki/CMA-ES (there
+	// listed as E[||N(0,1)||]).
+	cma.eChi = math.Sqrt(n) * (1 - 1.0/(4*n) + 1/(21*n*n))
+
+	// Allocate memory for function data.
+	cma.xs = mat.NewDense(cma.pop, dim, nil)
+	cma.fs = resize(cma.fs, cma.pop)
+
+	// Allocate and initialize adaptive parameters.
+	cma.invSigma = 1 / cma.InitStepSize
+	if cma.InitStepSize == 0 {
+		cma.invSigma = 10.0 / 3
+	} else if cma.InitStepSize < 0 {
+		panic("cma-es-chol: negative initial step size")
+	}
+	cma.pc = resize(cma.pc, dim)
+	for i := range cma.pc {
+		cma.pc[i] = 0
+	}
+	cma.ps = resize(cma.ps, dim)
+	for i := range cma.ps {
+		cma.ps[i] = 0
+	}
+	cma.mean = resize(cma.mean, dim) // mean location initialized at the start of Run
+
+	if cma.InitCholesky != nil {
+		if cma.InitCholesky.SymmetricDim() != dim {
+			panic("cma-es-chol: incorrect InitCholesky size")
+		}
+		cma.chol.Clone(cma.InitCholesky)
+	} else {
+		// Set the initial Cholesky to I.
+		b := mat.NewDiagDense(dim, nil)
+		for i := 0; i < dim; i++ {
+			b.SetDiag(i, 1)
+		}
+		var chol mat.Cholesky
+		ok := chol.Factorize(b)
+		if !ok {
+			panic("cma-es-chol: bad cholesky. shouldn't happen")
+		}
+		cma.chol = chol
+	}
+
+	cma.bestX = resize(cma.bestX, dim)
+	cma.bestF = math.Inf(1)
+
+	cma.sentIdx = 0
+	cma.receivedIdx = 0
+	cma.operation = nil
+	cma.updateErr = nil
+	t := min(tasks, cma.pop)
+	return t
+}
+
+func (cma *CmaEsChol) sendInitTasks(tasks []Task) {
+	for i, task := range tasks {
+		cma.sendTask(i, task)
+	}
+	cma.sentIdx = len(tasks)
+}
+
+// sendTask generates a sample and sends the task. It does not update the cma index.
+func (cma *CmaEsChol) sendTask(idx int, task Task) {
+	task.ID = idx
+	task.Op = FuncEvaluation
+	distmv.NormalRand(cma.xs.RawRowView(idx), cma.mean, &cma.chol, cma.Src)
+	copy(task.X, cma.xs.RawRowView(idx))
+	cma.operation <- task
+}
+
+// bestIdx returns the best index in the functions. Returns -1 if all values
+// are NaN.
+func (cma *CmaEsChol) bestIdx() int {
+	best := -1
+	bestVal := math.Inf(1)
+	for i, v := range cma.fs {
+		if math.IsNaN(v) {
+			continue
+		}
+		// Use equality in case somewhere evaluates to +inf.
+		if v <= bestVal {
+			best = i
+			bestVal = v
+		}
+	}
+	return best
+}
+
+// findBestAndUpdateTask finds the best task in the current list, updates the
+// new best overall, and then stores the best location into task.
+func (cma *CmaEsChol) findBestAndUpdateTask(task Task) Task {
+	// Find and update the best location.
+	// Don't use floats because there may be NaN values.
+	best := cma.bestIdx()
+	bestF := math.NaN()
+	bestX := cma.xs.RawRowView(0)
+	if best != -1 {
+		bestF = cma.fs[best]
+		bestX = cma.xs.RawRowView(best)
+	}
+	if cma.ForgetBest {
+		task.F = bestF
+		copy(task.X, bestX)
+	} else {
+		if bestF < cma.bestF {
+			cma.bestF = bestF
+			copy(cma.bestX, bestX)
+		}
+		task.F = cma.bestF
+		copy(task.X, cma.bestX)
+	}
+	return task
+}
+
+func (cma *CmaEsChol) Run(operations chan<- Task, results <-chan Task, tasks []Task) {
+	copy(cma.mean, tasks[0].X)
+	cma.operation = operations
+	// Send the initial tasks. We know there are at most as many tasks as elements
+	// of the population.
+	cma.sendInitTasks(tasks)
+
+Loop:
+	for {
+		result := <-results
+		switch result.Op {
+		default:
+			panic("unknown operation")
+		case PostIteration:
+			break Loop
+		case MajorIteration:
+			// The last thing we did was update all of the tasks and send the
+			// major iteration. Now we can send a group of tasks again.
+			cma.sendInitTasks(tasks)
+		case FuncEvaluation:
+			cma.receivedIdx++
+			cma.fs[result.ID] = result.F
+			switch {
+			case cma.sentIdx < cma.pop:
+				// There are still tasks to evaluate. Send the next.
+				cma.sendTask(cma.sentIdx, result)
+				cma.sentIdx++
+			case cma.receivedIdx < cma.pop:
+				// All the tasks have been sent, but not all of them have been received.
+				// Need to wait until all are back.
+				continue Loop
+			default:
+				// All of the evaluations have been received.
+				if cma.receivedIdx != cma.pop {
+					panic("bad logic")
+				}
+				cma.receivedIdx = 0
+				cma.sentIdx = 0
+
+				task := cma.findBestAndUpdateTask(result)
+				// Update the parameters and send a MajorIteration or a convergence.
+				err := cma.update()
+				// Kill the existing data.
+				for i := range cma.fs {
+					cma.fs[i] = math.NaN()
+					cma.xs.Set(i, 0, math.NaN())
+				}
+				switch {
+				case err != nil:
+					cma.updateErr = err
+					task.Op = MethodDone
+				case cma.methodConverged() != NotTerminated:
+					task.Op = MethodDone
+				default:
+					task.Op = MajorIteration
+					task.ID = -1
+				}
+				operations <- task
+			}
+		}
+	}
+
+	// Been told to stop. Clean up.
+	// Need to see best of our evaluated tasks so far. Should instead just
+	// collect, then see.
+	for task := range results {
+		switch task.Op {
+		case MajorIteration:
+		case FuncEvaluation:
+			cma.fs[task.ID] = task.F
+		default:
+			panic("unknown operation")
+		}
+	}
+	// Send the new best value if the evaluation is better than any we've
+	// found so far. Keep this separate from findBestAndUpdateTask so that
+	// we only send an iteration if we find a better location.
+	if !cma.ForgetBest {
+		best := cma.bestIdx()
+		if best != -1 && cma.fs[best] < cma.bestF {
+			task := tasks[0]
+			task.F = cma.fs[best]
+			copy(task.X, cma.xs.RawRowView(best))
+			task.Op = MajorIteration
+			task.ID = -1
+			operations <- task
+		}
+	}
+	close(operations)
+}
+
+// update computes the new parameters (mean, cholesky, etc.). Does not update
+// any of the synchronization parameters (taskIdx).
+func (cma *CmaEsChol) update() error {
+	// Sort the function values to find the elite samples.
+	ftmp := make([]float64, cma.pop)
+	copy(ftmp, cma.fs)
+	indexes := make([]int, cma.pop)
+	for i := range indexes {
+		indexes[i] = i
+	}
+	sort.Sort(bestSorter{F: ftmp, Idx: indexes})
+
+	meanOld := make([]float64, len(cma.mean))
+	copy(meanOld, cma.mean)
+
+	// m_{t+1} = \sum_{i=1}^mu w_i x_i
+	for i := range cma.mean {
+		cma.mean[i] = 0
+	}
+	for i, w := range cma.weights {
+		idx := indexes[i] // index of teh 1337 sample.
+		floats.AddScaled(cma.mean, w, cma.xs.RawRowView(idx))
+	}
+	meanDiff := make([]float64, len(cma.mean))
+	floats.SubTo(meanDiff, cma.mean, meanOld)
+
+	// p_{c,t+1} = (1-c_c) p_{c,t} + \sqrt(c_c*(2-c_c)*mueff) (m_{t+1}-m_t)/sigma_t
+	floats.Scale(1-cma.cc, cma.pc)
+	scaleC := math.Sqrt(cma.cc*(2-cma.cc)*cma.muEff) * cma.invSigma
+	floats.AddScaled(cma.pc, scaleC, meanDiff)
+
+	// p_{sigma, t+1} = (1-c_sigma) p_{sigma,t} + \sqrt(c_s*(2-c_s)*mueff) A_t^-1 (m_{t+1}-m_t)/sigma_t
+	floats.Scale(1-cma.cs, cma.ps)
+	// First compute A_t^-1 (m_{t+1}-m_t), then add the scaled vector.
+	tmp := make([]float64, cma.dim)
+	tmpVec := mat.NewVecDense(cma.dim, tmp)
+	diffVec := mat.NewVecDense(cma.dim, meanDiff)
+	err := tmpVec.SolveVec(cma.chol.RawU().T(), diffVec)
+	if err != nil {
+		return err
+	}
+	scaleS := math.Sqrt(cma.cs*(2-cma.cs)*cma.muEff) * cma.invSigma
+	floats.AddScaled(cma.ps, scaleS, tmp)
+
+	// Compute the update to A.
+	scaleChol := 1 - cma.c1 - cma.cmu
+	if scaleChol == 0 {
+		scaleChol = math.SmallestNonzeroFloat64 // enough to kill the old data, but still non-zero.
+	}
+	cma.chol.Scale(scaleChol, &cma.chol)
+	cma.chol.SymRankOne(&cma.chol, cma.c1, mat.NewVecDense(cma.dim, cma.pc))
+	for i, w := range cma.weights {
+		idx := indexes[i]
+		floats.SubTo(tmp, cma.xs.RawRowView(idx), meanOld)
+		cma.chol.SymRankOne(&cma.chol, cma.cmu*w*cma.invSigma, tmpVec)
+	}
+
+	// sigma_{t+1} = sigma_t exp(c_sigma/d_sigma * norm(p_{sigma,t+1}/ E[chi] -1)
+	normPs := floats.Norm(cma.ps, 2)
+	cma.invSigma /= math.Exp(cma.cs / cma.ds * (normPs/cma.eChi - 1))
+	return nil
+}
+
+type bestSorter struct {
+	F   []float64
+	Idx []int
+}
+
+func (b bestSorter) Len() int {
+	return len(b.F)
+}
+func (b bestSorter) Less(i, j int) bool {
+	return b.F[i] < b.F[j]
+}
+func (b bestSorter) Swap(i, j int) {
+	b.F[i], b.F[j] = b.F[j], b.F[i]
+	b.Idx[i], b.Idx[j] = b.Idx[j], b.Idx[i]
+}