Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: complete overhaul #17

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions src/cg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ struct ConjugateGradient{F<:CGFlavor,T<:Real,L<:AbstractLineSearch} <: Optimizat
end
ConjugateGradient(; flavor = HagerZhang(), maxiter = typemax(Int), gradtol::Real = 1e-8,
restart = typemax(Int), verbosity::Int = 0,
linesearch::AbstractLineSearch = HagerZhangLineSearch(;verbosity = verbosity - 2)) =
linesearch::AbstractLineSearch = HagerZhangLineSearch()) =
ConjugateGradient(flavor, maxiter, gradtol, linesearch, restart, verbosity)

function optimize(fg, x, alg::ConjugateGradient;
Expand All @@ -29,8 +29,12 @@ function optimize(fg, x, alg::ConjugateGradient;
normgradhistory = [normgrad]

# compute here once to define initial value of α in scale-invariant way
Pg = precondition(x, g)
normPg = sqrt(inner(x, Pg, Pg))
if precondition === _precondition
Pg = g
else
Pg = precondition(x, deepcopy(g))
end
normPg = sqrt(abs(inner(x, g, Pg)))
α = 1/(normPg) # initial guess: scale invariant
# α = one(normgrad)

Expand Down Expand Up @@ -66,7 +70,7 @@ function optimize(fg, x, alg::ConjugateGradient;
_glast[] = g
_dlast[] = η
x, f, g, ξ, α, nfg = alg.linesearch(fg, x, η, (f, g);
initialguess = α, retract = retract, inner = inner)
initialguess = α, retract = retract, inner = inner, verbosity = verbosity - 2)
numfg += nfg
numiter += 1
x, f, g = finalize!(x, f, g, numiter)
Expand Down
4 changes: 2 additions & 2 deletions src/gd.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ struct GradientDescent{T<:Real,L<:AbstractLineSearch} <: OptimizationAlgorithm
end
GradientDescent(; maxiter = typemax(Int), gradtol::Real = 1e-8,
verbosity::Int = 0,
linesearch::AbstractLineSearch = HagerZhangLineSearch(;verbosity = verbosity - 2)) =
linesearch::AbstractLineSearch = HagerZhangLineSearch()) =
GradientDescent(maxiter, gradtol, linesearch, verbosity)

function optimize(fg, x, alg::GradientDescent;
Expand Down Expand Up @@ -41,7 +41,7 @@ function optimize(fg, x, alg::GradientDescent;
_glast[] = g
_dlast[] = η
x, f, g, ξ, α, nfg = alg.linesearch(fg, x, η, (f, g);
initialguess = α, retract = retract, inner = inner)
initialguess = α, retract = retract, inner = inner, verbosity = verbosity - 2)
numfg += nfg
numiter += 1
x, f, g = finalize!(x, f, g, numiter)
Expand Down
51 changes: 48 additions & 3 deletions src/lbfgs.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,28 @@
"""
struct LBFGS{T<:Real,L<:AbstractLineSearch} <: OptimizationAlgorithm
LBFGS(m::Int = 8; maxiter = typemax(Int), gradtol::Real = 1e-8,
acceptfirst::Bool = true, verbosity::Int = 0,
linesearch::AbstractLineSearch = HagerZhangLineSearch())




LBFGS optimization algorithm.

## Fields
- `m::Int`: The number of previous iterations to store for the limited memory BFGS approximation.
- `maxiter::Int`: The maximum number of iterations.
- `gradtol::T`: The tolerance for the norm of the gradient.
- `acceptfirst::Bool`: Whether to accept the first step of the line search.
- `linesearch::L`: The line search algorithm to use.
- `verbosity::Int`: The verbosity level.

## Constructors
- `LBFGS(m::Int = 8; maxiter = typemax(Int), gradtol::Real = 1e-8, acceptfirst::Bool = true,
verbosity::Int = 0,
linesearch::AbstractLineSearch = HagerZhangLineSearch())`: Construct an LBFGS object with the specified parameters.

"""
struct LBFGS{T<:Real,L<:AbstractLineSearch} <: OptimizationAlgorithm
m::Int
maxiter::Int
Expand All @@ -6,9 +31,29 @@ struct LBFGS{T<:Real,L<:AbstractLineSearch} <: OptimizationAlgorithm
linesearch::L
verbosity::Int
end

"""
LBFGS(m::Int = 8; maxiter = typemax(Int), gradtol::Real = 1e-8, acceptfirst::Bool = true,
verbosity::Int = 0,
linesearch::AbstractLineSearch = HagerZhangLineSearch())

Construct an LBFGS object with the specified parameters.

## Arguments
- `m::Int = 8`: The number of previous iterations to store for the limited memory BFGS approximation.
- `maxiter::Int = typemax(Int)`: The maximum number of iterations.
- `gradtol::Real = 1e-8`: The tolerance for the norm of the gradient.
- `acceptfirst::Bool = true`: Whether to accept the first step of the line search.
- `verbosity::Int = 0`: The verbosity level.
- `linesearch::AbstractLineSearch = HagerZhangLineSearch()`: The line search algorithm to use.

## Returns
- `LBFGS`: The LBFGS object.

"""
LBFGS(m::Int = 8; maxiter = typemax(Int), gradtol::Real = 1e-8, acceptfirst::Bool = true,
verbosity::Int = 0,
linesearch::AbstractLineSearch = HagerZhangLineSearch(;verbosity = verbosity - 2)) =
linesearch::AbstractLineSearch = HagerZhangLineSearch()) =
LBFGS(m, maxiter, gradtol, acceptfirst, linesearch, verbosity)

function optimize(fg, x, alg::LBFGS;
Expand Down Expand Up @@ -44,7 +89,7 @@ function optimize(fg, x, alg::LBFGS;
else
Pg = precondition(x, deepcopy(g))
normPg = sqrt(inner(x, Pg, Pg))
η = scale!(Pg, -1/normPg) # initial guess: scale invariant
η = scale!(Pg, -0.01/normPg) # initial guess: scale invariant
end

# store current quantities as previous quantities
Expand All @@ -59,7 +104,7 @@ function optimize(fg, x, alg::LBFGS;
x, f, g, ξ, α, nfg = alg.linesearch(fg, x, η, (f, g);
initialguess = one(f), acceptfirst = alg.acceptfirst,
# for some reason, line search seems to converge to solution alpha = 2 in most cases if acceptfirst = false. If acceptfirst = true, the initial value of alpha can immediately be accepted. This typically leads to a more erratic convergence of normgrad, but to less function evaluations in the end.
retract = retract, inner = inner)
retract = retract, inner = inner, verbosity = verbosity - 2)
numfg += nfg
numiter += 1
x, f, g = finalize!(x, f, g, numiter)
Expand Down
Loading
Loading