Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

EVE: stochastic gradient descent with feedback #144

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 97 additions & 0 deletions eve.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
--[[ EVE implementation https://arxiv.org/pdf/1611.01505v1.pdf

ARGS:
- 'opfunc' : a function that takes a single input (X), the point
of a evaluation, and returns f(X) and df/dX
- 'x' : the initial point
- 'config` : a table with configuration parameters for the optimizer
- 'config.learningRate' : learning rate
- `config.learningRateDecay` : learning rate decay
- 'config.beta1' : first moment coefficient
- 'config.beta2' : second moment coefficient
- `config.beta3` : exponential decay rate for relative change
- 'config.epsilon' : for numerical stability
- `config.thl` : lowerbound threshold
- `config.thu` : upperbound threshold
- 'config.weightDecay' : weight decay
- 'state' : a table describing the state of the optimizer; after each
call the state is modified
RETURN:
- `x` : the new x vector
- `f(x)` : the function, evaluated before update
]]
function optim.eve(opfunc, x, config, state)
-- (0) get/update state
if config == nil and state == nil then
print('no state table, EVE initializing')
end

local config = config or {}
local state = state or {}

local lr = config.learningRate or 1e-3
local lrd = config.learningRateDecay or 0
local beta1 = config.beta1 or 0.9
local beta2 = config.beta2 or 0.999
local beta3 = config.beta3 or 0.999
local eps = config.epsilon or 1e-8
local thl = config.thl or 0.1
local thu = config.thu or 10
local wd = config.weightDecay or 0

-- (1) evaluate f(x) and df/dx
local fx, dfdx = opfunc(x)

-- (2) weight decay
if wd ~= 0 then
dfdx:add(wd, x)
end

-- Initialize state
state.d = state.d or 1
state.t = state.t or 0
state.fhat = state.fhat or 0

-- (3) learning rate decay (annealing)
local clr = lr / (1 + state.t*lrd)
state.t = state.t + 1

-- Decay the first and second moment running average coefficient
state.m = state.m or x.new(dfdx:size()):zero()
state.m:mul(beta1):add(1-beta1, dfdx)

state.v = state.v or x.new(dfdx:size()):zero()
state.v:mul(beta2):addcmul(1-beta2, dfdx, dfdx)

state.denom = state.denom or x.new(dfdx:size()):zero()

if state.t == 1 then
state.d = 1
state.fhat = fx
state.t = 1
else
local l, u = 0, 0 -- lowerbound and upperbound
if fx > state.fhat then
l, u = thl + 1, thu + 1
else
l, u = 1 / (thu+1), 1 / (thl + 1)
end
local fhat = state.fhat * math.min(math.max(l, fx / state.fhat), u)
local r = math.abs(fhat - state.fhat) / math.min(fhat, state.fhat)
state.fhat = fhat
-- Decay the relative change
state.d = beta3 * state.d + (1 - beta3) * r
end


local biasCorrection1 = 1 - beta1^state.t
local biasCorrection2 = 1 - beta2^state.t
local alpha = clr * math.sqrt(biasCorrection2) / biasCorrection1 / state.d
state.denom:copy(state.v):sqrt():add(eps)

-- (4) update x
x:addcdiv(-alpha, state.m, state.denom)

-- return x*, f(x) before optimization
return x, {fx}
end
1 change: 1 addition & 0 deletions init.lua
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ require('optim.rmsprop')
require('optim.adadelta')
require('optim.cmaes')
require('optim.de')
require('optim.eve')

-- line search functions
require('optim.lswolfe')
Expand Down
22 changes: 22 additions & 0 deletions test/test_eve.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
require 'torch'
require 'optim'
require 'rosenbrock'
require 'l2'
x = torch.Tensor(2):fill(0)
fx = {}
config = {thl = 1e-1, thu = 10}
state = {}
for i = 1, 10001 do
x, f = optim.eve(rosenbrock, x, config, state)
if (i-1)%1000 == 0 then
table.insert(fx,f[1])
end
end
print()
print('Rosenbrock test')
print()
print('x=');print(x)
print('fx=')
for i = 1, #fx do
print((i-1)*1000+1, fx[i])
end