Skip to content

Commit

Permalink
adapted dispersion fitting in numpy, added optional code for wolfe li…
Browse files Browse the repository at this point in the history
…nesearch
  • Loading branch information
davidsebfischer committed Dec 1, 2019
1 parent f50e79f commit e699985
Showing 1 changed file with 31 additions and 10 deletions.
41 changes: 31 additions & 10 deletions batchglm/train/numpy/base_glm/estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def train(
update_b_freq: int = 5,
ftol_b: float = 1e-8,
lr_b: float = 1e-2,
max_iter_b: int = 100,
max_iter_b: int = 1000,
nproc: int = 3,
**kwargs
):
Expand Down Expand Up @@ -250,7 +250,7 @@ def iwls_step(
:return: (inferred param x features)
"""
w = self.model.fim_weight_j(j=idx_update) # (observations x features)
w = self.model.fim_weight_aa_j(j=idx_update) # (observations x features)
ybar = self.model.ybar_j(j=idx_update) # (observations x features)
# Translate to problem of form ax = b for each feature:
# (in the following, X=design and Y=counts)
Expand Down Expand Up @@ -368,6 +368,7 @@ def _b_step_gd(

def optim_handle(
self,
b_j,
data_j,
eta_loc_j,
xh_scale,
Expand All @@ -381,17 +382,33 @@ def optim_handle(
data_j = np.expand_dims(data_j, axis=-1)

ll = self.model.ll_handle()
lb, ub = self.model.param_bounds(dtype=data_j.dtype)
lb_bracket = np.max([lb["b_var"], b_j - 20])
ub_bracket = np.min([ub["b_var"], b_j + 20])

def cost_b_var(x, data_jj, eta_loc_jj, xh_scale_jj):
x = np.array([[x]])
x = np.clip(np.array([[x]]), lb["b_var"], ub["b_var"])
return - np.sum(ll(data_jj, eta_loc_jj, x, xh_scale_jj))

# jac_b = self.model.jac_b_handle()
# def cost_b_var_prime(x, data_jj, eta_loc_jj, xh_scale_jj):
# x = np.clip(np.array([[x]]), lb["b_var"], ub["b_var"])
# return - np.sum(jac_b(data_jj, eta_loc_jj, x, xh_scale_jj))
# return scipy.optimize.line_search(
# f=cost_b_var,
# myfprime=cost_b_var_prime,
# args=(data_j, eta_loc_j, xh_scale),
# maxiter=max_iter,
# xk=b_j+5,
# pk=-np.ones_like(b_j)
# )

return scipy.optimize.brent(
func=cost_b_var,
args=(data_j, eta_loc_j, xh_scale),
maxiter=max_iter,
tol=ftol,
brack=(-5, 5),
brack=(lb_bracket, ub_bracket),
full_output=True
)

Expand All @@ -407,13 +424,13 @@ def _b_step_loop(
:return:
"""
x0 = -10
delta_theta = np.zeros_like(self.model.b_var)
if isinstance(delta_theta, dask.array.core.Array):
delta_theta = delta_theta.compute()

xh_scale = np.matmul(self.model.design_scale, self.model.constraints_scale).compute()
if nproc > 1:
b_var = self.model.b_var.compute()
if nproc > 1 and len(idx_update) > nproc:
sys.stdout.write('\rFitting %i dispersion models: (progress not available with multiprocessing)' % len(idx_update))
sys.stdout.flush()
with multiprocessing.Pool(processes=nproc) as pool:
Expand All @@ -422,6 +439,7 @@ def _b_step_loop(
results = pool.starmap(
self.optim_handle,
[(
b_var[0, j],
x[:, [j]],
eta_loc[:, [j]],
xh_scale,
Expand Down Expand Up @@ -452,12 +470,16 @@ def _b_step_loop(
data = data.todense()

ll = self.model.ll_handle()
lb, ub = self.model.param_bounds(dtype=data.dtype)
lb_bracket = np.max([lb["b_var"], b_var[0, j] - 20])
ub_bracket = np.min([ub["b_var"], b_var[0, j] + 20])

def cost_b_var(x, data_j, eta_loc_j, xh_scale_j):
x = np.clip(np.array([[x]]), lb["b_var"], ub["b_var"])
return - np.sum(ll(
data_j,
eta_loc_j,
np.array([[x]]),
x,
xh_scale_j
))

Expand All @@ -466,7 +488,7 @@ def cost_b_var(x, data_j, eta_loc_j, xh_scale_j):
args=(data, eta_loc, xh_scale),
maxiter=max_iter,
tol=ftol,
brack=(-5, 5),
brack=(lb_bracket, ub_bracket),
full_output=False
)
else:
Expand All @@ -489,8 +511,7 @@ def finalize(self):
transfers relevant attributes.
"""
# Read from numpy-IRLS estimator specific model:

self._hessian = self.model.hessian.compute()
self._hessian = - self.model.fim.compute()
self._fisher_inv = np.linalg.inv(- self._hessian)
self._jacobian = np.sum(np.abs(self.model.jac.compute() / self.model.x.shape[0]), axis=1)
self._log_likelihood = self.model.ll_byfeature.compute()
Expand Down

0 comments on commit e699985

Please sign in to comment.