diff --git a/descent/optim/_lm.py b/descent/optim/_lm.py index ed5c6f0..a51d8c1 100644 --- a/descent/optim/_lm.py +++ b/descent/optim/_lm.py @@ -529,7 +529,7 @@ def levenberg_marquardt( loss_history = [] has_converged = False - best_x, best_loss = x.clone(), closure_prev[0] + best_x, best_loss = x.clone().detach(), closure_prev[0] for step in range(config.max_steps): loss_prev, gradient_prev, hessian_prev = closure_prev @@ -537,6 +537,7 @@ def levenberg_marquardt( dx, expected_improvement, damping_adjusted, damping_factor = _step( gradient_prev, hessian_prev, trust_radius, config ) + _LOGGER.info(f"The current step is {dx.detach().cpu().numpy()}") if config.mode.lower() == _HESSIAN_SEARCH: dx, expected_improvement = _hessian_diagonal_search( @@ -553,6 +554,7 @@ def levenberg_marquardt( _LOGGER.info(f"{config.mode} step found (length {dx_norm:.4e})") x_next = correct_fn(x + dx).requires_grad_(x.requires_grad) + _LOGGER.info(f"The next set of parameters to try {x_next.detach().cpu().numpy()}") loss, gradient, hessian = closure_fn(x_next, True, True) loss_delta = loss - loss_prev diff --git a/descent/targets/thermo.py b/descent/targets/thermo.py index 3cae352..b009057 100644 --- a/descent/targets/thermo.py +++ b/descent/targets/thermo.py @@ -318,32 +318,25 @@ def _bulk_config(temperature: float, pressure: float) -> SimulationConfig: pressure = pressure * openmm.unit.atmosphere return SimulationConfig( - max_mols=256, + max_mols=1000, gen_coords=smee.mm.GenerateCoordsConfig(), equilibrate=[ smee.mm.MinimizationConfig(), - # short NVT equilibration simulation - smee.mm.SimulationConfig( - temperature=temperature, - pressure=None, - n_steps=50000, - timestep=1.0 * openmm.unit.femtosecond, - ), # short NPT equilibration simulation smee.mm.SimulationConfig( temperature=temperature, pressure=pressure, - n_steps=50000, - timestep=1.0 * openmm.unit.femtosecond, + n_steps=100000, + timestep=2.0 * openmm.unit.femtosecond, ), ], production=smee.mm.SimulationConfig( temperature=temperature, pressure=pressure, - n_steps=500000, + n_steps=1000000, timestep=2.0 * openmm.unit.femtosecond, ), - production_frequency=1000, + production_frequency=2000, ) @@ -758,23 +751,41 @@ def closure_fn( compute_hessian: bool, ): force_field = trainable.to_force_field(x) - - y_ref, _, y_pred, _ = descent.targets.thermo.predict( - dataset, - force_field, - topologies, - pathlib.Path.cwd(), - None, - per_type_scales, - verbose, - ) - loss, gradient, hessian = ((y_pred - y_ref) ** 2).sum(), None, None - - if compute_hessian: - hessian = descent.utils.loss.approximate_hessian(x, y_pred) - if compute_gradient: - gradient = torch.autograd.grad(loss, x, retain_graph=True)[0].detach() - - return loss.detach(), gradient, hessian + total_loss, grad, hess = torch.zeros(size=(1,), device=x.device.type), None, None + for i in range(len(dataset)): + y_ref, _, y_pred, _ = descent.targets.thermo.predict( + dataset.select(indices=[i]), + force_field, + topologies, + pathlib.Path.cwd(), + None, + per_type_scales, + verbose, + ) + loss = (y_pred - y_ref) ** 2 + + if compute_hessian: + hessian = descent.utils.loss.approximate_hessian(x, y_pred).detach() + if hess is None: + hess = hessian + else: + hess += hessian + if compute_gradient: + gradient = torch.autograd.grad(loss, x, retain_graph=True)[0].detach() + if grad is None: + grad = gradient + else: + grad += gradient + + total_loss += loss.detach() + # clear the graph + torch.cuda.empty_cache() + + # if compute_gradient: + # grad = sum(grad[1:], grad[0]).detach() + # if compute_hessian: + # hess = sum(hess[1:], hess[0]).detach() + # total_loss = sum(total_loss[1:], total_loss[0]).detach() + return total_loss, grad, hess return closure_fn diff --git a/descent/utils/loss.py b/descent/utils/loss.py index 5b019ab..d4f8f33 100644 --- a/descent/utils/loss.py +++ b/descent/utils/loss.py @@ -104,7 +104,7 @@ def combined_closure_fn( if verbose: verbose_rows.append( - {"target": name, "loss": float(f"{local_loss:.5f}")} + {"target": name, "loss": float(f"{local_loss.item():.5f}")} ) loss = sum(loss[1:], loss[0])