diff --git a/arc/family/arc_families_test.py b/arc/family/arc_families_test.py new file mode 100644 index 0000000000..662e30d26e --- /dev/null +++ b/arc/family/arc_families_test.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python3 +# encoding: utf-8 + +""" +This module contains unit tests for the kinetic families defined under arc.data.families. +""" + +import unittest +import os + +from arc.family.family import ReactionFamily, get_reaction_family_products, get_recipe_actions +from arc.imports import settings +from arc.reaction.reaction import ARCReaction +from arc.species.species import ARCSpecies + +ARC_FAMILIES_PATH = settings['ARC_FAMILIES_PATH'] + + +class TestCarbonylBasedHydrolysisReactionFamily(unittest.TestCase): + """ + Contains unit tests for the carbonyl-based hydrolysis reaction family. + """ + + @classmethod + def setUpClass(cls): + """Set up the test by defining the carbonyl-based hydrolysis reaction family.""" + cls.family = ReactionFamily('carbonyl_based_hydrolysis') + + def test_carbonyl_based_hydrolysis_reaction(self): + """Test if carbonyl_based hydrolysis products are correctly generated.""" + carbonyl = ARCSpecies(label='carbonyl', smiles='CC(=O)OC') + water = ARCSpecies(label='H2O', smiles='O') + acid = ARCSpecies(label='acid', smiles='CC(=O)O') + alcohol = ARCSpecies(label='alcohol', smiles='CO') + rxn = ARCReaction(r_species=[carbonyl, water], p_species=[acid, alcohol]) + products = get_reaction_family_products(rxn) + product_smiles = [p.to_smiles() for p in products[0]['products']] + expected_product_smiles = ['CC(=O)O', 'CO'] + self.assertEqual(product_smiles, expected_product_smiles) + + def test_recipe_actions(self): + """Test if the reaction recipe is applied correctly.""" + groups_file_path = os.path.join(ARC_FAMILIES_PATH, 'carbonyl_based_hydrolysis.py') + with open(groups_file_path, 'r') as f: + groups_as_lines = f.readlines() + actions = get_recipe_actions(groups_as_lines) + expected_actions = [ + ['BREAK_BOND', '*1', 1, '*2'], + ['BREAK_BOND', '*3', 1, '*4'], + ['FORM_BOND', '*1', 1, '*4'], + ['FORM_BOND', '*2', 1, '*3'], + ] + self.assertEqual(actions, expected_actions) + + def test_carbonyl_based_hydrolysis_withP(self): + """Test if carbonyl-based hydrolysis products are correctly generated.""" + carbonyl= ARCSpecies(label='carbonyl', smiles='CP(=O)(OC)O') + water = ARCSpecies(label='H2O', smiles='O') + acid = ARCSpecies(label='acid', smiles='CP(=O)(O)O') + alcohol = ARCSpecies(label='alcohol', smiles='CO') + rxn = ARCReaction(r_species=[carbonyl, water], p_species=[acid, alcohol]) + products = get_reaction_family_products(rxn) + product_smiles = [p.to_smiles() for p in products[0]['products']] + expected_product_smiles = ['CP(=O)(O)O', 'CO'] + self.assertEqual(product_smiles, expected_product_smiles) + + +class TestNitrileHydrolysisReactionFamily(unittest.TestCase): + """ + Contains unit tests for the nitrile hydrolysis reaction family. + """ + + @classmethod + def setUpClass(cls): + """Set up the test by defining the nitrile hydrolysis reaction family.""" + cls.family = ReactionFamily('nitrile_hydrolysis') + + def test_nitrile_hydrolysis_reaction(self): + """Test if nitrile hydrolysis products are correctly generated.""" + nitrile = ARCSpecies(label='nitrile', smiles='CC#N') + water = ARCSpecies(label='H2O', smiles='O') + acid = ARCSpecies(label='acid', smiles='CC(=N)O') + rxn = ARCReaction(r_species=[nitrile, water], p_species=[acid]) + products = get_reaction_family_products(rxn) + product_smiles = [p.to_smiles() for p in products[0]['products']] + expected_product_smiles = ['CC(=N)O'] + self.assertEqual(product_smiles, expected_product_smiles) + + def test_recipe_actions(self): + """Test if the reaction recipe is applied correctly for nitrile hydrolysis.""" + groups_file_path = os.path.join(ARC_FAMILIES_PATH, 'nitrile_hydrolysis.py') + with open(groups_file_path, 'r') as f: + groups_as_lines = f.readlines() + actions = get_recipe_actions(groups_as_lines) + expected_actions =[ + ['CHANGE_BOND', '*1', -1, '*2'], + ['BREAK_BOND', '*3', 1, '*4'], + ['FORM_BOND', '*1', 1, '*4'], + ['FORM_BOND', '*2', 1, '*3'], + ] + self.assertEqual(actions, expected_actions) + + +class TestEtherHydrolysisReactionFamily(unittest.TestCase): + """ + Contains unit tests for the ether hydrolysis reaction family. + """ + + @classmethod + def setUpClass(cls): + """Set up the test by defining the ether hydrolysis reaction family.""" + cls.family = ReactionFamily('ether_hydrolysis') + + def test_ether_hydrolysis_reaction(self): + """Test if ether hydrolysis products are correctly generated.""" + ether = ARCSpecies(label='ether', smiles='CCOC') + water = ARCSpecies(label='H2O', smiles='O') + alcohol1 = ARCSpecies(label='alcohol1', smiles='CCO') + alcohol2 = ARCSpecies(label='alcohol2', smiles='CO') + rxn = ARCReaction(r_species=[ether, water], p_species=[alcohol1, alcohol2]) + products = get_reaction_family_products(rxn) + product_smiles = [p.to_smiles() for p in products[0]['products']] + expected_product_smiles = ['CCO', 'CO'] + self.assertEqual(product_smiles, expected_product_smiles) + + def test_recipe_actions(self): + """Test if the reaction recipe is applied correctly.""" + groups_file_path = os.path.join(ARC_FAMILIES_PATH, 'ether_hydrolysis.py') + with open(groups_file_path, 'r') as f: + groups_as_lines = f.readlines() + actions = get_recipe_actions(groups_as_lines) + expected_actions = [ + ['BREAK_BOND', '*1', 1, '*2'], + ['BREAK_BOND', '*3', 1, '*4'], + ['FORM_BOND', '*1', 1, '*4'], + ['FORM_BOND', '*2', 1, '*3'], + ] + self.assertEqual(actions, expected_actions) + + +if __name__ == '__main__': + unittest.main() diff --git a/arc/family/family.py b/arc/family/family.py index f48d5af9d7..1b9b1edd46 100644 --- a/arc/family/family.py +++ b/arc/family/family.py @@ -588,11 +588,12 @@ def get_all_families(rmg_family_set: Union[List[str], str] = 'default', rmg_families.extend(list(families)) else: rmg_families = list(family_sets[rmg_family_set]) \ - if isinstance(rmg_family_set, str) and rmg_family_set in family_sets else rmg_family_set + if isinstance(rmg_family_set, str) and rmg_family_set in family_sets else [rmg_family_set] if consider_arc_families: - arc_families = [os.path.splitext(family)[0] for family in os.listdir(ARC_FAMILIES_PATH)] - rmg_families = [rmg_families] if isinstance(rmg_families, str) else rmg_families - arc_families = [arc_families] if isinstance(arc_families, str) else arc_families + for family in os.listdir(ARC_FAMILIES_PATH): + if family.startswith('.') or family.startswith('_'): + continue + arc_families.append(os.path.splitext(family)[0]) return rmg_families + arc_families if rmg_families is not None else arc_families @@ -862,3 +863,18 @@ def isomorphic_products(rxn: 'ARCReaction', """ p_species = rxn.get_reactants_and_products(return_copies=True)[1] return check_product_isomorphism(products, p_species) + +def check_family_name(family: str + ) -> bool: + """ + Check whether the family name is defined. + + Args: + family (str): The family name. + + Returns: + bool: Whether the family is defined. + """ + if not isinstance(family, str) and family is not None: + raise TypeError("Family name must be a string or None.") + return family in get_all_families() or family is None diff --git a/arc/family/family_test.py b/arc/family/family_test.py index 92dcc36d78..c0cf838174 100644 --- a/arc/family/family_test.py +++ b/arc/family/family_test.py @@ -28,6 +28,7 @@ get_rmg_recommended_family_sets, is_own_reverse, is_reversible, + check_family_name ) from arc.molecule import Group, Molecule from arc.molecule.resonance import generate_resonance_structures_safely @@ -701,7 +702,9 @@ def test_get_all_families(self): self.assertIn('intra_OH_migration', families) families = get_all_families(consider_rmg_families=False) self.assertIsInstance(families, list) - self.assertIn('hydrolysis', families) + self.assertIn('carbonyl_based_hydrolysis', families) + self.assertIn('ether_hydrolysis', families) + self.assertIn('nitrile_hydrolysis', families) families = get_all_families(rmg_family_set=['H_Abstraction']) self.assertEqual(families, ['H_Abstraction']) @@ -1059,6 +1062,16 @@ def test_get_isomorphic_subgraph(self): ) self.assertEqual(isomorphic_subgraph, {0: '*3', 4: '*1', 7: '*2'}) + def test_check_family_name(self): + """Test check family name function""" + self.assertTrue(check_family_name('H_Abstraction')) + self.assertTrue(check_family_name('ether_hydrolysis')) + self.assertFalse(check_family_name('etherhydrolysis')) + self.assertFalse(check_family_name('amine_hydrolysis')) + self.assertTrue(check_family_name(None)) + with self.assertRaises(TypeError): + check_family_name(123) + if __name__ == '__main__': unittest.main(testRunner=unittest.TextTestRunner(verbosity=2)) diff --git a/arc/job/adapters/common.py b/arc/job/adapters/common.py index a351d4c01d..8fb331522c 100644 --- a/arc/job/adapters/common.py +++ b/arc/job/adapters/common.py @@ -26,7 +26,6 @@ default_job_settings, global_ess_settings, rotor_scan_resolution = \ settings['default_job_settings'], settings['global_ess_settings'], settings['rotor_scan_resolution'] - ts_adapters_by_rmg_family = {'1+2_Cycloaddition': ['kinbot'], '1,2_Insertion_CO': ['kinbot'], '1,2_Insertion_carbene': ['kinbot'], @@ -43,6 +42,9 @@ 'Cyclopentadiene_scission': ['gcn', 'xtb_gsm'], 'Diels_alder_addition': ['kinbot'], 'H_Abstraction': ['heuristics', 'autotst'], + 'carbonyl_based_hydrolysis': ['heuristics'], + 'ether_hydrolysis': ['heuristics'], + 'nitrile_hydrolysis': ['heuristics'], 'HO2_Elimination_from_PeroxyRadical': ['kinbot'], 'Intra_2+2_cycloaddition_Cd': ['gcn', 'xtb_gsm'], 'Intra_5_membered_conjugated_C=C_C=C_addition': ['gcn', 'xtb_gsm'], @@ -117,7 +119,7 @@ def _initialize_adapter(obj: 'JobAdapter', times_rerun: int = 0, torsions: Optional[List[List[int]]] = None, tsg: Optional[int] = None, - xyz: Optional[Union[dict,List[dict]]] = None, + xyz: Optional[Union[dict, List[dict]]] = None, ): """ A common Job adapter initializer function. @@ -161,7 +163,7 @@ def _initialize_adapter(obj: 'JobAdapter', obj.job_num = job_num obj.job_server_name = job_server_name obj.job_status = job_status \ - or ['initializing', {'status': 'initializing', 'keywords': list(), 'error': '', 'line': ''}] + or ['initializing', {'status': 'initializing', 'keywords': list(), 'error': '', 'line': ''}] obj.job_type = job_type if isinstance(job_type, str) else job_type[0] # always a string obj.job_types = job_type if isinstance(job_type, list) else [job_type] # always a list # When restarting ARC and re-setting the jobs, ``level`` is a string, convert it to a Level object instance @@ -211,7 +213,7 @@ def _initialize_adapter(obj: 'JobAdapter', obj.is_ts = obj.species[0].is_ts obj.species_label = list() for spc in obj.species: - obj.charge.append(spc.charge) + obj.charge.append(spc.charge) obj.multiplicity.append(spc.multiplicity) obj.species_label.append(spc.label) elif obj.reactions is not None: @@ -286,9 +288,9 @@ def is_species_restricted(obj: 'JobAdapter', bool: Whether to run as restricted (``True``) or not (``False``). """ - if obj.level.method_type in ['force_field','composite','semiempirical']: + if obj.level.method_type in ['force_field', 'composite', 'semiempirical']: return True - + multiplicity = obj.multiplicity if species is None else species.multiplicity number_of_radicals = obj.species[0].number_of_radicals if species is None else species.number_of_radicals species_label = obj.species[0].label if species is None else species.label @@ -322,7 +324,8 @@ def check_argument_consistency(obj: 'JobAdapter'): raise NotImplementedError(f'The {obj.job_adapter} job adapter does not support ESS scans.') if obj.job_type == 'scan' and divmod(360, obj.scan_res)[1]: raise ValueError(f'Got an illegal rotor scan resolution of {obj.scan_res}.') - if obj.job_type == 'scan' and ((not obj.species[0].rotors_dict or obj.rotor_index is None) and obj.torsions is None): + if obj.job_type == 'scan' and ( + (not obj.species[0].rotors_dict or obj.rotor_index is None) and obj.torsions is None): # If this is a scan job type and species.rotors_dict is empty (e.g., via pipe), then torsions must be set up. raise ValueError('Either torsions or a species rotors_dict along with a rotor_index argument ' 'must be specified for an ESS scan job.') @@ -406,7 +409,7 @@ def update_input_dict_with_args(args: dict, else: if 'keywords' not in input_dict.keys(): input_dict['keywords'] = '' - # Check if input_dict['keywords'] already contains a value + # Check if input_dict['keywords'] already contains a value if input_dict['keywords']: input_dict['keywords'] += f' {value}' else: @@ -444,6 +447,7 @@ def update_input_dict_with_args(args: dict, return input_dict + def input_dict_strip(input_dict: dict) -> dict: """ Strip all values in the input dict of leading and trailing whitespace. @@ -536,6 +540,7 @@ def which(command: Union[str, list], else: return ans + def combine_parameters(input_dict: dict, terms: list) -> Tuple[dict, List]: """ Extract and combine specific parameters from a dictionary's string values based on a list of terms. diff --git a/arc/job/adapters/ts/heuristics.py b/arc/job/adapters/ts/heuristics.py index aa281542ae..9031aa9ec3 100644 --- a/arc/job/adapters/ts/heuristics.py +++ b/arc/job/adapters/ts/heuristics.py @@ -15,28 +15,39 @@ Can/should we try both? """ +import copy import datetime import itertools +import os from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union -from arc.common import almost_equal_coords, get_logger, is_angle_linear, is_xyz_linear, key_by_val +from arc.common import (ARC_PATH, almost_equal_coords, get_angle_in_180_range, get_logger, is_angle_linear, + is_xyz_linear, key_by_val, read_yaml_file) +from arc.family import get_reaction_family_products from arc.job.adapter import JobAdapter from arc.job.adapters.common import _initialize_adapter, ts_adapters_by_rmg_family from arc.job.factory import register_job_adapter from arc.plotter import save_geo -from arc.species.converter import compare_zmats, relocate_zmat_dummy_atoms_to_the_end, zmat_from_xyz, zmat_to_xyz +from arc.species.converter import (compare_zmats, relocate_zmat_dummy_atoms_to_the_end, zmat_from_xyz, zmat_to_xyz, + add_atom_to_xyz_using_internal_coords, sorted_distances_of_atom) from arc.mapping.engine import map_two_species from arc.molecule.molecule import Molecule from arc.species.species import ARCSpecies, TSGuess, SpeciesError, colliding_atoms -from arc.species.zmat import get_parameter_from_atom_indices, remove_zmat_atom_0, up_param +from arc.species.zmat import get_parameter_from_atom_indices, remove_zmat_atom_0, up_param, xyz_to_zmat +from arc.species.vectors import calculate_angle if TYPE_CHECKING: from arc.level import Level from arc.reaction import ARCReaction +FAMILY_SETS = {'hydrolysis_set_1': ['carbonyl_based_hydrolysis', 'ether_hydrolysis'], + 'hydrolysis_set_2': ['nitrile_hydrolysis']} + DIHEDRAL_INCREMENT = 30 +ELECTRONEGATIVITIES = read_yaml_file(os.path.join(ARC_PATH, 'data', 'electronegativity.yml')) + logger = get_logger() @@ -248,13 +259,25 @@ def execute_incore(self): ) xyzs = list() - tsg = None + tsg, families = None, None if rxn.family == 'H_Abstraction': tsg = TSGuess(method='Heuristics') tsg.tic() xyzs = h_abstraction(reaction=rxn, dihedral_increment=self.dihedral_increment) tsg.tok() + if rxn.family in FAMILY_SETS['hydrolysis_set_1'] or rxn.family in FAMILY_SETS['hydrolysis_set_2']: + try: + tsg = TSGuess(method='Heuristics') + tsg.tic() + xyzs, families, indices = hydrolysis(reaction=rxn) + tsg.tok() + if not xyzs: + logger.warning(f'Heuristics TS search failed to generate any valid TS guesses for {rxn.label}.') + continue + except ValueError: + continue + for method_index, xyz in enumerate(xyzs): unique = True for other_tsg in rxn.ts_species.ts_guesses: @@ -270,7 +293,7 @@ def execute_incore(self): t0=tsg.t0, execution_time=tsg.execution_time, success=True, - family=rxn.family, + family=rxn.family if families is None else families[method_index], xyz=xyz, ) rxn.ts_species.ts_guesses.append(ts_guess) @@ -933,4 +956,732 @@ def h_abstraction(reaction: 'ARCReaction', return xyz_guesses +def hydrolysis(reaction: 'ARCReaction') -> Tuple[List[dict], List[dict], List[int]]: + """ + Generate TS guesses for reactions of the ARC "hydrolysis" families. + + Args: + reaction (ARCReaction): The reaction to process. + + Returns: + Tuple containing: + - List[dict]: Cartesian coordinates of TS guesses. + - List[dict]: Reaction families of the TS guesses. + - List[int]: Indices of the generated TS guesses. + """ + xyz_guesses_total, zmats_total, reaction_families, guesses_indices = [], [], [], [] + product_dicts, carbonyl_based_and_ether_families = get_products_and_check_families(reaction) + hydrolysis_parameters = load_hydrolysis_parameters() + dihedrals_to_change_num, max_dihedrals_found = 0, 0 + if reaction._family is not None: + product_dicts = [pd for pd in product_dicts if pd.get("family") == reaction._family] + if not product_dicts: + raise ValueError(f"Specified target family '{reaction._family}' not found in reaction products.") + condition_met = False + while not xyz_guesses_total or not condition_met: + if dihedrals_to_change_num >= max_dihedrals_found > 0: + break + dihedrals_to_change_num += 1 + for product_dict in product_dicts: + reaction_family = product_dict["family"] + is_set_1 = reaction_family in hydrolysis_parameters["family_sets"]["set_1"] + is_set_2 = reaction_family in hydrolysis_parameters["family_sets"]["set_2"] + + main_reactant, water, initial_xyz, xyz_indices = extract_reactant_and_indices(reaction, + product_dict, + is_set_1) + base_xyz_indices = { + "a": xyz_indices["a"], + "b": xyz_indices["b"], + "e": xyz_indices["e"], + "o": xyz_indices["o"], + "h1": xyz_indices["h1"], + } + adjustments_to_try = [False, True] if dihedrals_to_change_num == 1 else [True] + for adjust_dihedral in adjustments_to_try: + chosen_xyz_indices, xyz_guesses, zmats_total, n_dihedrals_found = process_chosen_d_indices(initial_xyz, base_xyz_indices, xyz_indices, + hydrolysis_parameters,reaction_family, water, zmats_total, is_set_1, is_set_2, + dihedrals_to_change_num, should_adjust_dihedral=adjust_dihedral) + max_dihedrals_found = max(max_dihedrals_found, n_dihedrals_found) + if xyz_guesses: + xyz_guesses_total.extend(xyz_guesses) + reaction_families.extend([reaction_family] * len(xyz_guesses)) + guesses_indices.extend([list(chosen_xyz_indices.values()) for _ in range(len(xyz_guesses))]) + if reaction._family is not None: + condition_met = any(fam == reaction._family for fam in reaction_families) + elif carbonyl_based_and_ether_families: + condition_met = has_carbonyl_based_hydrolysis(reaction_families) + else: + condition_met = len(xyz_guesses_total) > 0 + + nitrile_in_inputs = any( + (pd.get("family") == "nitrile_hydrolysis") or + (isinstance(pd.get("family"), list) and "nitrile_hydrolysis" in pd.get("family")) + for pd in product_dicts + ) + nitrile_already_found = any(fam == "nitrile_hydrolysis" for fam in reaction_families) + + if nitrile_in_inputs and not nitrile_already_found: + dihedrals_to_change_num, max_dihedrals_found = 0, 0 + for product_dict in product_dicts: + _fam = product_dict["family"] + reaction_family = _fam[0] if isinstance(_fam, list) else _fam + if reaction_family != "nitrile_hydrolysis": + continue + + is_set_1 = reaction_family in hydrolysis_parameters["family_sets"]["set_1"] + is_set_2 = reaction_family in hydrolysis_parameters["family_sets"]["set_2"] + + main_reactant, water, initial_xyz, xyz_indices = extract_reactant_and_indices(reaction, + product_dict, + is_set_1) + base_xyz_indices = { + "a": xyz_indices["a"], + "b": xyz_indices["b"], + "e": xyz_indices["e"], + "o": xyz_indices["o"], + "h1": xyz_indices["h1"], + } + + while True: + if dihedrals_to_change_num >= max_dihedrals_found > 0: + break + dihedrals_to_change_num += 1 + chosen_xyz_indices, xyz_guesses, zmats_total, n_dihedrals_found = process_chosen_d_indices( + initial_xyz, base_xyz_indices, xyz_indices, + hydrolysis_parameters, reaction_family, water, zmats_total, is_set_1, is_set_2, + dihedrals_to_change_num, should_adjust_dihedral=True, + allow_nitrile_dihedrals=True + ) + max_dihedrals_found = max(max_dihedrals_found, n_dihedrals_found) + + if xyz_guesses: + xyz_guesses_total.extend(xyz_guesses) + reaction_families.extend([reaction_family] * len(xyz_guesses)) + guesses_indices.extend([list(chosen_xyz_indices.values()) for _ in range(len(xyz_guesses))]) + break + + return xyz_guesses_total, reaction_families, guesses_indices + + +def get_products_and_check_families(reaction: 'ARCReaction') -> Tuple[List[dict], bool]: + """ + Get all reaction products and determine if both carbonyl-based and ether hydrolysis families are present. + + Args: + reaction: An ARCReaction instance. + + Returns: + Tuple containing: + - List[dict]: Product dictionaries with reaction family information + - bool: True if both carbonyl-based and ether hydrolysis families are present + """ + product_dicts = get_reaction_family_products( + rxn=reaction, + rmg_family_set="default", + consider_rmg_families=False, + consider_arc_families=True, + ) + carbonyl_based_present = any( + "carbonyl_based_hydrolysis" in (d.get("family", []) if isinstance(d.get("family"), list) else [d.get("family")]) + for d in product_dicts + ) + ether_present = any( + "ether_hydrolysis" in (d.get("family", []) if isinstance(d.get("family"), list) else [d.get("family")]) + for d in product_dicts + ) + + return product_dicts, (carbonyl_based_present and ether_present) + + +def load_hydrolysis_parameters() -> dict: + """ + Load parameters for hydrolysis reactions from the YAML configuration file. + + Returns: + dict: Hydrolysis parameters loaded from the configuration file containing + family sets and specific parameters for different reaction types. + """ + return read_yaml_file(os.path.join(ARC_PATH, "data", "hydrolysis_families_parameters.yml")) + + +def has_carbonyl_based_hydrolysis(reaction_families: List[dict]) -> bool: + """ + Check if carbonyl-based hydrolysis is present in the generated transition state guesses. + + Args: + reaction_families: Reaction families of the generated TS guesses. + + Returns: + bool: True if carbonyl-based hydrolysis is present in any of the transition state guesses. + """ + return any(family == "carbonyl_based_hydrolysis" for family in reaction_families) + + +def extract_reactant_and_indices(reaction: 'ARCReaction', + product_dict: dict, + is_set_1: bool) -> Tuple[ARCSpecies, ARCSpecies, dict, dict]: + """ + Extract the reactant molecules and relevant atomic indices (a,b,e,d,o,h1) for the hydrolysis reaction. + + Atom scheme for the TS structure of the hydrolysis reaction: + + e + | + d — a — — b + | | + | | + o — — h1 + | + h2 + Description: + - a: Electrophilic center (receives nucleophilic attack from o). + - b: Leaving group atom (accepts proton h1). + - e, d: Atoms bonded to a (used to define geometry). + - o: Oxygen from water + - h1: The reactive hydrogen from water + - h2: The other hydrogen from water (not reactive). + + Args: + reaction: An ARCReaction instance. + product_dict: Dictionary containing reaction product information and atom mappings. + is_set_1: Whether the reaction is in the first set of hydrolysis families. + + Returns: + Tuple containing: + - main_reactant(ARCSpecies): The main reactant molecule + - water(ARCSpecies): The water molecule + - initial_xyz(dict): Initial XYZ coordinates of the main reactant + - xyz_indices(dict): Dictionary mapping a, b, e, d , o, h1 atoms to their indices in the xyz dictionary + """ + main_reactant, water = get_main_reactant_and_water_from_hydrolysis_reaction(reaction) + a_xyz_index = product_dict["r_label_map"]["*1"] + b_xyz_index = product_dict["r_label_map"]["*2"] + two_neighbors = is_set_1 + try: + e_xyz_index, d_xyz_indices = get_neighbors_by_electronegativity( + main_reactant, + a_xyz_index, + b_xyz_index, + two_neighbors + ) + except ValueError as e: + raise ValueError(f"Failed to determine neighbors by electronegativity for atom {a_xyz_index} " + f"in species {main_reactant.label}: {e}") + o_index = len(main_reactant.mol.atoms) + h1_index = o_index + 1 + + initial_xyz = main_reactant.get_xyz() + xyz_indices = { + "a": a_xyz_index, + "b": b_xyz_index, + "e": e_xyz_index, + "d": d_xyz_indices, + "o": o_index, + "h1": h1_index + } + + return main_reactant, water, initial_xyz, xyz_indices + + +def process_chosen_d_indices(initial_xyz: dict, + base_xyz_indices: dict, + xyz_indices: dict, + hydrolysis_parameters: dict, + reaction_family: str, + water: 'ARCSpecies', + zmats_total: List[dict], + is_set_1: bool, + is_set_2: bool, + dihedrals_to_change_num: int, + should_adjust_dihedral: bool, + allow_nitrile_dihedrals: bool = False + ) -> Tuple[Dict[str, int], List[Dict[str, Any]], List[Dict[str, Any]], int]: + """ + Iterates over the 'd' indices to process TS guess generation. + + Args: + initial_xyz (dict): Initial Cartesian coordinates. + base_xyz_indices (Dict[str, int]): Base indices for TS generation. + xyz_indices (Dict[str, List[int]]): All relevant indices including 'd'. + hydrolysis_parameters (Dict[str, Any]): Hydrolysis-specific parameters. + reaction_family (str): The reaction family. + water ('ARCSpecies'): Water molecule info. + zmats_total (List[Dict[str, Any]]): List to accumulate Z-matrices. + is_set_1 (bool): Flag indicating if reaction_family is in set 1. + is_set_2 (bool): Flag indicating if reaction_family is in set 2. + dihedrals_to_change_num (int): The current iteration for adjusting dihedrals. + should_adjust_dihedral (bool): Whether to adjust dihedral angles. + allow_nitrile_dihedrals (bool, optional): Force-enable dihedral adjustments for nitriles. Defaults to False. + + + Returns: + Tuple[Dict[str, int], List[Dict[str, Any]], List[Dict[str, Any]]]: + - Chosen indices for TS generation. + - List of generated transition state (TS) guesses. + - Updated list of Z-matrices. + - Integer indicating maximum number of dihedrals found. + """ + max_dihedrals_found = 0 + for d_index in xyz_indices.get("d", []) or [None]: + chosen_xyz_indices = {**base_xyz_indices, "d": d_index} if d_index is not None else {**base_xyz_indices, + "d": None} + current_zmat, zmat_indices = setup_zmat_indices(initial_xyz, chosen_xyz_indices) + matches = get_matching_dihedrals(current_zmat, zmat_indices['a'], zmat_indices['b'], + zmat_indices['e'], zmat_indices['d']) + max_dihedrals_found = max(max_dihedrals_found, len(matches)) + if should_adjust_dihedral and dihedrals_to_change_num > len(matches): + continue + + stretch_ab_bond(current_zmat, chosen_xyz_indices, zmat_indices, hydrolysis_parameters, reaction_family) + zmats_to_process = [current_zmat] + attempted_dihedral_adjustments = False + + if should_adjust_dihedral and (reaction_family != 'nitrile_hydrolysis' or allow_nitrile_dihedrals): + attempted_dihedral_adjustments = True + adjustment_factors = [15, 25, 35, 45, 55] + indices_list = matches[:dihedrals_to_change_num] + adjusted_zmats = [] + for indices in indices_list: + zmat_variants = generate_dihedral_variants(current_zmat, indices, adjustment_factors) + if zmat_variants: + adjusted_zmats.extend(zmat_variants) + if not adjusted_zmats: + pass + else: + zmats_to_process = adjusted_zmats + + ts_guesses_list = [] + for zmat_to_process in zmats_to_process: + ts_guesses, updated_zmats = process_family_specific_adjustments( + is_set_1, is_set_2, reaction_family, hydrolysis_parameters, + zmat_to_process, water, chosen_xyz_indices, zmats_total) + zmats_total = updated_zmats + ts_guesses_list.extend(ts_guesses) + + if attempted_dihedral_adjustments and not ts_guesses_list and ( + reaction_family != 'nitrile_hydrolysis' or allow_nitrile_dihedrals): + flipped_zmats= [] + adjustment_factors = [15, 25, 35, 45, 55] + for indices in indices_list: + flipped_variants = generate_dihedral_variants(current_zmat, indices, adjustment_factors, flip=True) + flipped_zmats.extend(flipped_variants) + + for zmat_to_process in flipped_zmats: + ts_guesses, updated_zmats = process_family_specific_adjustments( + is_set_1, is_set_2, reaction_family, hydrolysis_parameters, + zmat_to_process, water, chosen_xyz_indices, zmats_total + ) + zmats_total = updated_zmats + ts_guesses_list.extend(ts_guesses) + + if ts_guesses_list: + return chosen_xyz_indices, ts_guesses_list, zmats_total, max_dihedrals_found + + return {}, [], zmats_total, max_dihedrals_found + + +def get_main_reactant_and_water_from_hydrolysis_reaction(reaction: 'ARCReaction') -> Tuple['ARCSpecies', 'ARCSpecies']: + """ + Get main reactant and water species from a given hydrolysis reaction family. + + Args: + reaction: An ARCReaction instance. + + Returns: + Tuple containing: + - ARCSpecies: The main reactant + - ARCSpecies: The water molecule + + Raises: + ValueError: If reactants don't include both water and a non-water molecule. + """ + arc_reactants, _ = reaction.get_reactants_and_products() + arc_reactant, water = None, None + + for spc in arc_reactants: + if spc.is_water(): + water = spc + else: + arc_reactant = spc + + if arc_reactant is None or water is None: + raise ValueError("Reactants must include a non-water molecule and water.") + + return arc_reactant, water + + +def get_neighbors_by_electronegativity(spc: 'ARCSpecies', + atom_index: int, + exclude_index: int, + two_neighbors: bool = True) -> Tuple[int, List[int]]: + """ + Retrieve the top two neighbors of a given atom in a species, sorted by their effective electronegativity, + excluding a specified neighbor. + Effective electronegativity is calculated as: + Effective Electronegativity = Electronegativity of the neighbor * Bond order. + Sorting rules: + 1. Neighbors are sorted in descending order of their effective electronegativity. + 2. If two neighbors have the same effective electronegativity, the tie is broken by comparing the + sum of the effective electronegativities of their own bonded neighbors. + The neighbor with the higher sum will be ranked first. + + Args: + spc (ARCSpecies): The species containing the atoms. + atom_index (int): Index of the central atom. + exclude_index (int): Index of atom to exclude from neighbors list. + two_neighbors (bool): Whether to return two neighbors (True) or one (False). + + Returns: + Tuple[int, List[int]]: A tuple where: + - The first element is the index of the most electronegative neighbor. + - The second element is a list of the remaining ranked neighbors (empty if only one neighbor is requested). + + Raises: + ValueError: If the atom has no valid neighbors. + """ + neighbors = [neighbor for neighbor in spc.mol.atoms[atom_index].edges.keys() + if spc.mol.atoms.index(neighbor) != exclude_index] + + if not neighbors: + raise ValueError(f"Atom at index {atom_index} has no valid neighbors.") + + def get_neighbor_total_electronegativity(neighbor: 'Atom') -> float: + """ + Calculate the total electronegativity of a neighbor based on its bonded neighbors. + Args: + neighbor (Atom): The atom to calculate the total electronegativity for. + Returns: + float: The total electronegativity of the neighbor + """ + return sum( + ELECTRONEGATIVITIES[n.symbol] * neighbor.edges[n].order + for n in neighbor.edges.keys() + ) + + effective_electronegativities = [(ELECTRONEGATIVITIES[n.symbol] * spc.mol.atoms[atom_index].edges[n].order, + get_neighbor_total_electronegativity(n), n ) for n in neighbors] + effective_electronegativities.sort(reverse=True, key=lambda x: (x[0], x[1])) + sorted_neighbors = [spc.mol.atoms.index(n[2]) for n in effective_electronegativities] + most_electronegative = sorted_neighbors[0] + remaining_neighbors = sorted_neighbors[1:] if two_neighbors else [] + return most_electronegative, remaining_neighbors + + +def setup_zmat_indices(initial_xyz: dict, + xyz_indices: dict) -> Tuple[dict, dict]: + """ + Convert XYZ coordinates to Z-matrix format and set up corresponding indices. + + Args: + initial_xyz (dict): XYZ coordinates of the molecule. + xyz_indices (dict): Dictionary mapping atom types to their XYZ indices. + + Returns: + tuple: A tuple containing: + - dict: Z-matrix representation of the molecule + - dict: Dictionary mapping atom types to their Z-matrix indices + """ + initial_zmat = zmat_from_xyz(initial_xyz, consolidate=False) + zmat_indices = { + 'a': key_by_val(initial_zmat.get('map', {}), xyz_indices['a']), + 'b': key_by_val(initial_zmat.get('map', {}), xyz_indices['b']), + 'e': key_by_val(initial_zmat.get('map', {}), xyz_indices['e']), + 'd': key_by_val(initial_zmat.get('map', {}), xyz_indices['d']) if xyz_indices['d'] is not None else None + } + return initial_zmat, zmat_indices + + +def generate_dihedral_variants(zmat: dict, + indices: List[int], + adjustment_factors: List[float], + flip: bool = False, + tolerance_degrees: float = 10.0) -> List[dict]: + """ + Create variants of a Z-matrix by adjusting dihedral angles using multiple adjustment factors. + + This function creates variants of the Z-matrix using different adjustment factors: + 1. Retrieve the current dihedral value and normalize it to the (-180°, 180°] range. + 2. For each adjustment factor, slightly push the angle away from 0° or ±180° to avoid + unstable, boundary configurations. + 3. If `flip=True`, the same procedure is applied starting from a flipped + (180°-shifted) baseline angle. + 4. Each adjusted or flipped variant is deep-copied to ensure independence. + + Args: + zmat (dict): The initial Z-matrix. + indices (List[int]): The indices defining the dihedral angle. + adjustment_factors (List[float], optional): List of factors to try. + flip (bool, optional): Whether to start from a flipped (180°) baseline dihedral angle. + Defaults to False. + tolerance_degrees (float, optional): Tolerance (in degrees) for detecting angles near 0° or ±180°. Defaults to 10.0. + + Returns: + List[dict]: List of Z-matrix variants with adjusted dihedral angles. + """ + variants = [] + parameter_name = get_parameter_from_atom_indices(zmat=zmat, indices=indices, xyz_indexed=False) + current_value = zmat['vars'].get(parameter_name, 0) + normalized_value = get_angle_in_180_range(current_value) + if tolerance_degrees < 0: + raise ValueError("tolerance_degrees must be non-negative.") + + def push_up_dihedral(val: float, adj_factor: float) -> float: + """Scale the dihedral away from 0°/180° using the given factor.""" + if abs(val) < tolerance_degrees: + new_value = (val + 360 + adj_factor) - 360 + elif val < 0: + new_value = val + adj_factor + else: + new_value = val - adj_factor + normalized_new_value = get_angle_in_180_range(new_value) + return normalized_new_value + + seed_value = normalized_value + if flip: + seed_value = get_angle_in_180_range(normalized_value + 180.0) + boundary_like = ((abs(seed_value) < tolerance_degrees) + or (180 - tolerance_degrees <= abs(seed_value) <= 180+tolerance_degrees)) + if boundary_like: + for factor in adjustment_factors: + variant = copy.deepcopy(zmat) + variant["vars"][parameter_name] = push_up_dihedral(seed_value, factor) + variants.append(variant) + return variants + + +def get_matching_dihedrals(zmat: dict, + a: int, + b: int, + e: int, + d: Optional[int]) -> List[List[int]]: + """ + Retrieve all dihedral angles in the Z-matrix that match the given atom indices. + This function scans the Z-matrix for dihedral parameters (keys starting with 'D_' or 'DX_') + and collects those whose indices match the specified atoms. + + Args: + zmat (dict): The Z-matrix containing atomic coordinates and parameters. + a (int): The first atom index to match. + b (int): The second atom index to match. + e (int): The third atom index to match. + d (Optional[int]): The fourth atom index to match (optional). + + Returns: + List[List[int]]: A list of lists, where each sublist contains the indices of a matching dihedral. + Returns an empty list if no matches are found. + """ + matches = [] + for key in zmat['vars']: + if key.startswith('D_') or key.startswith('DX_'): + indices = [int(idx) for idx in key.split('_')[1:]] + if d is not None: + if a in indices and b in indices and (e in indices or d in indices): + matches.append(indices) + else: + if a in indices and b in indices and e in indices: + matches.append(indices) + return matches + + +def stretch_ab_bond(initial_zmat: 'dict', + xyz_indices: 'dict', + zmat_indices: 'dict', + hydrolysis_parameters: 'dict', + reaction_family: str) -> None: + """ + Stretch the bond between atoms a and b in the Z-matrix based on the reaction family parameters. + + Args: + initial_zmat (dict): The Z-matrix to modify. + xyz_indices (dict): Dictionary containing atom indices in XYZ coordinates. + zmat_indices (dict): Dictionary containing atom indices in Z-matrix coordinates. + hydrolysis_parameters (dict): Parameters for hydrolysis reactions. + reaction_family (str): The type of reaction family, used to get specific stretch parameters. + + Returns: + None: Modifies the Z-matrix in place. + + Note: + The stretching direction is determined by comparing atom positions in both + coordinate systems to ensure consistent bond modifications. The stretch degree + is obtained from the hydrolysis parameters for the specific reaction family. + """ + a_before_b_xyz = xyz_indices['a'] < xyz_indices['b'] + a_before_b_zmat = zmat_indices['a'] < zmat_indices['b'] + stretch_degree = hydrolysis_parameters['family_parameters'][str(reaction_family)]['stretch'] + + if a_before_b_zmat != a_before_b_xyz: + indices = (min(xyz_indices['a'], xyz_indices['b']), max(xyz_indices['a'], xyz_indices['b'])) + else: + indices = (max(xyz_indices['a'], xyz_indices['b']), min(xyz_indices['a'], xyz_indices['b'])) + + stretch_zmat_bond(zmat=initial_zmat, indices=indices, stretch=stretch_degree) + + +def process_family_specific_adjustments(is_set_1: bool, + is_set_2: bool, + reaction_family: str, + hydrolysis_parameters: dict, + initial_zmat: dict, + water: 'ARCSpecies', + xyz_indices: dict, + zmats_total: List[dict]) -> Tuple[List[dict], List[dict]]: + """ + Process specific adjustments for different hydrolysis reaction families if needed, then generate TS guesses . + + Args: + is_set_1 (bool): Whether the reaction belongs to parameter set 1. + is_set_2 (bool): Whether the reaction belongs to parameter set 2. + reaction_family (str): Type of hydrolysis reaction ('ether_hydrolysis' or others). + hydrolysis_parameters (dict): Parameters for different hydrolysis families. + initial_zmat (dict): Initial Z-matrix of the molecule. + water (ARCSpecies): Water molecule for the reaction. + xyz_indices (dict): Dictionary of atom indices in XYZ coordinates. + zmats_total (List[dict]): List of existing Z-matrices. + + Returns: + Tuple[List[dict], List[dict]]: Generated XYZ guesses and updated Z-matrices list. + + Raises: + ValueError: If the reaction family is not supported. + """ + a_xyz, b_xyz, e_xyz, o_xyz, h1_xyz, d_xyz= xyz_indices.values() + r_atoms = [a_xyz, o_xyz, o_xyz] + a_atoms = [[b_xyz, a_xyz], [a_xyz, o_xyz], [h1_xyz, o_xyz]] + d_atoms = ([[e_xyz, d_xyz, a_xyz], [b_xyz, a_xyz, o_xyz], [a_xyz, h1_xyz, o_xyz]] + if d_xyz is not None else + [[e_xyz, b_xyz, a_xyz], [b_xyz, a_xyz, o_xyz], [a_xyz, h1_xyz, o_xyz]]) + r_value = hydrolysis_parameters['family_parameters'][str(reaction_family)]['r_value'] + a_value = hydrolysis_parameters['family_parameters'][str(reaction_family)]['a_value'] + d_values = hydrolysis_parameters['family_parameters'][str(reaction_family)]['d_values'] + + if is_set_1 or is_set_2: + initial_xyz = zmat_to_xyz(initial_zmat) + return generate_hydrolysis_ts_guess(initial_xyz, xyz_indices.values(), water, r_atoms, a_atoms, d_atoms, + r_value, a_value, d_values, zmats_total, is_set_1, + threshold=0.6 if reaction_family == 'nitrile_hydrolysis' else 0.8) + else: + raise ValueError(f"Family {reaction_family} not supported for hydrolysis TS guess generation.") + + +def generate_hydrolysis_ts_guess(initial_xyz: dict, + xyz_indices: List[int], + water: 'ARCSpecies', + r_atoms: List[int], + a_atoms: List[List[int]], + d_atoms: List[List[int]], + r_value: List[float], + a_value: List[float], + d_values: List[List[float]], + zmats_total: List[dict], + is_set_1: bool, + threshold: float + ) -> Tuple[List[dict], List[dict]]: + """ + Generate Z-matrices and Cartesian coordinates for transition state (TS) guesses. + + Args: + initial_xyz (dict): The initial coordinates of the reactant. + xyz_indices (List[int]): The indices of the atoms in the initial coordinates. + water (ARCSpecies): The water molecule involved in the reaction. + r_atoms (List[int]): Atom pairs for defining bond distances. + a_atoms (List[List[int]]): Atom triplets for defining bond angles. + d_atoms (List[List[int]]): Atom quartets for defining dihedral angles. + r_value (List[float]): Bond distances for each atom pair. + a_value (List[float]): Bond angles for each atom triplet. + d_values (List[List[float]]): Sets of dihedral angles for TS guesses. + zmats_total (List[dict]): Existing Z-matrices to avoid duplicates. + is_set_1 (bool): Whether the reaction belongs to parameter set 1. + threshold (float): Threshold for atom collision checking. + + Returns: + Tuple[List[dict], List[dict]]: Unique TS guesses (XYZ coords and Z-matrices). + """ + xyz_guesses = [] + + for index, d_value in enumerate(d_values): + xyz_guess = copy.deepcopy(initial_xyz) + for i in range(3): + xyz_guess = add_atom_to_xyz_using_internal_coords( + xyz=xyz_guess, + element=water.mol.atoms[i].element.symbol, + r_index=r_atoms[i], + a_indices=a_atoms[i], + d_indices=d_atoms[i], + r_value=r_value[i], + a_value=a_value[i], + d_value=d_value[i] + ) + + a_xyz, b_xyz, e_xyz, o_xyz, h1_xyz, d_xyz= xyz_indices + are_valid_bonds=check_ts_bonds(xyz_guess, [o_xyz, h1_xyz, h1_xyz+1, a_xyz, b_xyz]) + colliding=colliding_atoms(xyz_guess, threshold=threshold) + duplicate = any(compare_zmats(existing, xyz_to_zmat(xyz_guess)) for existing in zmats_total) + if is_set_1: + dihedral_edao=[e_xyz, d_xyz, a_xyz, o_xyz] + dao_is_linear=check_dao_angle(dihedral_edao, xyz_guess) + else: + dao_is_linear=False + if xyz_guess is not None and not colliding and not duplicate and are_valid_bonds and not dao_is_linear: + xyz_guesses.append(xyz_guess) + zmats_total.append(xyz_to_zmat(xyz_guess)) + + + return xyz_guesses, zmats_total + + +def check_dao_angle(d_indices: List[int], xyz_guess: dict) -> bool: + """ + Check if the angle DAO is close to 0 or 180 degrees in the given XYZ coordinates. + + Args: + d_indices (List[int]): The indices of atoms defining the angle. + xyz_guess (dict): The XYZ coordinates of the molecule. + + Returns: + bool: True if DAO angle is close to 0 or 180 degrees, False otherwise. + """ + angle_indices = [d_indices[1], d_indices[2], d_indices[3]] + angle_value = calculate_angle(xyz_guess, angle_indices) + norm_value=(angle_value + 180) % 180 + return (norm_value < 10) or (norm_value > 170) + + +def check_ts_bonds(transition_state_xyz: dict, tested_atom_indices: list) -> bool: + """ + Check if the transition state guess has the correct bonds between water atoms. + + Args: + transition_state_xyz (dict): The transition state guess in XYZ format. + tested_atom_indices (list): the needed atom indices for the test. + + Returns: + bool: Whether the transition state guess has the expected water-related bonds. + """ + oxygen_index, h1_index, h2_index, a_index, b_index= tested_atom_indices + oxygen_bonds = sorted_distances_of_atom(transition_state_xyz, oxygen_index) + h1_bonds = sorted_distances_of_atom(transition_state_xyz, h1_index) + h2_bonds = sorted_distances_of_atom(transition_state_xyz, h2_index) + + def check_oxygen_bonds(bonds): + if len(bonds) < 3: + return False + bonded_atoms = [bonds[1][0], bonds[2][0]] + if h1_index in bonded_atoms and a_index in bonded_atoms: + return True + a_position = next((i for i, bond in enumerate(bonds) if bond[0] == a_index), None) + if a_position is None: + return False + if all(transition_state_xyz['symbols'][bonds[i][0]] == 'H' for i in range(2, a_position)): + rel_error = abs(bonds[a_position][1] - bonds[2][1]) / bonds[a_position][1] + return rel_error <= 0.1 + return False + + oxygen_has_valid_bonds = (oxygen_bonds[0][0] == h2_index and check_oxygen_bonds(oxygen_bonds)) + h1_has_valid_bonds = (h1_bonds[0][0] in {oxygen_index, b_index}and h1_bonds[1][0] in {oxygen_index, b_index}) + h2_has_valid_bonds = h2_bonds[0][0] == oxygen_index + return oxygen_has_valid_bonds and h1_has_valid_bonds and h2_has_valid_bonds + + register_job_adapter('heuristics', HeuristicsAdapter) diff --git a/arc/job/adapters/ts/heuristics_test.py b/arc/job/adapters/ts/heuristics_test.py index a765d4aa80..e692a231ac 100644 --- a/arc/job/adapters/ts/heuristics_test.py +++ b/arc/job/adapters/ts/heuristics_test.py @@ -8,8 +8,8 @@ import copy import itertools import os -import unittest import shutil +import unittest from arc.common import ARC_PATH, almost_equal_coords from arc.family import get_reaction_family_products @@ -23,11 +23,18 @@ get_new_map_based_on_zmat_1, get_new_zmat_2_map, stretch_zmat_bond, + get_main_reactant_and_water_from_hydrolysis_reaction, + setup_zmat_indices, + get_neighbors_by_electronegativity, + get_matching_dihedrals, + generate_dihedral_variants, + check_dao_angle, + check_ts_bonds, ) from arc.reaction import ARCReaction -from arc.species.converter import str_to_xyz, zmat_to_xyz +from arc.species.converter import str_to_xyz, zmat_to_xyz, zmat_from_xyz from arc.species.species import ARCSpecies -from arc.species.zmat import _compare_zmats +from arc.species.zmat import _compare_zmats, get_parameter_from_atom_indices class TestHeuristicsAdapter(unittest.TestCase): @@ -125,37 +132,232 @@ def setUpClass(cls): H -1.02943316 -0.30449156 1.00193709 H -0.60052507 -0.86954495 -0.63086438 H 0.30391344 2.59629139 0.17435159""") + + cls.water = ARCSpecies(label='H2O', smiles='O', xyz="""O -0.00032700 0.39565700 0.00000000 + H -0.75690800 -0.19845300 0.00000000 + H 0.75723500 -0.19720400 0.00000000""") + cls.methylformate = ARCSpecies(label='ester', smiles='COC=O', xyz="""C -1.01765390 -0.08355112 0.05206009 + O 0.22303684 -0.79051481 0.05294172 + C 0.35773087 -1.66017412 -0.97863090 + O -0.45608483 -1.87500387 -1.86208833 + H -1.82486467 -0.81522856 0.14629516 + H -1.06962462 0.60119223 0.90442455 + H -1.14968688 0.45844916 -0.88969505 + H 1.33643417 -2.15859899 -0.90083808""") + cls.acetic_acid = ARCSpecies(label='acid', smiles='CC(=O)O', xyz="""C -0.92388497 0.26665029 -0.14915313 + C 0.41299734 -0.37841629 0.00959894 + O 0.62774816 -1.53407965 0.33161574 + O 1.42589890 0.46969647 -0.24689306 + H -1.01860792 1.10010001 0.55120865 + H -1.04837584 0.61282781 -1.17812429 + H -1.70667782 -0.46488649 0.07043813 + H 2.23089150 -0.07206674 -0.10807745""") + cls.ethanol = ARCSpecies(label='alcohol', smiles='OCC', xyz="""O 1.19051468 0.74721872 0.55745278 + C 0.42396685 -0.33421819 0.04897215 + C -0.98542075 0.14578863 -0.22414249 + H 2.08706846 0.40878057 0.72232827 + H 0.41841326 -1.14061638 0.78839856 + H 0.89171403 -0.69551584 -0.87175392 + H -0.97955985 0.96896352 -0.94625607 + H -1.44657152 0.52976777 0.69182626 + H -1.60463449 -0.66494303 -0.61804700""") + cls.formicacid = ARCSpecies(label='formicacid', smiles='C(=O)O', xyz="""C -0.39120646 0.07781977 0.13864035 + O -0.92483455 -0.71970938 -0.60912134 + O 0.93691157 0.26838190 0.13942568 + H -0.89084150 0.72004713 0.87848577 + H 1.26997096 -0.34653955 -0.54743033""") + cls.methanol = ARCSpecies(label='methanol', smiles='CO', xyz="""C -0.36862686 -0.00871354 0.04292587 + O 0.98182901 -0.04902010 0.46594709 + H -0.57257378 0.95163086 -0.43693396 + H -0.55632373 -0.82564527 -0.65815446 + H -1.01755588 -0.12311763 0.91437513 + H 1.10435907 0.67758465 1.10037299""") + cls.propionitrile = ARCSpecies(label='nitrile', smiles='CC#N', xyz="""C -0.48629842 0.00448354 0.00136213 + C 0.97554967 -0.00899430 -0.00273253 + N 2.13574353 -0.01969098 -0.00598223 + H -0.88318669 -0.63966273 -0.78887729 + H -0.87565097 -0.35336611 0.95910491 + H -0.86615712 1.01723058 -0.16287498""") + cls.imidic_acid = ARCSpecies(label='acid', smiles='CC(O)=N', xyz="""C -0.95184918 -0.34383640 0.27749809 + C 0.40787045 0.04165652 -0.17951964 + O 0.49758652 1.21928238 -0.79192045 + N 1.48315686 -0.64715910 -0.06425624 + H -1.56199325 -0.61442732 -0.58808675 + H -0.89746501 -1.19773120 0.95792817 + H -1.41661007 0.49427007 0.80380287 + H 1.43056708 1.33384843 -1.04192555 + H 1.28070929 -1.53312020 0.41216154""") + cls.ethyleneoxide = ARCSpecies(label='ether', smiles='C1CO1', xyz="""C -0.73103000 -0.04646300 0.06435400 +C 0.72760500 0.10564700 0.01160200 +O 0.04187500 -0.72362100 -0.92868600 +H -1.36764100 0.75367000 -0.30587200 +H -1.17678900 -0.68478600 0.82356100 +H 1.15756500 1.01700400 -0.39719600 +H 1.34841600 -0.42145200 0.73223700""") + cls.ethyleneglycol = ARCSpecies(label='alcohol', smiles='C(CO)O', xyz="""O -1.53938900 0.86956400 0.16926200 +C -0.90062800 -0.37363000 -0.14226000 +C -1.78595900 -1.46045700 0.43864000 +O -1.91351700 -1.32061800 1.84404200 +H -0.90863900 1.58212100 0.03809800 +H 0.09166600 -0.42853000 0.32293400 +H -0.79439500 -0.50559300 -1.22812100 +H -1.34671000 -2.44297700 0.24952500 +H -2.76759800 -1.42346700 -0.05607100 +H -2.15881000 -0.39901700 1.98917200""") + cls.allylmethylether = ARCSpecies(label='ether', smiles='C=CCOC', xyz="""C 2.24051202 1.04153068 0.19486347 + C 1.10659712 0.58234118 0.74083019 + C 0.16338489 -0.36827342 0.07674238 + O -0.00738172 -1.48018240 0.95029996 + C -0.89152333 -2.44569234 0.39890889 + H 2.55998781 0.74063014 -0.79771591 + H 2.88053617 1.72597545 0.74310056 + H 0.83267744 0.90476855 1.74255103 + H 0.55255009 -0.71024239 -0.88913402 + H -0.80072247 0.12806836 -0.08052245 + H -0.49138857 -2.84032162 -0.54006077 + H -0.98849098 -3.27013487 1.11037749 + H -1.88123946 -2.00923795 0.23313156""") + cls.allylalcohol = ARCSpecies(label='alcohol', smiles='C=CCO', xyz="""C -1.52682541 -0.19194750 -0.17281541 + C -0.40630181 0.18362433 0.45744638 + C 0.89960438 0.44544042 -0.21619860 + O 1.26209057 1.79490593 0.02459363 + H -2.44231366 -0.36966852 0.38380790 + H -1.55956509 -0.33307605 -1.24882770 + H -0.42113902 0.30719547 1.53825844 + H 1.67685800 -0.20286001 0.19855306 + H 0.85438611 0.28223717 -1.29754528 + H 0.50870998 2.34042848 -0.25927076""") + cls.acetamide = ARCSpecies(label='amide', smiles='CC(=O)N', xyz="""C 1.10760981 -0.06195096 -0.07465698 +C -0.28279459 -0.19244661 0.47836893 +O -0.76333777 -1.25950344 0.84075145 +N -0.99437508 0.96533352 0.55638164 +H 1.19468276 -0.68174655 -0.97108544 +H 1.34171196 0.97277779 -0.33936488 +H 1.82419932 -0.40246705 0.67757751 +H -1.93524371 0.91299784 0.92464616 +H -0.63325570 1.86305111 0.26809395""") + cls.ammonia = ARCSpecies(label='amine', smiles='N', xyz="""N 0.00064924 -0.00099698 0.29559292 +H -0.41786606 0.84210396 -0.09477452 +H -0.52039228 -0.78225292 -0.10002797 +H 0.93760911 -0.05885406 -0.10079043""") + cls.benzamide = ARCSpecies(label='amide', smiles='C1=CC=CC=C1C(=O)N', xyz="""C 0.02406042 1.07877013 0.26424337 +C 1.38891822 1.37139283 0.28510228 +C 2.32165679 0.35574124 0.08135062 +C 1.89229414 -0.95101202 -0.14974810 +C 0.52668674 -1.24826306 -0.17038981 +C -0.41489992 -0.23412560 0.04744473 +C -1.88074492 -0.48932167 0.03531130 +O -2.68819802 0.41054791 -0.16101181 +N -2.30823742 -1.76367784 0.28239630 +H -0.69919556 1.87725702 0.41979936 +H 1.72181259 2.39197169 0.45694294 +H 3.38453807 0.58491031 0.09426306 +H 2.62280279 -1.73763731 -0.32294062 +H 0.22046525 -2.26713606 -0.38344205 +H -1.70408144 -2.52799066 0.54098462 +H -3.30777598 -1.92019120 0.27981193""") + cls.benzoyl = ARCSpecies(label='benzoyl', smiles='C1=CC=CC=C1C(=O)O', xyz="""C -0.18985385 1.27034645 -0.23363322 +C -1.57890492 1.17186499 -0.13319938 +C -2.18068609 -0.07844120 0.00742639 +C -1.39707546 -1.23185055 0.04799168 +C -0.00710015 -1.13763032 -0.05216040 +C 0.60204045 0.11559795 -0.19351299 +C 2.07279801 0.26826941 -0.30374600 +O 2.67411260 1.31925133 -0.42831386 +O 2.72649982 -0.90613791 -0.25283572 +H 0.27276852 2.24928995 -0.34316862 +H -2.19058154 2.07017174 -0.16469933 +H -3.26265825 -0.15423252 0.08558343 +H -1.86840430 -2.20550765 0.15768451 +H 0.59058844 -2.04515889 -0.01898448 +H 3.66918927 -0.65134306 -0.33610950""") + cls.malononitrile = ARCSpecies(label='nitrile', smiles='N#CC(C#N)', xyz="""N -2.18112669 1.02419451 0.00876053 +C -1.21207265 0.38522802 0.00538987 +C -0.01011698 -0.45119476 0.00133359 +C 1.22812060 0.33047430 -0.00750528 +N 2.22484025 0.92533224 -0.01452270 +H -0.02012675 -1.10452741 0.88124864 +H -0.02951777 -1.10950691 -0.87470464""") + cls.iminopropanoic_acid = ARCSpecies(label='imine', smiles='N=C(O)C(C#N)', xyz="""N 1.08761470 1.20942794 0.02869931 +C 0.72094617 -0.02042197 -0.00900129 +O 1.69914939 -0.91426006 -0.18471210 +C -0.63647764 -0.61808894 0.18150798 +C -1.70229213 0.30165773 -0.20783131 +N -2.54435820 1.04519579 -0.49922098 +H 0.29194728 1.84186248 0.18467011 +H 2.52767273 -0.40935065 -0.27491806 +H -0.70494480 -1.53618745 -0.41219569 +H -0.73664727 -0.89373946 1.23654320""") + cls.phenylethanimine = ARCSpecies(label='imine', smiles='c1ccccc1C(=N)C', xyz="""C 0.45780349 1.06857125 -0.19224526 + C 1.81425409 1.33449779 0.01894805 +C 2.66519037 0.32552995 0.46751606 +C 2.16360565 -0.95203351 0.70642685 +C 0.80932992 -1.22043991 0.49635350 +C -0.05427922 -0.21287994 0.04552340 +C -1.49752902 -0.53941187 -0.16746471 +N -1.94201576 -1.73606811 0.06159544 +C -2.43653310 0.54060141 -0.65348173 +H -0.17512162 1.87800283 -0.54174004 +H 2.20753530 2.33099177 -0.16697472 +H 3.71944447 0.53443580 0.63106141 +H 2.82337950 -1.74186579 1.05624804 +H 0.42917332 -2.22229939 0.68645387 +H -2.94698099 -1.80207650 -0.13009056 +H -2.10721900 0.90816936 -1.62946617 +H -2.46053024 1.36395217 0.06597366 +H -3.45360689 0.15275707 -0.76116277""") + cls.ethylamine = ARCSpecies(label='amine', smiles='CCN', xyz="""C -1.14264899 -0.06058026 -0.03332469 + C 0.26912880 0.49496284 0.03304382 + N 1.21966642 -0.56664747 0.33628965 + H -1.43371056 -0.51780317 0.91844437 + H -1.23525080 -0.81736666 -0.81966584 + H -1.85393441 0.74205986 -0.25306388 + H 0.32681273 1.26512119 0.80885729 + H 0.53039286 0.96148310 -0.92252276 + H 2.16545120 -0.18719811 0.32509986 + H 1.18773917 -1.27609387 -0.39480684""") cls.zmat_1 = {'symbols': ('C', 'C', 'H', 'H', 'H', 'H', 'H', 'O', 'O', 'H'), - 'coords': ((None, None, None), ('R_1_0', None, None), ('R_2_0', 'A_2_0_1', None), - ('R_3_0', 'A_3_0_1', 'D_3_0_1_2'), ('R_4_0', 'A_4_0_1', 'D_4_0_1_3'), - ('R_5_1', 'A_5_1_0', 'D_5_1_0_4'), ('R_6_1', 'A_6_1_0', 'D_6_1_0_5'), - ('R_7_1', 'A_7_1_0', 'D_7_1_0_6'), ('R_8_7', 'A_8_7_1', 'D_8_7_1_0'), + 'coords': ((None, None, None), ('R_1_0', None, None), + ('R_2_0', 'A_2_0_1', None), + ('R_3_0', 'A_3_0_1', 'D_3_0_1_2'), + ('R_4_0', 'A_4_0_1', 'D_4_0_1_3'), + ('R_5_1', 'A_5_1_0', 'D_5_1_0_4'), + ('R_6_1', 'A_6_1_0', 'D_6_1_0_5'), + ('R_7_1', 'A_7_1_0', 'D_7_1_0_6'), + ('R_8_7', 'A_8_7_1', 'D_8_7_1_0'), ('R_9_8', 'A_9_8_7', 'D_9_8_7_1')), 'vars': {'R_1_0': 1.514747977256775, 'R_2_0': 1.0950205326080322, 'A_2_0_1': 110.62464141845703, 'R_3_0': 1.093567967414856, 'A_3_0_1': 110.91426086425781, - 'D_3_0_1_2': 120.87266977773987, 'R_4_0': 1.0950090885162354, - 'A_4_0_1': 110.62271118164062, 'D_4_0_1_3': 120.87301274044218, + 'D_3_0_1_2': 120.87266977773987, + 'R_4_0': 1.0950090885162354, 'A_4_0_1': 110.62271118164062, + 'D_4_0_1_3': 120.87301274044218, 'R_5_1': 1.0951433181762695, 'A_5_1_0': 110.20822143554688, - 'D_5_1_0_4': 181.16392677464202, 'R_6_1': 1.095141053199768, - 'A_6_1_0': 110.2014389038086, 'D_6_1_0_5': 239.4199964284852, + 'D_5_1_0_4': 181.16392677464202, + 'R_6_1': 1.095141053199768, 'A_6_1_0': 110.2014389038086, 'D_6_1_0_5': 239.4199964284852, 'R_7_1': 1.4265729188919067, 'A_7_1_0': 108.63387298583984, - 'D_7_1_0_6': 240.2886512602055, 'R_8_7': 1.455925464630127, - 'A_8_7_1': 105.58023071289062, 'D_8_7_1_0': 179.9922243050821, + 'D_7_1_0_6': 240.2886512602055, + 'R_8_7': 1.455925464630127, 'A_8_7_1': 105.58023071289062, + 'D_8_7_1_0': 179.9922243050821, 'R_9_8': 0.9741224646568298, 'A_9_8_7': 96.3006591796875, 'D_9_8_7_1': 242.3527063196313}, 'map': {0: 0, 1: 1, 2: 4, 3: 5, 4: 6, 5: 7, 6: 8, 7: 2, 8: 3, 9: 9}} cls.zmat_2 = {'symbols': ('H', 'C', 'C', 'H', 'H', 'H', 'H', 'H'), - 'coords': ((None, None, None), ('R_1_0', None, None), ('R_2_1', 'A_2_1_0', None), - ('R_3_1', 'A_3_1_2', 'D_3_1_2_0'), ('R_4_1', 'A_4_1_2', 'D_4_1_2_3'), - ('R_5_2', 'A_5_2_1', 'D_5_2_1_4'), ('R_6_2', 'A_6_2_1', 'D_6_2_1_5'), + 'coords': ((None, None, None), ('R_1_0', None, None), + ('R_2_1', 'A_2_1_0', None), + ('R_3_1', 'A_3_1_2', 'D_3_1_2_0'), + ('R_4_1', 'A_4_1_2', 'D_4_1_2_3'), + ('R_5_2', 'A_5_2_1', 'D_5_2_1_4'), + ('R_6_2', 'A_6_2_1', 'D_6_2_1_5'), ('R_7_2', 'A_7_2_1', 'D_7_2_1_6')), 'vars': {'R_1_0': 1.0940775871276855, 'R_2_1': 1.5120487213134766, 'A_2_1_0': 110.5680160522461, 'R_3_1': 1.0940725803375244, 'A_3_1_2': 110.56890869140625, - 'D_3_1_2_0': 239.99938309284218, 'R_4_1': 1.0940817594528198, - 'A_4_1_2': 110.56755065917969, 'D_4_1_2_3': 239.9997190582892, + 'D_3_1_2_0': 239.99938309284218, + 'R_4_1': 1.0940817594528198, 'A_4_1_2': 110.56755065917969, + 'D_4_1_2_3': 239.9997190582892, 'R_5_2': 1.0940725803375244, 'A_5_2_1': 110.56890869140625, - 'D_5_2_1_4': 59.99971758419434, 'R_6_2': 1.0940840244293213, - 'A_6_2_1': 110.56790924072266, 'D_6_2_1_5': 239.99905123159166, + 'D_5_2_1_4': 59.99971758419434, + 'R_6_2': 1.0940840244293213, 'A_6_2_1': 110.56790924072266, + 'D_6_2_1_5': 239.99905123159166, 'R_7_2': 1.0940817594528198, 'A_7_2_1': 110.56755065917969, 'D_7_2_1_6': 240.00122783407815}, 'map': {0: 3, 1: 0, 2: 1, 3: 2, 4: 4, 5: 5, 6: 6, 7: 7}} @@ -166,13 +368,20 @@ def setUpClass(cls): ('R_7_1', 'A_7_1_0', 'D_7_1_0_6'), ('R_8_1', 'A_8_1_0', 'D_8_1_0_7'), ('R_9_3', 'A_9_3_2', 'D_9_3_2_1'), ('RX_10_9', 'AX_10_9_3', 'DX_10_9_3_2')), 'vars': {'R_1_0': 1.5147479951212197, 'R_2_1': 1.4265728986680748, 'A_2_1_0': 108.63387152978416, - 'R_3_2': 1.4559254886404387, 'A_3_2_1': 105.58023544826183, 'D_3_2_1_0': 179.9922243050821, - 'R_4_0': 1.0950205915944824, 'A_4_0_1': 110.62463321031589, 'D_4_0_1_3': 59.13545080998071, - 'R_5_0': 1.093567969297245, 'A_5_0_1': 110.91425998596507, 'D_5_0_1_4': 120.87266977773987, - 'R_6_0': 1.0950091062890002, 'A_6_0_1': 110.62270362433773, 'D_6_0_1_5': 120.87301274044218, - 'R_7_1': 1.0951433842986755, 'A_7_1_0': 110.20822115119915, 'D_7_1_0_6': 181.16392677464265, - 'R_8_1': 1.0951410439636102, 'A_8_1_0': 110.20143800025897, 'D_8_1_0_7': 239.4199964284852, - 'R_9_3': 1.1689469645782498, 'A_9_3_2': 96.30065819269021, 'D_9_3_2_1': 242.3527063196313, + 'R_3_2': 1.4559254886404387, 'A_3_2_1': 105.58023544826183, + 'D_3_2_1_0': 179.9922243050821, + 'R_4_0': 1.0950205915944824, 'A_4_0_1': 110.62463321031589, + 'D_4_0_1_3': 59.13545080998071, + 'R_5_0': 1.093567969297245, 'A_5_0_1': 110.91425998596507, + 'D_5_0_1_4': 120.87266977773987, + 'R_6_0': 1.0950091062890002, 'A_6_0_1': 110.62270362433773, + 'D_6_0_1_5': 120.87301274044218, + 'R_7_1': 1.0951433842986755, 'A_7_1_0': 110.20822115119915, + 'D_7_1_0_6': 181.16392677464265, + 'R_8_1': 1.0951410439636102, 'A_8_1_0': 110.20143800025897, + 'D_8_1_0_7': 239.4199964284852, + 'R_9_3': 1.1689469645782498, 'A_9_3_2': 96.30065819269021, + 'D_9_3_2_1': 242.3527063196313, 'RX_10_9': 1.0, 'AX_10_9_3': 90.0, 'DX_10_9_3_2': 0}, 'map': {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 'X10'}} cls.zmat_4 = {'symbols': ('H', 'C', 'C', 'H', 'H', 'H', 'H', 'H'), @@ -181,11 +390,16 @@ def setUpClass(cls): ('R_5_2', 'A_5_2_1', 'D_5_2_1_4'), ('R_6_2', 'A_6_2_1', 'D_6_2_1_5'), ('R_7_2', 'A_7_2_1', 'D_7_2_1_6')), 'vars': {'R_1_0': 1.3128870801982788, 'R_2_1': 1.5120487296562577, 'A_2_1_0': 110.56890700195424, - 'R_3_1': 1.0940775789443724, 'A_3_1_2': 110.56801921096591, 'D_3_1_2_0': 120.00061587714492, - 'R_4_1': 1.0940817193677925, 'A_4_1_2': 110.56754686774481, 'D_4_1_2_3': 119.99910067703652, - 'R_5_2': 1.0940725668318991, 'A_5_2_1': 110.56890700195424, 'D_5_2_1_4': 59.99971758419434, - 'R_6_2': 1.0940840619688397, 'A_6_2_1': 110.56790845138725, 'D_6_2_1_5': 239.99905123159166, - 'R_7_2': 1.0940817193677925, 'A_7_2_1': 110.56754686774481, 'D_7_2_1_6': 240.00122783407815}, + 'R_3_1': 1.0940775789443724, 'A_3_1_2': 110.56801921096591, + 'D_3_1_2_0': 120.00061587714492, + 'R_4_1': 1.0940817193677925, 'A_4_1_2': 110.56754686774481, + 'D_4_1_2_3': 119.99910067703652, + 'R_5_2': 1.0940725668318991, 'A_5_2_1': 110.56890700195424, + 'D_5_2_1_4': 59.99971758419434, + 'R_6_2': 1.0940840619688397, 'A_6_2_1': 110.56790845138725, + 'D_6_2_1_5': 239.99905123159166, + 'R_7_2': 1.0940817193677925, 'A_7_2_1': 110.56754686774481, + 'D_7_2_1_6': 240.00122783407815}, 'map': {0: 2, 1: 0, 2: 1, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7}} cls.zmat_5 = {'symbols': ('C', 'C', 'X', 'C', 'C', 'C', 'X', 'C', 'X', 'H', 'H', 'H', 'H', 'H', 'X', 'H', 'X'), 'coords': ((None, None, None), ('R_1_0', None, None), ('RX_2_1', 'AX_2_1_0', None), @@ -196,22 +410,26 @@ def setUpClass(cls): ('R_11_4', 'A_11_4_3', 'D_11_4_3_1'), ('R_12_4', 'A_12_4_3', 'D_12_4_3_1'), ('R_13_4', 'A_13_4_3', 'D_13_4_3_1'), ('RX_14_7', 'AX_14_7_5', 'DX_14_7_5_1'), ('R_15_7', 'AX_15_7_14', 'DX_15_7_14_5'), ('RX_16_10', 'AX_16_10_3', 'DX_16_10_3_1')), - 'vars': {'R_1_0': 1.2014696201892123, 'RX_2_1': 1.0, 'AX_2_1_0': 90.0, 'R_3_1': 1.4764335616394748, + 'vars': {'R_1_0': 1.2014696201892123, 'RX_2_1': 1.0, 'AX_2_1_0': 90.0, + 'R_3_1': 1.4764335616394748, 'AX_3_1_2': 90.0, 'DX_3_1_2_0': 180.0, 'R_4_3': 1.528403848430877, - 'A_4_3_1': 110.10474745663315, 'DX_4_3_1_2': 250.79698398730164, 'R_5_3': 1.4764334808980883, + 'A_4_3_1': 110.10474745663315, 'DX_4_3_1_2': 250.79698398730164, + 'R_5_3': 1.4764334808980883, 'A_5_3_1': 112.26383853893992, 'D_5_3_1_4': 123.05092674196338, 'RX_6_5': 1.0, 'AX_6_5_3': 90.0, 'DX_6_5_3_1': 180, 'R_7_5': 1.201469520969646, 'AX_7_5_6': 90.0, 'DX_7_5_6_3': 180.0, 'RX_8_0': 1.0, 'AX_8_0_1': 90.0, 'DX_8_0_1_7': 180, 'R_9_0': 1.065642981503376, 'AX_9_0_8': 90.0, 'DX_9_0_8_1': 180.0, 'R_10_3': 1.3169771399805865, 'A_10_3_1': 108.17388195099538, 'D_10_3_1_7': 119.20794850242746, 'R_11_4': 1.0969184758191393, - 'A_11_4_3': 111.59730790975621, 'D_11_4_3_1': 62.15337627950438, 'R_12_4': 1.096052090430251, + 'A_11_4_3': 111.59730790975621, 'D_11_4_3_1': 62.15337627950438, + 'R_12_4': 1.096052090430251, 'A_12_4_3': 111.0304823817703, 'D_12_4_3_1': 302.14665453695886, 'R_13_4': 1.0960521991926764, 'A_13_4_3': 111.03046862714851, 'D_13_4_3_1': 182.16006499876246, 'RX_14_7': 1.0, 'AX_14_7_5': 90.0, 'DX_14_7_5_1': 180, 'R_15_7': 1.0656433171015254, 'AX_15_7_14': 90.0, 'DX_15_7_14_5': 180.0, 'RX_16_10': 1.0, 'AX_16_10_3': 90.0, 'DX_16_10_3_1': 0}, - 'map': {0: 0, 1: 1, 2: 'X12', 3: 2, 4: 3, 5: 4, 6: 'X13', 7: 5, 8: 'X14', 9: 6, 10: 7, 11: 8, 12: 9, + 'map': {0: 0, 1: 1, 2: 'X12', 3: 2, 4: 3, 5: 4, 6: 'X13', 7: 5, 8: 'X14', 9: 6, 10: 7, 11: 8, + 12: 9, 13: 10, 14: 'X15', 15: 11, 16: 'X16'}} cls.zmat_6 = {'symbols': ('H', 'C', 'C', 'C', 'X', 'C', 'X', 'C', 'X', 'H', 'H', 'X', 'H'), 'coords': ((None, None, None), ('R_1_0', None, None), ('R_2_1', 'A_2_1_0', None), @@ -221,15 +439,18 @@ def setUpClass(cls): ('R_9_5', 'AX_9_5_8', 'DX_9_5_8_2'), ('R_10_1', 'A_10_1_2', 'D_10_1_2_7'), ('RX_11_7', 'AX_11_7_3', 'DX_11_7_3_2'), ('R_12_7', 'AX_12_7_11', 'DX_12_7_11_3')), 'vars': {'R_1_0': 1.3155122903491134, 'R_2_1': 1.4707410587114869, 'A_2_1_0': 109.22799244788278, - 'R_3_1': 1.4707410587114869, 'A_3_1_2': 113.21235050581743, 'D_3_1_2_0': 121.94357782706227, + 'R_3_1': 1.4707410587114869, 'A_3_1_2': 113.21235050581743, + 'D_3_1_2_0': 121.94357782706227, 'RX_4_2': 1.0, 'AX_4_2_1': 90.0, 'DX_4_2_1_3': 180, 'R_5_2': 1.2013089233618282, - 'AX_5_2_4': 90.0, 'DX_5_2_4_1': 180.0, 'RX_6_3': 1.0, 'AX_6_3_1': 90.0, 'DX_6_3_1_2': 180, + 'AX_5_2_4': 90.0, 'DX_5_2_4_1': 180.0, 'RX_6_3': 1.0, 'AX_6_3_1': 90.0, + 'DX_6_3_1_2': 180, 'R_7_3': 1.2013088241289895, 'AX_7_3_6': 90.0, 'DX_7_3_6_1': 180.0, 'RX_8_5': 1.0, 'AX_8_5_2': 90.0, 'DX_8_5_2_7': 180, 'R_9_5': 1.06567033240585, 'AX_9_5_8': 90.0, 'DX_9_5_8_2': 180.0, 'R_10_1': 1.0962601875867035, 'A_10_1_2': 109.22799322222649, 'D_10_1_2_7': 121.94358050468233, 'RX_11_7': 1.0, 'AX_11_7_3': 90.0, 'DX_11_7_3_2': 180, 'R_12_7': 1.0656705002006313, 'AX_12_7_11': 90.0, 'DX_12_7_11_3': 180.0}, - 'map': {0: 7, 1: 2, 2: 1, 3: 3, 4: 'X9', 5: 0, 6: 'X10', 7: 4, 8: 'X11', 9: 5, 10: 6, 11: 'X12', 12: 8}} + 'map': {0: 7, 1: 2, 2: 1, 3: 3, 4: 'X9', 5: 0, 6: 'X10', 7: 4, 8: 'X11', 9: 5, 10: 6, 11: 'X12', + 12: 8}} def test_heuristics_for_h_abstraction_1(self): """ @@ -372,9 +593,12 @@ def test_heuristics_for_h_abstraction_2(self): self.assertEqual(rxn5.ts_species.charge, 0) self.assertEqual(rxn5.ts_species.multiplicity, 2) self.assertEqual(len(rxn5.ts_species.ts_guesses), 18) - self.assertEqual(rxn5.ts_species.ts_guesses[0].initial_xyz['symbols'], ('C', 'C', 'C', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'O', 'O', 'H')) - self.assertEqual(rxn5.ts_species.ts_guesses[1].initial_xyz['symbols'], ('C', 'C', 'C', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'O', 'O', 'H')) - self.assertEqual(rxn5.ts_species.ts_guesses[2].initial_xyz['symbols'], ('C', 'C', 'C', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'O', 'O', 'H')) + self.assertEqual(rxn5.ts_species.ts_guesses[0].initial_xyz['symbols'], + ('C', 'C', 'C', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'O', 'O', 'H')) + self.assertEqual(rxn5.ts_species.ts_guesses[1].initial_xyz['symbols'], + ('C', 'C', 'C', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'O', 'O', 'H')) + self.assertEqual(rxn5.ts_species.ts_guesses[2].initial_xyz['symbols'], + ('C', 'C', 'C', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'O', 'O', 'H')) self.assertTrue(rxn5.ts_species.ts_guesses[0].success) self.assertTrue(rxn5.ts_species.ts_guesses[1].success) self.assertTrue(rxn5.ts_species.ts_guesses[2].success) @@ -491,29 +715,35 @@ def test_heuristics_for_h_abstraction_5(self): self.assertEqual(rxn8.ts_species.multiplicity, 3) self.assertEqual(len(rxn8.ts_species.ts_guesses), 6) self.assertTrue(almost_equal_coords(rxn8.ts_species.ts_guesses[0].initial_xyz, - {'symbols': ('N', 'C', 'O', 'N', 'H', 'H'), 'isotopes': (14, 12, 16, 14, 1, 1), - 'coords': ((-3.657596721635545e-09, 0.08876698337705413, 0.9329034620293603), - (-3.657596721635545e-09, 0.8029731964901674, 0.005816759497990542), - (9.609228590831403e-09, 1.4947776654181317, -0.9420542025729876), - (-3.657596721635545e-09, -2.2452231277287726, 0.16101001965294726), - (-3.657596721635545e-09, -1.0762904363668742, 0.547597552628467), - (-3.657596721635545e-09, -2.2452231277287726, -0.8649899620469162))})) + {'symbols': ('N', 'C', 'O', 'N', 'H', 'H'), + 'isotopes': (14, 12, 16, 14, 1, 1), + 'coords': ( + (-3.657596721635545e-09, 0.08876698337705413, 0.9329034620293603), + (-3.657596721635545e-09, 0.8029731964901674, 0.005816759497990542), + (9.609228590831403e-09, 1.4947776654181317, -0.9420542025729876), + (-3.657596721635545e-09, -2.2452231277287726, 0.16101001965294726), + (-3.657596721635545e-09, -1.0762904363668742, 0.547597552628467), + (-3.657596721635545e-09, -2.2452231277287726, -0.8649899620469162))})) self.assertTrue(almost_equal_coords(rxn8.ts_species.ts_guesses[1].initial_xyz, - {'symbols': ('N', 'C', 'O', 'N', 'H', 'H'), 'isotopes': (14, 12, 16, 14, 1, 1), + {'symbols': ('N', 'C', 'O', 'N', 'H', 'H'), + 'isotopes': (14, 12, 16, 14, 1, 1), 'coords': ((0.7304309896785263, 0.4813753237349452, -0.26044855417424406), (-0.22605518455485318, 0.6753956743209286, 0.3853614572987496), (-1.2013936879956266, 0.8535814918821567, 1.0130695121236126), (0.7304309896785263, -1.8526147873708816, -1.032341996550657), (0.7304309896785263, -0.6836820960089831, -0.6457544635751373), - (0.7304309896785263, -1.8526147873708816, -2.0583419782505206))})) + (0.7304309896785263, -1.8526147873708816, + -2.0583419782505206))})) self.assertTrue(almost_equal_coords(rxn8.ts_species.ts_guesses[2].initial_xyz, - {'symbols': ('N', 'C', 'O', 'N', 'H', 'H'), 'isotopes': (14, 12, 16, 14, 1, 1), - 'coords': ((-0.7304309882994099, 0.48137532979234665, -0.26044855803662603), - (0.22605518593396912, 0.6753956803783296, 0.3853614534363681), - (1.201393684372416, 0.8535814759681686, 1.0130695222708512), - (-0.7304309882994099, -1.8526147813134801, -1.032342000413039), - (-0.7304309882994099, -0.6836820899515816, -0.6457544674375193), - (-0.7304309882994099, -1.8526147813134801, -2.0583419821129025))})) + {'symbols': ('N', 'C', 'O', 'N', 'H', 'H'), + 'isotopes': (14, 12, 16, 14, 1, 1), + 'coords': ( + (-0.7304309882994099, 0.48137532979234665, -0.26044855803662603), + (0.22605518593396912, 0.6753956803783296, 0.3853614534363681), + (1.201393684372416, 0.8535814759681686, 1.0130695222708512), + (-0.7304309882994099, -1.8526147813134801, -1.032342000413039), + (-0.7304309882994099, -0.6836820899515816, -0.6457544674375193), + (-0.7304309882994099, -1.8526147813134801, -2.0583419821129025))})) def test_heuristics_for_h_abstraction_6(self): # butenylnebzene + CCOO <=> butenylnebzene_rad + CCOOH @@ -603,8 +833,10 @@ def test_heuristics_for_h_abstraction_6(self): butenylnebzene = ARCSpecies(label='butenylnebzene', smiles='c1ccccc1CCC=C', xyz=butenylnebzene_xyz) peroxyl = ARCSpecies(label='CCOO', smiles='CCO[O]', xyz=peroxyl_xyz) peroxide = ARCSpecies(label='CCOOH', smiles='CCOO', xyz=peroxide_xyz) - butenylnebzene_rad1 = ARCSpecies(label='butenylnebzene_rad1', smiles='c1ccccc1C[CH]C=C', xyz=butenylnebzene_rad1_xyz) - butenylnebzene_rad2 = ARCSpecies(label='butenylnebzene_rad2', smiles='c1ccccc1[CH]CC=C', xyz=butenylnebzene_rad2_xyz) + butenylnebzene_rad1 = ARCSpecies(label='butenylnebzene_rad1', smiles='c1ccccc1C[CH]C=C', + xyz=butenylnebzene_rad1_xyz) + butenylnebzene_rad2 = ARCSpecies(label='butenylnebzene_rad2', smiles='c1ccccc1[CH]CC=C', + xyz=butenylnebzene_rad2_xyz) rxn9 = ARCReaction(r_species=[butenylnebzene, peroxyl], p_species=[peroxide, butenylnebzene_rad1]) rxn10 = ARCReaction(r_species=[butenylnebzene, peroxyl], p_species=[peroxide, butenylnebzene_rad2]) self.assertEqual(rxn9.family, 'H_Abstraction') @@ -893,6 +1125,65 @@ def test_heuristics_for_h_abstraction_13(self): heuristics_1.execute_incore() self.assertEqual(len(rxn1.ts_species.ts_guesses), 12) + def test_heuristics_for_hydrolysis(self): + """ + Test that ARC can generate TS guesses based on heuristics for different hydrolysis families reactions. + """ + # Carbonyl-based hydrolysis + # C2H4O2 + H2O <=> CH2O2 + CH4O + methylformate = self.methylformate + formicacid = self.formicacid + methanol = self.methanol + water = self.water + rxn1 = ARCReaction(r_species=[methylformate, water], p_species=[formicacid, methanol], + family='carbonyl_based_hydrolysis') + heuristics_1 = HeuristicsAdapter(job_type='tsg', + reactions=[rxn1], + testing=True, + project='test', + project_directory=os.path.join(ARC_PATH, 'arc', 'testing', 'heuristics') + ) + heuristics_1.execute_incore() + self.assertEqual(rxn1.family, 'carbonyl_based_hydrolysis') + self.assertTrue(rxn1.ts_species.is_ts) + self.assertEqual(rxn1.ts_species.ts_guesses[0].initial_xyz['symbols'], + ('C', 'O', 'C', 'O', 'H', 'H', 'H', 'H', 'O', 'H', 'H')) + self.assertEqual(len(rxn1.ts_species.ts_guesses), 6) + # Ether hydrolysis + # C3H8O + H2O <=> C2H6O + CH4O + allylmethylether = self.allylmethylether + allylalcohol = self.allylalcohol + methanol = self.methanol + rxn2 = ARCReaction(r_species=[allylmethylether, water], p_species=[allylalcohol, methanol]) + heuristics_2 = HeuristicsAdapter(job_type='tsg', + reactions=[rxn2], + testing=True, + project='test', + project_directory=os.path.join(ARC_PATH, 'arc', 'testing', 'heuristics') + ) + heuristics_2.execute_incore() + self.assertEqual(rxn2.family, 'ether_hydrolysis') + self.assertTrue(rxn2.ts_species.is_ts) + self.assertEqual(rxn2.ts_species.ts_guesses[0].initial_xyz['symbols'], + ('C', 'C', 'C', 'O', 'C', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'O', 'H', 'H')) + self.assertEqual(len(rxn2.ts_species.ts_guesses), 4) + # Nitrile hydrolysis + # N#CC(C#N) + H2O <=> N=C(O)C(C#N) + malononitrile = self.malononitrile + iminopropanoic_acid = self.iminopropanoic_acid + rxn4 = ARCReaction(r_species=[malononitrile, water], p_species=[iminopropanoic_acid]) + heuristics_4 = HeuristicsAdapter(job_type='tsg', + reactions=[rxn4], + testing=True, + project='test', + project_directory=os.path.join(ARC_PATH, 'arc', 'testing', 'heuristics') + ) + heuristics_4.execute_incore() + self.assertEqual(rxn4.family, 'nitrile_hydrolysis') + self.assertTrue(rxn4.ts_species.is_ts) + self.assertEqual(rxn4.ts_species.ts_guesses[0].initial_xyz['symbols'], + ('N', 'C', 'C', 'C', 'N', 'H', 'H', 'O', 'H', 'H')) + def test_keeping_atom_order_in_ts(self): """Test that the generated TS has the same atom order as in the reactants""" # reactant_reversed, products_reversed = False, False @@ -977,7 +1268,7 @@ def test_keeping_atom_order_in_ts(self): self.assertIn(tuple(rxn_4.atom_map[7:9]), itertools.permutations([7, 8])) self.assertEqual(rxn_4.atom_map[9:11], [11, 10]) self.assertIn(tuple(rxn_4.atom_map[11:14]), itertools.permutations([9, 15, 16])) - self.assertIn(tuple(rxn_4.atom_map[14:]), itertools.permutations([12, 13, 14 ])) + self.assertIn(tuple(rxn_4.atom_map[14:]), itertools.permutations([12, 13, 14])) heuristics_4 = HeuristicsAdapter(job_type='tsg', reactions=[rxn_4], testing=True, @@ -1007,32 +1298,36 @@ def test_combine_coordinates_with_redundant_atoms(self): d3=0, reactants_reversed=False, ) - expected_xyz = {'symbols': ('C', 'C', 'O', 'O', 'H', 'H', 'H', 'H', 'H', 'H', 'C', 'C', 'H', 'H', 'H', 'H', 'H'), - 'isotopes': (12, 12, 16, 16, 1, 1, 1, 1, 1, 1, 12, 12, 1, 1, 1, 1, 1), - 'coords': ((1.5410341894056017, 0.03255278579527917, -2.1958803093604273), - (1.5410341894056017, 0.03255278579527917, -0.6811323321036524), - (0.3807836408624652, 0.7262089976674171, -0.22531436706462338), - (0.44725483370102004, 0.6862474252614342, 1.2285438144001), - (1.5410341894056017, 1.0573914061715723, -2.581594930574602), - (2.417813888620787, -0.491621171880301, -2.5862518253483486), - (0.6382978847026384, -0.45259259592322265, -2.5815563738259377), - (2.436246144172599, 0.5373544263141657, -0.30283384265673163), - (1.5201959575389625, -0.995008683824121, -0.3029562875823373), - (-0.5371462088963908, 0.07564215831498397, 1.3852370974886976), - (-2.2867840076236203, -0.9301984769015986, 0.23115065682441438), - (-1.6427630361819756, -0.6101509572468562, 1.5612250566727561), - (-3.208131363694941, -1.5016927398697992, 0.3778072894777962), - (-1.609416630920172, -1.5224707666144137, -0.3912654967124345), - (-2.5339216518555596, -0.010500763584261032, -0.3074446156924231), - (-1.3956300152903744, -1.5298551170593468, 2.099824519093628), - (-2.320130400140207, -0.017878639343882265, 2.183641197255159))} + expected_xyz = { + 'symbols': ('C', 'C', 'O', 'O', 'H', 'H', 'H', 'H', 'H', 'H', 'C', 'C', 'H', 'H', 'H', 'H', 'H'), + 'isotopes': (12, 12, 16, 16, 1, 1, 1, 1, 1, 1, 12, 12, 1, 1, 1, 1, 1), + 'coords': ((1.5410341894056017, 0.03255278579527917, -2.1958803093604273), + (1.5410341894056017, 0.03255278579527917, -0.6811323321036524), + (0.3807836408624652, 0.7262089976674171, -0.22531436706462338), + (0.44725483370102004, 0.6862474252614342, 1.2285438144001), + (1.5410341894056017, 1.0573914061715723, -2.581594930574602), + (2.417813888620787, -0.491621171880301, -2.5862518253483486), + (0.6382978847026384, -0.45259259592322265, -2.5815563738259377), + (2.436246144172599, 0.5373544263141657, -0.30283384265673163), + (1.5201959575389625, -0.995008683824121, -0.3029562875823373), + (-0.5371462088963908, 0.07564215831498397, 1.3852370974886976), + (-2.2867840076236203, -0.9301984769015986, 0.23115065682441438), + (-1.6427630361819756, -0.6101509572468562, 1.5612250566727561), + (-3.208131363694941, -1.5016927398697992, 0.3778072894777962), + (-1.609416630920172, -1.5224707666144137, -0.3912654967124345), + (-2.5339216518555596, -0.010500763584261032, -0.3074446156924231), + (-1.3956300152903744, -1.5298551170593468, 2.099824519093628), + (-2.320130400140207, -0.017878639343882265, 2.183641197255159))} self.assertTrue(almost_equal_coords(ts_xyz, expected_xyz)) ts_xyz = combine_coordinates_with_redundant_atoms(xyz_1=self.ccooh_xyz, xyz_2=self.c2h6_xyz, - mol_1=ARCSpecies(label='CCOOH', smiles='CCOO', xyz=self.ccooh_xyz).mol, - mol_2=ARCSpecies(label='C2H6', smiles='CC', xyz=self.c2h6_xyz).mol, - reactant_2=ARCSpecies(label='C2H5', smiles='C[CH2]', xyz=self.c2h5_xyz), + mol_1=ARCSpecies(label='CCOOH', smiles='CCOO', + xyz=self.ccooh_xyz).mol, + mol_2=ARCSpecies(label='C2H6', smiles='CC', + xyz=self.c2h6_xyz).mol, + reactant_2=ARCSpecies(label='C2H5', smiles='C[CH2]', + xyz=self.c2h5_xyz), h1=9, h2=5, c=2, @@ -1042,25 +1337,26 @@ def test_combine_coordinates_with_redundant_atoms(self): d3=120, reactants_reversed=False, ) - expected_xyz = {'symbols': ('C', 'C', 'O', 'O', 'H', 'H', 'H', 'H', 'H', 'H', 'C', 'C', 'H', 'H', 'H', 'H', 'H'), - 'isotopes': (12, 12, 16, 16, 1, 1, 1, 1, 1, 1, 12, 12, 1, 1, 1, 1, 1), - 'coords': ((1.3718025969171979, 0.23742954398890095, -2.378138339962993), - (1.3718025969171979, 0.23742954398890095, -0.8633903627062183), - (0.21155204837406139, 0.9310857558610388, -0.4075723976671892), - (0.27802324121261623, 0.891124183455056, 1.0462857837975341), - (1.3718025969171979, 1.262268164365194, -2.763852961177168), - (2.2485822961323834, -0.28674441368667924, -2.7685098559509145), - (0.46906629221423457, -0.24771583772960085, -2.7638144044285036), - (2.2670145516841953, 0.7422311845077875, -0.4850918732592975), - (1.3509643650505587, -0.7901319256304993, -0.4852143181849031), - (-0.7063778013847946, 0.2805189165086057, 1.2029790668861318), - (-1.6675899857195289, -1.7647048216534997, 1.748867627178241), - (-1.5718122041529685, -0.6101944928182649, 0.7771558114650299), - (-2.3887853213596744, -2.5069659960925583, 1.3940149143273226), - (-1.9912618716664985, -1.4129403631715014, 2.7329982955164334), - (-0.6957522841048858, -2.2546439158856977, 1.860657306122405), - (-2.5436577650309165, -0.1202574787360858, 0.6653623387328302), - (-1.2481402862001663, -0.961958946530624, -0.20697484805157007))} + expected_xyz = { + 'symbols': ('C', 'C', 'O', 'O', 'H', 'H', 'H', 'H', 'H', 'H', 'C', 'C', 'H', 'H', 'H', 'H', 'H'), + 'isotopes': (12, 12, 16, 16, 1, 1, 1, 1, 1, 1, 12, 12, 1, 1, 1, 1, 1), + 'coords': ((1.3718025969171979, 0.23742954398890095, -2.378138339962993), + (1.3718025969171979, 0.23742954398890095, -0.8633903627062183), + (0.21155204837406139, 0.9310857558610388, -0.4075723976671892), + (0.27802324121261623, 0.891124183455056, 1.0462857837975341), + (1.3718025969171979, 1.262268164365194, -2.763852961177168), + (2.2485822961323834, -0.28674441368667924, -2.7685098559509145), + (0.46906629221423457, -0.24771583772960085, -2.7638144044285036), + (2.2670145516841953, 0.7422311845077875, -0.4850918732592975), + (1.3509643650505587, -0.7901319256304993, -0.4852143181849031), + (-0.7063778013847946, 0.2805189165086057, 1.2029790668861318), + (-1.6675899857195289, -1.7647048216534997, 1.748867627178241), + (-1.5718122041529685, -0.6101944928182649, 0.7771558114650299), + (-2.3887853213596744, -2.5069659960925583, 1.3940149143273226), + (-1.9912618716664985, -1.4129403631715014, 2.7329982955164334), + (-0.6957522841048858, -2.2546439158856977, 1.860657306122405), + (-2.5436577650309165, -0.1202574787360858, 0.6653623387328302), + (-1.2481402862001663, -0.961958946530624, -0.20697484805157007))} self.assertTrue(almost_equal_coords(ts_xyz, expected_xyz)) def test_get_new_zmat2_map(self): @@ -1118,9 +1414,9 @@ def test_get_new_zmat2_map(self): # expected_new_map = {0: 12, 1: 13, 2: 'X24', 3: 14, 4: 15, 5: 16, 6: 'X25', 7: 17, 8: 'X26', 9: 18, 10: 19, # 11: 20, 12: 21, 13: 22, 14: 'X27', 15: 23, 16: 'X28', 17: 2, 18: 3, 19: 1, 21: 4, 23: 0, # 25: 7, 26: 6, 28: 5, 20: 'X8', 22: 'X9', 24: 'X10', 27: 'X11'} - expected_new_map = {0: 12, 1: 13, 2: 'X24', 3: 14, 4: 15, 5: 16, 6: 'X25', 7: 17, 8: 'X26', 9: 18, 10: 19, - 11: 20, 12: 21, 13: 22, 14: 'X27', 15: 23, 16: 'X28', 17: 2, 18: 1, 19: 3, 21: 0, 23: 4, - 25: 5, 26: 6, 28: 7, 20: 'X8', 22: 'X9', 24: 'X10', 27: 'X11'} + expected_new_map = {0: 12, 1: 13, 2: 'X24', 3: 14, 4: 15, 5: 16, 6: 'X25', 7: 17, 8: 'X26', 9: 18, 10: 19, + 11: 20, 12: 21, 13: 22, 14: 'X27', 15: 23, 16: 'X28', 17: 2, 18: 1, 19: 3, 21: 0, 23: 4, + 25: 5, 26: 6, 28: 7, 20: 'X8', 22: 'X9', 24: 'X10', 27: 'X11'} self.assertEqual(new_map, expected_new_map) @@ -1170,8 +1466,10 @@ def test_generate_the_two_constrained_zmats_1(self): """Test the generate_the_two_constrained_zmats() function.""" zmat_1, zmat_2 = generate_the_two_constrained_zmats(xyz_1=self.ccooh_xyz, xyz_2=self.c2h6_xyz, - mol_1=ARCSpecies(label='CCOOH', smiles='CCOO', xyz=self.ccooh_xyz).mol, - mol_2=ARCSpecies(label='C2H6', smiles='CC', xyz=self.c2h6_xyz).mol, + mol_1=ARCSpecies(label='CCOOH', smiles='CCOO', + xyz=self.ccooh_xyz).mol, + mol_2=ARCSpecies(label='C2H6', smiles='CC', + xyz=self.c2h6_xyz).mol, h1=9, h2=3, a=3, @@ -1260,9 +1558,10 @@ def test_generate_the_two_constrained_zmats_3(self): 'A_3_2_0': 105.99800872802734, 'D_3_2_0_1': 112.36217876566015}, 'map': {0: 2, 1: 3, 2: 0, 3: 1}} expected_zmat_2 = {'symbols': ('H', 'N', 'N', 'H', 'H', 'H'), - 'coords': ((None, None, None), ('R_1_0', None, None), ('R_2_1', 'A_2_1_0', None), # R_1_0, A_2_1_0 - ('R_3_1', 'A_3_1_2', 'D_3_1_2_0'), ('R_4_2', 'A_4_2_1', 'D_4_2_1_3'), - ('R_5_2', 'A_5_2_1', 'D_5_2_1_4')), + 'coords': ( + (None, None, None), ('R_1_0', None, None), ('R_2_1', 'A_2_1_0', None), # R_1_0, A_2_1_0 + ('R_3_1', 'A_3_1_2', 'D_3_1_2_0'), ('R_4_2', 'A_4_2_1', 'D_4_2_1_3'), + ('R_5_2', 'A_5_2_1', 'D_5_2_1_4')), 'vars': {'R_1_0': 1.0239644050598145, 'R_2_1': 1.4346996545791626, 'A_2_1_0': 113.24588012695312, 'R_3_1': 1.0248854160308838, 'A_3_1_2': 111.58697509765625, 'D_3_1_2_0': 240.07077704046904, @@ -1387,7 +1686,8 @@ def test_generate_the_two_constrained_zmats_6(self): 'R_6_3': 1.0941026210784912, 'A_6_3_2': 110.54409790039062, 'D_6_3_2_5': 181.34310124526942, 'R_7_0': 0.972385048866272, 'A_7_0_3': 107.0633544921875, 'D_7_0_3_2': 179.99951584936258, - 'R_8_1': 2.509397506713867, 'A_8_1_0': 37.757564544677734, 'D_8_1_0_3': 35.458506109251914}, + 'R_8_1': 2.509397506713867, 'A_8_1_0': 37.757564544677734, + 'D_8_1_0_3': 35.458506109251914}, 'map': {0: 2, 1: 4, 2: 0, 3: 1, 4: 3, 5: 5, 6: 6, 7: 8, 8: 7}} expected_zmat_2 = {'symbols': ('H', 'C', 'O', 'O', 'H', 'H', 'H'), 'coords': ((None, None, None), ('R_1_0', None, None), ('R_2_1', 'A_2_1_0', None), @@ -1507,7 +1807,8 @@ def test_determine_glue_params(self): 'R_7_2': 1.1059615583516615, 'A_7_2_1': 110.8283376252455, 'D_7_2_1_6': 300.2088700889345, 'R_8_2': 1.1059615583516615, 'A_8_2_1': 110.8283376252455, 'D_8_2_1_7': 119.58225745206313, 'R_9_0': 1.1059615583516615, 'A_9_0_1': 110.8283376252455, 'D_9_0_1_2': 300.2088700889345, - 'R_10_0': 1.1059615583516615, 'A_10_0_1': 110.8283376252455, 'D_10_0_1_2': 59.79112991106552}, + 'R_10_0': 1.1059615583516615, 'A_10_0_1': 110.8283376252455, + 'D_10_0_1_2': 59.79112991106552}, 'map': {0: 2, 1: 0, 2: 1, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10}} param_a2, param_d2, param_d3 = determine_glue_params(zmat=zmat_2, add_dummy=False, @@ -1563,7 +1864,8 @@ def test_get_modified_params_from_zmat_2(self): d2=0, d3=120, ) - expected_new_symbols = ('C', 'C', 'H', 'H', 'H', 'H', 'H', 'H', 'X', 'O', 'O', 'C', 'C', 'H', 'H', 'H', 'H', 'H') + expected_new_symbols = ( + 'C', 'C', 'H', 'H', 'H', 'H', 'H', 'H', 'X', 'O', 'O', 'C', 'C', 'H', 'H', 'H', 'H', 'H') expected_new_coords = ( (None, None, None), ('R_1_0', None, None), ('R_2_0', 'A_2_0_1', None), ('R_3_0', 'A_3_0_1', 'D_3_0_1_2'), ('R_4_1', 'A_4_1_0', 'D_4_1_0_3'), ('R_5_1', 'A_5_1_0', 'D_5_1_0_4'), ('R_6_1', 'A_6_1_0', 'D_6_1_0_5'), @@ -1661,13 +1963,17 @@ def test_are_h_abs_wells_reversed(self): Test the are_h_abs_wells_reversed() function. The expected order is: R(*1)-H(*2) + R(*3)j <=> R(*1)j + R(*3)-H(*2) """ - rxn_1 = ARCReaction(r_species=[ARCSpecies(label='C2H6', smiles='CC'), ARCSpecies(label='OH', smiles='[OH]')], # none are reversed + rxn_1 = ARCReaction(r_species=[ARCSpecies(label='C2H6', smiles='CC'), ARCSpecies(label='OH', smiles='[OH]')], + # none are reversed p_species=[ARCSpecies(label='C2H5', smiles='[CH2]C'), ARCSpecies(label='H2O', smiles='O')]) - rxn_2 = ARCReaction(r_species=[ARCSpecies(label='OH', smiles='[OH]'), ARCSpecies(label='C2H6', smiles='CC')], # r reversed + rxn_2 = ARCReaction(r_species=[ARCSpecies(label='OH', smiles='[OH]'), ARCSpecies(label='C2H6', smiles='CC')], + # r reversed p_species=[ARCSpecies(label='C2H5', smiles='[CH2]C'), ARCSpecies(label='H2O', smiles='O')]) - rxn_3 = ARCReaction(r_species=[ARCSpecies(label='C2H6', smiles='CC'), ARCSpecies(label='OH', smiles='[OH]')], # p reversed + rxn_3 = ARCReaction(r_species=[ARCSpecies(label='C2H6', smiles='CC'), ARCSpecies(label='OH', smiles='[OH]')], + # p reversed p_species=[ARCSpecies(label='H2O', smiles='O'), ARCSpecies(label='C2H5', smiles='[CH2]C')]) - rxn_4 = ARCReaction(r_species=[ARCSpecies(label='OH', smiles='[OH]'), ARCSpecies(label='C2H6', smiles='CC')], # r and p reversed + rxn_4 = ARCReaction(r_species=[ARCSpecies(label='OH', smiles='[OH]'), ARCSpecies(label='C2H6', smiles='CC')], + # r and p reversed p_species=[ARCSpecies(label='H2O', smiles='O'), ARCSpecies(label='C2H5', smiles='[CH2]C')]) product_dicts = get_reaction_family_products(rxn=rxn_1, @@ -1710,7 +2016,8 @@ def test_are_h_abs_wells_reversed(self): self.assertTrue(r_reversed) self.assertTrue(p_reversed) - rxn_5 = ARCReaction(r_species=[ARCSpecies(label='H', smiles='[H]'), ARCSpecies(label='H2O', smiles='O')], # r and p reversed + rxn_5 = ARCReaction(r_species=[ARCSpecies(label='H', smiles='[H]'), ARCSpecies(label='H2O', smiles='O')], + # r and p reversed p_species=[ARCSpecies(label='H2', smiles='[H][H]'), ARCSpecies(label='OH', smiles='[OH]')]) product_dicts = get_reaction_family_products(rxn=rxn_5, rmg_family_set=[rxn_5.family], @@ -1722,8 +2029,10 @@ def test_are_h_abs_wells_reversed(self): self.assertTrue(r_reversed) self.assertTrue(p_reversed) - rxn_6 = ARCReaction(r_species=[ARCSpecies(label='CCCC(O)=O', smiles='CCCC(O)=O'), ARCSpecies(label='OH', smiles='[OH]')], # none are reversed - p_species=[ARCSpecies(label='CCCC([O])=O', smiles='CCCC([O])=O'), ARCSpecies(label='H2O', smiles='O')]) + rxn_6 = ARCReaction( + r_species=[ARCSpecies(label='CCCC(O)=O', smiles='CCCC(O)=O'), ARCSpecies(label='OH', smiles='[OH]')], + # none are reversed + p_species=[ARCSpecies(label='CCCC([O])=O', smiles='CCCC([O])=O'), ARCSpecies(label='H2O', smiles='O')]) product_dicts = get_reaction_family_products(rxn=rxn_6, rmg_family_set=[rxn_6.family], consider_rmg_families=True, @@ -1734,7 +2043,210 @@ def test_are_h_abs_wells_reversed(self): self.assertFalse(r_reversed) self.assertFalse(p_reversed) - + def test_process_hydrolysis_reaction(self): + """Test the process_hydrolysis_reaction() function.""" + acetamide = self.acetamide + water = self.water + acetic_acid = self.acetic_acid + ammonia = self.ammonia + rxn = ARCReaction(r_species=[acetamide, water], p_species=[acetic_acid, ammonia]) + main_reactant, water_mol = get_main_reactant_and_water_from_hydrolysis_reaction(rxn) + self.assertEqual(main_reactant.label, acetamide.label) + + benzoyl = self.benzoyl + ethylamine = self.ethylamine + benzamide = self.benzamide + ethanol = self.ethanol + rxn_no_water = ARCReaction(r_species=[benzoyl, ethylamine], p_species=[benzamide, ethanol]) + with self.assertRaises(ValueError) as cm: + get_main_reactant_and_water_from_hydrolysis_reaction(rxn_no_water) + self.assertEqual(str(cm.exception), "Reactants must include a non-water molecule and water.") + + rxn_only_water = ARCReaction(r_species=[water, water], p_species=[water, water]) + with self.assertRaises(ValueError) as cm: + get_main_reactant_and_water_from_hydrolysis_reaction(rxn_only_water) + self.assertEqual(str(cm.exception), "Reactants must include a non-water molecule and water.") + + def test_get_neighbors_by_electronegativity(self): + """Test the get_neighbors_by_electronegativity() function.""" + spc = ARCSpecies(label='H', smiles='[H]') + with self.assertRaises(ValueError) as cm: + get_neighbors_by_electronegativity(spc, 0, 0, False) + self.assertEqual(str(cm.exception), "Atom at index 0 has no valid neighbors.") + + spc = ARCSpecies(label='carbonyl', smiles='C=O') + atom_index = next(i for i, atom in enumerate(spc.mol.atoms) if atom.is_carbon()) + exclude = next(i for i, atom in enumerate(spc.mol.atoms) if atom.is_oxygen()) + neighbor1 = next(i for i, atom in enumerate(spc.mol.atoms) if atom.is_hydrogen()) + neighbor2 = next(i for i, atom in enumerate(spc.mol.atoms) if atom.is_hydrogen() and i != neighbor1) + self.assertEqual(get_neighbors_by_electronegativity(spc, atom_index, exclude), (neighbor1, [neighbor2])) + + spc = ARCSpecies(label='NH2C(=O)H', smiles='NC(=O)') + atom_index = next(i for i, atom in enumerate(spc.mol.atoms) if atom.is_carbon()) + neighbors = [neighbor for neighbor in spc.mol.atoms[atom_index].edges.keys()] + exclude = next(i for i, atom in enumerate(spc.mol.atoms) if atom.is_hydrogen() and atom in neighbors) + highest = next(i for i, atom in enumerate(spc.mol.atoms) if atom.is_oxygen()) + second_highest = next(i for i, atom in enumerate(spc.mol.atoms) if atom.is_nitrogen()) + self.assertEqual(get_neighbors_by_electronegativity(spc, atom_index, exclude), (highest, [second_highest])) + + spc = ARCSpecies(label='ClOCH2OH', smiles='ClOCO') + atom_index = next(i for i, atom in enumerate(spc.mol.atoms) if atom.is_carbon()) + neighbors = [neighbor for neighbor in spc.mol.atoms[atom_index].edges.keys()] + exclude = next(i for i, atom in enumerate(spc.mol.atoms) if atom.is_hydrogen() and atom in neighbors) + first_oxygen = next(i for i, atom in enumerate(spc.mol.atoms) if atom.is_oxygen()) + second_oxygen = next(i for i, atom in enumerate(spc.mol.atoms) if atom.is_oxygen() and i != first_oxygen) + second_hydrogen = next(i for i, atom in enumerate(spc.mol.atoms) if atom.is_hydrogen() and i != exclude) + self.assertEqual(get_neighbors_by_electronegativity(spc, atom_index, exclude), + (first_oxygen, [second_oxygen, second_hydrogen])) + + def test_setup_zmat_indices(self): + """Test the setup_zmat_indices() function.""" + initial_xyz = {'coords': ((-0.36862686, -0.00871354, 0.04292587), + (0.98182901, -0.0490201, 0.46594709), + (-0.57257378, 0.95163086, -0.43693396), + (-0.55632373, -0.82564527, -0.65815446), + (-1.01755588, -0.12311763, 0.91437513), + (1.10435907, 0.67758465, 1.10037299)), + 'isotopes': (12, 16, 1, 1, 1, 1), + 'symbols': ('C', 'O', 'H', 'H', 'H', 'H')} + xyz_indices = {'a': 0, 'b': 1, 'e': 2, 'd': 3} + initial_zmat, zmat_indices = setup_zmat_indices(initial_xyz, xyz_indices) + self.assertIsNotNone(zmat_indices['a']) + self.assertIsNotNone(zmat_indices['b']) + self.assertIsNotNone(zmat_indices['e']) + self.assertIsNotNone(zmat_indices['d']) + + xyz_indices_no_d = {'a': 0, 'b': 1, 'e': 2, 'd': None} + initial_zmat, zmat_indices = setup_zmat_indices(initial_xyz, xyz_indices_no_d) + self.assertIsNone(zmat_indices['d']) + + def test_get_matching_dihedrals(self): + """ + Test get_matching_dihedrals. + """ + zmat = {'vars': {'D_1_2_3_4': 60, + 'D_1_2_5_6': 120, + 'D_2_3_4_5': 180, + 'DX_1_2_4_7': 90, + 'DX_3_4_5_6': -60, + 'D_2_3_5_6': 150}} + matches_with_d = get_matching_dihedrals(zmat, a=1, b=2, e=3, d=4) + expected_matches_with_d = [[1, 2, 3, 4], [1, 2, 4, 7]] + self.assertEqual(matches_with_d, expected_matches_with_d, + "get_matching_dihedrals with 'd' provided failed.") + matches_without_d = get_matching_dihedrals(zmat, a=1, b=2, e=3, d=None) + expected_matches_without_d = [[1, 2, 3, 4]] + self.assertEqual(matches_without_d, expected_matches_without_d, + "get_matching_dihedrals without 'd' provided failed.") + + def test_generate_dihedral_variants(self): + """ + Test generate_dihedral_variants + """ + formicacid = self.formicacid + formicacid_coords = formicacid.get_xyz() + zmat_formicacid_1 = zmat_from_xyz(formicacid_coords) + indices = [3, 1, 0, 2] + adjustment_factors = [15, 25] + parameter_name = get_parameter_from_atom_indices(zmat=zmat_formicacid_1, indices=indices, xyz_indexed=False) + + # dihedral angle value is 180 + variants_1 = generate_dihedral_variants(zmat_formicacid_1, indices, adjustment_factors) + variant_values_1 = [variant["vars"][parameter_name] for variant in variants_1] + expected_values_1 = [165.0, 155.0] + self.assertEqual(len(variant_values_1), len(expected_values_1)) + for v, e in zip(variant_values_1, expected_values_1): + self.assertAlmostEqual(v, e, delta=0.5) + + # dihedral angel value is 0 + zmat_formicacid_2 = copy.deepcopy(zmat_formicacid_1) + zmat_formicacid_2["vars"][parameter_name] = 0 + variants_2 = generate_dihedral_variants(zmat_formicacid_2, indices, adjustment_factors) + variant_values_2 = [variant["vars"][parameter_name] for variant in variants_2] + expected_values_2 = [15.0, 25.0] + self.assertEqual(len(variant_values_2), len(expected_values_2)) + for v, e in zip(variant_values_2, expected_values_2): + self.assertAlmostEqual(v, e, delta=0.5) + + # dihedral angle value is 90 + zmat_formicacid_3 = copy.deepcopy(zmat_formicacid_1) + zmat_formicacid_3["vars"][parameter_name] = 90 + variants_3 = generate_dihedral_variants(zmat_formicacid_3, indices, adjustment_factors) + variant_values_3 = [variant["vars"][parameter_name] for variant in variants_3] + expected_values_3 = [] + self.assertEqual(len(variant_values_3), len(expected_values_3)) + for v, e in zip(variant_values_3, expected_values_3): + self.assertAlmostEqual(v, e, delta=0.5) + + # dihedral angle value is 360 + zmat_formicacid_4 = copy.deepcopy(zmat_formicacid_1) + zmat_formicacid_4["vars"][parameter_name] = 360 + variants_4 = generate_dihedral_variants(zmat_formicacid_4, indices, adjustment_factors) + variant_values_4 = [variant["vars"][parameter_name] for variant in variants_4] + expected_values_4 = [15.0, 25.0] + self.assertEqual(len(variant_values_4), len(expected_values_4)) + for v, e in zip(variant_values_4, expected_values_4): + self.assertAlmostEqual(v, e, delta=0.5) + + # dihedral angle value is -9 + zmat_formicacid_5 = copy.deepcopy(zmat_formicacid_1) + zmat_formicacid_5["vars"][parameter_name] = -9 + variants_5 = generate_dihedral_variants(zmat_formicacid_5, indices, adjustment_factors) + variant_values_5 = [variant["vars"][parameter_name] for variant in variants_5] + expected_values_5 = [6.0, 16.0] + self.assertEqual(len(variant_values_5), len(expected_values_5)) + for v, e in zip(variant_values_5, expected_values_5): + self.assertAlmostEqual(v, e, delta=0.5) + + # dihedral angle value is 180 + flip + zmat_flip = copy.deepcopy(zmat_formicacid_1) + zmat_flip["vars"][parameter_name] = 0 + variants_flip = generate_dihedral_variants(zmat_flip, indices, adjustment_factors, flip=True) + variant_values_flip = [v["vars"][parameter_name] for v in variants_flip] + expected_flip = [-165.0, -155.0] + self.assertEqual(len(variant_values_flip), len(expected_flip)) + for v, e in zip(variant_values_flip, expected_flip): + self.assertAlmostEqual(v, e, delta=0.5) + + def test_check_dao_angle(self): + """Test the check_dao_angle() function.""" + initial_xyz = {'coords': ((0.30355829, -0.66195506, -1.72681105), + (-0.43699079, 0.12678172, -0.66520607), + (-0.43699079, 0.12678172, 0.76025877), + (-0.43699079, 1.54424376, 1.2153343), + (-1.51651197, -1.74230895, -0.7476734), + (0.22760699, -1.92765268, -0.60796871), + (-0.48269046, -1.432306, -2.16308677), + (-1.2382232, 0.71323739, -1.12821542), + (0.53171526, 0.5251499, -0.98644288), + (-1.24394344, 2.15646467, 0.80128544), + (-0.52457078, 1.54152685, 2.30509001), + (0.53491089, 1.96742818, 0.94377345), + (1.09881554, -0.99733851, 0.22229234)), + 'isotopes': (12, 12, 16, 12, 1, 1, 1, 1, 1, 1, 1, 1, 16), + 'symbols': ('C', 'C', 'O', 'C', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'O')} + d1_indices = [0, 7, 1, 12] + d2_indices = [0, 8, 1, 12] + result = check_dao_angle(d1_indices, initial_xyz) + self.assertTrue(result) + result = check_dao_angle(d2_indices, initial_xyz) + self.assertFalse(result) + + def test_check_ts_bonds(self): + """Test the check_ts_bonds() function.""" + initial_xyz_str1 = """Cl -0.53400971 -1.20319371 -1.44406381 +C -0.53400971 -1.20319371 0.32841058 +C -0.53400971 0.21902198 0.83449069 +O -1.26462888 1.07894027 0.34668602 +Cl 1.53426851 1.14297915 0.85911118 +H -0.45140709 -1.91562715 1.15398835 +H -1.46761551 -1.40028827 -0.20631013 +O -0.43514883 0.94568191 2.47832518 +H 0.58786134 1.18684413 1.87883293 +H -0.30139889 0.23142254 3.12085495""" + initial_xyz = str_to_xyz(initial_xyz_str1) + result = check_ts_bonds(initial_xyz, [7, 8, 9, 2, 4]) + self.assertTrue(result) @classmethod def tearDownClass(cls): diff --git a/arc/job/adapters/xtb_test.py b/arc/job/adapters/xtb_test.py index 17af51760a..fef349c5c4 100644 --- a/arc/job/adapters/xtb_test.py +++ b/arc/job/adapters/xtb_test.py @@ -96,7 +96,7 @@ def setUpClass(cls): cls.job_9 = xTBAdapter(execution_type='queue', job_type='opt', project='test_9', - level=Level(method='xtb', solvent='water'), + level=Level(method='xtb', solvent='water', solvation_method="alpb"), project_directory=os.path.join(ARC_PATH, 'arc', 'testing', 'test_xTBAdapter_9'), species=[ARCSpecies(label='spc2', smiles='CC[O]')], ) diff --git a/arc/level.py b/arc/level.py index 70bb7d1010..50d2062d2a 100644 --- a/arc/level.py +++ b/arc/level.py @@ -83,10 +83,11 @@ def __init__(self, or solvation_scheme_level.solvation_scheme_level is not None): raise ValueError('Cannot represent a solvation_scheme_level which itself has solvation attributes.') self.solvation_scheme_level = solvation_scheme_level - if self.solvation_method is not None and self.solvent is None: - raise ValueError(f'Cannot represent a level of theory with a solvation method ("{self.solvation_method}") ' - f'that lacks a solvent.') - + if (self.solvation_method is None) != (self.solvent is None): + raise ValueError( + 'Both solvation method and solvent must be defined together, or both must be None. ' + f'Got solvation method = "{self.solvation_method}", solvent = "{self.solvent}".' + ) self.args = args or {'keyword': dict(), 'block': dict()} if self.repr is not None: diff --git a/arc/reaction/reaction.py b/arc/reaction/reaction.py index 2dc2dea4fa..ee09dc0c88 100644 --- a/arc/reaction/reaction.py +++ b/arc/reaction/reaction.py @@ -6,7 +6,7 @@ from arc.common import get_element_mass, get_logger from arc.exceptions import ReactionError, InputError -from arc.family.family import ReactionFamily, get_reaction_family_products +from arc.family.family import ReactionFamily, get_reaction_family_products, check_family_name from arc.molecule.resonance import generate_resonance_structures_safely from arc.species.converter import (check_xyz_dict, sort_xyz_using_indices, @@ -41,6 +41,7 @@ class ARCReaction(object): p_species (List[ARCSpecies], optional): A list of products :ref:`ARCSpecies ` objects. ts_label (str, optional): The :ref:`ARCSpecies ` label of the respective TS. ts_xyz_guess (list, optional): A list of TS XYZ user guesses, each in a string format. + family (str, optional): The reaction family, if applicable. xyz (list, optional): Identical to `ts_xyz_guess`, used as a shortcut. multiplicity (int, optional): The reaction surface multiplicity. A trivial guess will be made unless provided. charge (int, optional): The reaction surface charge. @@ -94,6 +95,7 @@ def __init__(self, p_species: Optional[List[ARCSpecies]] = None, ts_label: Optional[str] = None, ts_xyz_guess: Optional[list] = None, + family: Optional[str] = None, xyz: Optional[list] = None, multiplicity: Optional[int] = None, charge: Optional[int] = None, @@ -109,7 +111,10 @@ def __init__(self, self.kinetics = kinetics self.rmg_kinetics = None self.long_kinetic_description = '' - self._family = None + if check_family_name(family): + self.family = family + else: + raise ValueError(f"Invalid family name: {family}") self._family_own_reverse = False self.ts_label = ts_label self.dh_rxn298 = None diff --git a/arc/species/converter.py b/arc/species/converter.py index c2d488a963..e5a01a81ed 100644 --- a/arc/species/converter.py +++ b/arc/species/converter.py @@ -2039,16 +2039,16 @@ def ics_to_scan_constraints(ics: list, def add_atom_to_xyz_using_internal_coords(xyz: Union[dict, str], element: str, r_index: int, - a_indices: tuple, - d_indices: tuple, + a_indices: Union[tuple, list], + d_indices: Union[tuple, list], r_value: float, a_value: float, d_value: float, - opt_method: str = 'Nelder-Mead', + opt_methods: Optional[Union[str, List[str]]] = None, ) -> Optional[dict]: """ - Add an atom to XYZ. The new atom may have random r, a, and d index parameters - (not necessarily defined for the same respective atoms). + Add an atom to an XYZ structure based on distance, angle, and dihedral constraints. + The new atom may have random r, a, and d index parameters (not necessarily defined for the same respective atoms). This function will compute the coordinates for the new atom. This function assumes that the original xyz has at least 3 atoms. @@ -2056,79 +2056,111 @@ def add_atom_to_xyz_using_internal_coords(xyz: Union[dict, str], xyz (dict): The xyz coordinates to process in a dictionary format. element (str): The chemical element of the atom to add. r_index (int): The index of an atom R to define the distance parameter R-X w.r.t the newly added atom, X. - a_indices (tuple): The indices of two atoms, A and B, to define the angle A-B-X parameter w.r.t the newly added atom, X. - d_indices (tuple): The indices of three atoms, L M and N, to define the dihedral angle L-M-N-X parameter w.r.t the newly added atom, X. + a_indices (Union[tuple, list]): The indices of two atoms, A and B, to define the angle A-B-X parameter w.r.t the newly added atom, X. + d_indices (Union[tuple, list]): The indices of three atoms, L M and N, to define the dihedral angle L-M-N-X parameter w.r.t the newly added atom, X. r_value (float): The value of the R-X distance parameter, r. a_value (float): The value of the A-B-X angle parameter, a. d_value (float): The value of the L-M-N-X dihedral angle parameter, d. - opt_method (str, optional): The optimization method to use for finding the new atom's coordinates. - Additional options include 'trust-constr' and 'BFGS'. + opt_methods (List[str], optional): The optimization method to use for finding the new atom's coordinates. + Options include 'SLSQP', 'Nelder-Mead', 'trust-constr' and 'BFGS'. Returns: Optional[dict]: The updated xyz coordinates. """ + best_guesses = list() xyz = check_xyz_dict(xyz) + opt_methods = [opt_methods] if isinstance(opt_methods, str) else opt_methods + opt_methods = opt_methods or ['SLSQP', 'Nelder-Mead', 'BFGS'] def calculate_errors(result_coords): - r_error = np.abs(np.sqrt(np.sum((np.array(result_coords) - np.array(xyz['coords'][r_index]))**2)) - r_value) - a_error = np.sqrt(angle_constraint(xyz['coords'][a_indices[0]], xyz['coords'][a_indices[1]], a_value)(*result_coords)) - d_error = np.sqrt(dihedral_constraint(xyz['coords'][d_indices[0]], xyz['coords'][d_indices[1]], xyz['coords'][d_indices[2]], d_value)(*result_coords)) + """Calculate constraint errors for distance, angle, and dihedral""" + r_error = np.abs(np.linalg.norm(np.array(result_coords) - np.array(xyz['coords'][r_index])) - r_value) + a_error = np.abs(angle_constraint(xyz['coords'][a_indices[0]], xyz['coords'][a_indices[1]], a_value)(*result_coords)) + d_error = np.abs(dihedral_constraint(xyz['coords'][d_indices[0]], xyz['coords'][d_indices[1]], xyz['coords'][d_indices[2]], d_value)(*result_coords)) return r_error, a_error, d_error def meets_precision(result_coords): + """Check if all constraints meet precision requirements""" r_error, a_error, d_error = calculate_errors(result_coords) return r_error < DIST_PRECISION and a_error < ANGL_PRECISION and d_error < ANGL_PRECISION + def average_best_guesses_from_all_methods(atom_r_coord, r_value, atom_a_coord, atom_b_coord, a_value): + if not best_guesses: + return generate_initial_guess_r_a(atom_r_coord, r_value, atom_a_coord, atom_b_coord, a_value) + coords = np.array( + [guess_xyz['coords'][-1] for guess_xyz in best_guesses if guess_xyz is not None] + ) + return np.mean(coords, axis=0) + guess_functions = [ generate_initial_guess_r_a, generate_midpoint_initial_guess, generate_perpendicular_initial_guess, generate_bond_length_initial_guess, - generate_random_initial_guess, generate_shifted_initial_guess, + average_best_guesses_from_all_methods, ] - closest_result = None - closest_error = float('inf') - - for attempt, guess_func in enumerate(guess_functions, start=1): - initial_guess = guess_func( - atom_r_coord=xyz['coords'][r_index], - r_value=r_value, - atom_a_coord=xyz['coords'][a_indices[0]], - atom_b_coord=xyz['coords'][a_indices[1]], - a_value=a_value - ) - - try: - updated_xyz = _add_atom_to_xyz_using_internal_coords( - xyz, element, r_index, a_indices, d_indices, r_value, a_value, d_value, initial_guess, opt_method, + best_error = float('inf') + best_guess_and_errors = dict() + + for opt_method in opt_methods: + best_guess = None + for guess_func in guess_functions: + initial_guess = guess_func( + atom_r_coord=xyz['coords'][r_index], + r_value=r_value, + atom_a_coord=xyz['coords'][a_indices[0]], + atom_b_coord=xyz['coords'][a_indices[1]], + a_value=a_value, ) - new_coord = updated_xyz['coords'][-1] - - r_error, a_error, d_error = calculate_errors(new_coord) - total_error = r_error + a_error + d_error - - print(f"Attempt {attempt}: r_error={r_error}, a_error={a_error}, d_error={d_error}, total_error={total_error}") - - if meets_precision(new_coord): - print("Precision criteria met. Returning result.") - return updated_xyz - if total_error < closest_error: - print(f"Updating closest result. Previous closest_error={closest_error}, new total_error={total_error}") - closest_result = updated_xyz - closest_error = total_error - - except Exception as e: - print(f"Attempt {attempt} with {guess_func.__name__} failed due to exception: {e}") - - if closest_result is not None: - print("Returning closest result as no guess met precision criteria.") - return closest_result - - print("No valid solution was found.") - return None + try: + updated_xyz = _add_atom_to_xyz_using_internal_coords( + xyz, element, r_index, a_indices, d_indices, r_value, a_value, d_value, initial_guess, opt_method) + new_coord = updated_xyz['coords'][-1] + r_error, a_error, d_error = calculate_errors(new_coord) + total_error = r_error + a_error + d_error + + if meets_precision(new_coord): + print("Precision criteria met. Returning result.") + return updated_xyz + + if total_error < best_error: + print(f"Updating closest result. Previous best_error={best_error}, new total_error={total_error}") + best_error = total_error + best_guess = updated_xyz + best_guess_and_errors[best_error] = best_guess + + except Exception as e: + print(f"Optimization failed with {guess_func.__name__}: {e}") + best_guesses.append(best_guess) + + print("No valid solution was found, returning the closest solution after brute force modifications.") + lowest_key = None + for key, val in best_guess_and_errors.items(): + if lowest_key is None or key < lowest_key: + lowest_key = key + xyz = best_guess_and_errors[lowest_key] + xyz = modify_coords(coords=xyz, + indices=[r_index, len(xyz['coords']) - 1], + new_value=r_value, + modification_type='atom', + index=0, + ) + xyz = modify_coords(coords=xyz, + indices=[a_indices[0], a_indices[1], len(xyz['coords']) - 1], + new_value=a_value, + modification_type='atom', + index=0, + ) + xyz = modify_coords(coords=xyz, + indices=[d_indices[0], d_indices[1], d_indices[2], len(xyz['coords']) - 1], + new_value=d_value, + modification_type='atom', + index=0, + ) + return xyz def _add_atom_to_xyz_using_internal_coords(xyz: dict, @@ -2140,10 +2172,12 @@ def _add_atom_to_xyz_using_internal_coords(xyz: dict, a_value: float, d_value: float, initial_guess: np.ndarray, - opt_method: str = 'Nelder-Mead', + opt_method: str = 'SLSQP', ) -> dict: """ - Add an atom to XYZ. The new atom may have random r, a, and d index parameters + Optimize the placement of a new atom in an XYZ structure while satisfying + distance, angle, and dihedral constraints. + The new atom may have random r, a, and d index parameters (not necessarily defined for the same respective atoms). This function will compute the coordinates for the new atom. This function assumes that the original xyz has at least 3 atoms. @@ -2159,7 +2193,7 @@ def _add_atom_to_xyz_using_internal_coords(xyz: dict, d_value (float): The value of the L-M-N-X dihedral angle parameter, d. initial_guess (np.ndarray): The initial guess for the new atom's coordinates. opt_method (str, optional): The optimization method to use for finding the new atom's coordinates. - Additional options include 'trust-constr' and 'BFGS'. + Additional options include 'Nelder-Mead', 'trust-constr' and 'BFGS'. Returns: dict: The updated xyz coordinates. @@ -2230,7 +2264,7 @@ def distance_constraint(reference_coord: tuple, distance: float): def angle_constraint(atom_a: tuple, atom_b: tuple, angle: float): """ Generate the angle constraint for a new atom with two other atoms in Cartesian space. - This constants atom X to a circle defined by a certain hight on a cone (looking for half angle) + This constants atom X to a circle defined by a certain height on a cone (looking for half angle). Args: atom_a (tuple): Cartesian coordinates of the first reference atom (A). @@ -2358,6 +2392,7 @@ def generate_initial_guess_r_a(atom_r_coord: tuple, X_position = np.array(atom_r_coord) + r_value * X_direction return X_position + def generate_midpoint_initial_guess(atom_r_coord, r_value, atom_a_coord, atom_b_coord, a_value): """Generate an initial guess midway between the two reference atoms.""" midpoint = (np.array(atom_a_coord) + np.array(atom_b_coord)) / 2.0 @@ -2365,6 +2400,7 @@ def generate_midpoint_initial_guess(atom_r_coord, r_value, atom_a_coord, atom_b_ direction /= np.linalg.norm(direction) return midpoint + r_value * direction + def generate_perpendicular_initial_guess(atom_r_coord, r_value, atom_a_coord, atom_b_coord, a_value): """Generate an initial guess that is perpendicular to the plane defined by the reference atoms.""" BA = np.array(atom_a_coord) - np.array(atom_b_coord) @@ -2377,18 +2413,36 @@ def generate_perpendicular_initial_guess(atom_r_coord, r_value, atom_a_coord, at perpendicular_vector /= np.linalg.norm(perpendicular_vector) return np.array(atom_r_coord) + r_value * perpendicular_vector -def generate_random_initial_guess(atom_r_coord, r_value, atom_a_coord, atom_b_coord, a_value): - perturbation = np.random.uniform(-0.1, 0.1, 3) - base_guess = generate_initial_guess_r_a(atom_r_coord, r_value, atom_a_coord, atom_b_coord, a_value) - return base_guess + perturbation def generate_shifted_initial_guess(atom_r_coord, r_value, atom_a_coord, atom_b_coord, a_value): shift = np.array([0.1, -0.1, 0.1]) # A deterministic shift base_guess = generate_initial_guess_r_a(atom_r_coord, r_value, atom_a_coord, atom_b_coord, a_value) return base_guess + shift + def generate_bond_length_initial_guess(atom_r_coord, r_value, atom_a_coord, atom_b_coord, a_value): """Generate an initial guess considering only the bond length to the reference atom.""" direction = np.random.uniform(-1.0, 1.0, 3) # Random direction direction /= np.linalg.norm(direction) # Normalize to unit vector return np.array(atom_r_coord) + r_value * direction + + +def sorted_distances_of_atom(xyz_dict: dict, atom_index: int) -> List[Tuple[int, float]]: + """ + Given XYZ coordinates of a molecule and an atom index, return a list of + (other_atom_index, distance) tuples sorted from closest to farthest, + excluding the atom itself. + + Args: + xyz_dict (dict): The XYZ coordinates of the molecule. + atom_index (int): Index of the reference atom. + + Returns: + List[Tuple[int, float]]: Sorted list of (atom index, distance) tuples. + """ + d_matrix = xyz_to_dmat(xyz_dict) + if atom_index >= d_matrix.shape[0]: + raise IndexError(f"Atom index {atom_index} out of range for distance matrix of size {d_matrix .shape[0]}.") + + distances = [(i, d_matrix[atom_index, i]) for i in range(d_matrix.shape[0]) if i != atom_index] + return sorted(distances, key=lambda x: x[1]) diff --git a/arc/species/converter_test.py b/arc/species/converter_test.py index 940313dc8d..a7724b7455 100644 --- a/arc/species/converter_test.py +++ b/arc/species/converter_test.py @@ -4,6 +4,8 @@ """ This module contains unit tests of the arc.species.converter module """ + +import math import os import numpy as np @@ -534,6 +536,17 @@ def setUpClass(cls): 'coords': ((1.082465, -0.311042, 0.517009), (-0.000538, 0.002628, 0.064162), (-0.872035, -0.717142, -0.381683), (-0.209893, 1.025557, 0.057233))} + cls.xyz_14 = {'symbols': ('C', 'H', 'H', 'O', 'H', 'N', 'H', 'H'), + 'isotopes': (12, 1, 1, 16, 1, 14, 1, 1), + 'coords': ((-3.59616665, 3.56051739, 1.66762912), + (-3.12243719, 4.49463523, 1.44875867), + (-4.24266183, 3.67770871, 2.51214684), + (-2.59966081, 2.57831173, 1.96283775), + (-3.00851627, 1.82896095, 2.40205367), + (-4.38319614, 3.12587525, 0.50462835), + (-5.34431488, 3.03821019, 0.76647869), + (-4.29744813, 3.80180325, -0.22733382))} + nh_s_adj = """1 N u0 p2 c0 {2,S} 2 H u0 p0 c0 {1,S}""" nh_s_xyz = """N 0.50949998 0.00000000 0.00000000 @@ -552,42 +565,42 @@ def test_coordinates_converter_tools_notebook(self): Note: If these tests fail, we should update the notebook as well """ zmat = {'symbols': ('N', 'C', 'C', 'C', 'N', 'O', 'C', 'O', 'X', 'C', 'X', 'C', 'X', 'N', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H'), - 'coords': ((None, None, None), ('R_1_0', None, None), ('R_2_1', 'A_2_1_0', None), - ('R_3_2', 'A_3_2_1', 'D_3_2_1_0'), ('R_4_3', 'A_4_3_2', 'D_4_3_2_1'), - ('R_5_3', 'A_5_3_2', 'D_5_3_2_1'), ('R_6_4', 'A_6_4_3', 'D_6_4_3_2'), - ('R_7_5', 'A_7_5_3', 'D_7_5_3_2'), - ('RX_8|10|12_6|9|11', 'AX_8|9|10|11|12|13_6|6|9|9|11|11_4|8|6|10|9|12', 'DX_8|9|10|11|12|13_6|6|9|9|11|11_4|8|6|10|9|12_7|4|7|6|7|9'), - ('R_9_6', 'AX_8|9|10|11|12|13_6|6|9|9|11|11_4|8|6|10|9|12', 'DX_8|9|10|11|12|13_6|6|9|9|11|11_4|8|6|10|9|12_7|4|7|6|7|9'), - ('RX_8|10|12_6|9|11', 'AX_8|9|10|11|12|13_6|6|9|9|11|11_4|8|6|10|9|12', 'DX_8|9|10|11|12|13_6|6|9|9|11|11_4|8|6|10|9|12_7|4|7|6|7|9'), - ('R_11_9', 'AX_8|9|10|11|12|13_6|6|9|9|11|11_4|8|6|10|9|12', 'DX_8|9|10|11|12|13_6|6|9|9|11|11_4|8|6|10|9|12_7|4|7|6|7|9'), - ('RX_8|10|12_6|9|11', 'AX_8|9|10|11|12|13_6|6|9|9|11|11_4|8|6|10|9|12', 'DX_8|9|10|11|12|13_6|6|9|9|11|11_4|8|6|10|9|12_7|4|7|6|7|9'), - ('R_13_11', 'AX_8|9|10|11|12|13_6|6|9|9|11|11_4|8|6|10|9|12', 'DX_8|9|10|11|12|13_6|6|9|9|11|11_4|8|6|10|9|12_7|4|7|6|7|9'), - ('R_14_0', 'A_14_0_1', 'D_14_0_1_2'), ('R_15_0', 'A_15_0_1', 'D_15_0_1_2'), - ('R_16_1', 'A_16_1_2', 'D_16_1_2_15'), ('R_17_1', 'A_17_1_2', 'D_17_1_2_16'), ('R_18_2', 'A_18_2_1', 'D_18_2_1_17'), - ('R_19_2', 'A_19_2_1', 'D_19_2_1_18'), ('R_20_3', 'A_20_3_2', 'D_20_3_2_1'), ('R_21_7', 'A_21_7_5', 'D_21_7_5_3'), - ('R_22_4', 'A_22_4_6', 'D_22_4_6_21')), - 'vars': {'R_1_0': 1.4604942751011212, 'R_2_1': 1.527922849799549, 'A_2_1_0': 110.3065082939501, - 'R_3_2': 1.528953932777341, 'A_3_2_1': 111.18144649602384, 'D_3_2_1_0': 182.82580139682537, - 'R_4_3': 1.4513236745554712, 'A_4_3_2': 110.1816515421551, 'D_4_3_2_1': 178.77234190887103, - 'R_5_3': 1.4324895363736756, 'A_5_3_2': 108.87342377052293, 'D_5_3_2_1': 296.5360670758954, - 'R_6_4': 1.45471039292036, 'A_6_4_3': 119.52044799360947, 'D_6_4_3_2': 267.385789076988, - 'R_7_5': 1.463961241323698, 'A_7_5_3': 106.24609505276611, 'D_7_5_3_2': 169.47632562217643, - 'R_9_6': 1.2003625639923046, 'R_11_9': 1.5409261196430282, 'R_13_11': 1.1601896955504991, - 'R_14_0': 1.0199706201435979, 'A_14_0_1': 109.82434650817126, 'D_14_0_1_2': 298.4471183049676, - 'R_15_0': 1.0203338036642535, 'A_15_0_1': 109.29256268947609, 'D_15_0_1_2': 182.748124244299, - 'R_16_1': 1.0953359821757327, 'A_16_1_2': 110.03394606716385, 'D_16_1_2_15': 121.92752893614716, - 'R_17_1': 1.095192530179795, 'A_17_1_2': 110.84866373393122, 'D_17_1_2_16': 119.08345045450586, - 'R_18_2': 1.098313538677341, 'A_18_2_1': 109.08968835777581, 'D_18_2_1_17': 300.70048659688996, - 'R_19_2': 1.0975897283522487, 'A_19_2_1': 109.86987657387881, 'D_19_2_1_18': 242.83679879428865, + 'coords': ((None, None, None), ('R_1_0', None, None), ('R_2_1', 'A_2_1_0', None), + ('R_3_2', 'A_3_2_1', 'D_3_2_1_0'), ('R_4_3', 'A_4_3_2', 'D_4_3_2_1'), + ('R_5_3', 'A_5_3_2', 'D_5_3_2_1'), ('R_6_4', 'A_6_4_3', 'D_6_4_3_2'), + ('R_7_5', 'A_7_5_3', 'D_7_5_3_2'), + ('RX_8|10|12_6|9|11', 'AX_8|9|10|11|12|13_6|6|9|9|11|11_4|8|6|10|9|12', 'DX_8|9|10|11|12|13_6|6|9|9|11|11_4|8|6|10|9|12_7|4|7|6|7|9'), + ('R_9_6', 'AX_8|9|10|11|12|13_6|6|9|9|11|11_4|8|6|10|9|12', 'DX_8|9|10|11|12|13_6|6|9|9|11|11_4|8|6|10|9|12_7|4|7|6|7|9'), + ('RX_8|10|12_6|9|11', 'AX_8|9|10|11|12|13_6|6|9|9|11|11_4|8|6|10|9|12', 'DX_8|9|10|11|12|13_6|6|9|9|11|11_4|8|6|10|9|12_7|4|7|6|7|9'), + ('R_11_9', 'AX_8|9|10|11|12|13_6|6|9|9|11|11_4|8|6|10|9|12', 'DX_8|9|10|11|12|13_6|6|9|9|11|11_4|8|6|10|9|12_7|4|7|6|7|9'), + ('RX_8|10|12_6|9|11', 'AX_8|9|10|11|12|13_6|6|9|9|11|11_4|8|6|10|9|12', 'DX_8|9|10|11|12|13_6|6|9|9|11|11_4|8|6|10|9|12_7|4|7|6|7|9'), + ('R_13_11', 'AX_8|9|10|11|12|13_6|6|9|9|11|11_4|8|6|10|9|12', 'DX_8|9|10|11|12|13_6|6|9|9|11|11_4|8|6|10|9|12_7|4|7|6|7|9'), + ('R_14_0', 'A_14_0_1', 'D_14_0_1_2'), ('R_15_0', 'A_15_0_1', 'D_15_0_1_2'), + ('R_16_1', 'A_16_1_2', 'D_16_1_2_15'), ('R_17_1', 'A_17_1_2', 'D_17_1_2_16'), ('R_18_2', 'A_18_2_1', 'D_18_2_1_17'), + ('R_19_2', 'A_19_2_1', 'D_19_2_1_18'), ('R_20_3', 'A_20_3_2', 'D_20_3_2_1'), ('R_21_7', 'A_21_7_5', 'D_21_7_5_3'), + ('R_22_4', 'A_22_4_6', 'D_22_4_6_21')), + 'vars': {'R_1_0': 1.4604942751011212, 'R_2_1': 1.527922849799549, 'A_2_1_0': 110.3065082939501, + 'R_3_2': 1.528953932777341, 'A_3_2_1': 111.18144649602384, 'D_3_2_1_0': 182.82580139682537, + 'R_4_3': 1.4513236745554712, 'A_4_3_2': 110.1816515421551, 'D_4_3_2_1': 178.77234190887103, + 'R_5_3': 1.4324895363736756, 'A_5_3_2': 108.87342377052293, 'D_5_3_2_1': 296.5360670758954, + 'R_6_4': 1.45471039292036, 'A_6_4_3': 119.52044799360947, 'D_6_4_3_2': 267.385789076988, + 'R_7_5': 1.463961241323698, 'A_7_5_3': 106.24609505276611, 'D_7_5_3_2': 169.47632562217643, + 'R_9_6': 1.2003625639923046, 'R_11_9': 1.5409261196430282, 'R_13_11': 1.1601896955504991, + 'R_14_0': 1.0199706201435979, 'A_14_0_1': 109.82434650817126, 'D_14_0_1_2': 298.4471183049676, + 'R_15_0': 1.0203338036642535, 'A_15_0_1': 109.29256268947609, 'D_15_0_1_2': 182.748124244299, + 'R_16_1': 1.0953359821757327, 'A_16_1_2': 110.03394606716385, 'D_16_1_2_15': 121.92752893614716, + 'R_17_1': 1.095192530179795, 'A_17_1_2': 110.84866373393122, 'D_17_1_2_16': 119.08345045450586, + 'R_18_2': 1.098313538677341, 'A_18_2_1': 109.08968835777581, 'D_18_2_1_17': 300.70048659688996, + 'R_19_2': 1.0975897283522487, 'A_19_2_1': 109.86987657387881, 'D_19_2_1_18': 242.83679879428865, 'R_20_3': 1.0977229848103394, 'A_20_3_2': 110.25024873242359, 'D_20_3_2_1': 55.735631534996074, - 'R_21_7': 0.9770937580834932, 'A_21_7_5': 96.48404094647259, 'D_21_7_5_3': 73.0418671453161, - 'R_22_4': 1.0156335096736346, 'A_22_4_6': 120.95423666407609, 'D_22_4_6_21': 114.47617942015948, - 'RX_8|10|12_6|9|11': 1.0, - 'AX_8|9|10|11|12|13_6|6|9|9|11|11_4|8|6|10|9|12': 90.0, - 'DX_8|9|10|11|12|13_6|6|9|9|11|11_4|8|6|10|9|12_7|4|7|6|7|9': 180.0}, - 'map': {0: 0, 1: 1, 2: 2, 3: 3, 4: 6, 5: 4, 6: 7, 7: 5, 8: 'X20', 9: 8, 10: 'X21', 11: 9, 12: 'X22', + 'R_21_7': 0.9770937580834932, 'A_21_7_5': 96.48404094647259, 'D_21_7_5_3': 73.0418671453161, + 'R_22_4': 1.0156335096736346, 'A_22_4_6': 120.95423666407609, 'D_22_4_6_21': 114.47617942015948, + 'RX_8|10|12_6|9|11': 1.0, + 'AX_8|9|10|11|12|13_6|6|9|9|11|11_4|8|6|10|9|12': 90.0, + 'DX_8|9|10|11|12|13_6|6|9|9|11|11_4|8|6|10|9|12_7|4|7|6|7|9': 180.0}, + 'map': {0: 0, 1: 1, 2: 2, 3: 3, 4: 6, 5: 4, 6: 7, 7: 5, 8: 'X20', 9: 8, 10: 'X21', 11: 9, 12: 'X22', 13: 10, 14: 11, 15: 12, 16: 13, 17: 14, 18: 15, 19: 16, 20: 17, 21: 18, 22: 19}} - + expected_xyz = {'coords': ((-0.8213900082688513, -2.10323641901248, -3.4787571562478665), (-0.8213900082688513, -2.10323641901248, -2.0182628811467453), (-0.8213900082688513, -0.6702747081300946, -1.488009196497028), @@ -4524,17 +4537,7 @@ def test_check_isomorphism(self): self.assertTrue(converter.check_isomorphism(mol1, mol2)) def test_cluster_confs_by_rmsd(self): - nco_1 = {'symbols': ('C', 'H', 'H', 'O', 'H', 'N', 'H', 'H'), - 'isotopes': (12, 1, 1, 16, 1, 14, 1, 1), - 'coords': ((-3.59616665, 3.56051739, 1.66762912), - (-3.12243719, 4.49463523, 1.44875867), - (-4.24266183, 3.67770871, 2.51214684), - (-2.59966081, 2.57831173, 1.96283775), - (-3.00851627, 1.82896095, 2.40205367), - (-4.38319614, 3.12587525, 0.50462835), - (-5.34431488, 3.03821019, 0.76647869), - (-4.29744813, 3.80180325, -0.22733382))} - + """Test clustering conformers by RMSD""" # translation of nco_1 nco_2 = {'symbols': ('C', 'H', 'H', 'O', 'H', 'N', 'H', 'H'), 'isotopes': (12, 1, 1, 16, 1, 14, 1, 1), @@ -4631,17 +4634,107 @@ def test_cluster_confs_by_rmsd(self): (-3.69226847, 4.12970364, 1.0893621), (-4.52773502, 3.18138419, 0.05526659))} - xyzs1 = [nco_1, nco_2, nco_3, nco_4, nco_5] + xyzs1 = [self.xyz_14, nco_2, nco_3, nco_4, nco_5] self.assertEqual(len(converter.cluster_confs_by_rmsd(xyzs1)), 1) - xyzs2 = [nco_1, nco_2, nco_3, nco_4, nco_5, nco_6] + xyzs2 = [self.xyz_14, nco_2, nco_3, nco_4, nco_5, nco_6] self.assertEqual(len(converter.cluster_confs_by_rmsd(xyzs2)), 2) - xyzs3 = [nco_1, nco_2, nco_6, nco_7, nco_8, nco_9] + xyzs3 = [self.xyz_14, nco_2, nco_6, nco_7, nco_8, nco_9] self.assertEqual(len(converter.cluster_confs_by_rmsd(xyzs3)), 4) + def test_distance_constraint(self): + """Test the distance_constraint() function""" + sphere_eq = converter.distance_constraint(reference_coord=self.xyz_14['coords'][0], distance=0.0) + sphere_error = sphere_eq(*self.xyz_14['coords'][0]) + self.assertEqual(sphere_error, 0.0) + sphere_eq = converter.distance_constraint(reference_coord=self.xyz_14['coords'][0], distance=1.0) + sphere_error = sphere_eq(*self.xyz_14['coords'][0]) + self.assertEqual(abs(sphere_error), 1.0) + distance = calculate_param(self.xyz_14['coords'], atoms=[0, 1]) + self.assertAlmostEqual(distance, 1.06999992, places=4) + sphere_eq = converter.distance_constraint(reference_coord=self.xyz_14['coords'][1], + distance=distance) + sphere_error = sphere_eq(*self.xyz_14['coords'][0]) + self.assertAlmostEqual(abs(sphere_error), 0.0, places=4) + + def test_angle_constraint(self): + """Test the angle_constraint() function""" + plane_eq = converter.angle_constraint(atom_a=self.xyz_14['coords'][0], + atom_b=self.xyz_14['coords'][1], + angle=0.0) + plane_error = plane_eq(*self.xyz_14['coords'][0]) + self.assertEqual(plane_error, 0.0) + plane_eq = converter.angle_constraint(atom_a=self.xyz_14['coords'][0], + atom_b=self.xyz_14['coords'][1], + angle=90.0) + plane_error = plane_eq(*self.xyz_14['coords'][0]) + self.assertAlmostEqual(abs(math.degrees(plane_error)), 90.0, places=4) + angle = calculate_param(self.xyz_14['coords'], atoms=[0, 1, 2]) + self.assertAlmostEqual(abs(angle), 35.2643963, places=4) + plane_eq = converter.angle_constraint(atom_a=self.xyz_14['coords'][0], + atom_b=self.xyz_14['coords'][1], + angle=angle) + plane_error = plane_eq(*self.xyz_14['coords'][0]) + self.assertAlmostEqual(abs(math.degrees(plane_error)), angle, places=4) + plane_error = plane_eq(*self.xyz_14['coords'][2]) + self.assertAlmostEqual(abs(math.degrees(plane_error)), 0.0, places=4) + + def test_dihedral_constraint(self): + """Test the dihedral_constraint() function""" + torsion_eq = converter.dihedral_constraint(atom_a=self.xyz_14['coords'][0], + atom_b=self.xyz_14['coords'][1], + atom_c=self.xyz_14['coords'][2], + dihedral=0.0) + torsion_error = torsion_eq(*self.xyz_14['coords'][0]) + self.assertAlmostEqual(torsion_error, 0.0, places=10) + torsion_eq = converter.dihedral_constraint(atom_a=self.xyz_14['coords'][0], + atom_b=self.xyz_14['coords'][1], + atom_c=self.xyz_14['coords'][2], + dihedral=60.0) + torsion_error = torsion_eq(*self.xyz_14['coords'][0]) + self.assertAlmostEqual(math.degrees(torsion_error), 60.0, delta=3) + dihedral = calculate_param(self.xyz_14['coords'], atoms=[0, 1, 2, 3]) + self.assertAlmostEqual(dihedral, 38.970413, delta=3) + torsion_eq = converter.dihedral_constraint(atom_a=self.xyz_14['coords'][0], + atom_b=self.xyz_14['coords'][1], + atom_c=self.xyz_14['coords'][2], + dihedral=dihedral) + torsion_error = torsion_eq(*self.xyz_14['coords'][0]) + self.assertAlmostEqual(math.degrees(torsion_error), dihedral, delta=3) + torsion_error = torsion_eq(*self.xyz_14['coords'][3]) + self.assertAlmostEqual(math.degrees(torsion_error), 0.0, delta=3) + def test_add_atom_to_xyz_using_internal_coords(self): """Test the add_atom_to_xyz_using_internal_coords() function.""" + def check_distance(coords, atoms, expected, places=0): + """ + Checks if the calculated distance between atoms is close to the expected value. + """ + value = calculate_param(coords=coords, atoms=atoms) + self.assertAlmostEqual(value, expected, places=places) + + def check_angle(coords, atoms, expected, places=None, delta=None): + """ + Checks if the calculated angle is within the acceptable deviation (delta) from the expected angle. + """ + value = calculate_param(coords=coords, atoms=atoms) + if delta is not None: + self.assertAlmostEqual(value, expected, delta=delta) + elif places is not None: + self.assertAlmostEqual(value, expected, places=places) + + def check_dihedral(coords, atoms, expected, places=None, delta=None): + """ + Checks if the calculated dihedral angle (normalized) is within the acceptable deviation (delta) from the expected angle. + """ + value = calculate_param(coords=coords, atoms=atoms) + normalized = abs((value + 180) % 360 - 180) + if delta is not None: + self.assertAlmostEqual(normalized, expected, delta=delta) + elif places is not None: + self.assertAlmostEqual(normalized, expected, places=places) + xyz_1 = """ C -3.63243985 -0.48299420 -0.05541310 H -3.27244945 -1.49054926 -0.06723326 H -3.24128971 0.04543562 -0.89960709 @@ -4670,9 +4763,9 @@ def test_add_atom_to_xyz_using_internal_coords(self): (-1.24509731, 0.74268637, 0.46176133), (-1.3183353, 0.6960459, 2.20690531), (-1.0398666203453495, -1.4521333553792743, 1.2864271004043641))} self.assertTrue(almost_equal_coords_lists(new_xyz_1, expected_xyz_1)) - self.assertAlmostEqual(calculate_param(coords=new_xyz_1['coords'], atoms=[7, 10]), 1.77, places=2) - self.assertAlmostEqual(calculate_param(coords=new_xyz_1['coords'], atoms=[4, 7, 10]), 109.5, places=1) - self.assertAlmostEqual(calculate_param(coords=new_xyz_1['coords'], atoms=[0, 4, 7, 10]), 300, places=1) + check_distance(coords=new_xyz_1['coords'], atoms=[7, 10], expected=1.77, places=1) + check_angle(coords=new_xyz_1['coords'], atoms=[4, 7, 10], expected=109.5, places=1) + check_dihedral(coords=new_xyz_1['coords'], atoms=[0, 4, 7, 10], expected=60, places=1) new_xyz_2 = converter.add_atom_to_xyz_using_internal_coords(xyz=xyz_1, element='Cl', @@ -4682,14 +4775,10 @@ def test_add_atom_to_xyz_using_internal_coords(self): r_value=2.70, a_value=61.46, d_value=-60.0, - opt_method='BFGS', ) - self.assertAlmostEqual(new_xyz_2['coords'][-1][0], -1.0407054376010356, places=2) - self.assertAlmostEqual(new_xyz_2['coords'][-1][1], -1.4428236405325816, places=2) - self.assertAlmostEqual(new_xyz_2['coords'][-1][2], 1.2912758496528232, places=2) - self.assertAlmostEqual(calculate_param(coords=new_xyz_1['coords'], atoms=[4, 10]), 2.70, places=1) - self.assertAlmostEqual(calculate_param(coords=new_xyz_1['coords'], atoms=[4, 0, 10]), 61.46, places=0) - self.assertAlmostEqual(calculate_param(coords=new_xyz_1['coords'], atoms=[0, 4, 7, 10]), 300, places=1) + check_distance(coords=new_xyz_2['coords'], atoms=[4, 10], expected=2.70, places=1) + check_angle(coords=new_xyz_2['coords'], atoms=[4, 0, 10], expected=61.46, places=1) + check_dihedral(coords=new_xyz_2['coords'], atoms=[0, 4, 7, 10], expected=60, places=1) xyz_3 = """C -1.01765390 -0.08355112 0.05206009 O 0.22303684 -0.79051481 0.05294172 @@ -4716,9 +4805,9 @@ def test_add_atom_to_xyz_using_internal_coords(self): (-1.14968688, 0.45844916, -0.88969505), (1.33643417, -2.15859899, -0.90083808), (1.4828150949768655, -2.377060198658591, 0.30306573909787815))} self.assertTrue(almost_equal_coords(new_xyz_3, expected_xyz)) - self.assertAlmostEqual(calculate_param(coords=new_xyz_3['coords'], atoms=[2, 8]), 1.85, places=2) - self.assertAlmostEqual(calculate_param(coords=new_xyz_3['coords'], atoms=[1, 2, 8]), 77.4, places=1) - self.assertAlmostEqual(calculate_param(coords=new_xyz_3['coords'], atoms=[3, 7, 2, 8]), 140, places=1) + check_distance(coords=new_xyz_3['coords'], atoms=[2, 8], expected=1.85, places=1) + check_angle(coords=new_xyz_3['coords'], atoms=[1, 2, 8], expected=77.4, places=1) + check_dihedral(coords=new_xyz_3['coords'], atoms=[3, 7, 2, 8], expected=140, places=1) xyz_4 = """C 2.44505336 0.33426556 -0.05839486 C 1.22268719 -0.52813666 0.01896600 @@ -4746,10 +4835,136 @@ def test_add_atom_to_xyz_using_internal_coords(self): self.assertAlmostEqual(new_xyz_4['coords'][-1][0], 1.8986166253764283, places=2) self.assertAlmostEqual(new_xyz_4['coords'][-1][1], 0.9987236974936107, places=2) self.assertAlmostEqual(new_xyz_4['coords'][-1][2], 0.8154061174277444, places=2) - self.assertAlmostEqual(calculate_param(coords=new_xyz_4['coords'], atoms=[1, 14]), 1.85, places=2) - self.assertAlmostEqual(calculate_param(coords=new_xyz_4['coords'], atoms=[3, 1, 14]), 77.4, places=1) - self.assertAlmostEqual(calculate_param(coords=new_xyz_4['coords'], atoms=[2, 0, 1, 14]), 140, places=1) + check_distance(coords=new_xyz_4['coords'], atoms=[1, 14], expected=1.85, places=2) + check_angle(coords=new_xyz_4['coords'], atoms=[3, 1, 14], expected=77.4, places=1) + check_dihedral(coords=new_xyz_4['coords'], atoms=[2, 0, 1, 14], expected=140, places=1) + + xyz_5="""C -1.01765390 -0.08355112 0.05206009 + O 0.22303684 -0.79051481 0.05294172 + C 0.35773087 -1.66017412 -0.97863090 + O -0.45608483 -1.87500387 -1.86208833 + H -1.82486467 -0.81522856 0.14629516 + H -1.06962462 0.60119223 0.90442455 + H -1.14968688 0.45844916 -0.88969505 + H 1.33643417 -2.15859899 -0.90083808""" + new_xyz_5= converter.add_atom_to_xyz_using_internal_coords(xyz=xyz_5, + element='O', + r_index=2, + a_indices=(1, 2), + d_indices=(3, 7, 2), + r_value=1.8, + a_value=77, + d_value=140, + ) + check_distance(coords=new_xyz_5['coords'], atoms=[2, 8], expected=1.8, places=1) + check_angle(coords=new_xyz_5['coords'], atoms=[1, 2, 8], expected=77, delta=5) + check_dihedral(coords=new_xyz_5['coords'], atoms=[3, 7, 2, 8], expected=140,delta=5) + + xyz_6 = """C -1.79090496 0.16195344 0.54586762 + C -0.29750770 0.40075866 0.64716134 + O 0.37622588 -0.66484334 -0.01806424 + C 1.78768038 -0.50729501 0.03637639 + H -2.05996050 -0.79657732 1.00186393 + H -2.10367702 0.11491160 -0.50249559 + H -2.34837353 0.95933379 1.04519886 + H 0.00442248 0.42939362 1.69991935 + H -0.03994121 1.35437631 0.17328998 + H 2.13363258 -0.50935390 1.07451700 + H 2.24870467 -1.34913122 -0.48708664 + H 2.08904465 0.42028683 -0.45980377""" + new_xyz_6=converter.add_atom_to_xyz_using_internal_coords(xyz=xyz_6, + element='O', + r_index=1, + a_indices=(2, 1), + d_indices=(0, 7, 1), + r_value=2.1, + a_value=65, + d_value=98.25 + ) + + check_distance(coords=new_xyz_6['coords'], atoms=[1, 12], expected=2.1, places=1) + check_angle(coords=new_xyz_6['coords'], atoms=[2, 1, 12], expected=65, delta=6) + check_dihedral(coords=new_xyz_6['coords'], atoms=[0, 7, 1, 12], expected=98.25, delta=5) + + xyz_7= """C 2.97058781 -0.21070737 -0.09207851 +O 1.61612389 -0.30531019 -0.51325496 +C 0.71770473 0.06060475 0.53401367 +C -0.71758641 -0.05709177 0.03392758 +O -1.61601140 0.30874025 1.08121962 +C -2.97047936 0.21401778 0.66008169 +H 3.15985016 -0.88941299 0.74516152 +H 3.21214164 0.81763976 0.19355007 +H 3.61217288 -0.50062986 -0.92848057 +H 0.86387097 -0.59504248 1.40088174 +H 0.91542802 1.09002507 0.85634557 +H -0.91528083 -1.08649709 -0.28846850 +H -0.86376919 0.59860060 -0.83290320 +H -3.21193982 -0.81434774 0.37443570 +H -3.61206186 0.50385697 1.49651591 +H -3.15982655 0.89272913 -0.17713488""" + new_xyz_7=converter.add_atom_to_xyz_using_internal_coords(xyz=xyz_7, + element='O', + r_index=0, + a_indices=(1, 0), + d_indices=(6, 7, 0), + r_value=2.1, + a_value=65, + d_value=-98.25 + ) + check_distance(coords=new_xyz_7['coords'], atoms=[0, 16], expected=2.1, places=1) + check_angle(coords=new_xyz_7['coords'], atoms=[1, 0, 16], expected=65, delta=6) + check_dihedral(coords=new_xyz_7['coords'], atoms=[6, 7, 0, 16], expected=98.25, delta=5) + + xyz_8="""C 2.24051202 1.04153068 0.19486347 +C 1.10659712 0.58234118 0.74083019 +C 0.16338489 -0.36827342 0.07674238 +O -0.00738172 -1.48018240 0.95029996 +C -0.89152333 -2.44569234 0.39890889 +H 2.55998781 0.74063014 -0.79771591 +H 2.88053617 1.72597545 0.74310056 +H 0.83267744 0.90476855 1.74255103 +H 0.55255009 -0.71024239 -0.88913402 +H -0.80072247 0.12806836 -0.08052245 +H -0.49138857 -2.84032162 -0.54006077 +H -0.98849098 -3.27013487 1.11037749 +H -1.88123946 -2.00923795 0.23313156""" + new_xyz_8=converter.add_atom_to_xyz_using_internal_coords(xyz=xyz_8, + element='O', + r_index=2, + a_indices=(3, 2), + d_indices=(1, 8, 2), + r_value=2.1, + a_value=65, + d_value=98.25 + ) + check_distance(coords=new_xyz_8['coords'], atoms=[2, 13], expected=2.1, places=1) + check_angle(coords=new_xyz_8['coords'], atoms=[3, 2, 13], expected=65, delta=6) + check_dihedral(coords=new_xyz_8['coords'], atoms=[1, 8, 2, 13], expected=98.25, delta=5) + + def test_sorted_distances_of_atom(self): + """Test the test_sorted_distances_of_atom function""" + xyz_dict = { + 'symbols': ['O', 'H', 'H'], + 'isotopes': [16, 1, 1], + 'coords': [ + [0.0, 0.0, 0.0], # O + [0.758, 0.0, 0.504], # H1 + [-0.758, 0.0, 0.504], # H2 + ] + } + result = converter.sorted_distances_of_atom(xyz_dict, 0) + result_indices = [i for i, d in result] + self.assertEqual(result_indices, [1, 2]) + self.assertAlmostEqual(result[0][1], result[1][1], delta=0.01) + + result = converter.sorted_distances_of_atom(xyz_dict, 1) + result_indices = [i for i, d in result] + self.assertEqual(result_indices, [0, 2]) + self.assertLess(result[0][1], result[1][1]) + + with self.assertRaises(IndexError): + converter.sorted_distances_of_atom(xyz_dict, 5) @classmethod def tearDownClass(cls): diff --git a/arc/species/species.py b/arc/species/species.py index 6fc8f7a85f..53d53631c6 100644 --- a/arc/species/species.py +++ b/arc/species/species.py @@ -631,6 +631,22 @@ def copy(self): species_dict = self.as_dict(reset_atom_ids=True) return ARCSpecies(species_dict=species_dict) + def is_water(self) -> bool: + """ + Check whether this species is a water molecule (H2O). + """ + if self.mol is None: + return False + if len(self.mol.atoms) != 3: + return False + o_count, h_count = 0, 0 + for atom in self.mol.atoms: + if atom.is_oxygen(): + o_count += 1 + elif atom.is_hydrogen(): + h_count += 1 + return o_count == 1 and h_count == 2 + def as_dict(self, reset_atom_ids: bool = False, ) -> dict: diff --git a/arc/species/species_test.py b/arc/species/species_test.py index 80bdad7753..dc9506e693 100644 --- a/arc/species/species_test.py +++ b/arc/species/species_test.py @@ -119,6 +119,20 @@ def setUpClass(cls): 'isotopes': (14, 1, 14, 1), 'symbols': ('N', 'N', 'H', 'H')} + cls.water = ARCSpecies(label='H2O', smiles='O', xyz="""O -0.00032700 0.39565700 0.00000000 + H -0.75690800 -0.19845300 0.00000000 + H 0.75723500 -0.19720400 0.00000000""") + + cls.ethanol = ARCSpecies(label='alcohol', smiles='OCC', xyz="""O 1.19051468 0.74721872 0.55745278 + C 0.42396685 -0.33421819 0.04897215 + C -0.98542075 0.14578863 -0.22414249 + H 2.08706846 0.40878057 0.72232827 + H 0.41841326 -1.14061638 0.78839856 + H 0.89171403 -0.69551584 -0.87175392 + H -0.97955985 0.96896352 -0.94625607 + H -1.44657152 0.52976777 0.69182626 + H -1.60463449 -0.66494303 -0.61804700""") + def test_from_yml_file(self): """Test that an ARCSpecies object can successfully be loaded from an Arkane YAML file""" n4h6_adj_list = """1 N u0 p1 c0 {2,S} {3,S} {4,S} @@ -1233,6 +1247,15 @@ def test_copy(self): self.assertEqual(spc_1_copy.mol.to_smiles(), spc_1.mol.to_smiles()) atom_labels = [atom.label for atom in spc_1_copy.mol.atoms] + def test_is_water(self): + """Test the ARCSpecies.is_water() method.""" + water = self.water + self.assertTrue(water.is_water()) + methane_species = ARCSpecies(label='methane', smiles='C') + self.assertFalse(methane_species.is_water()) + ethanol = self.ethanol + self.assertFalse(ethanol.is_water()) + def test_mol_dict_repr_round_trip(self): """Test that a Molecule object survives the as_dict() and from_dict() round trip with emphasis on atom IDs.""" mol = Molecule(smiles='NCC') diff --git a/data/electronegativity.yml b/data/electronegativity.yml new file mode 100644 index 0000000000..9dae7ad08d --- /dev/null +++ b/data/electronegativity.yml @@ -0,0 +1,65 @@ +# Atoms electronegativity values according to Pauling scale : +# Source: National Center for Biotechnology Information (2025). Electronegativity in the Periodic Table of Elements. +# Retrieved by May 7, 2025: https://pubchem.ncbi.nlm.nih.gov/periodic-table/electronegativity. + +H: 2.2 +He: 0.0 +Li: 0.98 +Be: 1.57 +B: 2.04 +C: 2.55 +N: 3.04 +O: 3.44 +F: 3.98 +Ne: 0.0 +Na: 0.93 +Mg: 1.31 +Al: 1.61 +Si: 1.9 +P: 2.19 +S: 2.58 +Cl: 3.16 +Ar: 0.0 +K: 0.82 +Ca: 1.0 +Sc: 1.36 +Ti: 1.54 +V: 1.63 +Cr: 1.66 +Mn: 1.55 +Fe: 1.83 +Co: 1.88 +Ni: 1.91 +Cu: 1.9 +Zn: 1.65 +Ga: 1.81 +Ge: 2.01 +As: 2.18 +Se: 2.55 +Br: 2.96 +Kr: 0.0 +Rb: 0.82 +Sr: 0.95 +Y: 1.22 +Zr: 1.33 +Nb: 1.6 +Mo: 2.16 +Tc: 1.9 +Ru: 2.2 +Rh: 2.28 +Pd: 2.2 +Ag: 1.93 +Cd: 1.69 +In: 1.78 +Sn: 1.96 +Sb: 2.05 +Te: 2.1 +I: 2.66 +Xe: 0.0 +Cs: 0.79 +Ba: 0.89 +La: 1.1 +Ce: 1.12 +Pr: 1.13 +Nd: 1.14 +Pm: 1.13 diff --git a/data/families/carbonyl_based_hydrolysis.py b/data/families/carbonyl_based_hydrolysis.py new file mode 100644 index 0000000000..d01a5fe95f --- /dev/null +++ b/data/families/carbonyl_based_hydrolysis.py @@ -0,0 +1,61 @@ +name = "carbonyl_based_hydrolysis/groups" +shortDesc = u"carbonyl_based_hydrolysis" +longDesc = u""" +A generic bimolecular carbonyl_based hydrolysis reaction: R-A(=O)-B + H2O <=> R-A(=O)-OH + B-H + +Where: +- A can be C (carbonyl) or P (phosphoryl). +- B can be O, N, or a halide (X). +- R represents substituent groups or hydrogen depending on the structure of the ester. + +RA(*1)(=O)B(*2) + H(*3)O(*4)H <=> RA(*1)(=O)O(*4)H + B(*2)H(*3) + + +This family encompasses hydrolysis reactions for a range of carbonyl_based classes, including esters, amides, and acyl halides. +""" + +template(reactants=["carbonyl_group", "H2O"], products=["acid", "alcohol"], ownReverse=False) + +reverse = "condensation" + +reversible = True + +recipe(actions=[ + ['BREAK_BOND', '*1', 1, '*2'], + ['BREAK_BOND', '*3', 1, '*4'], + ['FORM_BOND', '*1', 1, '*4'], + ['FORM_BOND', '*2', 1, '*3'], +]) + +entry( + index = 0, + label = "carbonyl_group", + group = +""" +1 R u0 {2,S} +2 *1 [C,P] u0 {1,S} {3,D} {4,S} +3 O u0 {2,D} +4 *2 [O,N,Cl,Br,F,I] u0 {2,S} +""", + kinetics = None, +) + +entry( + index = 1, + label = "H2O", + group = +""" +1 *4 O u0 p2 c0 {2,S} {3,S} +2 *3 H u0 p0 c0 {1,S} +3 H u0 p0 c0 {1,S} +""", + kinetics = None, +) + + +tree( +""" +L1: carbonyl_group +L1: H2O +""" +) diff --git a/data/families/ether_hydrolysis.py b/data/families/ether_hydrolysis.py new file mode 100644 index 0000000000..cb7dd5608b --- /dev/null +++ b/data/families/ether_hydrolysis.py @@ -0,0 +1,58 @@ +name = "ether_hydrolysis/groups" +shortDesc = u"ether_hydrolysis" +longDesc = u""" +A generic bimolecular ether hydrolysis reaction: A-B-R + H2O <=> A-OH + B(R)-H + +Where: +- A can be C +- B can be O +- R represents an alkyl (R'') or aryl (Ar) group. + +A(*1)B(*2)-R + H(*3)O(*4)H <=> A(*1)O(*4)H + B(*2)(R)H(*3) + +""" + +template(reactants=["ether", "H2O"], products=["alcohol1", "alcohol2"], ownReverse=False) + +reverse = "condensation" + +reversible = True + +recipe(actions=[ + ['BREAK_BOND', '*1', 1, '*2'], + ['BREAK_BOND', '*3', 1, '*4'], + ['FORM_BOND', '*1', 1, '*4'], + ['FORM_BOND', '*2', 1, '*3'], +]) + +entry( + index = 0, + label = "ether", + group = +""" +1 *1 C u0 {2,S} +2 *2 O u0 {1,S} {3,S} +3 C u0 {2,S} +""", + kinetics = None, +) + +entry( + index = 1, + label = "H2O", + group = +""" +1 *4 O u0 p2 c0 {2,S} {3,S} +2 *3 H u0 p0 c0 {1,S} +3 H u0 p0 c0 {1,S} +""", + kinetics = None, +) + + +tree( +""" +L1: ether +L1: H2O +""" +) diff --git a/data/families/hydrolysis.py b/data/families/hydrolysis.py deleted file mode 100644 index 853d088c00..0000000000 --- a/data/families/hydrolysis.py +++ /dev/null @@ -1,54 +0,0 @@ -name = "hydrolysis/groups" -shortDesc = u"hydrolysis" -longDesc = u""" -A generic bimolecular hydrolysis reaction: AB + H2O <=> AH + BOH - -R1(*1)[O/N](*3)R2(*2) + H(*4)O(*5)H <=> R1(*1)[O/N](*3)H(*4) + R2(*2)O(*5)H - -""" - -template(reactants=["R1ONR2", "H2O"], products=["R1ONH", "R2OH"], ownReverse=False) - -reverse = "condensation" - -reversible = True - -recipe(actions=[ - ['BREAK_BOND', '*3', 1, '*2'], - ['BREAK_BOND', '*4', 1, '*5'], - ['FORM_BOND', '*3', 1, '*4'], - ['FORM_BOND', '*2', 1, '*5'], -]) - -entry( - index = 0, - label = "R1ONR2", - group = -""" -1 *1 R!H u0 p0 c0 {2,S} -2 *3 [O2s,N3s] u0 p[1,2] c0 {1,S} {3,S} -3 *2 R!H u0 p0 c0 {2,S} -""", - kinetics = None, -) - -entry( - index = 1, - label = "H2O", - group = -""" -1 *4 O u0 p2 c0 {2,S} {3,S} -2 *5 H u0 p0 c0 {1,S} -3 H u0 p0 c0 {1,S} -""", - kinetics = None, -) - - -tree( -""" -L1: R1ONR2 -L1: H2O -""" -) - diff --git a/data/families/nitrile_hydrolysis.py b/data/families/nitrile_hydrolysis.py new file mode 100644 index 0000000000..d31edd0d59 --- /dev/null +++ b/data/families/nitrile_hydrolysis.py @@ -0,0 +1,57 @@ +name = "nitrile/groups" +shortDesc = u"nitrile_hydrolysis" +longDesc = u""" +A generic bimolecular ester hydrolysis reaction: R1-A#N + H2O <=> R1-A(OH)=NH + +Where: +- A can be C (carbonyl) or P (phosphoryl). +- R1 represent substituent groups or hydrogen depending on the structure of the ester. + +R1-A(*1)#N(*2) + H(*3)O(*4)H <=> R1-A(O(*4)H)=N(*2)H(*3) + +""" + +template(reactants=["nitrile", "H2O"], products=["acid"], ownReverse=False) + +reverse = "condensation" + +reversible = True + +recipe(actions=[ + ['CHANGE_BOND', '*1', -1, '*2'], + ['BREAK_BOND', '*3', 1, '*4'], + ['FORM_BOND', '*1', 1, '*4'], + ['FORM_BOND', '*2', 1, '*3'], +]) + +entry( + index = 0, + label = "nitrile", + group = +""" +1 R u0 {2,S} +2 *1 [C,P] u0 {1,S} {3,T} +3 *2 N u0 {2,T} +""", + kinetics = None, +) + +entry( + index = 1, + label = "H2O", + group = +""" +1 *4 O u0 p2 c0 {2,S} {3,S} +2 *3 H u0 p0 c0 {1,S} +3 H u0 p0 c0 {1,S} +""", + kinetics = None, +) + + +tree( +""" +L1: nitrile +L1: H2O +""" +) diff --git a/data/hydrolysis_families_parameters.yml b/data/hydrolysis_families_parameters.yml new file mode 100644 index 0000000000..72ea26e752 --- /dev/null +++ b/data/hydrolysis_families_parameters.yml @@ -0,0 +1,52 @@ +# Configuration file for hydrolysis reaction parameters +# The parameters are taken from +#Leen Fahoum and Alon Grinberg Dana,“Automated Reaction Transition State Search for Neutral Hydrolysis Reactions.” + + +# Family sets definition +family_sets: + set_1: + - carbonyl_based_hydrolysis + - ether_hydrolysis + set_2: + - nitrile_hydrolysis + +# Family-specific parameters for different reaction types +family_parameters: + # Set 1 Families + ether_hydrolysis: + stretch: 1.5 + r_value: [2.14, 1.1, 0.97] + a_value: [66, 72.8, 107] + d_values: + - [101.8, 0.3, 102.2] + - [-101.8, 0.3, 102.2] + - [101.8, 0.3, -102.2] + - [-101.8, 0.3, -102.2] + + carbonyl_based_hydrolysis: + stretch: 1.3 + r_value: [1.85, 1.21, 0.97] + a_value: [77, 76, 112] + d_values: + - [140, 1.64, 103] + - [-140, 1.64, 103] + - [140, 1.64, -103] + - [-140, 1.64, -103] + + # Set 2 Families + nitrile_hydrolysis: + stretch: 1.04 + r_value: [1.8, 1.3, 0.97] + a_value: [97, 58, 114] + d_values: + - [174, -0.0154, 104] + - [-174, -0.0154, 104] + - [174, -0.0154, -104] + - [-174, -0.0154, -104] + +# General reference information +units: + distances: Angstroms + angles: degrees + stretching_factors: dimensionless