Source code for emle._backends._orca

#######################################################################
# EMLE-Engine: https://github.com/chemle/emle-engine
#
# Copyright: 2023-2025
#
# Authors: Lester Hedges   <lester.hedges@gmail.com>
#          Kirill Zinovjev <kzinovjev@gmail.com>
#
# EMLE-Engine is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
#
# EMLE-Engine is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with EMLE-Engine. If not, see <http://www.gnu.org/licenses/>.
#####################################################################

"""ORCA in-vacuo backend implementation."""

__all__ = ["ORCA"]

import ase as _ase
import numpy as _np
import os as _os
import shlex as _shlex
import shutil as _shutil
import subprocess as _subprocess
import tempfile as _tempfile

from ._backend import Backend as _Backend


[docs] class ORCA(_Backend): """ ORCA in-vacuo backend implementation. """ def __init__(self, exe, template=None): """ Constructor. Parameters ---------- exe: str The path to the ORCA executable. template: str The path to the ORCA template file. """ if not isinstance(exe, str): raise TypeError("'exe' must be of type 'str'") if not _os.path.isfile(exe): raise IOError(f"Unable to locate ORCA executable: '{exe}'") if template is not None: if not isinstance(template, str): raise TypeError("'template' must be of type 'str'") if not _os.path.isfile(template): raise IOError(f"Unable to locate ORCA template file: '{template}'") # Read the ORCA template file to check for the presence of the # '*xyzfile' directive. Also store the charge and spin multiplicity. lines = [] with open(template, "r") as f: for line in f: if "*xyzfile" in line: is_xyzfile = True else: lines.append(line) if not is_xyzfile: raise ValueError( "ORCA template file must contain '*xyzfile' directive!" ) # Try to extract the charge and spin multiplicity from the line. try: _, charge, mult, _ = line.split() except: raise ValueError( "Unable to parse charge and spin multiplicity from ORCA template file!" ) self._template_lines = lines self._charge = int(charge) self._mult = int(mult) self._exe = exe self._template = template
[docs] def calculate(self, atomic_numbers, xyz, forces=True): """ Compute the energy and forces. Parameters ---------- atomic_numbers: numpy.ndarray, (N_BATCH, N_QM_ATOMS,) The atomic numbers of the atoms in the QM region. xyz: numpy.ndarray, (N_BATCH, N_QM_ATOMS, 3) The coordinates of the atoms in the QM region in Angstrom. forces: bool Whether to calculate and return forces. Returns ------- energy: float The in-vacuo energy in Eh. forces: numpy.ndarray The in-vacuo forces in Eh/Bohr. """ if not isinstance(atomic_numbers, _np.ndarray): raise TypeError("'atomic_numbers' must be of type 'numpy.ndarray'") if atomic_numbers.dtype != _np.int64: raise TypeError("'atomic_numbers' must have dtype 'int32'.") if not isinstance(xyz, _np.ndarray): raise TypeError("'xyz' must be of type 'numpy.ndarray'") if xyz.dtype != _np.float64: raise TypeError("'xyz' must have dtype 'float64'.") if len(atomic_numbers) != len(xyz): raise ValueError( f"Length of 'atomic_numbers' ({len(atomic_numbers)}) does not " f"match length of 'xyz' ({len(xyz)})" ) # Convert to batched NumPy arrays. if len(atomic_numbers.shape) == 1: atomic_numbers = _np.expand_dims(atomic_numbers, axis=0) xyz = _np.expand_dims(xyz, axis=0) # Lists to store results. results_energy = [] results_forces = [] # Loop over batches. for i, (atomic_numbers_i, xyz_i) in enumerate(zip(atomic_numbers, xyz)): if len(atomic_numbers_i) != len(xyz_i): raise ValueError( f"Length of 'atomic_numbers' ({len(atomic_numbers_i)}) does not " f"match length of 'xyz' ({len(xyz_i)}) for index {i}" ) # Create a temporary working directory. with _tempfile.TemporaryDirectory() as tmp: # Workout the name of the input and xyz files. inp_name = f"{tmp}/input.inp" xyz_name = f"{tmp}/input.xyz" # Write the input file. with open(inp_name, "w") as f: for line in self._template_lines: f.write(line) f.write(f"*xyzfile {self._charge} {self._mult} {xyz_name}\n") # Work out the elements. elements = [_ase.Atom(id).symbol for id in atomic_numbers_i] # Write the xyz file. with open(xyz_name, "w") as f: f.write(f"{len(elements):5d}\n\n") for elem, pos in zip(elements, xyz_i): f.write( f"{elem:<3s} {pos[0]:20.16f} {pos[1]:20.16f} {pos[2]:20.16f}\n" ) # Run the ORCA calculation. e, f = self.calculate_sander(xyz_name, inp_name, forces=forces) # Store the results. results_energy.append(e) results_forces.append(f) # Convert the results to NumPy arrays. results_energy = _np.array(results_energy) results_forces = _np.array(results_forces) return results_energy, results_forces if forces else results_energy
[docs] def calculate_sander(self, xyz_file, orca_input, forces=True): """ Internal function to compute in vacuo energies and forces using ORCA via input written by sander. Parameters ---------- xyz_file: str The path to the xyz coordinate file for the QM region. orca_input: str The path to the ORCA input file. forces: bool Whether to compute and return the forces. Returns ------- energy: float The in vacuo QM energy in Eh. forces: numpy.array The in vacuo QM forces in Eh/Bohr. """ if not isinstance(xyz_file, str): raise TypeError("'xyz_file' must be of type 'str'") if not _os.path.isfile(xyz_file): raise IOError(f"Unable to locate the ORCA QM xyz file: {xyz_file}") if not isinstance(orca_input, str): raise TypeError("'orca_input' must be of type 'str'") if not _os.path.isfile(orca_input): raise IOError(f"Unable to locate the ORCA input file: {orca_input}") # Create a temporary working directory. with _tempfile.TemporaryDirectory() as tmp: # Work out the name of the input files. inp_name = f"{tmp}/{_os.path.basename(orca_input)}" xyz_name = f"{tmp}/{_os.path.basename(xyz_file)}" # Copy the files to the working directory. _shutil.copyfile(orca_input, inp_name) _shutil.copyfile(xyz_file, xyz_name) # Edit the input file to remove the point charges. lines = [] with open(inp_name, "r") as f: for line in f: if not line.startswith("%pointcharges"): lines.append(line) with open(inp_name, "w") as f: for line in lines: f.write(line) # Create the ORCA command. command = f"{self._exe} {inp_name}" # Run the command as a sub-process. proc = _subprocess.run( _shlex.split(command), cwd=tmp, shell=False, stdout=_subprocess.PIPE, stderr=_subprocess.PIPE, ) if proc.returncode != 0: raise RuntimeError("ORCA job failed!") # Parse the output file for the energies and gradients. engrad = ( f"{tmp}/{_os.path.splitext(_os.path.basename(orca_input))[0]}.engrad" ) if not _os.path.isfile(engrad): raise IOError(f"Unable to locate ORCA engrad file: {engrad}") with open(engrad, "r") as f: is_nrg = False is_grad = False gradient = [] for line in f: if line.startswith("# The current total"): is_nrg = True count = 0 elif line.startswith("# The current gradient"): is_grad = True count = 0 else: # This is an energy record. These start two lines after # the header, following a comment. So we need to count # one line forward. if is_nrg and count == 1 and not line.startswith("#"): try: energy = float(line.strip()) except: IOError("Unable to parse ORCA energy record!") # This is a gradient record. These start two lines after # the header, following a comment. So we need to count # one line forward. elif is_grad and count == 1 and not line.startswith("#"): try: gradient.append(float(line.strip())) except: IOError("Unable to parse ORCA gradient record!") else: if is_nrg: # We've hit the end of the records, abort. if count == 1: is_nrg = False # Increment the line count since the header. else: count += 1 if is_grad: # We've hit the end of the records, abort. if count == 1: is_grad = False # Increment the line count since the header. else: count += 1 if not forces: return energy # Convert the gradient to a NumPy array and reshape. (Read as a single # column, convert to x, y, z components for each atom.) try: gradient = _np.array(gradient).reshape(int(len(gradient) / 3), 3) except: raise IOError("Number of ORCA gradient records isn't a multiple of 3!") return energy, -gradient