Source code for emle._backends._sqm

#######################################################################
# EMLE-Engine: https://github.com/chemle/emle-engine
#
# Copyright: 2023-2025
#
# Authors: Lester Hedges   <lester.hedges@gmail.com>
#          Kirill Zinovjev <kzinovjev@gmail.com>
#
# EMLE-Engine is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
#
# EMLE-Engine is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with EMLE-Engine. If not, see <http://www.gnu.org/licenses/>.
#####################################################################

"""SQM in-vacuo backend implementation."""

__all__ = ["SQM"]

import numpy as _np
import os as _os
import shlex as _shlex
import subprocess as _subprocess
import tempfile as _tempfile

from .._units import _KCAL_MOL_TO_HARTREE, _BOHR_TO_ANGSTROM

from ._backend import Backend as _Backend



[docs]
class SQM(_Backend):
    """
    SQM in-vacuo backend implementation.
    """

    def __init__(self, parm7, theory="DFTB3", qm_charge=0):
        """
        Constructor.

        Parameters
        ----------

        parm7: str
            The path to the AMBER topology file for the QM region.

        theory: str
            The SQM theory to use.

        qm_charge: int
            The charge on the QM region.
        """

        # Make sure a topology file has been set.
        if parm7 is None:
            raise ValueError("'parm7' must be specified when using the SQM backend")

        try:
            from sander import AmberParm as _AmberParm

            amber_parm = _AmberParm(parm7)
        except:
            raise IOError(f"Unable to load AMBER topology file: '{parm7}'")
        self._parm7 = parm7

        if not isinstance(theory, str):
            raise TypeError("'theory' must be of type 'str'.")

        # Store the atom names for the QM region.
        self._atom_names = [atom.name for atom in amber_parm.atoms]

        # Strip whitespace.
        self._theory = theory.replace(" ", "")

        # Validate the QM charge.
        if not isinstance(qm_charge, int):
            raise TypeError("'qm_charge' must be of type 'int'.")
        self._qm_charge = qm_charge


[docs]
    def calculate(self, atomic_numbers, xyz, forces=True, qm_charge=None):
        """
        Compute the energy and forces.

        Parameters
        ----------

        atomic_numbers: numpy.ndarray
            The atomic numbers of the atoms in the QM region.

        xyz: numpy.ndarray
            The coordinates of the atoms in the QM region in Angstrom.

        forces: bool
            Whether to calculate and return forces.

        qm_charge: int
            The charge on the QM region.

        Returns
        -------

        energy: float
            The in-vacuo energy in Eh.

        forces: numpy.ndarray
            The in-vacuo gradient in Eh/Bohr.
        """

        if not isinstance(xyz, _np.ndarray):
            raise TypeError("'xyz' must be of type 'numpy.ndarray'")
        if xyz.dtype != _np.float64:
            raise TypeError("'xyz' must have dtype 'float64'.")

        if not isinstance(atomic_numbers, _np.ndarray):
            raise TypeError("'atomic_numbers' must be of type 'numpy.ndarray'")

        if qm_charge is None:
            qm_charge = self._qm_charge
        else:
            if not isinstance(qm_charge, int):
                raise TypeError("'qm_charge' must be of type 'int'.")

        if len(atomic_numbers) != len(xyz):
            raise ValueError(
                f"Length of 'atomic_numbers' ({len(atomic_numbers)}) does not "
                f"match length of 'xyz' ({len(xyz)})"
            )

        # Convert to batched NumPy arrays.
        if len(atomic_numbers.shape) == 1:
            atomic_numbers = _np.expand_dims(atomic_numbers, axis=0)
            xyz = _np.expand_dims(xyz, axis=0)

        # Lists to store results.
        results_energy = []
        results_forces = []

        # Loop over batches.
        for i, (atomic_numbers_i, xyz_i) in enumerate(zip(atomic_numbers, xyz)):
            if len(atomic_numbers_i) != len(xyz_i):
                raise ValueError(
                    f"Length of 'atomic_numbers' ({len(atomic_numbers_i)}) does not "
                    f"match length of 'xyz' ({len(xyz_i)}) for index {i}"
                )

            # Store the number of QM atoms.
            num_qm = len(atomic_numbers_i)

            # Create a temporary working directory.
            with _tempfile.TemporaryDirectory() as tmp:
                # Work out the name of the input files.
                inp_name = f"{tmp}/sqm.in"
                out_name = f"{tmp}/sqm.out"

                # Write the input file.
                with open(inp_name, "w") as f:
                    # Write the header.
                    f.write("Run semi-empirical minimization\n")
                    f.write(" &qmmm\n")
                    f.write(f" qm_theory='{self._theory}',\n")
                    f.write(f" qmcharge={qm_charge},\n")
                    f.write(" maxcyc=0,\n")
                    f.write(" verbosity=4,\n")
                    f.write(f" /\n")

                    # Write the QM region coordinates.
                    for num, name, xyz_qm in zip(
                        atomic_numbers_i, self._atom_names, xyz_i
                    ):
                        x, y, z = xyz_qm
                        f.write(f" {num} {name} {x:.4f} {y:.4f} {z:.4f}\n")

                # Create the SQM command.
                command = f"sqm -i {inp_name} -o {out_name}"

                # Run the command as a sub-process.
                proc = _subprocess.run(
                    _shlex.split(command),
                    shell=False,
                    stdout=_subprocess.PIPE,
                    stderr=_subprocess.PIPE,
                )

                if proc.returncode != 0:
                    raise RuntimeError("SQM job failed!")

                if not _os.path.isfile(out_name):
                    raise IOError(f"Unable to locate SQM output file: {out_name}")

                with open(out_name, "r") as f:
                    is_converged = False
                    is_force = False
                    num_forces = 0
                    forces = []
                    for line in f:
                        # Skip lines prior to convergence.
                        if line.startswith(
                            " QMMM SCC-DFTB: SCC-DFTB for step     0 converged"
                        ):
                            is_converged = True
                            continue

                        # Now process the final energy and force records.
                        if is_converged:
                            if line.startswith(" Total SCF energy"):
                                try:
                                    energy = float(line.split()[4])
                                except:
                                    raise IOError(
                                        f"Unable to parse SCF energy record: {line}"
                                    )
                            elif line.startswith(
                                "QMMM: Forces on QM atoms from SCF calculation"
                            ):
                                # Flag that force records are coming.
                                is_force = True
                            elif is_force:
                                try:
                                    force = [float(x) for x in line.split()[3:6]]
                                except:
                                    raise IOError(
                                        f"Unable to parse SCF gradient record: {line}"
                                    )

                                # Update the forces.
                                forces.append(force)
                                num_forces += 1

                                # Exit if we've got all the forces.
                                if num_forces == num_qm:
                                    is_force = False
                                    break

            if num_forces != num_qm:
                raise IOError(
                    "Didn't find force records for all QM atoms in the SQM output!"
                )

            # Convert units.
            results_energy.append(energy * _KCAL_MOL_TO_HARTREE)

            # Convert the gradient to a NumPy array and reshape. Misleading comment
            # in sqm output, the "forces" are actually gradients so need to multiply by -1
            results_forces.append(
                -_np.array(forces) * _KCAL_MOL_TO_HARTREE * _BOHR_TO_ANGSTROM
            )

        # Convert to NumPy arrays.
        results_energy = _np.array(results_energy)
        results_forces = _np.array(results_forces)

        return results_energy, results_forces if forces else results_energy