Source code for chemsmart.jobs.orca.settings

"""
ORCA job settings implementation.

This module contains the settings classes
for configuring ORCA quantum chemistry
calculations, including general settings
and specialized settings for transition
state and IRC calculations.
"""

import copy
import logging
import os
import re

from chemsmart.io.orca import ORCA_ALL_SOLVENT_MODELS
from chemsmart.jobs.settings import MolecularJobSettings
from chemsmart.utils.utils import (
    deduplicate_string_keywords,
    get_list_from_string_range,
    get_prepend_string_list_from_modred_free_format,
)

logger = logging.getLogger(__name__)


class ORCAJobSettings(MolecularJobSettings):
    """
    Settings for ORCA quantum chemistry jobs.

    This class handles the configuration of ORCA calculations including
    method selection, basis sets, solvation, convergence criteria, and
    various calculation types.

    Attributes:
        ab_initio (str | None): Ab initio method (e.g., 'HF', 'MP2').
        functional (str | None): DFT functional (e.g., 'B3LYP', 'PBE0').
        dispersion (str | None): Dispersion correction (e.g., 'D3BJ').
        basis (str | None): Orbital basis set (e.g., 'def2-TZVP').
        aux_basis (str | None): Auxiliary basis set for RI/DF methods.
        extrapolation_basis (str | None): Basis for extrapolation schemes.
        defgrid (str | None): Grid quality keyword (e.g., 'defgrid2').
        scf_tol (str | float | None): SCF convergence tolerance.
        scf_algorithm (str | None): SCF algorithm.
        scf_maxiter (int | None): Maximum SCF iterations.
        scf_convergence (str | None): SCF convergence criteria.
        charge (int | None): Molecular charge.
        multiplicity (int | None): Spin multiplicity.
        gbw (bool): Write GBW file.
        freq (bool): Perform vibrational frequency calculation.
        numfreq (bool): Use numerical frequencies.
        dipole (bool): Compute dipole moment.
        quadrupole (bool): Compute quadrupole moment.
        mdci_cutoff (float | None): MDCI cutoff.
        mdci_density (float | None): MDCI density.
        jobtype (str | None): Calculation type (e.g., 'opt', 'sp').
        title (str | None): Job title string.
        solvent_model (str | None): Solvation model identifier.
        solvent_id (str | None): Solvent identifier.
        additional_solvent_options (str | None): Extra solvent options written
            inside the ``%cpcm`` block (e.g. ``'Epsilon 78.36'``).
        solventfilename (str | None): Path to a solvent file for use with the
            ``cosmors`` model. If in .cosmorsxyz, this file will be used directly,
            else, CHEMSMART will convert it into .cosmorsxyz format. The basename
            (without the ``.cosmorsxyz`` extension) is written as ``solventfilename
            "name"`` inside the ``%cosmors`` block, and the file itself is copied to
            the running directory (scratch or job folder) so that ORCA can locate it.
            Set via the ``-sf``/``--solventfilename`` CLI option.
        additional_route_parameters (str | None): Extra route parameters.
        route_to_be_written (str | None): Custom route string to write.
        modred (list | dict | None): Modredundant coordinates specification.
        gen_genecp_file (str | None): Path to gen/genecp file to include.
        heavy_elements (list | None): Heavy elements list for genecp.
        heavy_elements_basis (str | None): Basis for heavy elements.
        light_elements_basis (str | None): Basis for light elements.
        custom_solvent (str | None): Custom solvent parameters written
            inside the ``%cpcm`` block.  In ORCA, custom dielectric constants
            are specified as ``%cpcm`` block keywords rather than appended at
            the end of the input (as in Gaussian).  Example YAML entry::

                custom_solvent : |
                  Epsilon 16.7
                  Refrac 1.275

            This produces ``! CPCM B3LYP def2-SVP`` in the route line and
            a ``%cpcm`` block containing the Epsilon and Refrac lines.
        forces (bool): Calculate forces.
        input_string (str | None): Predefined input content to write directly.
        invert_constraints (bool): Invert modred constraints if True.
    """

    def __init__(
        self,
        ab_initio=None,
        functional=None,
        dispersion=None,
        basis=None,
        aux_basis=None,
        extrapolation_basis=None,
        defgrid=None,
        scf_tol=None,
        scf_algorithm=None,
        scf_maxiter=None,
        scf_convergence=None,
        charge=None,
        multiplicity=None,
        gbw=True,
        freq=False,
        numfreq=False,
        dipole=False,
        quadrupole=False,
        mdci_cutoff=None,
        mdci_density=None,
        jobtype=None,
        title=None,
        solvent_model=None,
        solvent_id=None,
        additional_solvent_options=None,
        solventfilename=None,
        additional_route_parameters=None,
        route_to_be_written=None,
        modred=None,
        gen_genecp_file=None,
        heavy_elements=None,
        heavy_elements_basis=None,
        light_elements_basis=None,
        custom_solvent=None,
        forces=False,
        input_string=None,
        invert_constraints=False,
        **kwargs,
    ):
        """
        Initialize ORCA job settings.

        Args:
            ab_initio: Ab initio method (e.g., 'HF', 'MP2')
            functional: DFT functional (e.g., 'B3LYP', 'PBE0')
            dispersion: Dispersion correction (e.g., 'D3BJ')
            basis: Basis set (e.g., 'def2-TZVP')
            aux_basis: Auxiliary basis set
            extrapolation_basis: Extrapolation basis set
            defgrid: Grid quality (e.g., 'defgrid2')
            scf_tol: SCF convergence tolerance
            scf_algorithm: SCF algorithm
            scf_maxiter: Maximum SCF iterations
            scf_convergence: SCF convergence criteria
            charge: Molecular charge
            multiplicity: Spin multiplicity
            gbw: Whether to write GBW file
            freq: Whether to calculate frequencies
            numfreq: Whether to use numerical frequencies
            dipole: Whether to calculate dipole moment
            quadrupole: Whether to calculate quadrupole moment
            mdci_cutoff: MDCI cutoff
            mdci_density: MDCI density
            jobtype: Type of calculation
            title: Job title
            solvent_model: Solvation model
            solvent_id: Solvent identifier
            additional_solvent_options: Additional solvent options written
                inside the ``%cpcm`` block (e.g. ``'Epsilon 78.36'``).
            solventfilename: Path to a ``.cosmorsxyz`` file for the ``cosmors``
                model.  The file is copied to the running directory and its
                basename (without extension) is written as
                ``solventfilename "name"`` in the ``%cosmors`` block.
            additional_route_parameters: Additional route parameters
            route_to_be_written: Custom route string
            modred: Modified redundant coordinates
            gen_genecp_file: General ECP file
            heavy_elements: Heavy elements list
            heavy_elements_basis: Basis for heavy elements
            light_elements_basis: Basis for light elements
            custom_solvent: Custom solvent parameters
            forces: Whether to calculate forces
            input_string: Custom input string
            invert_constraints: Whether to invert constraints
            **kwargs: Additional keyword arguments
        """
        super().__init__(
            ab_initio=ab_initio,
            functional=functional,
            dispersion=dispersion,
            basis=basis,
            defgrid=defgrid,
            charge=charge,
            multiplicity=multiplicity,
            freq=freq,
            numfreq=numfreq,
            jobtype=jobtype,
            title=title,
            solvent_model=solvent_model,
            solvent_id=solvent_id,
            additional_route_parameters=additional_route_parameters,
            route_to_be_written=route_to_be_written,
            modred=modred,
            gen_genecp_file=gen_genecp_file,
            heavy_elements=heavy_elements,
            heavy_elements_basis=heavy_elements_basis,
            light_elements_basis=light_elements_basis,
            custom_solvent=custom_solvent,
            forces=forces,
            input_string=input_string,
            **kwargs,
        )

        # ORCA-specific parameters
        self.aux_basis = aux_basis
        self.extrapolation_basis = extrapolation_basis
        self.scf_tol = scf_tol
        self.scf_algorithm = scf_algorithm
        self.scf_maxiter = scf_maxiter
        self.scf_convergence = scf_convergence
        self.gbw = gbw
        self.mdci_cutoff = mdci_cutoff
        self.mdci_density = mdci_density
        self.dipole = dipole
        self.quadrupole = quadrupole
        self.invert_constraints = invert_constraints
        self.additional_solvent_options = additional_solvent_options
        self.solventfilename = solventfilename

        # Validate frequency and force settings
        if forces is True and (freq is True or numfreq is True):
            raise ValueError(
                "Frequency and Force calculations cannot be performed by "
                "Orca at the same time!\n"
                'Such an input file will give "Illegal IType or MSType '
                'generated by parse." error.'
            )

    def merge(
        self, other, keywords=("charge", "multiplicity"), merge_all=False
    ):
        """
        Merge this settings object with another.

        Args:
            other: Settings object or dictionary to merge with
            keywords: Specific keywords to merge if merge_all is False
            merge_all: Whether to merge all attributes

        Returns:
            ORCAJobSettings: New merged settings object
        """

        other_dict = other if isinstance(other, dict) else other.__dict__

        if merge_all:
            # Update self with other for all
            merged_dict = self.__dict__.copy()
            merged_dict.update(other_dict)
            return type(self)(**merged_dict)

        if keywords is not None:
            other_dict = {
                k: other_dict[k] for k in keywords if k in other_dict
            }
        # Update self with other
        merged_dict = self.__dict__.copy()
        merged_dict.update(other_dict)
        return type(self)(**merged_dict)

    def copy(self):
        """
        Create a deep copy of the settings object.

        Returns:
            ORCAJobSettings: Deep copy of this settings object
        """
        return copy.deepcopy(self)

    def __getitem__(self, key):
        """
        Get settings attribute by key.

        Args:
            key: Attribute name to retrieve

        Returns:
            Value of the specified attribute
        """
        return self.__dict__[key]

    def __eq__(self, other):
        """
        Check equality with another settings object.

        Args:
            other: Another settings object to compare

        Returns:
            bool: True if settings are equal, False otherwise
        """
        if type(self) is not type(other):
            return NotImplemented

        # Exclude append_additional_info from the comparison
        self_dict = self.__dict__
        self_dict.pop("append_additional_info")

        other_dict = other.__dict__
        other_dict.pop("append_additional_info")

        return self_dict == other_dict

    @classmethod
    def from_comfile(cls, com_path):
        """
        Create ORCA job settings from a Gaussian .com file.

        Args:
            com_path: Path to the Gaussian .com file

        Returns:
            ORCAJobSettings: Settings object from Gaussian file
        """
        com_path = os.path.abspath(com_path)
        from chemsmart.io.gaussian.input import Gaussian16Input

        logger.info(f"Return Settings object from {com_path}")
        gaussian_settings_from_comfile = Gaussian16Input(
            filename=com_path
        ).read_settings()
        orca_default_settings = cls.default()
        return orca_default_settings.merge(
            gaussian_settings_from_comfile, merge_all=True
        )

    @classmethod
    def from_logfile(cls, log_path, **kwargs):
        """
        Create ORCA settings from Gaussian output .log file.

        Args:
            log_path: Path to the .log file
            **kwargs: Additional arguments for Gaussian16Output class

        Returns:
            ORCAJobSetting: Settings object from log file
        """
        log_path = os.path.abspath(log_path)
        from chemsmart.io.gaussian.output import Gaussian16Output

        logger.info(f"Return Settings object from {log_path}")
        gaussian_settings_from_logfile = Gaussian16Output(
            log_path
        ).read_settings()
        orca_default_settings = cls.default()
        orca_settings_from_logfile = orca_default_settings.merge(
            gaussian_settings_from_logfile, merge_all=True
        )
        # Convert def2 basis set naming
        if (
            "def2" in orca_settings_from_logfile.basis
            and "def2-" not in orca_settings_from_logfile.basis
        ):
            orca_settings_from_logfile.basis = (
                orca_settings_from_logfile.basis.replace("def2", "def2-")
            )
        return orca_settings_from_logfile

    @classmethod
    def from_inpfile(cls, inp_path):
        """
        Create ORCA job settings from ORCA .inp file.

        Args:
            inp_path: Path to the ORCA .inp file

        Returns:
            ORCAJobSettings: Settings object from input file
        """
        inp_path = os.path.abspath(inp_path)
        from chemsmart.io.orca.input import ORCAInput

        logger.info(f"Return settings object from {inp_path}")
        orca_settings_from_inpfile = ORCAInput(
            filename=inp_path
        ).read_settings()
        logger.info(f"with settings: {orca_settings_from_inpfile.__dict__}")
        return orca_settings_from_inpfile

    @classmethod
    def from_outfile(cls, out_path):
        """
        Create ORCA job settings from ORCA .out file.

        Args:
            out_path: Path to the ORCA .out file

        Returns:
            ORCAJobSettings: Settings object from output file
        """
        out_path = os.path.abspath(out_path)
        from chemsmart.io.orca.output import ORCAOutput

        logger.info(f"Return Settings object from {out_path}")
        return ORCAOutput(filename=out_path).read_settings()

    @classmethod
    def from_xyzfile(cls):
        """
        Create default ORCA job settings for .xyz file input.

        Returns:
            ORCAJobSettings: Default ORCA settings for xyz input
        """
        return ORCAJobSettings.default()

    @classmethod
    def from_filepath(cls, filepath, **kwargs):
        """
        Create settings from any supported file type.

        Args:
            filepath: Path to the input file
            **kwargs: Additional keyword arguments

        Returns:
            ORCAJobSettings or None: Settings object if file type supported
        """

        if ".com" in filepath or ".gjf" in filepath:
            return cls.from_comfile(filepath)

        if ".log" in filepath:
            return cls.from_logfile(filepath)

        if ".inp" in filepath:
            return cls.from_inpfile(filepath)

        if ".out" in filepath:
            return cls.from_outfile(filepath)

        if ".xyz" in filepath:
            return cls.from_xyzfile()

        return None

    @classmethod
    def default(cls):
        """
        Create default ORCA job settings.

        Returns:
            ORCAJobSettings: Default settings object
        """
        return cls(
            ab_initio=None,
            functional=None,
            dispersion=None,
            basis=None,
            aux_basis=None,
            extrapolation_basis=None,
            defgrid=None,
            scf_tol=None,
            scf_algorithm=None,
            scf_maxiter=None,
            scf_convergence=None,
            charge=None,
            multiplicity=None,
            gbw=True,
            freq=True,
            numfreq=False,
            dipole=False,
            quadrupole=False,
            mdci_cutoff=None,
            mdci_density=None,
            jobtype=None,
            title=None,
            solvent_model=None,
            solvent_id=None,
            additional_solvent_options=None,
            solventfilename=None,
            additional_route_parameters=None,
            route_to_be_written=None,
            modred=None,
            gen_genecp_file=None,
            heavy_elements=None,
            heavy_elements_basis=None,
            light_elements_basis=None,
            custom_solvent=None,
            forces=False,
            input_string=None,
            invert_constraints=False,
        )

    @property
    def route_string(self):
        """
        Get the ORCA route string for the job.

        Returns:
            str: ORCA route string based on job settings
        """
        if self.route_to_be_written is not None:
            route_string = self._get_route_string_from_user_input()
        else:
            route_string = self._get_route_string_from_jobtype()
        logger.debug(f"Route for settings {self}: {route_string}")
        return route_string

    def _get_route_string_from_user_input(self):
        """
        Get route string from user-provided input.

        Returns:
            str: User-specified route string with '!' prefix if needed
        """
        route_string = self.route_to_be_written
        if not route_string.startswith("!"):
            route_string = f"! {route_string}"
        return route_string

    def _get_route_string_from_jobtype(self):
        """
        Generate ORCA route string based on job type and settings.

        Returns:
            str: Generated ORCA route string

        Raises:
            ValueError: If invalid combination of settings is specified
        """
        route_string = ""
        if not route_string.startswith("!"):
            route_string += "! "

        # route string depends on job type
        # determine if route string requires 'opt' keyword
        if self.jobtype in ("opt", "modred", "scan"):
            route_string += "Opt"
        elif self.jobtype == "ts":
            route_string += (
                "OptTS"  # Orca keyword for transition state optimization
            )
        elif self.jobtype == "irc":
            route_string += "IRC"
        elif self.jobtype == "sp":
            route_string += ""

        # add frequency calculation
        # not okay if both freq and numfreq are True
        if self.freq and self.numfreq:
            raise ValueError("Cannot specify both freq and numfreq!")

        if self.freq:
            route_string += " Freq"
        elif self.numfreq:
            route_string += " NumFreq"  # requires numerical frequency,
            # e.g., in SMD model where analytic Hessian is not available

        # write level of theory
        level_of_theory = self._get_level_of_theory()
        route_string += f" {level_of_theory}"

        # write grid information
        if self.defgrid is not None:
            route_string += (
                f" {self.defgrid}"  # default is 'defgrid2', if not specified
            )

        # write convergence criteria in simple input/route
        if self.scf_tol is not None:
            if not self.scf_tol.lower().endswith("scf"):
                self.scf_tol += "SCF"
            route_string += f" {self.scf_tol}"

        # write convergence algorithm if not default
        if self.scf_algorithm is not None:
            route_string += f" {self.scf_algorithm}"

        # write solvent if solvation is turned on
        route_kw = self._get_solvent_route_keyword()
        if self.custom_solvent is not None:
            # Custom solvent parameters will be written in the appropriate
            # solvent block (%cpcm for CPCM/CPCMC/SMD, %cosmors for COSMO-RS).
            # The route keyword depends on the model.
            if self.solvent_id is not None:
                route_string += f" {route_kw}({self.solvent_id})"
            else:
                route_string += f" {route_kw}"
        elif self.solvent_model is not None and self.solvent_id is not None:
            # Each model uses its own route keyword:
            #   cpcm    → CPCM(solvent)
            #   cpcmc   → CPCMC(solvent)
            #   smd     → SMD(solvent)
            #   cosmors → COSMORS(solvent)
            route_string += f" {route_kw}({self.solvent_id})"
        elif self.solvent_model is not None and self.solvent_id is None:
            # Custom solvent case (e.g. user-specified Epsilon/Refrac via
            # additional_solvent_options): write bare keyword without a
            # solvent name.  ORCA reads the dielectric parameters from the
            # corresponding block.
            route_string += f" {route_kw}"
        elif self.solvent_model is None and self.solvent_id is not None:
            logger.warning(
                "Warning: Solvent identity is specified but solvent model "
                "is missing!\nDefaulting to CPCM model."
            )
            route_string += f" CPCM({self.solvent_id})"
        else:
            pass

        # Deduplication: if solvent model appears twice,
        # the first time it appears is removed
        if (
            self.solvent_model is not None
            and len(
                re.findall(
                    rf"\b{re.escape(self.solvent_model)}\b",
                    route_string,
                    re.IGNORECASE,
                )
            )
            > 1
        ):
            route_string = deduplicate_string_keywords(
                route_string, self.solvent_model
            )

        return route_string

    def _get_level_of_theory(self):
        """
        Get the level of theory string.

        Returns:
            str: Level of theory specification

        Raises:
            ValueError: If invalid method combination or missing parameters
        """
        level_of_theory = ""

        # detect whether this is a QMMM-type job to relax some validations
        if (
            self.jobtype is not None
            and "qm" in str(self.jobtype).lower()
            and "mm" in str(self.jobtype).lower()
        ):
            is_qmmm = True
        else:
            is_qmmm = False

        if self.ab_initio is not None and self.functional is not None:
            raise ValueError(
                "Warning: both ab initio and DFT are specified!\n"
                "Please specify only one method!"
            )

        if self.ab_initio is None and self.functional is None:
            # Allow missing ab_initio/functional for ORCA QMMM-type jobs.
            if not is_qmmm:
                raise ValueError(
                    "Warning: neither ab initio nor DFT is specified!\n"
                    "Please specify one method!"
                )
        if self.ab_initio is not None:
            level_of_theory += f"{self.ab_initio}"
        elif self.functional is not None:
            level_of_theory += f"{self.functional}"

        if self.basis is not None:
            level_of_theory += f" {self.basis}"
        elif self.basis is None:
            # allow missing basis for QMMM-type jobs where basis may be
            # provided per-layer (or omitted)
            if not is_qmmm:
                raise ValueError("Warning: basis is missing!")

        if self.aux_basis is not None:
            level_of_theory += f" {self.aux_basis}"

        if self.extrapolation_basis is not None:
            level_of_theory += f" {self.extrapolation_basis}"
        return level_of_theory

    def _write_geometry(self, f, atoms):
        """
        Write molecular geometry to input file.

        Args:
            f: File object to write to
            atoms: Atoms object containing molecular geometry

        Raises:
            AssertionError: If charge, multiplicity, or geometry is missing
        """
        # check that both charge and multiplicity are specified
        assert self.charge is not None, "No charge found!"
        assert self.multiplicity is not None, "No multiplicity found!"
        f.write(f"* xyz {self.charge} {self.multiplicity}\n")

        # check that a molecular geometry is given
        assert atoms is not None, "No molecular geometry found!"
        logger.info(f"Molecule given is: {atoms}")

        coordinates = ""
        for _i, (s, (x, y, z)) in enumerate(
            zip(atoms.symbols, atoms.positions, strict=False)
        ):
            string = f"{s:5} {float(x):15.10f} {float(y):15.10f} {float(z):15.10f}\n"
            coordinates += string
        f.write(coordinates)
        f.write("*\n")

    def _get_solvent_route_keyword(self):
        """Return the ORCA simple-input keyword for the active solvent model.

        Mapping (per ORCA 6.0 manual):

        * ``cpcm``    → ``CPCM``   (C-PCM with CPCM epsilon function)
        * ``cpcmc``   → ``CPCMC``  (C-PCM with COSMO epsilon function;
          replaces the legacy ``COSMO`` keyword removed in ORCA 4.0)
        * ``smd``     → ``SMD``    (invokes C-PCM internally; canonical
          simple-input is ``!SMD(solvent)``)
        * ``cosmors`` → ``COSMORS`` (openCOSMO-RS interface; route is
          ``!COSMORS(solvent)``)

        When no model is set the default is ``CPCM``.

        Returns:
            str: One of ``"CPCM"``, ``"CPCMC"``, ``"SMD"``, or ``"COSMORS"``
        """
        model_lower = (self.solvent_model or "").lower()
        if model_lower == "cpcmc":
            return "CPCMC"
        if model_lower == "smd":
            return "SMD"
        if model_lower == "cosmors":
            return "COSMORS"
        return "CPCM"

    def _check_solvent(self, solvent_model):
        """
        Validate solvent model specification.

        Args:
            solvent_model: Solvent model name to validate

        Raises:
            ValueError: If solvent model is not supported
        """
        if solvent_model.lower() not in ORCA_ALL_SOLVENT_MODELS:
            raise ValueError(
                f"The specified solvent model {solvent_model} is not in \n"
                f"the available solvent models: {ORCA_ALL_SOLVENT_MODELS}"
            )


class ORCATSJobSettings(ORCAJobSettings):
    """
    Settings for ORCA transition state calculations.

    This class extends ORCAJobSettings with specialized options for
    transition state searches and optimizations.

    Attributes:
        inhess (bool): Read initial Hessian if True.
        inhess_filename (str | None): Path to
        initial Hessian file (used when inhess=True).
        hybrid_hess (bool): Use hybrid Hessian scheme.
        hybrid_hess_atoms (list[int] | None): 1‑based
        atom indices for hybrid Hessian region.
        numhess (bool): Use numerical Hessian.
        recalc_hess (int): Frequency (in cycles) to recalculate Hessian.
        trust_radius (float | None): Trust radius for optimization.
        tssearch_type (str): TS search method ('optts' or 'scants').
        scants_modred (list | dict | None):
        Modredundant coordinates for ScanTS.
        full_scan (bool): If True, do not abort ScanTS after highest point.
    """

    def __init__(
        self,
        inhess=False,
        inhess_filename=None,
        hybrid_hess=False,
        hybrid_hess_atoms=None,
        numhess=False,
        recalc_hess=5,
        trust_radius=None,
        tssearch_type="optts",
        scants_modred=None,
        full_scan=False,
        **kwargs,
    ):
        """
        Initialize ORCA transition state job settings.

        Args:
            inhess: Whether to read initial Hessian
            inhess_filename: Filename for initial Hessian
            hybrid_hess: Whether to use hybrid Hessian
            hybrid_hess_atoms: Atoms for hybrid Hessian (1-indexed)
            numhess: Whether to use numerical Hessian
            recalc_hess: Frequency for Hessian recalculation
            trust_radius: Trust radius for optimization
            tssearch_type: Type of TS search ('optts' or 'scants')
            scants_modred: Modified coordinates for ScanTS
            full_scan: Whether to do full scan or abort after highest point
            **kwargs: Additional keyword arguments
        """
        super().__init__(**kwargs)
        self.inhess = inhess
        self.inhess_filename = inhess_filename
        self.hybrid_hess = hybrid_hess
        self.hybrid_hess_atoms = (
            hybrid_hess_atoms  # supplied a list; 1-indexed as per user
        )
        self.numhess = numhess
        self.recalc_hess = recalc_hess
        self.trust_radius = trust_radius
        self.tssearch_type = (
            tssearch_type  # methods for TS search: OptTS, ScanTS
        )
        self.scants_modred = (
            scants_modred  # modred for scanTS (as in a scan job)
        )
        self.full_scan = full_scan  # full scan or not;  do or not abort scan after highest point is reached

    @property
    def route_string(self):
        """
        Get ORCA route string for transition state job.

        Overrides parent property to handle TS-specific keywords.

        Returns:
            str: ORCA route string for TS calculation
        """
        self.jobtype = "ts"
        route_string = self._get_route_string_from_jobtype()
        if self.tssearch_type.lower() == "scants":
            route_string = route_string.replace("OptTS", "ScanTS")
        return route_string


class ORCAIRCJobSettings(ORCAJobSettings):
    """
    Settings for ORCA intrinsic reaction coordinate calculations.

    This class extends ORCAJobSettings with specialized options for
    IRC path following calculations.

    Attributes:
        maxiter (int | None): Maximum number of IRC iterations.
        printlevel (int | None): Verbosity level for IRC output.
        direction (str | None): IRC direction
        ('both', 'forward', 'backward', 'down').
        inithess (str | None): Initial Hessian specification
        ('read', 'calc_anfreq', 'calc_numfreq').
        hess_filename (str | None): Hessian filename when inithess='read'.
        hessmode (int | None): Hessian mode
        index used for initial displacement.
        init_displ (str | None): Initial displacement type ('DE' or 'length').
        scale_init_displ (float | None): Step size for initial displacement.
        de_init_displ (float | None): Target energy difference
        for initial displacement (Eh or mEh as per ORCA).
        follow_coordtype (str | None): Coordinate
        type to follow (typically 'cartesian').
        scale_displ_sd (float | None): Scaling factor for the first SD step.
        adapt_scale_displ (bool | None): Adapt SD step scaling dynamically.
        sd_parabolicfit (bool | None): Use
        parabolic fit to optimize SD step length.
        interpolate_only (bool | None): Restrict
        parabolic fit to interpolation only.
        do_sd_corr (bool | None): Apply correction to the first SD step.
        scale_displ_sd_corr (float | None):
        Scaling factor for SD correction step.
        sd_corr_parabolicfit (bool | None): Use
        parabolic fit for the correction step.
        tolrmsg (float | None): RMS gradient tolerance (a.u.).
        tolmaxg (float | None): Max gradient element tolerance (a.u.).
        monitor_internals (bool | None): Print
        selected internal coordinates during IRC.
        internal_modred (list | dict | None): Internal coordinates
        (B/A/D/I) to monitor when monitor_internals=True.
    """

    def __init__(
        self,
        maxiter=None,
        printlevel=None,
        direction=None,
        inithess=None,
        hess_filename=None,
        hessmode=None,
        init_displ=None,
        scale_init_displ=None,
        de_init_displ=None,
        follow_coordtype=None,
        scale_displ_sd=None,
        adapt_scale_displ=None,
        sd_parabolicfit=None,
        interpolate_only=None,
        do_sd_corr=None,
        scale_displ_sd_corr=None,
        sd_corr_parabolicfit=None,
        tolrmsg=None,
        tolmaxg=None,
        monitor_internals=None,
        internal_modred=None,
        **kwargs,
    ):
        """
        Initialize ORCA IRC job settings.

        Args:
            maxiter: Maximum number of IRC iterations
            printlevel: Level of output detail
            direction: IRC direction ('both', 'forward', 'backward', 'down')
            inithess: Initial Hessian specification
            hess_filename: Filename for Hessian file
            hessmode: Hessian mode for initial displacement
            init_displ: Initial displacement type
            scale_init_displ: Scaling for initial displacement
            de_init_displ: Energy difference for initial displacement
            follow_coordtype: Coordinate type to follow
            scale_displ_sd: Scaling for steepest descent displacement
            adapt_scale_displ: Whether to adapt displacement scaling
            sd_parabolicfit: Whether to use parabolic fit for SD step
            interpolate_only: Whether to only allow interpolation
            do_sd_corr: Whether to apply SD correction
            scale_displ_sd_corr: Scaling for SD correction
            sd_corr_parabolicfit: Whether to use parabolic fit for correction
            tolrmsg: RMS gradient tolerance
            tolmaxg: Maximum gradient tolerance
            monitor_internals: Whether to monitor internal coordinates
            internal_modred: Internal coordinates to monitor
            **kwargs: Additional keyword arguments
        """
        super().__init__(**kwargs)
        self.maxiter = maxiter
        self.printlevel = printlevel
        self.direction = direction
        self.inithess = inithess
        self.hess_filename = hess_filename
        self.hessmode = hessmode
        self.init_displ = init_displ
        self.scale_init_displ = scale_init_displ
        self.de_init_displ = de_init_displ
        self.follow_coordtype = follow_coordtype
        self.scale_displ_sd = scale_displ_sd
        self.adapt_scale_displ = adapt_scale_displ
        self.sd_parabolicfit = sd_parabolicfit
        self.interpolate_only = interpolate_only
        self.do_sd_corr = do_sd_corr
        self.scale_displ_sd_corr = scale_displ_sd_corr
        self.sd_corr_parabolicfit = sd_corr_parabolicfit
        self.tolrmsg = tolrmsg
        self.tolmaxg = tolmaxg
        self.monitor_internals = monitor_internals
        self.internal_modred = internal_modred

    @property
    def route_string(self):
        """
        Get ORCA route string for IRC job.

        Overrides parent property, removes frequency calculation
        as it's not compatible with IRC.

        Returns:
            str: ORCA route string for IRC calculation
        """
        self.jobtype = "irc"
        route_string = self._get_route_string_from_jobtype()
        if "freq" in route_string.lower():
            route_string = re.sub(
                r"freq", "", route_string, flags=re.IGNORECASE
            )
        return route_string

    def _write_irc_block(self, f):
        """Writes the IRC block options.

        IRC block input example below:
        ! IRC
        %irc
            MaxIter    20
            PrintLevel 1
            Direction  both # both - default
                            # forward
                            # backward
                            # down
        # Initial displacement
            InitHess read # by default ORCA uses the Hessian
            from AnFreq or NumFreq, or computes a new one
                            # read - reads the Hessian that
                            # is defined via Hess_Filename
                            # calc_anfreq  - computes the analytic Hessian
                            # calc_numfreq - computes the numeric Hessian
            Hess_Filename "h2o.hess" # Hessian for initial
            displacement, must be used together with InitHess = read
            hessMode 0 # Hessian mode that is used
            for the initial displacement. Default 0
            Init_Displ DE      # DE (default) - energy difference
                               # length       - step size
            Scale_Init_Displ 0.1 # step size for initial
            displacement from TS. Default 0.1 a.u.
            DE_Init_Displ 2.0 # energy difference
            that is expected for initial displacement
                                 #  based on provided Hessian (Default: 2 mEh)
        # Steps
            Follow_CoordType cartesian # default and only option
            Scale_Displ_SD 0.15 # Scaling
            factor for scaling the 1st SD step
            Adapt_Scale_Displ true # modify Scale_Displ_SD
            when the step size becomes smaller or larger
            SD_ParabolicFit true # Do a parabolic
            fit for finding an optimal SD step length
            Interpolate_only true # Only allow interpolation
            for parabolic fit, not extrapolation
            Do_SD_Corr        true  # Apply a correction to the 1st SD step
            Scale_Displ_SD_Corr 0.333 # Scaling factor for
            scaling the correction step to the SD step.
                                       # It is multiplied by the length
                                       # of the final 1st SD step
            SD_Corr_ParabolicFit true # Do a parabolic
            fit for finding an optimal correction
                                       # step length
        # Convergence thresholds - similar to LooseOpt
            TolRMSG   5.e-4      # RMS gradient (a.u.)
            TolMaxG   2.e-3      # Max. element of gradient (a.u.)
        # Output options
            Monitor_Internals # Up to three
            internal coordinates can be defined
                {B 0 1} # for which the values
                are printed during the IRC run.
                {B 1 5} # Possible are (B)onds,
                (A)ngles, (D)ihedrals and (I)mpropers
            end
        end.
        """
        irc_settings_keys = ORCAIRCJobSettings().__dict__.keys()
        parent_settings_keys = ORCAJobSettings().__dict__.keys()
        irc_specific_keys = set(irc_settings_keys) - set(parent_settings_keys)

        if not any(
            getattr(self, key) is not None for key in irc_specific_keys
        ):
            return

        # write irc block if any option value is not None:
        f.write("%irc\n")
        for key in irc_specific_keys:
            value = getattr(self, key)
            if value is None:
                continue  # ignore the rest of the code and go to next in the for loop
            # only write into IRC input if the value is not None
            if key == "internal_modred":
                pass  # internal_modred is not an option in ORCA IRC file
            elif key == "inithess":
                f.write(f"  {key} {value}\n")
                if value.lower() == "read":  # if initial hessian is to be read
                    assert (
                        self.hess_filename is not None
                    ), "No Hessian file is given!"
                    assert os.path.exists(
                        self.hess_filename
                    ), f"Hessian file {self.hess_filename} is not found!"
                    f.write(
                        f'  Hess_Filename "{self.hess_filename}"  # Hessian file\n'
                    )
            elif (
                key == "hess_filename"
            ):  # already used/written, if initial hessian is to be read
                pass
            elif key == "monitor_internals":
                if str(value).lower() == "true":
                    f.write(f"  {key}\n")
                    assert (
                        self.internal_modred is not None
                    ), 'No internal modred is specified for IRC job "monitor_intervals" option!'
                    prepend_string_list = (
                        get_prepend_string_list_from_modred_free_format(
                            self.internal_modred, program="orca"
                        )
                    )
                    for prepend_string in prepend_string_list:
                        f.write(f"  {{ {prepend_string} }}\n")
                    f.write("  end\n")
                else:  # monitor_internals has other value (false), then don't write it in input
                    pass
            else:  # all other keys with given values
                f.write(f"  {key} {value}\n")
        f.write("end\n")



[docs]
class ORCAQMMMJobSettings(ORCAJobSettings):
    """
    Configuration for ORCA multiscale QM/MM calculations.

    Supports five multiscale calculation types:
    1. Additive QM/MM
    2. Subtractive QM/QM2 (2-layer ONIOM)
    3. Subtractive QM/QM2/MM (3-layer ONIOM)
    4. MOL-CRYSTAL-QMMM (molecular crystals)
    5. IONIC-CRYSTAL-QMMM (semiconductors/insulators)

    Attributes:
        jobtype (str): Multiscale calculation type
        high_level_functional (str): DFT functional for high-level (QM) region
        high_level_basis (str): Basis set for high-level (QM) region
        intermediate_level_functional (str): DFT
        functional for intermediate-level (QM2) region
        intermediate_level_basis (str): Basis
        set for intermediate-level (QM2) region
        intermediate_level_method (str): Built-in
        method for intermediate-level (XTB, HF-3C, etc.)
        low_level_method (str): Method/force field for low-level (MM) region
        high_level_atoms (list): Atom indices for high-level (QM) region
        intermediate_level_atoms (list): Atom
        indices for intermediate-level (QM2) region
        charge_total (int): Total system charge
        mult_total (int): Total system multiplicity
        charge_intermediate (int): Intermediate layer charge
        mult_intermediate (int): Intermediate layer multiplicity
        charge_high (int): High-level region charge
        mult_high (int): High-level region multiplicity
        intermediate_level_solvation (str):
        Solvation model for intermediate-level region
        active_atoms (list): Active atoms for optimization
        use_active_info_from_pbc (bool): Use PDB active atom info
        optregion_fixed_atoms (list): Fixed atoms in optimization
        high_level_h_bond_length (dict): Custom high-level-H bond distances
        delete_la_double_counting (bool): Remove bend/torsion double counting
        delete_la_bond_double_counting_atoms
        (bool): Remove bond double counting
        embedding_type (str): Electronic or mechanical embedding
        conv_charges (bool): Use converged charges for crystal QM/MM
        conv_charges_max_n_cycles (int): Max charge convergence cycles
        conv_charges_conv_thresh (float): Charge convergence threshold
        scale_formal_charge_mm_atom (float): MM charge scaling factor
        n_unit_cell_atoms (int): Atoms per unit cell (MOL-CRYSTAL-QMMM)
        ecp_layer_ecp (str): ECP type for boundary region
        ecp_layer (int): Number of ECP layers
        scale_formal_charge_ecp_atom (float): ECP charge scaling factor
    """

    # Class attribute: Built-in methods supported for intermediate level (QM2)
    INTERMEDIATE_LEVEL_BUILT_IN_METHODS = [
        "XTB",
        "XTB0",
        "XTB1",
        "XTB2",
        "HF-3C",
        "PBEH-3C",
        "R2SCAN-3C",
        "PM3",
        "AM1",
    ]

    def __init__(
        self,
        jobtype=None,  # corresponding to the 5 types of jobs mentioned above
        high_level_functional=None,
        high_level_basis=None,
        intermediate_level_functional=None,
        intermediate_level_basis=None,
        intermediate_level_method=None,
        low_level_method=None,  # level-of-theory for MM
        high_level_atoms=None,
        intermediate_level_atoms=None,
        charge_total=None,
        mult_total=None,
        charge_intermediate=None,
        mult_intermediate=None,
        charge_high=None,
        mult_high=None,
        intermediate_level_solvation=None,
        intermediate_solv_scheme=None,
        active_atoms=None,
        use_active_info_from_pbc=False,
        optregion_fixed_atoms=None,
        high_level_h_bond_length=None,  # similar to scale factors in Gaussian ONIOM jobs
        delete_la_double_counting=False,
        delete_la_bond_double_counting_atoms=False,
        embedding_type=None,  # optional
        # the followings are crystal QM/MM parameters
        conv_charges=True,
        conv_charges_max_n_cycles=None,
        conv_charges_conv_thresh=None,
        scale_formal_charge_mm_atom=None,
        n_unit_cell_atoms=None,  # for MOL-CRYSTAL-QMMM jobs
        # the followings are for INONIC-CRYSTAL-QMMM jobs
        ecp_layer_ecp=None,
        ecp_layer=None,
        scale_formal_charge_ecp_atom=None,
        parent_jobtype=None,
        **kwargs,
    ):
        """
        Initialize ORCA QM/MM job settings.

        Args:
            jobtype: Type of multiscale calculation
            (QMMM, QM/QM2, QM/QM2/MM, etc.)
            high_level_functional: DFT functional for high-level (QM) region
            high_level_basis: Basis set for high-level (QM) region
            intermediate_level_functional: DFT functional
            for intermediate-level (QM2) region
            intermediate_level_basis: Basis set
            for intermediate-level (QM2) region
            intermediate_level_method: Built-in method for
            intermediate-level (XTB, HF-3C, PBEH-3C, etc.)
            low_level_method: Method/force field for
            low-level (MM) region (MMFF, AMBER, CHARMM, etc.)
            high_level_atoms: Atom indices for high-level (QM) region
            intermediate_level_atoms: Atom indices
            for intermediate-level (QM2) region
            charge_total: Total system charge
            mult_total: Total system multiplicity
            charge_intermediate: Intermediate layer charge (QM2)
            mult_intermediate: Intermediate layer multiplicity (QM2)
            charge_high: High-level region charge
            mult_high: High-level region multiplicity
            intermediate_level_solvation: Solvation model
            for intermediate-level (CPCM, SMD, etc.)
            active_atoms: Active atom indices (default: whole system)
            optregion_fixed_atoms: Fixed atom indices in optimization
            high_level_h_bond_length: Custom
            bond lengths {(atom1, atom2): length}
            delete_la_double_counting: Remove bend/torsion double counting
            delete_la_bond_double_counting_atoms: Remove bond double counting
            embedding_type: Electronic (default) or mechanical embedding
            conv_charges: Use converged charges
            conv_charges_max_n_cycles: Max cycles for charge convergence
            conv_charges_conv_thresh: Convergence threshold for charges
            scale_formal_charge_mm_atom: MM atomic charge scaling factor
            n_unit_cell_atoms: Atoms per unit
            cell (required for MOL-CRYSTAL-QMMM)
            ecp_layer_ecp: ECP type for boundary region
            ecp_layer: Number of ECP layers around QM region
            scale_formal_charge_ecp_atom: ECP atomic charge scaling factor
        """
        super().__init__(**kwargs)
        self.intermediate_solv_scheme = intermediate_solv_scheme
        self.jobtype = jobtype
        self.parent_jobtype = parent_jobtype
        self.high_level_functional = high_level_functional
        self.high_level_basis = high_level_basis
        self.intermediate_level_functional = intermediate_level_functional
        self.intermediate_level_basis = intermediate_level_basis
        self.intermediate_level_method = intermediate_level_method
        # allow legacy kwarg name from older configs
        self.low_level_method = (
            low_level_method
            if low_level_method is not None
            else kwargs.pop("low_level_force_field", None)
        )
        self.high_level_atoms = high_level_atoms
        self.intermediate_level_atoms = intermediate_level_atoms
        self.charge_total = charge_total
        self.mult_total = mult_total
        self.charge_intermediate = charge_intermediate
        self.mult_intermediate = mult_intermediate
        self.charge_high = charge_high
        self.mult_high = mult_high
        self.intermediate_level_solvation = intermediate_level_solvation
        self.active_atoms = active_atoms
        self.use_active_info_from_pbc = use_active_info_from_pbc
        self.optregion_fixed_atoms = optregion_fixed_atoms
        self.high_level_h_bond_length = high_level_h_bond_length
        self.delete_la_double_counting = delete_la_double_counting
        self.delete_la_bond_double_counting_atoms = (
            delete_la_bond_double_counting_atoms
        )
        self.embedding_type = embedding_type
        # Crystal QM/MM parameters
        self.conv_charges = conv_charges
        self.conv_charges_max_n_cycles = conv_charges_max_n_cycles
        self.conv_charges_conv_thresh = conv_charges_conv_thresh
        self.scale_formal_charge_mm_atom = scale_formal_charge_mm_atom
        self.n_unit_cell_atoms = n_unit_cell_atoms  # For MOL-CRYSTAL-QMMM
        # Ionic crystal QM/MM parameters
        self.ecp_layer_ecp = ecp_layer_ecp
        self.ecp_layer = ecp_layer
        self.scale_formal_charge_ecp_atom = scale_formal_charge_ecp_atom

        # Set parent class attributes from high-level (QM) region
        self.functional = self.high_level_functional
        self.basis = self.high_level_basis

        # Set charge/multiplicity with fallback
        # priority: high -> intermediate -> total
        if self.charge_high is not None and self.mult_high is not None:
            self.charge = self.charge_high
            self.multiplicity = self.mult_high
        elif (
            self.charge_intermediate is not None
            and self.mult_intermediate is not None
        ):
            # the charge/multiplicity of the
            # intermediate system corresponds to the
            # sum of the charge/multiplicity of
            # the high level and low level regions
            self.charge = self.charge_intermediate
            self.multiplicity = self.mult_intermediate
        elif self.charge_total is not None and self.mult_total is not None:
            self.charge = self.charge_total
            self.multiplicity = self.mult_total
        else:
            self.charge = None
            self.multiplicity = None

        # Validate intermediate-level parameters match job type
        self._validate_intermediate_parameters()


[docs]
    def re_init_and_validate(self):
        """Recompute derived fields and rerun validation after overrides."""
        # Update inherited level-of-theory aliases
        self.functional = self.high_level_functional
        self.basis = self.high_level_basis

        if self.charge_high is not None and self.mult_high is not None:
            self.charge = self.charge_high
            self.multiplicity = self.mult_high
        elif (
            self.charge_intermediate is not None
            and self.mult_intermediate is not None
        ):
            self.charge = self.charge_intermediate
            self.multiplicity = self.mult_intermediate
        elif self.charge_total is not None and self.mult_total is not None:
            self.charge = self.charge_total
            self.multiplicity = self.mult_total
        else:
            self.charge = None
            self.multiplicity = None

        self._validate_intermediate_parameters()


    def _validate_intermediate_parameters(self):
        """
        Validate that intermediate-level parameters are
        provided when needed and not provided when not needed.

        Raises:
            ValueError: If QM2 layer is required
            but intermediate parameters are missing,
                       or if intermediate parameters are
                       provided but QM2 layer is not required.
        """
        # Check if intermediate parameters are provided
        has_intermediate_params = (
            self.intermediate_level_functional is not None
            and self.intermediate_level_basis is not None
        ) or self.intermediate_level_method is not None
        if has_intermediate_params:
            if self.intermediate_level_atoms is None and self.low_level_method:
                raise ValueError(
                    "When intermediate-level theory is specified, the atoms "
                    "in intermediate level layer must also be specified via "
                    "intermediate_level_atoms."
                )

        # Job types that require QM2 (intermediate) layer
        qm2_required_jobtypes = ["QM/QM2", "QM/QM2/MM"]

        # Check if this job type requires QM2 layer
        requires_qm2 = self.jobtype and self.jobtype.upper() in [
            jt.upper() for jt in qm2_required_jobtypes
        ]

        if requires_qm2 and not has_intermediate_params:
            raise ValueError(
                f"Job type '{self.jobtype}' requires QM2 (intermediate) layer, but no intermediate-level "
                f"parameters were provided. Please specify at least one of:\n"
                f"  - intermediate_level_functional and intermediate_level_basis\n"
                f"  - intermediate_level_method (e.g., 'XTB', 'HF-3C')\n"
                f"  - intermediate_level_atoms\n"
                f"  - charge_intermediate and mult_intermediate"
            )

        if not requires_qm2 and has_intermediate_params:
            provided_params = []
            if self.intermediate_level_functional is not None:
                provided_params.append("intermediate_level_functional")
            if self.intermediate_level_basis is not None:
                provided_params.append("intermediate_level_basis")
            if self.intermediate_level_method is not None:
                provided_params.append("intermediate_level_method")
            if self.intermediate_level_atoms is not None:
                provided_params.append("intermediate_level_atoms")
            if self.charge_intermediate is not None:
                provided_params.append("charge_intermediate")
            if self.mult_intermediate is not None:
                provided_params.append("mult_intermediate")
            if self.intermediate_level_solvation is not None:
                provided_params.append("intermediate_level_solvation")

            if self.low_level_method is not None:
                raise ValueError(
                    f"Job type '{self.jobtype}' does not require QM2 (intermediate) layer, but "
                    f"intermediate-level parameters were provided: {', '.join(provided_params)}.\n"
                    f"QM2 layer is only used in job types: {', '.join(qm2_required_jobtypes)}.\n"
                    f"Either:\n"
                    f"  - Change job type to 'QM/QM2' or 'QM/QM2/MM', OR\n"
                    f"  - Remove the intermediate-level parameters"
                )

    @property
    def qmmm_route_string(self):
        return self._get_level_of_theory_string()

    @property
    def qmmm_block(self):
        return self._write_qmmm_block()


[docs]
    def validate_and_assign_level(
        self, functional, basis, built_in_method, level_name
    ):
        """
        Validate and assign level of theory
        for high/intermediate/low-level layers.

        Args:
            functional: DFT functional
            basis: Basis set
            built_in_method: Built-in ORCA method (XTB, HF-3C, etc.)
            level_name: Layer name (high_level, intermediate_level, low_level)

        Returns:
            str: Validated level of theory string

        Raises:
            ValueError: If incompatible options are specified
        """
        level_of_theory = ""
        if functional and basis and built_in_method:
            raise ValueError(
                f"For {level_name} level: specify either functional/basis OR built-in method, not both!"
            )
        if built_in_method:
            assert functional is None and basis is None, (
                f"Built-in method specified for {level_name} level - "
                f"functional and basis should not be provided!"
            )
            if (
                level_name == "intermediate_level"
                and built_in_method.upper()
                in self.INTERMEDIATE_LEVEL_BUILT_IN_METHODS
            ):
                level_of_theory = built_in_method
        elif functional and basis:
            if level_name == "intermediate_level":
                level_of_theory = "QM2"
            else:
                level_of_theory = f"{functional} {basis}"
        if level_name == "low_level":
            level_of_theory = "MM"
        logger.debug(f"Level of theory for {level_name}: {level_of_theory}")
        return level_of_theory



[docs]
    def check_crystal_qmmm(self):
        """
        Validate crystal QM/MM job settings.

        Ensures required parameters are set for MOL-CRYSTAL-QMMM
        and IONIC-CRYSTAL-QMMM calculations.

        Raises:
            AssertionError: If required parameters are missing or invalid
        """
        jobtype = self.jobtype.upper()
        if jobtype in ["IONIC-CRYSTAL-QMMM", "MOL-CRYSTAL-QMMM"]:
            assert (
                self.mult_high is None
                and self.mult_intermediate is None
                and self.mult_total is None
            ), f"Multiplicity should not be specified for {jobtype} job!"
            self.multiplicity = 0  # avoid conflicts from parent class
            if self.conv_charges is False:
                assert (
                    self.low_level_method is not None
                ), "Force field file containing convergence charges is not provided!"
            if jobtype == "MOL-CRYSTAL-QMMM":
                assert (
                    self.n_unit_cell_atoms
                ), f"The number of atoms per molecular subunit for {jobtype} job is not provided!"
            else:
                assert (
                    self.ecp_layer_ecp
                ), f"cECPs used for the boundary region for {jobtype} job must be specified! "
                assert (
                    self.n_unit_cell_atoms is None
                ), "The number of atoms per molecular subunit is only applicable to MOL-CRYSTAL-QMMM!"


    def _get_level_of_theory_string(self):
        """
        Generate ORCA route string for QM/MM calculations.

        Returns:
            str: Route string (e.g., '!QM/XTB', '!QM/HF-3C/MM', '!QMMM')
        """
        if (
            self.jobtype.upper() == "IONIC-CRYSTAL-QMMM"
            or self.jobtype.upper() == "MOL-CRYSTAL-QMMM"
        ):
            level_of_theory = f"! {self.jobtype.upper()}"
        else:
            level_of_theory = "!"
            parent_jobtype = self.parent_jobtype.lower()
            if parent_jobtype in ("opt", "modred", "scan"):
                level_of_theory += " Opt"
            elif parent_jobtype == "ts":
                level_of_theory += " OptTS"
            elif parent_jobtype == "irc":
                level_of_theory += " IRC"
            elif parent_jobtype == "sp":
                level_of_theory += ""
            if self.freq is True:
                level_of_theory += " Freq"
            self.high_level_of_theory = self.validate_and_assign_level(
                self.high_level_functional,
                self.high_level_basis,
                None,
                level_name="high_level",
            )
            self.intermediate_level_of_theory = self.validate_and_assign_level(
                self.intermediate_level_functional,
                self.intermediate_level_basis,
                self.intermediate_level_method,
                level_name="intermediate_level",
            )
            self.low_level_of_theory = self.validate_and_assign_level(
                None, None, self.low_level_method, level_name="low_level"
            )
            if self.low_level_of_theory is not None:
                if self.jobtype.upper() == "QMMM":
                    level_of_theory += " QMMM"  # Additive QM/MM
                else:
                    level_of_theory += " QM"
                    # only "!QMMM" will be used for additive QMMM
                    level_of_theory += f"/{self.intermediate_level_of_theory}"
                    print(
                        f"+++++++++++\n{self.intermediate_level_of_theory}\n+++++++++++"
                    )
                    if self.low_level_method is not None:
                        level_of_theory += f"/{self.low_level_of_theory}"
            level_of_theory += f" {self.high_level_of_theory}"
            if self.solvent_model is not None:
                level_of_theory += f" {self.solvent_model}"
            self.intermediate_level_method = (
                self.intermediate_level_method or ""
            ).lower()
            if self.intermediate_level_solvation:
                if self.intermediate_level_solvation in [
                    "alpb(water)",
                    "ddcosmo(water)",
                    "cpcmx(water)",
                ]:
                    assert self.intermediate_level_method.lower() == "xtb", (
                        "The intermediate-level solvation models ALPB, DDCOSMO, CPCMX are only "
                        "compatible with XTB method!"
                    )
                    level_of_theory += f" {self.intermediate_level_solvation}"
                elif (
                    self.intermediate_level_solvation.lower() == "cpcm(water)"
                ):
                    level_of_theory += f" {self.intermediate_level_solvation}"
        return level_of_theory

    def _get_h_bond_length(self):
        """
        Generate custom high-level-H bond length specifications.

        Returns:
            str: Bond length specifications for %qmmm block

        Example output:
            Dist_C_HLA 1.09
            Dist_O_HLA 0.98
            or
            H_Dist_FileName "QM_H_dist.txt"
        """
        if isinstance(self.high_level_h_bond_length, dict):
            h_bond_length = ""
            for (
                atom_pair,
                bond_length,
            ) in self.high_level_h_bond_length.items():
                h_bond_length += (
                    f"Dist_{atom_pair[0]}_{atom_pair[1]} {bond_length}\n"
                )
            return h_bond_length
        elif isinstance(self.high_level_h_bond_length, str):
            # if the user provided a file with the d0_X-H values
            assert os.path.exists(
                self.high_level_h_bond_length
            ), f"File {self.high_level_h_bond_length} does not exist!"
            return f'H_Dist_FileName "{self.high_level_h_bond_length}"'

    def _get_embedding_type(self):
        """
        Generate embedding type specification for QM/MM calculations.

        Returns:
            str: Embedding directive (e.g., "Embedding Electronic")

        Raises:
            ValueError: If invalid embedding type specified
        """
        embedding_type = "Embedding "
        if self.embedding_type.lower() == "electronic":
            embedding_type += "Electronic"
        elif self.embedding_type.lower() == "mechanical":
            embedding_type += "Mechanical"
        else:
            raise ValueError(
                f"Invalid embedding type: {self.embedding_type}. "
                "Valid options are 'Electronic' or 'Mechanical'."
            )
        return embedding_type

    def _get_charge_and_multiplicity(self):
        """
        Generate charge and multiplicity lines for %qmmm block.

        Returns charge/multiplicity for total system
        (QM/MM) or intermediate system (QM/QM2/MM).
        ORCA specifies total/intermediate
        charge/multiplicity in %qmmm block while
        high-level charge/multiplicity are specified in the coordinate section.

        Returns:
            tuple: (charge_str, mult_str) for %qmmm block
        """
        if self.intermediate_level_atoms is not None:
            charge = f"Charge_Intermediate {self.charge_intermediate}"
            mult = f"Mult_Intermediate {self.mult_intermediate}"
        else:
            charge = f"Charge_Total {self.charge_total}"
            mult = f"Mult_Total {self.mult_total}"
        return charge, mult

    def _get_partition_string(self):
        """
        Generate atom partition specifications for
        high-level and intermediate-level regions.

        Returns:
            str: Partition block with QMAtoms and QM2Atoms directives

        Example:
            QMAtoms {1:10 15:20} end
            QM2Atoms {21:30} end
        """

        partition_string = ""
        high_fmt = self._get_formatted_partition_strings(self.high_level_atoms)
        if high_fmt is not None:
            partition_string += f"QMAtoms {{{high_fmt}}} end\n"
        intermediate_fmt = self._get_formatted_partition_strings(
            self.intermediate_level_atoms
        )
        if intermediate_fmt is not None:
            partition_string += f"QM2Atoms {{{intermediate_fmt}}} end\n"
        return partition_string

    def _get_formatted_partition_strings(self, atom_id):
        """
        Convert atom specifications to ORCA-formatted range strings.

        Normalizes various atom specification formats to sorted unique integers
        and compresses contiguous sequences into compact range notation.

        Args:
            atom_id: Atom specification (list/tuple
            of ints, string ranges, etc.)

        Returns:
            str: Formatted string with ranges and individual atoms

        Examples:
            Input: [1,2,3,5,7,8,9] → Output: "1:3 5 7:9"
            Input: "1-15,37,39" → Output: "1:15 37 39"
        """
        if atom_id is None:
            return None

        # If it's already a sequence of integers (list/tuple), coerce to ints
        if isinstance(atom_id, (list, tuple)):
            atoms = [int(x) for x in atom_id]
        elif isinstance(atom_id, str):
            # Prefer robust utility if available
            try:
                atoms = get_list_from_string_range(atom_id)
            except Exception:
                # Fallback simple parser: split on commas/spaces,
                # handle ranges like '1-5' or '1:5'
                tokens = re.split(r"[,\s]+", atom_id.strip())
                atoms = []
                for tok in tokens:
                    if not tok:
                        continue
                    if "-" in tok:
                        a, b = tok.split("-", 1)
                        atoms.extend(range(int(a), int(b) + 1))
                    elif ":" in tok:
                        a, b = tok.split(":", 1)
                        atoms.extend(range(int(a), int(b) + 1))
                    else:
                        atoms.append(int(tok))
        else:
            # Try to iterate and coerce
            try:
                atoms = [int(x) for x in atom_id]
            except Exception:
                raise TypeError(
                    "Unsupported atom specification type for partition string"
                )

        # make unique sorted list of ints
        atoms = sorted(set(int(x) for x in atoms))
        if not atoms:
            return None

        # Convert user-facing 1-indexed atom
        # ids to ORCA's 0-indexed requirement
        if min(atoms) > 0:
            atoms = [a - 1 for a in atoms]

        # compress contiguous sequences into start:end or single integer
        ranges = []
        start = prev = atoms[0]
        for a in atoms[1:]:
            if a == prev + 1:
                prev = a
                continue
            # break in sequence
            if start == prev:
                ranges.append(str(start))
            else:
                ranges.append(f"{start}:{prev}")
            start = prev = a
        # finalize last segment
        if start == prev:
            ranges.append(str(start))
        else:
            ranges.append(f"{start}:{prev}")

        return " ".join(ranges)

    def _write_qmmm_block(self):
        """
        Generate complete %qmmm block for ORCA multiscale calculations.

        Constructs the full %qmmm block containing all necessary parameters
        for QM/MM calculations including atom
        partitions, charges, force fields,
        embedding options, and crystal-specific parameters.

        Returns:
            str: Complete %qmmm block ready for ORCA input file

        Example output:
            %qmmm
            QMAtoms {16:33 68:82} end
            QM2Atoms {0:12 83:104} end
            Charge_Medium 0
            ORCAFFFilename "system.prms"
            end
        """
        full_qm_block = "%qmmm\n"

        # Add atom partition specifications
        partition_block = self._get_partition_string()
        if partition_block:
            full_qm_block += partition_block

        # Add charge/multiplicity for medium or total system
        charge_str, mult_str = self._get_charge_and_multiplicity()
        # Only include lines that do not contain the string 'None'
        if charge_str and "None" not in str(charge_str):
            full_qm_block += f"{charge_str}\n"
        if mult_str and "None" not in str(mult_str):
            full_qm_block += f"{mult_str}\n"

        # Add intermediate-level solvation if specified
        if self.intermediate_solv_scheme is not None:
            full_qm_block += f"solv_scheme {self.intermediate_solv_scheme}\n"

        # Add active atoms specification
        active_fmt = self._get_formatted_partition_strings(self.active_atoms)
        if active_fmt is not None:
            full_qm_block += f"ActiveAtoms {{{active_fmt}}} end\n"

        # Add intermediate-level method/basis specifications
        if (
            self.intermediate_level_method is not None
            and self.intermediate_level_method.strip()
        ):
            # Only write QM2CustomFile if method is NOT in the built-in list
            if (
                self.intermediate_level_method.upper()
                not in self.INTERMEDIATE_LEVEL_BUILT_IN_METHODS
            ):
                # intermediate-level method provided via external file
                full_qm_block += (
                    f'QM2CustomFile "{self.intermediate_level_method}" end\n'
                )
        else:
            # If custom intermediate-level
            # functional/basis provided, include them
            if (
                self.intermediate_level_functional is not None
                and self.intermediate_level_functional.strip()
            ):
                full_qm_block += f'QM2CUSTOMMETHOD "{self.intermediate_level_functional}" end\n'
            if (
                self.intermediate_level_basis is not None
                and self.intermediate_level_basis.strip()
            ):
                full_qm_block += (
                    f'QM2CUSTOMBASIS "{self.intermediate_level_basis}" end\n'
                )

        # Add force field for specific job types
        # assert (
        #         self.low_level_force_field is not None
        # ), f"Force field file missing for {self.jobtype} job!"
        if self.jobtype and self.jobtype.upper() in [
            "QMMM",
            "QM/MM",
            "QM/QM2/MM",
            "IONIC-CRYSTAL-QMMM",
        ]:
            assert (
                self.low_level_method is not None
            ), f"Force field file missing for {self.jobtype} job!"
            full_qm_block += f'ORCAFFFilename "{self.low_level_method}"\n'

        # Fixed atoms options
        if self.use_active_info_from_pbc:
            full_qm_block += "Use_Active_InfoFromPDB true\n"
        elif self.optregion_fixed_atoms:
            fixed_fmt = self._get_formatted_partition_strings(
                self.optregion_fixed_atoms
            )
            if fixed_fmt is not None:
                full_qm_block += f"OptRegion_FixedAtoms {{{fixed_fmt}}} end\n"

        # Add custom H-bond lengths
        if self.high_level_h_bond_length is not None:
            h_block = self._get_h_bond_length()
            if h_block:
                # _get_h_bond_length may return a multi-line string
                full_qm_block += (
                    f"{h_block}\n" if not h_block.endswith("\n") else h_block
                )

        # Add double-counting removal options
        if self.delete_la_double_counting is True:
            full_qm_block += "Delete_LA_Double_Counting true\n"
        if self.delete_la_bond_double_counting_atoms:
            full_qm_block += "DeleteLABondDoubleCounting true\n"
        if self.embedding_type is not None:
            full_qm_block += f"{self._get_embedding_type()}\n"

        # Add crystal QM/MM parameters if any
        crystal_sub = self._write_crystal_qmmm_subblock()
        if crystal_sub is not None:
            full_qm_block += crystal_sub

        full_qm_block += "end\n"
        return full_qm_block

    def _write_crystal_qmmm_subblock(self):
        """
        Generate crystal-specific parameters for %qmmm block.

        Creates parameter block for MOL-CRYSTAL-QMMM and IONIC-CRYSTAL-QMMM
        calculations including charge convergence,
        ECP settings, and scaling factors.

        Returns:
            str: Crystal QM/MM parameter block
        """
        crystal_qmmm_subblock = ""
        if not self.conv_charges:
            crystal_qmmm_subblock += "Conv_Charges False \n"
            crystal_qmmm_subblock += (
                f'ORCAFFFilename "{self.low_level_method}" \n'
            )
        if self.conv_charges_max_n_cycles is not None:
            crystal_qmmm_subblock += (
                f"Conv_Charges_MaxNCycles {self.conv_charges_max_n_cycles} \n"
            )
        if self.conv_charges_conv_thresh is not None:
            crystal_qmmm_subblock += (
                f"Conv_Charges_ConvThresh {self.conv_charges_conv_thresh} \n"
            )
        if self.scale_formal_charge_mm_atom is not None:
            crystal_qmmm_subblock += f"Scale_FormalCharge_MMAtom {self.scale_formal_charge_mm_atom} \n"
        if self.n_unit_cell_atoms is not None:
            crystal_qmmm_subblock += (
                f"NumUnitCellAtoms {self.n_unit_cell_atoms} \n"
            )
        if self.ecp_layer_ecp is not None:
            crystal_qmmm_subblock += f"cECPs {self.ecp_layer_ecp} \n"
        if self.ecp_layer is not None:
            crystal_qmmm_subblock += f"ECPLayers {self.ecp_layer} \n"
        if self.scale_formal_charge_ecp_atom is not None:
            crystal_qmmm_subblock += f"Scale_FormalCharge_ECPAtom {self.scale_formal_charge_ecp_atom} \n"
        return crystal_qmmm_subblock

    def __eq__(self, other):
        """
        Compare two ORCAQMMMJobSettings objects for equality.

        Compares all attributes between two ORCA
        QMMM settings objects, including the
        QMMM-specific attributes that are not present in the parent class.

        Args:
            other (ORCAQMMMJobSettings): Settings object to compare with.

        Returns:
            bool or NotImplemented: True if equal, False if different,
                NotImplemented if types don't match.
        """
        if type(self) is not type(other):
            return NotImplemented

        # Get dictionaries of both objects
        self_dict = self.__dict__.copy()
        other_dict = other.__dict__.copy()

        # Exclude append_additional_info from
        # the comparison (inherited behavior)
        self_dict.pop("append_additional_info", None)
        other_dict.pop("append_additional_info", None)

        return self_dict == other_dict



class ORCANEBJobSettings(ORCAJobSettings):
    """
    Settings for ORCA Nudged Elastic Band (NEB) calculations.

    NEB finds minimum energy pathways and transition states by optimizing a
    series of molecular structures (images) connecting reactant and product
    geometries. Images are connected by spring forces forming an elastic band
    that converges to the minimum energy pathway.

    Supported NEB job types:
    - NEB: Standard NEB calculation
    - NEB-CI: Climbing Image NEB for accurate transition state location
    - NEB-TS: NEB with transition state optimization
    - FAST-NEB-TS: Fast convergence variant
    - TIGHT-NEB-TS: Tight convergence criteria
    - LOOSE-NEB: Loose convergence for initial screening
    - ZOOM-NEB: Zoomed NEB for specific pathway regions
    - NEB-IDPP: Image-dependent pair potential initialization

    Attributes:
        joboption (str): NEB calculation type (NEB, NEB-CI, NEB-TS, etc.)
        nimages (int): Number of intermediate images between endpoints
        starting_xyz (str): Reactant geometry file path (inherited)
        ending_xyzfile (str): Product geometry file path
        intermediate_xyzfile (str): Initial TS guess file path (optional)
        restarting_xyzfile (str): Restart file for continuation (optional)
        preopt_ends (bool): Pre-optimize endpoint geometries
        semiempirical (str): Semiempirical method (XTB0/XTB1/XTB2)
    """

    def __init__(
        self,
        semiempirical=None,
        joboption=None,
        nimages=None,
        ending_xyzfile=None,
        intermediate_xyzfile=None,
        restarting_xyzfile=None,
        preopt_ends=False,
        **kwargs,
    ):
        """
        Initialize ORCA NEB job settings.

        Args:
            joboption (str): NEB calculation type (NEB, NEB-CI, NEB-TS, etc.)
            nimages (int): Number of intermediate images in NEB chain
            ending_xyzfile (str): Product geometry file path
            intermediate_xyzfile (str): Initial TS geometry guess file path
            restarting_xyzfile (str): Restart file path for continuation
            preopt_ends (bool): Pre-optimize endpoint geometries before NEB
            semiempirical (str): Semiempirical method (XTB0, XTB1, XTB2)
            **kwargs: Additional arguments passed to parent ORCAJobSettings

        Note:
            Reactant geometry (starting_xyz) is typically set via parent class
            from the main molecule input file.
        """
        super().__init__(**kwargs)
        self.joboption = joboption
        self.nimages = nimages
        self.ending_xyzfile = ending_xyzfile
        self.intermediate_xyzfile = intermediate_xyzfile
        self.restarting_xyzfile = restarting_xyzfile
        self.preopt_ends = preopt_ends
        self.semiempirical = semiempirical

    def __eq__(self, other):
        """
        Compare two ORCANEBJobSettings objects for equality.

        Compares all attributes between two ORCA NEB settings objects, including the
        NEB-specific attributes that are not present in the parent class.

        Args:
            other (ORCANEBJobSettings): Settings object to compare with.

        Returns:
            bool or NotImplemented: True if equal, False if different,
                NotImplemented if types don't match.
        """
        if type(self) is not type(other):
            return NotImplemented

        # Get dictionaries of both objects
        self_dict = self.__dict__.copy()
        other_dict = other.__dict__.copy()

        # Exclude append_additional_info from the comparison (inherited behavior)
        self_dict.pop("append_additional_info", None)
        other_dict.pop("append_additional_info", None)

        return self_dict == other_dict

    # populate attribute from parent class (optional)
    @property
    def route_string(self):
        """
        Generate ORCA route line for NEB calculation.

        Returns:
            str: Route string with method and NEB job type
        """
        return self._get_neb_route_string()

    def _get_neb_route_string(self):
        """
        Generate ORCA route line for NEB calculation.

        Returns:
            str: Route string combining method and NEB job type

        Examples:
            "! XTB2 NEB-TS" (with semiempirical)
            "! B3LYP def2-SVP NEB-CI" (with DFT)
        """
        route_string = ""
        if not route_string.startswith("!"):
            route_string += "! "

        # add frequency calculation
        # not okay if both freq and numfreq are True
        if self.freq and self.numfreq:
            raise ValueError("Cannot specify both freq and numfreq!")

        if self.freq:
            route_string += " Freq"
        elif self.numfreq:
            route_string += " NumFreq"  # requires numerical frequency,
            # e.g., in SMD model where analytic Hessian is not available

        # write level of theory
        if self.semiempirical:
            route_string += f" {self.semiempirical} {self.joboption}"
        else:
            route_string += f" {self._get_level_of_theory()} {self.joboption}"

        # write grid information
        if self.defgrid is not None:
            route_string += (
                f" {self.defgrid}"  # default is 'defgrid2', if not specified
            )

        # write convergence criteria in simple input/route
        if self.scf_tol is not None:
            if not self.scf_tol.lower().endswith("scf"):
                self.scf_tol += "SCF"
            route_string += f" {self.scf_tol}"

        # write convergence algorithm if not default
        if self.scf_algorithm is not None:
            route_string += f" {self.scf_algorithm}"

        # write solvent if solvation is turned on
        route_kw = self._get_solvent_route_keyword()
        if self.custom_solvent is not None:
            # Custom solvent parameters will be written in the appropriate
            # solvent block.  The route keyword depends on the model.
            if self.solvent_id is not None:
                route_string += f" {route_kw}({self.solvent_id})"
            else:
                route_string += f" {route_kw}"
        elif self.solvent_model is not None and self.solvent_id is not None:
            route_string += f" {route_kw}({self.solvent_id})"
        elif self.solvent_model is not None and self.solvent_id is None:
            route_string += f" {route_kw}"
        elif self.solvent_model is None and self.solvent_id is not None:
            logger.warning(
                "Warning: Solvent identity is specified but solvent model "
                "is missing!\nDefaulting to CPCM model."
            )
            route_string += f" CPCM({self.solvent_id})"
        else:
            pass

        # Deduplication: if solvent model appears twice,
        # the first time it appears is removed
        if (
            self.solvent_model is not None
            and len(
                re.findall(
                    rf"\b{re.escape(self.solvent_model)}\b",
                    route_string,
                    re.IGNORECASE,
                )
            )
            > 1
        ):
            route_string = deduplicate_string_keywords(
                route_string, self.solvent_model
            )

        return route_string