Source code for chemsmart.jobs.gaussian.settings

"""
Gaussian job configuration and settings management.

This module provides the GaussianJobSettings class for configuring
Gaussian computational chemistry calculations. It handles all aspects
of job setup including calculation methods, basis sets, solvent models,
optimization parameters, and advanced options like GenECP definitions.

The settings class supports merging configurations, validation of
parameter combinations, and conversion to/from various input formats.
"""

import copy
import logging
import os
import re

from chemsmart.io.gaussian import GAUSSIAN_SOLVATION_MODELS
from chemsmart.io.gaussian.gengenecp import GenGenECPSection
from chemsmart.jobs.settings import MolecularJobSettings
from chemsmart.utils.periodictable import PeriodicTable
from chemsmart.utils.repattern import (
    gaussian_freq_keywords_pattern,
    gaussian_opt_keywords_pattern,
    multiple_spaces_pattern,
)

pt = PeriodicTable()


logger = logging.getLogger(__name__)


class GaussianJobSettings(MolecularJobSettings):
    """
    Configuration settings for Gaussian computational chemistry jobs.

    Manages all parameters needed to configure Gaussian calculations
    including quantum chemistry methods, basis sets, solvation models,
    optimization options, and advanced features like pseudopotentials.
    Provides validation and merging capabilities for job configurations.

    Inherits common calculation fields from `MolecularJobSettings`, such as
    `ab_initio`, `functional`, `basis`, `semiempirical`, `charge`,
    `multiplicity`, `jobtype`, `title`, `freq`, `numfreq`, `solvent_model`,
    `solvent_id`, `additional_solvent_options`,
    `additional_opt_options_in_route`,
    `append_additional_info`, `gen_genecp_file`, `heavy_elements`,
    `heavy_elements_basis`, `light_elements_basis`, `custom_solvent`, `forces`,
    and `input_string`.

    Attributes:
        chk (bool): Whether to use checkpoint files.
        dieze_tag (str): Calculation level specification.
        additional_solvent_options (str): Extra solvent parameters.
        additional_opt_options_in_route (str): Extra optimization options.
        append_additional_info (str): Additional input file content.
        gen_genecp_file (str): Path to GenECP definition file.
    """

    def __init__(
        self,
        ab_initio=None,
        functional=None,
        basis=None,
        semiempirical=None,
        charge=None,
        multiplicity=None,
        chk=True,
        jobtype=None,
        title=None,
        freq=False,
        numfreq=False,
        dieze_tag=None,
        solvent_model=None,
        solvent_id=None,
        additional_solvent_options=None,
        additional_opt_options_in_route=None,
        additional_route_parameters=None,
        route_to_be_written=None,
        modred=None,
        gen_genecp_file=None,
        heavy_elements=None,
        heavy_elements_basis=None,
        light_elements_basis=None,
        custom_solvent=None,
        append_additional_info=None,
        forces=False,
        input_string=None,
        **kwargs,
    ):
        """
        Initialize Gaussian job settings with calculation parameters.

        Sets up all configuration options for Gaussian calculations
        including validation of parameter combinations and path expansion.

        Args:
            ab_initio (str, optional): Ab initio method (e.g., 'HF', 'MP2').
            functional (str, optional): DFT functional (e.g., 'B3LYP').
            basis (str, optional): Basis set (e.g., '6-31G*').
            semiempirical (str, optional): Semi-empirical method.
            charge (int, optional): Molecular charge.
            multiplicity (int, optional): Spin multiplicity.
            chk (bool): Use checkpoint files (default True).
            jobtype (str, optional): Calculation type (e.g., 'opt', 'freq').
            title (str, optional): Job title.
            freq (bool): Include frequency calculations.
            numfreq (bool): Use numerical frequencies.
            dieze_tag (str, optional): Calculation level tag.
            solvent_model (str, optional): Solvation model.
            solvent_id (str, optional): Solvent identifier.
            additional_solvent_options (str, optional): Extra solvent options.
            additional_opt_options_in_route (str, optional): Extra opt options.
            additional_route_parameters (str, optional): Extra route params.
            route_to_be_written (str, optional): Custom route string.
            modred (dict, optional): Modified redundant coordinates.
            gen_genecp_file (str, optional): GenECP definition file path.
            heavy_elements (list, optional): Heavy element symbols.
            heavy_elements_basis (str, optional): Heavy element basis set.
            light_elements_basis (str, optional): Light element basis set.
            custom_solvent (dict, optional): Custom solvent parameters.
            append_additional_info (str, optional): Additional input content.
            forces (bool): Calculate forces.
            input_string (str, optional): Custom input string.
            **kwargs: Additional keyword arguments.

        Raises:
            ValueError: If incompatible options are specified (freq + forces).
        """
        super().__init__(
            ab_initio=ab_initio,
            functional=functional,
            basis=basis,
            semiempirical=semiempirical,
            charge=charge,
            multiplicity=multiplicity,
            freq=freq,
            numfreq=numfreq,
            jobtype=jobtype,
            title=title,
            solvent_model=solvent_model,
            solvent_id=solvent_id,
            additional_route_parameters=additional_route_parameters,
            route_to_be_written=route_to_be_written,
            modred=modred,
            gen_genecp_file=gen_genecp_file,
            heavy_elements=heavy_elements,
            heavy_elements_basis=heavy_elements_basis,
            light_elements_basis=light_elements_basis,
            custom_solvent=custom_solvent,
            forces=forces,
            input_string=input_string,
            **kwargs,
        )
        self.chk = chk
        self.dieze_tag = dieze_tag
        self.additional_solvent_options = additional_solvent_options
        self.additional_opt_options_in_route = additional_opt_options_in_route
        self.append_additional_info = append_additional_info
        self._route_string = None

        if gen_genecp_file is not None and "~" in gen_genecp_file:
            gen_genecp_file = os.path.expanduser(gen_genecp_file)
            logger.debug(f"Expanded GenECP file path: {gen_genecp_file}")
        self.gen_genecp_file = gen_genecp_file

        # Validate that frequency and force calculations are not both enabled
        if forces is True and freq is True:
            logger.error(
                "Attempted to enable both frequency and force calculations"
            )
            raise ValueError(
                "Frequency and Force calculations cannot be performed by "
                "Gaussian at the same time!\n"
                'Such an input file will give "Illegal IType or MSType '
                'generated by parse." error.'
            )

    @property
    def genecp(self):
        """
        Check if GenECP pseudopotentials are configured.

        Determines whether the job uses GenECP pseudopotential
        definitions either from a file or heavy element specifications.

        Returns:
            bool: True if GenECP is configured, False otherwise.
        """
        return (
            self.gen_genecp_file is not None or self.heavy_elements is not None
        )

    def merge(
        self,
        other,
        keywords=("charge", "multiplicity", "title"),
        merge_all=False,
    ):
        """
        Merge current settings with another settings object.

        Combines settings from two sources, with the other settings
        taking precedence. Supports selective merging of specific
        keywords or complete merging of all parameters.

        Args:
            keywords (list): Specific list of keywords to merge.
                Defaults to charge and multiplicity.
                If None, all settings will be merged
                (Caution: may cause issue if e.g.,
                genecp log file used to prepare input without genecp).
            other (JobSettings, dict): Settings to
            merge. Can also take the form of a dictionary
            merge_all (bool): If True, merge all settings.
            If False, only merge the settings specified in keywords.
        """
        other_dict = other if isinstance(other, dict) else other.__dict__

        if merge_all:
            # Update self with other for all
            logger.debug(
                f"Merging all settings from {type(other).__name__} "
                f"into {type(self).__name__}"
            )
            merged_dict = self.__dict__.copy()
            merged_dict.update(other_dict)
            return type(self)(**merged_dict)

        if keywords is not None:
            logger.debug(f"Merging specific keywords: {keywords}")
            other_dict = {
                k: other_dict[k] for k in keywords if k in other_dict
            }
        # Update self with other
        merged_dict = self.__dict__.copy()
        merged_dict.update(other_dict)
        return type(self)(**merged_dict)

    def copy(self):
        """
        Create a deep copy of the settings object.

        Returns a completely independent copy of the settings
        that can be modified without affecting the original.

        Returns:
            GaussianJobSettings: Deep copy of this settings object.
        """
        return copy.deepcopy(self)

    def __getitem__(self, key):
        """
        Get setting value by key using dictionary-style access.

        Allows settings to be accessed like a dictionary for
        compatibility with existing code patterns.

        Args:
            key (str): Setting name to retrieve.

        Returns:
            Any: Value of the requested setting.
        """
        return self.__dict__[key]

    def __eq__(self, other):
        """
        Compare two settings objects for equality.

        Checks if all attributes are equal between two settings objects,
        excluding the append_additional_info field from comparison.

        Args:
            other (GaussianJobSettings): Settings object to compare with.

        Returns:
            bool or NotImplemented: True if equal, False if different,
                NotImplemented if types don't match.
        """
        if type(self) is not type(other):
            return NotImplemented

        # Exclude append_additional_info from the comparison
        self_dict = self.__dict__
        self_dict.pop("append_additional_info")

        other_dict = other.__dict__
        other_dict.pop("append_additional_info")

        is_equal = self_dict == other_dict
        if not is_equal:
            import dictdiffer

            logger.info("Gaussian job settings are not equal.")
            for diff in list(dictdiffer.diff(self_dict, other_dict)):
                logger.info(f"Difference: {diff}")
        return self_dict == other_dict

    @classmethod
    def from_comfile(cls, filename):
        """
        Create Gaussian settings from a Gaussian .com input file.

        Parses a Gaussian input file to extract calculation settings
        and returns a configured GaussianJobSettings object.

        Args:
            filename (str): Path to the .com file to be parsed.

        Returns:
            GaussianJobSettings: Settings object extracted from input file.
        """
        from chemsmart.io.gaussian.input import Gaussian16Input

        com_path = os.path.abspath(filename)
        gaussian_settings_from_comfile = Gaussian16Input(
            filename=com_path
        ).read_settings()
        return gaussian_settings_from_comfile

    @classmethod
    def from_inpfile(cls, filename):
        """
        Create Gaussian settings from an ORCA .inp input file.

        Converts ORCA input file settings to equivalent Gaussian
        settings by parsing the file and merging with default values.

        Args:
            filename (str): Path to the .inp file to be parsed.

        Returns:
            GaussianJobSettings: Gaussian-compatible settings object.
        """
        from chemsmart.io.orca.input import ORCAInput

        inp_path = os.path.abspath(filename)
        logger.info(f"Return Settings object from inp file: {inp_path}")
        orca_settings_from_inpfile = ORCAInput(
            filename=inp_path
        ).read_settings()
        gaussian_default_settings = cls.default()
        gaussian_settings_from_inpfile = gaussian_default_settings.merge(
            orca_settings_from_inpfile, merge_all=True
        )
        logger.info(
            f"with settings: {gaussian_settings_from_inpfile.__dict__}"
        )
        return gaussian_settings_from_inpfile

    @classmethod
    def from_logfile(cls, filename):
        """
        Create Gaussian settings from a Gaussian .log output file.

        Extracts calculation settings from a completed Gaussian
        log file, including support for periodic boundary conditions.

        Args:
            filename (str): Path to the .log file to be parsed.

        Returns:
            GaussianJobSettings: Settings object extracted from log file.
        """
        log_path = os.path.abspath(filename)
        from chemsmart.io.gaussian.output import (
            Gaussian16Output,
            Gaussian16OutputWithPBC,
        )

        logger.info(f"Return Settings object from logfile: {log_path}")
        try:
            settings = Gaussian16Output(filename=log_path).read_settings()
        except ValueError:
            settings = Gaussian16OutputWithPBC(
                filename=log_path
            ).read_settings()

        return settings

    @classmethod
    def from_outfile(cls, filename):
        """
        Create Gaussian settings from an ORCA .out output file.

        Converts ORCA output file settings to equivalent Gaussian
        settings by parsing the file and merging with defaults.

        Args:
            filename (str): Path to the ORCA .out file to be parsed.

        Returns:
            GaussianJobSettings: Gaussian-compatible settings object.
        """
        from chemsmart.io.orca.output import ORCAOutput

        out_path = os.path.abspath(filename)
        logger.info(
            f"Return Settings object from ORCA .out filename: {out_path}"
        )
        orca_settings_from_outfile = ORCAOutput(
            filename=out_path
        ).read_settings()
        gaussian_default_settings = cls.default()
        gaussian_settings_from_outfile = gaussian_default_settings.merge(
            orca_settings_from_outfile, merge_all=True
        )
        logger.info(
            f"with settings: {gaussian_settings_from_outfile.__dict__}"
        )
        return gaussian_settings_from_outfile

    @classmethod
    def default(cls):
        """
        Create default Gaussian job settings.

        Returns a GaussianJobSettings object with sensible default
        values for common calculations including frequency analysis.

        Returns:
            GaussianJobSettings: Default settings configuration.
        """
        return cls(
            ab_initio=None,
            functional=None,
            basis=None,
            semiempirical=None,
            charge=None,
            multiplicity=None,
            chk=True,
            jobtype=None,
            title="Gaussian job with default settings",
            freq=True,
            numfreq=False,
            dieze_tag=None,
            solvent_model=None,
            solvent_id=None,
            additional_solvent_options=None,
            additional_opt_options_in_route=None,
            additional_route_parameters=None,
            route_to_be_written=None,
            modred=None,
            gen_genecp_file=None,
            heavy_elements=None,
            heavy_elements_basis=None,
            light_elements_basis=None,
            custom_solvent=None,
            append_additional_info=None,
            forces=False,
            input_string=None,
        )

    @classmethod
    def from_filepath(cls, filepath, **kwargs):
        """
        Create settings from a file based on its extension.

        Automatically determines the file type and uses the appropriate
        parser to create settings from the input file.

        Args:
            filepath (str): Path to input file (.com, .gjf, .inp, .log).
            **kwargs: Additional keyword arguments for file parsing.

        Returns:
            GaussianJobSettings: Settings object from the input file.

        Raises:
            ValueError: If file extension is not supported.
        """
        if filepath.endswith((".com", ".gjf")):
            logger.debug(
                f"Loading settings from Gaussian input file: {filepath}"
            )
            return cls.from_comfile(filepath)
        if filepath.endswith(".inp"):
            logger.debug(f"Loading settings from ORCA input file: {filepath}")
            return cls.from_inpfile(filepath)
        if filepath.endswith(".log"):
            logger.debug(
                f"Loading settings from Gaussian log file: {filepath}"
            )
            return cls.from_logfile(filepath)
        raise ValueError(f"Could not create {cls} from {filepath}")

    @property
    def route_string(self):
        """
        Generate the Gaussian route string for the calculation.

        Constructs the complete route line (#-line) for the Gaussian
        input file based on the configured job settings.

        Returns:
            str: Complete route string for Gaussian input file.
        """
        if self.route_to_be_written is not None:
            route_string = self._get_route_string_from_user_input()
        else:
            route_string = self._get_route_string_from_jobtype()
        logger.debug(f"Route for settings {self}: {route_string}")
        return route_string

    @route_string.setter
    def route_string(self, value):
        self._route_string = value

    def _get_route_string_from_user_input(self):
        """
        Generate route string from user-provided route specification.

        Processes user-defined route string and adds appropriate
        prefix (#-tag) if not already present.

        Returns:
            str: Formatted route string with proper prefix.
        """
        route_string = self.route_to_be_written
        if not route_string.startswith("#"):
            route_string = (
                f"#{self.dieze_tag} {route_string}"
                if self.dieze_tag is not None
                else f"# {route_string}"
            )
        return route_string

    def get_light_elements(self, molecule):
        """
        Identify light elements in molecule for GenECP calculations.

        Determines which elements in the molecular structure are
        considered light elements (not in heavy_elements list) for
        mixed basis set calculations with pseudopotentials.

        Args:
            molecule: Molecule object containing atomic information.

        Returns:
            list or None: Sorted list of light element symbols,
                or None if no heavy elements are specified.
        """
        if self.heavy_elements is None:
            return None

        unique_atoms = set(molecule.chemical_symbols)
        light_elements_set = unique_atoms - set(self.heavy_elements)
        light_elements_list = list(light_elements_set)

        sorted_light_elements_list = pt.sorted_periodic_table_list(
            light_elements_list
        )
        logger.info(
            f"Light elements in structure: {sorted_light_elements_list}"
        )
        return sorted_light_elements_list

    def _get_route_string_from_jobtype(self):
        """
        Generate route string based on job type and settings.

        Constructs the Gaussian route string by analyzing the job type
        and incorporating all relevant calculation parameters including
        optimization options, frequency calculations, and solvation.

        Returns:
            str: Complete route string for the specified job type.

        Raises:
            ValueError: If required parameters are missing or incompatible
                options are specified.
        """
        route_string = ""

        # Get dieze tag with job route and freq string
        dieze_tag = self._get_dieze_tag()
        route_string += dieze_tag

        level_of_theory_string = self._get_level_of_theory_string()
        route_string += level_of_theory_string

        return route_string

    def _get_dieze_tag(self):
        """Get dieze tag from job type."""
        route_string = ""
        # Add #-tag prefix for calculation level specification
        if self.dieze_tag is not None:
            route_string += (
                f"#{self.dieze_tag}"  # e.g. dieze_tag='p' to get '#p'
            )
            logger.debug(f"Added dieze tag: {self.dieze_tag}")
        else:
            route_string += "#"

        # Write optimization keywords with additional options
        # e.g., maxstep, calcall etc
        if self.additional_opt_options_in_route is not None:
            logger.debug(
                f"Adding additional opt options: "
                f"{self.additional_opt_options_in_route}"
            )
            if self.jobtype == "opt":
                route_string += (
                    f" opt=({self.additional_opt_options_in_route})"
                )
            elif self.jobtype == "ts":
                if "calcall" not in self.additional_opt_options_in_route:
                    route_string += f" opt=(ts,calcfc,noeigentest,{self.additional_opt_options_in_route})"
                else:
                    route_string += f" opt=(ts,noeigentest,{self.additional_opt_options_in_route})"
            elif self.jobtype == "modred":
                route_string += f" opt=(modredundant,{self.additional_opt_options_in_route})"
                self.freq = True
            elif self.jobtype == "scan":
                route_string += f" opt=(modredundant,{self.additional_opt_options_in_route})"
                self.freq = False
            elif self.jobtype == "sp":
                route_string += ""
                self.freq = False  # turn off freq calculation for sp job
        elif self.additional_opt_options_in_route is None:
            if self.jobtype == "opt":
                route_string += " opt"
            elif self.jobtype == "ts":
                route_string += " opt=(ts,calcfc,noeigentest)"
            elif self.jobtype == "modred":
                route_string += " opt=modredundant"
                self.freq = True
            elif self.jobtype == "scan":
                route_string += " opt=modredundant"
                self.freq = False
            elif self.jobtype == "sp":
                route_string += ""
                self.freq = False  # turn off freq calculation for sp job

        # Write frequency calculation keywords
        if self.freq and not self.numfreq:
            route_string += " freq"
            logger.debug("Added frequency calculation")
        elif not self.freq and self.numfreq:
            route_string += " freq=numer"
            logger.debug("Added numerical frequency calculation")
        elif self.freq and self.numfreq:
            raise ValueError(
                "Both freq and numfreq cannot be True at the same time!"
            )
        return route_string

    def _get_level_of_theory_string(self):
        """Get level of theory string for route."""
        route_string = ""

        # Determine computational method and add to route string
        if self.semiempirical is not None:
            # Semiempirical methods do not require a basis set
            if self.basis is not None:
                logger.warning(
                    "Basis set provided but not required for semiempirical "
                    "methods."
                )
            route_string += f" {self.semiempirical}"
            logger.debug(f"Added semiempirical method: {self.semiempirical}")

        elif self.ab_initio is not None and self.functional is None:
            # Ab initio method requires a basis set
            if self.basis is None:
                logger.error("Basis set required for ab initio methods")
                raise ValueError(
                    "Error: Basis set is required for ab initio methods."
                )
            route_string += f" {self.ab_initio} {self.basis}"
            logger.debug(
                f"Added ab initio method: {self.ab_initio} with basis: "
                f"{self.basis}"
            )

        elif self.functional is not None and self.ab_initio is None:
            # DFT method requires a basis set
            if self.basis is None:
                logger.error("Basis set required for DFT methods")
                raise ValueError(
                    "Error: Basis set is required for DFT methods."
                )
            route_string += f" {self.functional} {self.basis}"
            logger.debug(
                f"Added DFT functional: {self.functional} with basis: "
                f"{self.basis}"
            )

        elif self.ab_initio is not None and self.functional is not None:
            logger.error(
                "Both ab initio and DFT functional provided - invalid"
            )
            raise ValueError(
                "Error: Both ab initio and DFT functional provided.\n"
                "Specify only one."
            )

        else:
            raise ValueError("Error: No computational method provided.")

        # Write forces calculation keyword
        if self.forces:
            route_string += " force"
            logger.debug("Added force calculation")

        # Handle solvation model configuration
        if self.custom_solvent is not None:
            logger.debug("Using custom solvent parameters")
            if self.solvent_model is None and self.solvent_id is None:
                route_string += " scrf=(pcm,read"  # using pcm model as default
                logger.debug("Using default PCM model for custom solvent")
            else:
                # Set default values if any of solvent_model or solvent_id
                # are None
                solvent_model = self.solvent_model or "pcm"
                solvent_id = self.solvent_id or "generic,read"
                route_string += f" scrf=({solvent_model},solvent={solvent_id}"
                logger.debug(
                    f"Using custom solvent with model: {solvent_model}, "
                    f"ID: {solvent_id}"
                )
        elif (
            self.solvent_model is not None and self.solvent_id is not None
        ):  # solvation is turned on
            route_string += (
                f" scrf=({self.solvent_model},solvent={self.solvent_id}"
            )
            logger.debug(
                f"Added solvation: {self.solvent_model} with solvent "
                f"{self.solvent_id}"
            )
        elif (self.solvent_model is not None and self.solvent_id is None) or (
            self.solvent_model is None and self.solvent_id is not None
        ):  # if one is provided but the other not
            logger.error(
                f"Incomplete solvation specification - model: "
                f"{self.solvent_model}, ID: {self.solvent_id}"
            )
            raise ValueError(
                f"Both solvent model and solvent ID need to be specified.\n"
                f"Currently, solvent model is {self.solvent_model} and "
                f"solvent id is {self.solvent_id}!"
            )

        # Add additional solvation options if present
        if "scrf" in route_string:
            if self.additional_solvent_options is not None:
                route_string += f",{self.additional_solvent_options})"
                logger.debug(
                    f"Added additional solvent options: "
                    f"{self.additional_solvent_options}"
                )
            else:
                route_string += ")"

        # Write additional parameters for route
        if self.additional_route_parameters is not None:
            route_string += f" {self.additional_route_parameters}"
            logger.debug(
                f"Added additional route parameters: "
                f"{self.additional_route_parameters}"
            )

        # Write job type specific route keywords
        if self.jobtype == "nci":
            route_string += " output=wfn"  # output wavefunction file for NCI
            logger.debug("Added NCI-specific output=wfn keyword")
        elif self.jobtype == "wbi":
            route_string += " pop=nboread"  # write bond order matrix
            logger.debug("Added WBI-specific pop=nboread keyword")
        return route_string

    @property
    def _genecp_elements_specified(self):
        """
        Check if GenECP elements are properly specified.

        Verifies that both heavy elements and their basis set are
        defined for GenECP pseudopotential calculations.

        Returns:
            bool: True if GenECP elements are properly specified.
        """
        return (
            self.heavy_elements is not None
            and self.heavy_elements_basis is not None
        )

    @property
    def _genecp_file_specified(self):
        """
        Check if GenECP file is properly specified and exists.

        Verifies that a GenECP file path is provided and the file
        exists on the filesystem for pseudopotential definitions.

        Returns:
            bool: True if GenECP file is specified and exists.
        """
        return self.gen_genecp_file is not None and os.path.exists(
            self.gen_genecp_file
        )

    def get_genecp_section(self, molecule):
        """
        Generate GenECP section for mixed basis set calculations.

        Creates the GenECP pseudopotential section either from
        explicitly specified elements and basis sets or from a
        pre-defined GenECP file.

        Args:
            molecule: Molecule object for element analysis.

        Returns:
            GenGenECPSection: GenECP section for Gaussian input.

        Raises:
            ValueError: If neither GenECP elements nor file are specified.
        """
        if self._genecp_elements_specified:
            logger.info(
                f"GENECP elements specified:\n"
                f"Heavy elements: {self.heavy_elements}\n"
                f"Heavy elements basis: {self.heavy_elements_basis}\n"
                f"Light elements basis: {self.light_elements_basis}\n"
            )
            # Method 1 for getting genecp
            # Need to supply self.heavy_elements, self.heavy_elements_basis
            # and self.light_elements_basis
            heavy_elements_in_structure = self.prune_heavy_elements(molecule)

            genecp_section = GenGenECPSection.from_bse_api(
                light_elements=self.get_light_elements(molecule),
                light_elements_basis=self.light_elements_basis,
                heavy_elements=heavy_elements_in_structure,
                heavy_elements_basis=self.heavy_elements_basis,
            )

        elif self._genecp_file_specified:
            logger.info(f"GENECP file specified: {self.gen_genecp_file}")
            # Method 2 for getting genecp:
            # Supplied path to genecp file
            genecp_section = GenGenECPSection.from_genecp_path(
                genecp_path=self.gen_genecp_file
            )
        else:
            raise ValueError("Could not get GenECPSection")
        return genecp_section

    def prune_heavy_elements(self, molecule):
        """
        Filter heavy elements list to those present in the molecule.

        Removes heavy elements from the settings list that are not
        actually present in the current molecular structure, ensuring
        only relevant pseudopotentials are included.

        Args:
            molecule: Molecule object containing atomic information.

        Returns:
            list or None: Heavy elements present in the molecule,
                or None if no heavy elements are specified.
        """
        # heavy atoms list supplied from settings contains all heavy atoms
        # needed for heavy_atom_basis but in each structure, some heavy atoms
        # supplied from settings may not appear in the structure
        if self.heavy_elements is None:
            return None
        return list(
            set(molecule.chemical_symbols).intersection(self.heavy_elements)
        )

    def determine_basis_keyword(self, molecule):
        """
        Determine the appropriate basis keyword
        (gen or genecp) for the molecule.

        Based on the heavy elements present in the molecule, determines whether
        to use 'gen' (for elements that don't require ECPs) or 'genecp' (for
        elements that require ECPs). Elements with atomic number > 36 require
        ECPs and thus need 'genecp', while elements <= 36 only need 'gen'.

        If no heavy elements are present in the molecule, returns the light
        elements basis keyword (with hyphens removed).

        Args:
            molecule: Molecule object containing atomic information.

        Returns:
            str: 'gen' if all heavy elements have atomic number <= 36,
                 'genecp' if any heavy element has atomic number > 36,
                 light_elements_basis (formatted) if no heavy elements present,
                 or the original basis keyword if not using gen/genecp.
        """
        # Only applies when basis is gen or genecp
        if self.basis not in ["gen", "genecp"]:
            return self.basis

        # Get heavy elements actually present in the molecule
        heavy_elements_in_structure = self.prune_heavy_elements(molecule)

        # If no heavy elements specified or present, use light elements basis
        if (
            heavy_elements_in_structure is None
            or len(heavy_elements_in_structure) == 0
        ):
            # Return light elements basis if available,
            # otherwise return original basis
            if self.light_elements_basis is not None:
                # Remove hyphens for Gaussian compatibility
                # (def2-SVP -> def2svp)
                return self.light_elements_basis.replace("-", "").lower()
            return self.basis

        # Check if any heavy element requires ECP (atomic number > 36)
        for element in heavy_elements_in_structure:
            if pt.requires_ecp(element):
                logger.debug(
                    f"Element {element} requires ECP (Z > 36), using 'genecp'"
                )
                return "genecp"

        # All heavy elements have atomic number <= 36, use 'gen'
        logger.debug(
            f"All heavy elements {heavy_elements_in_structure} have Z <= 36, "
            f"using 'gen'"
        )
        return "gen"

    def _check_solvent(self, solvent_model):
        """
        Validate that the specified solvent model is supported.

        Checks if the provided solvent model is in the list of
        supported Gaussian solvation models.

        Args:
            solvent_model (str): Solvent model to validate.

        Raises:
            ValueError: If solvent model is not supported.
        """
        if solvent_model.lower() not in GAUSSIAN_SOLVATION_MODELS:
            raise ValueError(
                f"The specified solvent model {solvent_model} is not in \n"
                f"the available solvent models: {GAUSSIAN_SOLVATION_MODELS}"
            )


class GaussianIRCJobSettings(GaussianJobSettings):
    """
    Specialized settings for Gaussian IRC (Intrinsic Reaction Coordinate) jobs.

    Extends GaussianJobSettings with IRC-specific parameters for tracking
    reaction pathways from transition states to reactants and products.
    Includes options for predictors, recorrection, and flat IRC calculations.

    Attributes:
        predictor (str): IRC predictor method (e.g., 'LQA', 'Euler').
        recorrect (str): Recorrection strategy (e.g., 'never', 'always').
        direction (str): IRC direction ('forward' or 'reverse').
        recalc_step (int): Interval between energy recalculations.
        maxpoints (int): Maximum number of IRC points to follow.
        maxcycles (int): Max optimization cycles per IRC point.
        stepsize (int): IRC integration step size.
    """

    def __init__(
        self,
        predictor=None,
        recorrect=None,
        recalc_step=6,
        direction=None,
        maxpoints=512,
        maxcycles=128,
        stepsize=20,
        flat_irc=False,
        **kwargs,
    ):
        """
        Initialize IRC-specific Gaussian job settings.

        Configures parameters for IRC calculations including predictor
        methods, recorrection strategies, and integration limits.

        Args:
            predictor (str, optional): Predictor method for IRC integration.
            recorrect (str, optional): Recorrection strategy.
            recalc_step (int): Steps between energy recalculations.
            direction (str, optional): IRC direction
            ('forward'/'reverse'). If None, run both directions.
            maxpoints (int): Maximum number of IRC points.
            maxcycles (int): Maximum optimization cycles per point.
            stepsize (int): IRC integration step size.
            flat_irc (bool): Enable flat IRC calculations.
            **kwargs: Additional arguments for parent class.
        """
        super().__init__(**kwargs)
        self.predictor = predictor
        self.recorrect = recorrect
        self.recalc_step = recalc_step
        self.direction = direction
        self.maxpoints = maxpoints
        self.maxcycles = maxcycles
        self.stepsize = stepsize
        self.flat_irc = flat_irc
        self.freq = False  # turn off freq calc for IRC jobs
        self.forces = False  # turn off forces calculations
        self.route_to_be_written = None

    def _get_route_string_from_jobtype(self):
        """
        Generate IRC-specific route string for Gaussian calculations.

        Constructs the route string with IRC-specific keywords including
        predictor methods, recorrection settings, and integration
        parameters. Handles both regular and flat IRC calculations.

        Returns:
            str: Complete IRC route string.

        Raises:
            ValueError: If predictor and recorrect are inconsistently
                specified.
        """
        route_string = super()._get_route_string_from_jobtype()

        # Configure flat IRC parameters with appropriate defaults
        if self.flat_irc:
            logger.debug("Configuring flat IRC calculation parameters")
            # default route for flat IRC run if no options are specified
            self.predictor = (
                "LQA" if self.predictor is None else self.predictor
            )
            self.recorrect = (
                "never" if self.recorrect is None else self.recorrect
            )
            self.recalc_step = (
                -5 if self.recalc_step == 6 else self.recalc_step
            )
            logger.debug(
                f"Flat IRC settings - predictor: {self.predictor}, "
                f"recorrect: {self.recorrect}, recalc_step: {self.recalc_step}"
            )

        # Write job type specific route for IRC direction
        if self.jobtype == "ircf":
            self.direction = "forward"
            logger.debug("Set IRC direction to forward")
        elif self.jobtype == "ircr":
            self.direction = "reverse"
            logger.debug("Set IRC direction to reverse")

        if self.predictor is not None and self.recorrect is not None:
            route_string += (
                f" irc({self.predictor},calcfc,recorrect={self.recorrect},"
                f"recalc={self.recalc_step},"
                f"stepsize={self.stepsize},{self.direction},"
                f"maxpoints={self.maxpoints},maxcycle={self.maxcycles})"
            )
            logger.debug(
                f"Added IRC route with predictor {self.predictor} and "
                f"recorrect {self.recorrect}"
            )
        elif self.predictor is None and self.recorrect is None:
            route_string += (
                f" irc(calcfc,recalc={self.recalc_step},{self.direction},"
                f"maxpoints={self.maxpoints},maxcycle={self.maxcycles})"
            )
            logger.debug("Added basic IRC route without predictor/recorrect")
        else:
            logger.error(
                f"Inconsistent IRC predictor/recorrect specification - "
                f"predictor: {self.predictor}, recorrect: {self.recorrect}"
            )
            raise ValueError(
                f"Only one of predictor type and recorrect is specified, "
                f"please check!\n"
                f"Predictor: {self.predictor}; Recorrect: {self.recorrect}"
            )

        if self.additional_route_parameters is not None:
            # Check if the additional parameters
            # are already in the route string
            # to avoid duplication (e.g., scf=qc appearing twice)
            additional_params = self.additional_route_parameters.strip()
            if additional_params not in route_string:
                route_string += f" {additional_params}"
            else:
                logger.debug(
                    f"Additional route parameters '{additional_params}' already present in route string"
                )

        return route_string


class GaussianLinkJobSettings(GaussianJobSettings):
    """
    Specialized settings for Gaussian multi-step link calculations.

    Extends GaussianJobSettings for calculations that require multiple
    linked steps, such as stability analysis followed by optimization.
    Manages checkpoint file usage and guess orbital specifications.

    Attributes:
        link (bool): Whether to use link job functionality.
        link_route (str): Custom route string for the link step.
        stable (str): Stability analysis type ('opt', 'qrhf', etc.).
        guess (str): Initial guess method ('mix', 'read', etc.).
    """

    def __init__(
        self,
        link=True,
        link_route=None,
        stable="opt",
        guess="mix",
        # IRC-specific parameters for IRC link jobs
        predictor=None,
        recorrect=None,
        recalc_step=6,
        direction=None,
        maxpoints=512,
        maxcycles=128,
        stepsize=20,
        flat_irc=False,
        **kwargs,
    ):
        """
        Initialize link job specific Gaussian settings.

        Configures parameters for multi-step calculations that use
        checkpoint files to pass information between calculation steps.

        Args:
            link (bool): Enable link job functionality.
            link_route (str, optional): Custom route for link step.
            stable (str): Stability analysis method.
            guess (str): Initial orbital guess method.
            predictor (str, optional): IRC predictor method for IRC link jobs.
            recorrect (str, optional): IRC
            recorrection strategy for IRC link jobs.
            recalc_step (int): Steps between energy recalculations for IRC.
            direction (str, optional): IRC direction ('forward'/'reverse').
            maxpoints (int): Maximum number of IRC points.
            maxcycles (int): Maximum optimization cycles per IRC point.
            stepsize (int): IRC integration step size.
            flat_irc (bool): Enable flat IRC calculations.
            **kwargs: Additional arguments for parent class.
        """
        super().__init__(**kwargs)
        self.link = link
        self.link_route = link_route
        self.stable = stable
        self.guess = guess

        # IRC-specific parameters (will be None for non-IRC link jobs)
        self.predictor = predictor
        self.recorrect = recorrect
        self.recalc_step = recalc_step
        self.direction = direction
        self.maxpoints = maxpoints
        self.maxcycles = maxcycles
        self.stepsize = stepsize
        self.flat_irc = flat_irc

    @property
    def link_route_string(self):
        """
        Generate the route string for the link calculation step.

        Creates the route string for the second step in a link job,
        ensuring proper geometry and orbital guess specifications
        for continuation from the previous step.

        Returns:
            str: Route string for the link calculation step.
        """
        if self.link_route is not None:
            link_route_string = self.link_route
            if self.functional not in self.link_route:
                link_route_string += f" {self.functional}"
            if self.basis not in self.link_route:
                link_route_string += f" {self.basis}"
            if "geom=check" not in self.link_route:
                link_route_string += " geom=check"
            if "guess=read" not in self.link_route:
                link_route_string += " guess=read"
            logger.debug(
                f"Link route for settings {self}: {link_route_string}"
            )
            return link_route_string

        link_route_string = self._get_link_route_string_from_jobtype()
        logger.debug(f"Link route for settings {self}: {link_route_string}")
        return link_route_string

    def _get_route_string_from_jobtype(self):
        """
        Generate route string for the initial stability analysis step.

        Creates the first route string in a link job by removing
        optimization and frequency keywords and adding stability
        analysis and guess method specifications.

        Returns:
            str: Route string for stability analysis step.
        """
        route_string = super()._get_route_string_from_jobtype()
        # Remove opt keywords
        route_string_final = re.sub(
            gaussian_opt_keywords_pattern,
            " ",
            route_string,
            flags=re.IGNORECASE,
        )
        # Remove freq keywords
        route_string_final = re.sub(
            gaussian_freq_keywords_pattern,
            " ",
            route_string_final,
            flags=re.IGNORECASE,
        )
        # Clean up multiple spaces
        route_string_final = re.sub(
            multiple_spaces_pattern, " ", route_string_final
        ).strip()

        if self.stable:
            logger.debug(f"Stable: {self.stable}")
            route_string_final += f" stable={self.stable}"
        if self.guess:
            logger.debug(f"Guess: {self.guess}")
            # Normalize: strip any surrounding parentheses and whitespace so
            # that user input like '(mix,always)' is treated the same as
            # 'mix,always' and never produces double parentheses.
            guess_normalized = self.guess.strip()
            if guess_normalized.startswith("(") and guess_normalized.endswith(
                ")"
            ):
                guess_normalized = guess_normalized[1:-1].strip()
            if "," in guess_normalized:
                route_string_final += f" guess=({guess_normalized})"
            else:
                route_string_final += f" guess={guess_normalized}"

        return route_string_final

    def _get_link_route_string_from_jobtype(self):
        """
        Generate route string for the optimization/IRC step in link jobs.

        Creates the second route string that continues from the
        stability analysis by adding checkpoint geometry and orbital
        reading specifications. Handles IRC
        jobs by using GaussianIRCJobSettings.

        Returns:
            str: Route string for the optimization/IRC step.
        """
        # Special handling for IRC jobs - use GaussianIRCJobSettings
        if self.jobtype in ["ircf", "ircr"]:
            # Create a temporary GaussianIRCJobSettings
            # instance with current settings
            irc_settings = GaussianIRCJobSettings(**self.__dict__)
            # Get the IRC route string from the specialized class
            route_string = irc_settings._get_route_string_from_jobtype()
        else:
            # For non-IRC jobs, use the existing logic
            route_string = super()._get_route_string_from_jobtype()

        if "geom=check" not in route_string:
            route_string += " geom=check"
        if "guess=read" not in route_string:
            route_string += " guess=read"
        return route_string


class GaussianTDDFTJobSettings(GaussianJobSettings):
    """
    Specialized settings for Gaussian TD-DFT excited state calculations.

    Extends GaussianJobSettings with parameters specific to time-dependent
    density functional theory calculations for excited states, including
    state selection, solvation effects, and transition analysis.

    Attributes:
        states (str): Type of excited states ('singlets', 'triplets').
        root (int): Specific excited state root to analyze.
        nstates (int): Number of excited states to calculate.
        eqsolv (str): Equilibrium solvation treatment.
    """

    def __init__(
        self, states="singlets", root=1, nstates=3, eqsolv=None, **kwargs
    ):
        """
        Initialize TD-DFT specific Gaussian job settings.

        Configures parameters for excited state calculations including
        the number of states, state types, and solvation effects.

        Args:
            states (str): Type of excited states to calculate.
            root (int): Specific excited state root for analysis.
            nstates (int): Number of excited states to compute.
            eqsolv (str, optional): Equilibrium solvation option
                ('eqsolv' or 'noneqsolv').
            **kwargs: Additional arguments for parent class.
        """
        super().__init__(**kwargs)
        self.states = states
        self.root = root
        self.nstates = nstates
        self.eqsolv = eqsolv

    def _get_route_string_from_jobtype(self):
        """
        Generate TD-DFT specific route string for excited state calculations.

        Constructs the route string with TD-DFT keywords including
        state types, number of states, root selection, and solvation
        equilibrium options.

        Returns:
            str: Complete TD-DFT route string.

        Raises:
            AssertionError: If eqsolv option is not valid.
        """
        route_string = super()._get_route_string_from_jobtype()

        if self.eqsolv is None:
            eqsolv = ""
        else:
            eqsolv_options = ["eqsolv", "noneqsolv"]
            assert (
                self.eqsolv.lower() in eqsolv_options
            ), f"Possible equilibrium solvation options are: {eqsolv_options}!"
            eqsolv = f",{self.eqsolv}"

        route_string += f" TD({self.states},nstates={self.nstates},root={self.root}{eqsolv})"

        return route_string


[docs] class GaussianQMMMJobSettings(GaussianJobSettings): """ Configuration settings for Gaussian QM/MM calculations using ONIOM methodology. This class manages all parameters needed to set up multi-layer ONIOM calculations in Gaussian, which partition molecular systems into different regions treated with varying levels of theory. The ONIOM approach enables accurate quantum mechanical treatment of chemically active regions while efficiently handling large molecular environments with molecular mechanics. The class supports 2-layer and 3-layer ONIOM calculations: **2-Layer ONIOM**: High(QM):Low(MM) - High level: Quantum mechanics (DFT, ab initio, etc.) - Low level: Molecular mechanics force fields **3-Layer ONIOM**: High(QM):Medium(QM):Low(MM) - High level: High-accuracy quantum mechanics - Medium level: Lower-cost quantum mechanics - Low level: Molecular mechanics force fields Key Features: - Flexible layer definition with atom selection - Support for mixed QM/MM and QM/QM/MM schemes - Automatic link atom handling for covalent boundaries - Customizable scale factors for link atom placement - Integration with popular force fields (AMBER, UFF, etc.) - Multiple charge/multiplicity specifications per layer Attributes: jobtype (str, optional): Type of ONIOM calculation ('sp', 'opt', 'freq', 'ts', 'irc', 'modred', 'scan'). When using CLI commands, this is automatically inferred from the parent command (e.g., 'chemsmart sub gaussian opt qmmm' sets jobtype='opt'). Level-specific theory parameters: high_level_functional (str): DFT functional for high layer (e.g., 'B3LYP', 'M06-2X') high_level_basis (str): Basis set for high layer (e.g., '6-31G*', 'def2-TZVP') high_level_force_field (str): Force field for high layer (if MM) medium_level_functional (str): DFT functional for medium layer medium_level_basis (str): Basis set for medium layer medium_level_force_field (str): Force field for medium layer low_level_functional (str): DFT functional for low layer (if QM) low_level_basis (str): Basis set for low layer (if QM) low_level_force_field (str): Force field for low layer (usually MM) Charge and multiplicity specifications: charge_total/mult_total (int): Full system properties (legacy: charge_total/real_multiplicity) charge_intermediate/mult_intermediate (int): Intermediate system properties (legacy: int_charge/int_multiplicity) charge_high/mult_high (int): Model/high-layer properties (legacy: model_charge/model_multiplicity) Atom partitioning: high_level_atoms (list/str): Atoms in high layer (1-indexed) medium_level_atoms (list/str): Atoms in medium layer (1-indexed) low_level_atoms (list/str): Atoms in low layer (1-indexed) bonded_atoms (list): Covalent bonds crossing layer boundaries scale_factors (dict): Custom scale factors for link atom placement Examples: 2-layer enzyme active site calculation: >>> settings = GaussianQMMMJobSettings( ... jobtype='opt', ... high_level_functional='B3LYP', ... high_level_basis='6-31G*', ... low_level_force_field='AMBER=HardFirst', ... charge_total=0, ... real_multiplicity=1, ... high_level_atoms=[1, 2, 3, 4, 5], # Active site residues ... bonded_atoms=[(5, 6)], # QM-MM boundary bond ... ) 3-layer organometallic catalyst: >>> settings = GaussianQMMMJobSettings( ... jobtype='freq', ... high_level_functional='M06-2X', ... high_level_basis='def2-TZVP', ... medium_level_functional='B3LYP', ... medium_level_basis='6-31G*', ... low_level_force_field='UFF', ... charge_total=-1, ... real_multiplicity=2, ... high_level_atoms='1-10', # Metal center and ligands ... medium_level_atoms='11-50', # Extended coordination sphere ... bonded_atoms=[(10,11), (50,51)], ... scale_factors={(10,11): [0.709, 0.709, 0.709]} ... ) Note: The real/intermediate/model charge and multiplicity specifications follow ONIOM conventions where: - Real system: Complete molecular system - Intermediate system: High+Medium layers (3-layer only) - Model system: High layer only Scale factors control link atom placement and default to covalent radii ratios if not specified. The format is {(atom1,atom2): [low,medium,high]}. See Also: - GaussianQMMMJob: Job execution class for QM/MM calculations - GaussianJobSettings: Base class for Gaussian job configuration - Molecule: Molecular structure with QM/MM partitioning information """ def __init__( self, jobtype=None, parent_jobtype=None, high_level_functional=None, high_level_basis=None, high_level_force_field=None, medium_level_functional=None, medium_level_basis=None, medium_level_force_field=None, low_level_functional=None, low_level_basis=None, low_level_force_field=None, charge_total=None, mult_total=None, charge_intermediate=None, mult_intermediate=None, charge_high=None, mult_high=None, real_charge=None, real_multiplicity=None, int_charge=None, int_multiplicity=None, model_charge=None, model_multiplicity=None, high_level_atoms=None, medium_level_atoms=None, low_level_atoms=None, bonded_atoms=None, scale_factors=None, **kwargs, ): """ Initialize Gaussian QM/MM job settings for ONIOM calculations. Args: jobtype (str, optional): Type of ONIOM calculation to perform. Options: 'sp' (single-point), 'opt' (optimization), 'freq' (frequency), 'ts' (transition state), 'irc' (intrinsic reaction coordinate), 'modred' (redundant coordinates), 'scan' (coordinate scan) Note: When using the CLI with `chemsmart sub gaussian <jobtype> qmmm`, the jobtype is automatically inferred from the parent command and does not need to be specified manually. Theory level parameters: high_level_functional (str): DFT functional for high QM layer (e.g., 'B3LYP', 'M06-2X', 'wB97X-D', 'PBE0') high_level_basis (str): Basis set for high QM layer (e.g., '6-31G*', '6-311++G(d,p)', 'def2-TZVP', 'cc-pVTZ') high_level_force_field (str): Force field for high MM layer (rare) medium_level_functional (str): DFT functional for medium QM layer medium_level_basis (str): Basis set for medium QM layer medium_level_force_field (str): Force field for medium MM layer low_level_functional (str): DFT functional for low QM layer (uncommon) low_level_basis (str): Basis set for low QM layer (uncommon) low_level_force_field (str): Force field for low MM layer (e.g., 'AMBER=HardFirst', 'UFF', 'DREIDING', 'MM3') Charge and multiplicity: charge_total (int): Total charge of complete molecular system (legacy: charge_total) mult_total (int): Spin multiplicity of complete system (legacy: real_multiplicity) charge_intermediate (int): Charge of high+medium layers (legacy: int_charge) mult_intermediate (int): Multiplicity of high+medium layers (legacy: int_multiplicity) charge_high (int): Charge of high layer only (legacy: model_charge) mult_high (int): Multiplicity of high layer only (legacy: model_multiplicity) Atom partitioning: high_level_atoms (list/str): Atoms in high layer. Can be: - List of integers: [1, 2, 3, 5, 8] - Range string: "1-10,15,20-25" - List of ranges: ["1-10", "15", "20-25"] medium_level_atoms (list/str): Atoms in medium layer (3-layer only) low_level_atoms (list/str): Atoms in low layer (usually auto-assigned) bonded_atoms (list): Covalent bonds crossing layer boundaries. Format: [(atom1, atom2), (atom3, atom4)] where atoms are 1-indexed scale_factors (dict): Custom link atom scale factors. Format: {(atom1, atom2): [scale_low, scale_medium, scale_high]} Values typically range from 0.5-1.0, common value is 0.709 **kwargs: Additional keyword arguments passed to parent GaussianJobSettings Note: - All atom indices are 1-based following Gaussian conventions - If only 2 layers specified, use high_level_* and low_level_* parameters - Force fields must be available in your Gaussian installation - Link atoms are automatically placed for bonded_atoms specifications - The parent class 'functional' and 'basis' attributes are set to high_level values """ super().__init__(**kwargs) self.jobtype = jobtype self.parent_jobtype = parent_jobtype self.high_level_functional = high_level_functional self.high_level_basis = high_level_basis self.high_level_force_field = high_level_force_field self.medium_level_functional = medium_level_functional self.medium_level_basis = medium_level_basis self.medium_level_force_field = medium_level_force_field self.low_level_functional = low_level_functional self.low_level_basis = low_level_basis self.low_level_force_field = low_level_force_field # Canonical charge/multiplicity names # (aligned with ORCA) with legacy fallbacks self.charge_total = charge_total if self.charge_total is None: self.charge_total = real_charge self.mult_total = mult_total if self.mult_total is None: self.mult_total = real_multiplicity self.charge_intermediate = charge_intermediate if self.charge_intermediate is None: self.charge_intermediate = int_charge self.mult_intermediate = mult_intermediate if self.mult_intermediate is None: self.mult_intermediate = int_multiplicity self.charge_high = charge_high if self.charge_high is None: self.charge_high = model_charge self.mult_high = mult_high if self.mult_high is None: self.mult_high = model_multiplicity # Maintain legacy attribute names for backward compatibility self.real_charge = self.charge_total self.real_multiplicity = self.mult_total self.int_charge = self.charge_intermediate self.int_multiplicity = self.mult_intermediate self.model_charge = self.charge_high self.model_multiplicity = self.mult_high self.high_level_atoms = high_level_atoms self.medium_level_atoms = medium_level_atoms self.low_level_atoms = low_level_atoms self.bonded_atoms = bonded_atoms self.scale_factors = scale_factors self.title = "Gaussian QM/MM job" if self.charge_total is not None and self.mult_total is not None: # the charge and multiplicity of the real system equal to # that of the low_level_charge and low_level_multiplicity self.charge = self.charge_total self.multiplicity = self.mult_total @property def charge_and_multiplicity_string(self): """Obtain charge and multiplicity string.""" return self._get_charge_and_multiplicity()
[docs] def validate_and_assign_level( self, functional, basis, force_field, level_name ): """Validates functional and basis set for a given level and returns formatted theory string. Return level of theory if both functional and basis are specified, or force field if both are not specified. """ if functional and basis and force_field: raise ValueError( f"For {level_name} level of theory, one should specify only functional/basis or force field!" ) if force_field: assert functional is None and basis is None, ( f"Force field is given for {level_name} level of theory, " f"thus no functional and basis should be given!" ) level_of_theory = force_field else: # if force field is not given, then # functional and basis can be given, # so that level of theory takes functional and basis set if functional and basis: level_of_theory = f"{functional}/{basis}" else: # but functional and basis set can # also not be given, in which case, # all 3 are None and overall level # of theory for that layer is None. level_of_theory = None logger.debug( f"Obtained level of theory {level_of_theory} for {level_name} level." ) return level_of_theory
def _get_route_string_from_jobtype(self): """Generate QM/MM route string with job type, freq, and ONIOM specification.""" route_string = "#" if self.dieze_tag: route_string += self.dieze_tag # Add job type parent_jobtype = ( self.parent_jobtype.lower() if self.parent_jobtype else None ) # Normalise: treat empty / whitespace-only values the same as None so # we never emit invalid keywords like opt=() or opt=(modredundant,). _raw = self.additional_opt_options_in_route extra_opt = _raw.strip() if isinstance(_raw, str) else _raw if not extra_opt: # catches None, "", " " extra_opt = None if parent_jobtype == "opt": if extra_opt is not None: route_string += f" opt=({extra_opt})" else: route_string += " opt" elif parent_jobtype == "scan" or parent_jobtype == "modred": if extra_opt is not None: route_string += f" opt=(modredundant,{extra_opt})" else: route_string += " opt=modredundant" elif parent_jobtype == "ts": if extra_opt is not None: if "calcall" in extra_opt: route_string += f" opt=(ts,noeigentest,{extra_opt})" else: route_string += f" opt=(ts,calcfc,noeigentest,{extra_opt})" else: route_string += " opt=(ts,calcfc,noeigentest)" elif parent_jobtype == "irc": route_string += " irc" # sp doesn't add any keyword # Add freq if enabled (and not already a freq job) if (self.freq or self.numfreq) and parent_jobtype != "freq": route_string += " freq" # Add ONIOM level of theory string oniom_string = self._get_oniom_string() route_string += oniom_string # Add solvation if specified if self.solvent_model is not None and self.solvent_id is not None: route_string += ( f" scrf=({self.solvent_model},solvent={self.solvent_id})" ) # Append additional route parameters (e.g., from -r CLI flag) if self.additional_route_parameters is not None: additional_params = self.additional_route_parameters.strip() if additional_params not in route_string: route_string += f" {additional_params}" logger.debug( f"Added additional route parameters: {additional_params}" ) else: logger.debug( f"Additional route parameters '{additional_params}' " "already present in route string" ) return route_string def _get_oniom_string(self): """Get ONIOM level of theory string.""" high_level_of_theory = self.validate_and_assign_level( self.high_level_functional, self.high_level_basis, self.high_level_force_field, level_name="high", ) medium_level_of_theory = self.validate_and_assign_level( self.medium_level_functional, self.medium_level_basis, self.medium_level_force_field, level_name="medium", ) low_level_of_theory = self.validate_and_assign_level( self.low_level_functional, self.low_level_basis, self.low_level_force_field, level_name="low", ) # Build ONIOM string with proper parentheses handling levels = [] if high_level_of_theory is not None: levels.append(high_level_of_theory) if medium_level_of_theory is not None: levels.append(medium_level_of_theory) if low_level_of_theory is not None: levels.append(low_level_of_theory) if levels: oniom_string = f" oniom({':'.join(levels)})" else: oniom_string = " oniom" return oniom_string
[docs] def get_qmmm_level_of_theory_string(self): """Get ONIOM level of theory for route string. Deprecated: Use _get_route_string_from_jobtype instead. Note: jobtype is now inferred from the parent command when using CLI, so it should always be set. If not set, defaults to basic ONIOM route. """ if self.jobtype is None: logger.warning( "Job type not specified for ONIOM job. " "Using basic route string without job-specific keywords." ) return self._get_route_string_from_jobtype()
def _get_charge_and_multiplicity(self): """Obtain charge and multiplicity string. For two-layer ONIOM jobs, the format for this input line is: chrg_real-low spin_real-low [chrg_model-high spin_model-high [chrg_model-low spin_model-low [chrg_real-high spin_real-high]]] Fourth pair applies only to ONIOM=SValue calculations. When only a single value pair is specified, all levels will use those values. If two pairs of values are included, then third pair defaults to same values as second pair. If final pair is omitted for an S-value job, it defaults to values for the real system at low level. For such two-layer ONIOM jobs, users are required to specify the charge and multiplicity of high-level layer and low-level layer, instead high and medium level. For 3-layers ONIOM, the format is: cRealL sRealL [cIntM sIntM [cIntL sIntL [cModH sModH [cModM sModM [cModL sModL]]]]] Real, Int=Intermediate system, and Mod=Model system, and second character is one of: H, M and L for the High, Medium and Low levels). """ assert ( self.charge_total is not None and self.mult_total is not None ), "Charge and multiplicity for the real system must be specified!" real_low_charge = self.charge_total real_low_multiplicity = self.mult_total int_med_charge = self.charge_intermediate int_med_multiplicity = self.mult_intermediate int_low_charge = self.charge_intermediate int_low_multiplicity = self.mult_intermediate model_high_charge = self.charge_high model_high_multiplicity = self.mult_high model_med_charge = self.charge_high model_med_multiplicity = self.mult_high model_low_charge = self.charge_high model_low_multiplicity = self.mult_high # two-layer ONIOM model if ( self.validate_and_assign_level( self.medium_level_functional, self.medium_level_basis, self.medium_level_force_field, level_name="medium", ) is None or self.validate_and_assign_level( self.low_level_functional, self.low_level_basis, self.low_level_force_field, level_name="low", ) is None ): charge_and_multiplicity_list = [ real_low_charge, real_low_multiplicity, model_high_charge, model_high_multiplicity, model_low_charge, model_low_multiplicity, ] if all(var is None for var in charge_and_multiplicity_list[2:]): for i in range(2, len(charge_and_multiplicity_list), 2): charge_and_multiplicity_list[i] = real_low_charge charge_and_multiplicity_list[i + 1] = real_low_multiplicity elif all(var is None for var in charge_and_multiplicity_list[4:]): for i in range(4, len(charge_and_multiplicity_list), 2): charge_and_multiplicity_list[i] = model_high_charge charge_and_multiplicity_list[i + 1] = ( model_high_multiplicity ) elif all(var is not None for var in charge_and_multiplicity_list): pass else: raise ValueError( "The charge and multiplicity of lower level-of-theory cannot override the higher ones!" ) updated_list = [] for charge_and_multiplicity in charge_and_multiplicity_list: updated_list.append(str(charge_and_multiplicity)) charge_and_multiplicity = " ".join(updated_list) else: # three-layer ONIOM model charge_and_multiplicity_list = [ real_low_charge, real_low_multiplicity, int_med_charge, int_med_multiplicity, int_low_charge, int_low_multiplicity, model_high_charge, model_high_multiplicity, model_med_charge, model_med_multiplicity, model_low_charge, model_low_multiplicity, ] # Defaults for missing charge / spin multiplicity # pairs are taken from the next highest # calculation level and / or system size. if all(var is None for var in charge_and_multiplicity_list[2:]): # only charge and multiplicity of real system is specified, # so the charge and multiplicity of other # systems will be the same as the real system for i in range(2, len(charge_and_multiplicity_list), 2): charge_and_multiplicity_list[i] = real_low_charge charge_and_multiplicity_list[i + 1] = real_low_multiplicity elif all(var is None for var in charge_and_multiplicity_list[4:]): # only charge and multiplicity of real # system and that of intermediate layer, # medium level-of-theory are specified, # the charge and multiplicity of other # systems will be the same as the intermediate layer for i in range(4, len(charge_and_multiplicity_list), 2): charge_and_multiplicity_list[i] = int_med_charge charge_and_multiplicity_list[i + 1] = int_med_multiplicity elif all(var is None for var in charge_and_multiplicity_list[6:]): # only charge and multiplicity of real system, # intermediate layer, medium level-of-theory # and intermediate layer, medium level-of-theory are # specified, the charge and multiplicity of other # systems will be the same as intermediate # layer, medium level-of-theory,... for i in range(6, len(charge_and_multiplicity_list), 2): charge_and_multiplicity_list[i] = int_med_charge charge_and_multiplicity_list[i + 1] = int_med_multiplicity elif all(var is None for var in charge_and_multiplicity_list[8:]): # the rest systems will follow the # model system, high level-of-theory for i in range(8, len(charge_and_multiplicity_list), 2): charge_and_multiplicity_list[i] = model_high_charge charge_and_multiplicity_list[i + 1] = ( model_high_multiplicity ) elif all(var is None for var in charge_and_multiplicity_list[10:]): charge_and_multiplicity_list[-2] = model_med_charge charge_and_multiplicity_list[-1] = model_med_multiplicity elif all(var is not None for var in charge_and_multiplicity_list): pass else: raise ValueError( "The charge and multiplicity of lower level-of-theory cannot override the higher ones!" ) updated_list = [] for charge_and_multiplicity in charge_and_multiplicity_list: updated_list.append(str(charge_and_multiplicity)) charge_and_multiplicity = " ".join(updated_list) return charge_and_multiplicity def __eq__(self, other): """ Compare two GaussianQMMMJobSettings objects for equality. Compares all attributes between two QMMM settings objects, including the QMMM-specific attributes that are not present in the parent class. Args: other (GaussianQMMMJobSettings): Settings object to compare with. Returns: bool or NotImplemented: True if equal, False if different, NotImplemented if types don't match. """ if type(self) is not type(other): return NotImplemented # Get dictionaries of both objects self_dict = self.__dict__.copy() other_dict = other.__dict__.copy() # Exclude append_additional_info from # the comparison (inherited behavior) self_dict.pop("append_additional_info", None) other_dict.pop("append_additional_info", None) is_equal = self_dict == other_dict if not is_equal: import dictdiffer logger.info("Gaussian QMMM job settings are not equal.") for diff in list(dictdiffer.diff(self_dict, other_dict)): logger.info(f"Difference: {diff}") return self_dict == other_dict