from ase import Atoms
from chemsmart.io.gaussian.gengenecp import GenGenECPSection
from chemsmart.io.molecules.structure import Molecule
from chemsmart.jobs.gaussian.settings import GaussianJobSettings
from chemsmart.utils.periodictable import PeriodicTable
from chemsmart.utils.utils import two_files_have_similar_contents
[docs]
class TestGaussianGenGenECP:
[docs]
def test_genecp_fallback_when_basis_missing_from_bse(self, caplog):
genecp_section = GenGenECPSection.from_bse_api(
light_elements=["H", "C", "N", "O", "P", "Cl"],
light_elements_basis="6-31G*",
heavy_elements=["Pd"],
heavy_elements_basis="nonexistent-basis-for-test",
)
assert genecp_section.string_list[0] == "H C N O P Cl 0"
assert genecp_section.string_list[1] == "6-31g*"
assert genecp_section.string_list[2] == "****"
assert genecp_section.string_list[3] == "Pd 0"
assert genecp_section.string_list[4] == "nonexistent-basis-for-test"
assert genecp_section.string_list[5] == "****"
assert genecp_section.string_list[6] == ""
assert genecp_section.string_list[7] == "Pd 0"
assert genecp_section.string_list[8] == "nonexistent-basis-for-test"
assert "Falling back to Gaussian basis keyword section format" in str(
caplog.messages
)
[docs]
def test_gen_fallback_when_heavy_element_does_not_require_ecp(
self, caplog
):
genecp_section = GenGenECPSection.from_bse_api(
light_elements=["H", "C", "N", "O"],
light_elements_basis="6-311+G**",
heavy_elements=["Ni"],
heavy_elements_basis="nonexistent-basis-for-test",
)
assert genecp_section.string_list[0] == "H C N O 0"
assert genecp_section.string_list[1] == "6-311+g**"
assert genecp_section.string_list[2] == "****"
assert genecp_section.string_list[3] == "Ni 0"
assert genecp_section.string_list[4] == "nonexistent-basis-for-test"
assert genecp_section.string_list[5] == "****"
assert "Falling back to Gaussian basis keyword section format" in str(
caplog.messages
)
[docs]
def test_genecp_from_base_api(
self, tmpdir, reference_genecp_txt_file_from_api
):
genecp_section = GenGenECPSection.from_bse_api(
light_elements=["C", "H", "O"],
light_elements_basis="def2-SVP",
heavy_elements=["Pd"],
heavy_elements_basis="def2-TZVPPD",
)
assert genecp_section.heavy_elements == ["Pd"]
assert genecp_section.heavy_elements_basis == "def2-TZVPPD".lower()
assert genecp_section.light_elements == ["H", "C", "O"]
assert (
genecp_section.light_elements_basis == "def2svp"
) # no hyphen in this one, since this is the required format for Gausian input
# temp file for writing genecp section
genecp_txt_file_from_api_strings = tmpdir.join("genecp_from_api.txt")
with open(genecp_txt_file_from_api_strings, "w") as f:
for line in genecp_section.string_list:
f.write(f"{line}\n")
assert two_files_have_similar_contents(
reference_file=reference_genecp_txt_file_from_api,
output_file=genecp_txt_file_from_api_strings,
ignored_string="Version",
)
[docs]
def test_genecp_from_base_api_accepts_hyphenless_def2_heavy_basis(self):
with_hyphen = GenGenECPSection.from_bse_api(
light_elements=["C", "H", "O"],
light_elements_basis="def2-SVP",
heavy_elements=["Pd"],
heavy_elements_basis="def2-SVPD",
)
without_hyphen = GenGenECPSection.from_bse_api(
light_elements=["C", "H", "O"],
light_elements_basis="def2-SVP",
heavy_elements=["Pd"],
heavy_elements_basis="def2SVPD",
)
assert with_hyphen.heavy_elements_basis == "def2-svpd"
assert with_hyphen.light_elements_basis == "def2svp"
assert without_hyphen.heavy_elements_basis == "def2-svpd"
assert without_hyphen.light_elements_basis == "def2svp"
[docs]
def test_genecp_from_comfile(
self, tmpdir, gaussian_opt_genecp_inputfile, genecp_txt_file_from_web
):
genecp_section = GenGenECPSection.from_comfile(
gaussian_opt_genecp_inputfile
)
assert genecp_section.heavy_elements == ["Pd"]
assert genecp_section.heavy_elements_basis == "Def2-TZVPPD".lower()
assert set(genecp_section.light_elements) == {"C", "H", "O"}
assert genecp_section.light_elements_basis == "def2svp"
# temp file for writing genecp section
genecp_txt_file_from_web_strings = tmpdir.join("genecp_from_api.txt")
with open(genecp_txt_file_from_web_strings, "w") as f:
for line in genecp_section.string_list:
f.write(f"{line}\n")
assert two_files_have_similar_contents(
reference_file=genecp_txt_file_from_web,
output_file=genecp_txt_file_from_web_strings,
ignored_string="Version",
)
[docs]
def test_genecp_from_modred_gen_comfile(
self, tmpdir, modred_gen_inputfile, gen_txt_file_from_web
):
genecp_section = GenGenECPSection.from_comfile(modred_gen_inputfile)
assert genecp_section.heavy_elements == ["Br"]
assert genecp_section.heavy_elements_basis == "Def2-TZVPPD".lower()
assert set(genecp_section.light_elements) == {"C", "H", "O"}
assert genecp_section.light_elements_basis == "def2svp"
# temp file for writing genecp section
genecp_txt_file_from_web_strings = tmpdir.join("genecp_from_api.txt")
with open(genecp_txt_file_from_web_strings, "w") as f:
for line in genecp_section.string_list:
f.write(f"{line}\n")
assert two_files_have_similar_contents(
reference_file=gen_txt_file_from_web,
output_file=genecp_txt_file_from_web_strings,
ignored_string="Version",
)
[docs]
def test_genecp_from_modred_genecp_comfile(self, modred_genecp_inputfile):
genecp_section = GenGenECPSection.from_comfile(modred_genecp_inputfile)
assert genecp_section.heavy_elements == ["Pd"]
assert genecp_section.heavy_elements_basis == "Def2-TZVPPD".lower()
assert set(genecp_section.light_elements) == {"C", "H", "O"}
assert genecp_section.light_elements_basis == "def2svp"
[docs]
def test_genecp_from_modred_genecp_solvent_comfile(
self, modred_genecp_custom_solvent_inputfile
):
genecp_section = GenGenECPSection.from_comfile(
modred_genecp_custom_solvent_inputfile
)
assert genecp_section.heavy_elements == ["Pd"]
assert genecp_section.heavy_elements_basis == "Def2-TZVPPD".lower()
assert set(genecp_section.light_elements) == {
"C",
"H",
"Cl",
"N",
"O",
"S",
}
assert genecp_section.light_elements_basis == "def2svp"
[docs]
class TestGenGenECPBasisDetermination:
"""Test automatic determination of gen vs genecp basis keywords
based on elements present."""
[docs]
def test_periodic_table_requires_ecp(self):
"""Test that requires_ecp correctly
identifies elements needing ECPs."""
pt = PeriodicTable()
# Elements with Z <= 36 should not require ECP
assert not pt.requires_ecp("H") # Z=1
assert not pt.requires_ecp("C") # Z=6
assert not pt.requires_ecp("Br") # Z=35
assert not pt.requires_ecp("Kr") # Z=36 (last element in period 4)
# Elements with Z > 36 should require ECP
assert pt.requires_ecp("Rb") # Z=37 (first element in period 5)
assert pt.requires_ecp("I") # Z=53
assert pt.requires_ecp("Pd") # Z=46
assert pt.requires_ecp("Ir") # Z=77
[docs]
def test_determine_basis_keyword_with_br(self):
"""Test that molecules with only Br use 'gen' instead of 'genecp'."""
# Create molecule with Br
br_molecule = Atoms(
"BrC2H4O2",
positions=[
[0, 0, 0],
[1, 0, 0],
[2, 0, 0],
[3, 0, 0],
[4, 0, 0],
[5, 0, 0],
[6, 0, 0],
[7, 0, 0],
[8, 0, 0],
],
)
symbols = br_molecule.get_chemical_symbols()
positions = br_molecule.get_positions()
br_mol = Molecule(
symbols=symbols, positions=positions, charge=0, multiplicity=1
)
# Create settings with genecp basis
settings = GaussianJobSettings(
functional="mn15",
basis="genecp",
heavy_elements=["Ir", "Br"],
heavy_elements_basis="def2-SVPD",
light_elements_basis="def2SVP",
charge=0,
multiplicity=1,
)
# Determine basis should return 'gen' since Br (Z=35) <= 36
determined_basis = settings.determine_basis_keyword(br_mol)
assert determined_basis == "gen"
[docs]
def test_determine_basis_keyword_with_i(self):
"""Test that molecules with I use 'genecp'."""
# Create molecule with I
i_molecule = Atoms(
"IC2H4O2",
positions=[
[0, 0, 0],
[1, 0, 0],
[2, 0, 0],
[3, 0, 0],
[4, 0, 0],
[5, 0, 0],
[6, 0, 0],
[7, 0, 0],
[8, 0, 0],
],
)
symbols = i_molecule.get_chemical_symbols()
positions = i_molecule.get_positions()
i_mol = Molecule(
symbols=symbols, positions=positions, charge=0, multiplicity=1
)
# Create settings with genecp basis
settings = GaussianJobSettings(
functional="mn15",
basis="genecp",
heavy_elements=["Ir", "I"],
heavy_elements_basis="def2-SVPD",
light_elements_basis="def2SVP",
charge=0,
multiplicity=1,
)
# Determine basis should return 'genecp' since I (Z=53) > 36
determined_basis = settings.determine_basis_keyword(i_mol)
assert determined_basis == "genecp"
[docs]
def test_determine_basis_keyword_with_pd(self):
"""Test that molecules with Pd use 'genecp'."""
# Create molecule with Pd
pd_molecule = Atoms(
"PdC2H4O2",
positions=[
[0, 0, 0],
[1, 0, 0],
[2, 0, 0],
[3, 0, 0],
[4, 0, 0],
[5, 0, 0],
[6, 0, 0],
[7, 0, 0],
[8, 0, 0],
],
)
symbols = pd_molecule.get_chemical_symbols()
positions = pd_molecule.get_positions()
pd_mol = Molecule(
symbols=symbols, positions=positions, charge=0, multiplicity=1
)
# Create settings with genecp basis
settings = GaussianJobSettings(
functional="mn15",
basis="genecp",
heavy_elements=["Pd"],
heavy_elements_basis="def2-TZVPPD",
light_elements_basis="def2SVP",
charge=0,
multiplicity=1,
)
# Determine basis should return 'genecp' since Pd (Z=46) > 36
determined_basis = settings.determine_basis_keyword(pd_mol)
assert determined_basis == "genecp"
[docs]
def test_determine_basis_keyword_no_heavy_elements(self):
"""Test that molecules with no heavy
elements use light elements basis."""
# Create molecule with only light elements
h2o_molecule = Atoms(
"H2O", positions=[[0, 0, 0], [1, 0, 0], [0, 1, 0]]
)
symbols = h2o_molecule.get_chemical_symbols()
positions = h2o_molecule.get_positions()
h2o_mol = Molecule(
symbols=symbols, positions=positions, charge=0, multiplicity=1
)
# Create settings with genecp basis
settings = GaussianJobSettings(
functional="mn15",
basis="genecp",
heavy_elements=["Ir", "Br"],
heavy_elements_basis="def2-SVPD",
light_elements_basis="def2SVP",
charge=0,
multiplicity=1,
)
# Determine basis should use light
# elements since no heavy elements present
determined_basis = settings.determine_basis_keyword(h2o_mol)
assert determined_basis == "def2svp"
[docs]
def test_determine_basis_keyword_non_gen_basis(self):
"""Test that non-gen/genecp basis keywords are returned unchanged."""
# Create molecule
br_molecule = Atoms(
"BrC2H4O2",
positions=[
[0, 0, 0],
[1, 0, 0],
[2, 0, 0],
[3, 0, 0],
[4, 0, 0],
[5, 0, 0],
[6, 0, 0],
[7, 0, 0],
[8, 0, 0],
],
)
symbols = br_molecule.get_chemical_symbols()
positions = br_molecule.get_positions()
br_mol = Molecule(
symbols=symbols, positions=positions, charge=0, multiplicity=1
)
# Create settings with regular basis
settings = GaussianJobSettings(
functional="b3lyp",
basis="6-31g(d)",
charge=0,
multiplicity=1,
)
# Determine basis should return original
# basis since it's not gen/genecp
determined_basis = settings.determine_basis_keyword(br_mol)
assert determined_basis == "6-31g(d)"
[docs]
def test_determine_basis_keyword_mixed_elements(self):
"""Test molecule with both gen and genecp elements uses genecp."""
# Create molecule with both Br (gen) and I (genecp)
mixed_molecule = Atoms(
"BrIC2H4O2",
positions=[
[0, 0, 0],
[1, 0, 0],
[2, 0, 0],
[3, 0, 0],
[4, 0, 0],
[5, 0, 0],
[6, 0, 0],
[7, 0, 0],
[8, 0, 0],
[9, 0, 0],
],
)
symbols = mixed_molecule.get_chemical_symbols()
positions = mixed_molecule.get_positions()
mixed_mol = Molecule(
symbols=symbols, positions=positions, charge=0, multiplicity=1
)
# Create settings with both Br and I as heavy elements
settings = GaussianJobSettings(
functional="mn15",
basis="genecp",
heavy_elements=["Br", "I"],
heavy_elements_basis="def2-SVPD",
light_elements_basis="def2SVP",
charge=0,
multiplicity=1,
)
# Should use 'genecp' since I requires ECP (even though Br doesn't)
determined_basis = settings.determine_basis_keyword(mixed_mol)
assert determined_basis == "genecp"
[docs]
class TestGenECPReplacementInRoute:
"""Test that gen/genecp replacement in route
strings doesn't affect other keywords."""
[docs]
def test_gen_replacement_does_not_affect_noeigentest(self):
"""Test that 'gen' in 'noeigentest' is not
replaced, and case mode behaves correctly."""
import re
from chemsmart.utils.io import replace_word
# Route strings with lowercase 'gen'
# (replacement should work in BOTH modes)
route_strings = [
"# opt=(ts,calcfc,noeigentest) freq mn15 gen",
"opt=(calcfc,ts,noeigentest,maxstep=5) freq gen m06",
"# opt=(ts,calcfc,noeigentest) freq b3lyp gen",
"# opt=(ts,calcfc,noeigentest) freq b3lyp gen",
]
for case_sensitive in (True, False):
for route_string in route_strings:
result = replace_word(
route_string,
"gen",
"def2svp",
case_sensitive=case_sensitive,
)
# The word 'noeigentest' should remain unchanged
assert "noeigentest" in result
# If 'gen' appeared as a standalone word, it should be replaced
assert re.search(r"\bgen\b", result) is None
assert "def2svp" in result
# Make sure we didn't create 'noeidef2svptest'
assert "noeidef2svptest" not in result
# Extra: include a case-variant route to test the difference explicitly
route_string_case = "# opt=(ts,calcfc,noeigentest) freq mn15 Gen"
# case-insensitive: should replace "Gen"
result_ci = replace_word(
route_string_case, "gen", "def2svp", case_sensitive=False
)
assert re.search(r"\bGen\b", result_ci) is None
assert "def2svp" in result_ci
assert "noeigentest" in result_ci
# case-sensitive: should NOT replace "Gen" when old_word is "gen"
result_cs = replace_word(
route_string_case, "gen", "def2svp", case_sensitive=True
)
assert re.search(r"\bGen\b", result_cs) is not None
assert "def2svp" not in result_cs
assert "noeigentest" in result_cs
[docs]
def test_genecp_replacement_does_not_affect_other_keywords(self):
"""Test that 'genecp' replacement
works correctly in both case modes."""
from chemsmart.utils.io import replace_word
route_string = "# opt=(ts,calcfc,noeigentest) freq mn15 genecp"
# lowercase genecp -> replacement should work in BOTH modes
for case_sensitive in (True, False):
result = replace_word(
route_string,
"genecp",
"def2svp",
case_sensitive=case_sensitive,
)
assert "noeigentest" in result
assert "genecp" not in result
assert "def2svp" in result
assert "noeidef2svptest" not in result
# Extra: case-variant explicitly tests the difference
route_string_case = "# opt=(ts,calcfc,noeigentest) freq mn15 GenECP"
# case-insensitive: should replace
result_ci = replace_word(
route_string_case, "genecp", "def2svp", case_sensitive=False
)
assert "GenECP" not in result_ci
assert "def2svp" in result_ci
# case-sensitive: should NOT replace when old_word is "genecp"
result_cs = replace_word(
route_string_case, "genecp", "def2svp", case_sensitive=True
)
assert "GenECP" in result_cs
assert "def2svp" not in result_cs
[docs]
def test_gen_to_genecp_replacement(self):
"""Test replacing 'gen' with 'genecp'
in route strings for both case modes."""
import re
from chemsmart.utils.io import replace_word
route_string = "# opt=(ts,calcfc,noeigentest) freq mn15 gen"
# lowercase 'gen' -> replacement should work in BOTH modes
for case_sensitive in (True, False):
result = replace_word(
route_string, "gen", "genecp", case_sensitive=case_sensitive
)
assert "noeigentest" in result
assert re.search(r"\bgen\b", result) is None
assert "genecp" in result
# Extra: case variant shows mode difference
route_string_case = "# opt=(ts,calcfc,noeigentest) freq mn15 Gen"
# case-insensitive: should replace "Gen"
result_ci = replace_word(
route_string_case, "gen", "genecp", case_sensitive=False
)
assert re.search(r"\bGen\b", result_ci) is None
assert "genecp" in result_ci
# case-sensitive: should NOT replace "Gen" when old_word is "gen"
result_cs = replace_word(
route_string_case, "gen", "genecp", case_sensitive=True
)
assert re.search(r"\bGen\b", result_cs) is not None
assert "genecp" not in result_cs
[docs]
def test_replacement_with_various_delimiters(self):
"""Test that replacement works correctly with
various delimiters, in both case modes."""
from chemsmart.utils.io import replace_word
# These are all lowercase 'gen' -> should replace in BOTH modes
test_cases = [
("gen def2svp", "gen", "6-31g", "6-31g def2svp"),
("def2svp gen", "gen", "6-31g", "def2svp 6-31g"),
("opt gen freq", "gen", "6-31g", "opt 6-31g freq"),
("opt=(gen) freq", "gen", "6-31g", "opt=(6-31g) freq"),
]
for case_sensitive in (True, False):
for route_string, old_basis, new_basis, expected in test_cases:
result = replace_word(
route_string,
old_basis,
new_basis,
case_sensitive=case_sensitive,
)
assert (
result == expected
), f"[case_sensitive={case_sensitive}] Expected '{expected}' but got '{result}'"
# Should not replace "gen" when it is
# part of another word (boundary test)
# NOTE: this string actually *contains* a standalone
# "gen" at the end, so it SHOULD replace that one.
route_string = "regenerate gen"
expected = "regenerate 6-31g"
for case_sensitive in (True, False):
result = replace_word(
route_string, "gen", "6-31g", case_sensitive=case_sensitive
)
assert (
result == expected
), f"[case_sensitive={case_sensitive}] Expected '{expected}' but got '{result}'"
# Extra: ensure case-sensitive does NOT replace "Gen" if old_word="gen"
route_string_case = "opt=(Gen) freq"
result_cs = replace_word(
route_string_case, "gen", "6-31g", case_sensitive=True
)
assert result_cs == route_string_case # unchanged
result_ci = replace_word(
route_string_case, "gen", "6-31g", case_sensitive=False
)
assert result_ci == "opt=(6-31g) freq"