Source code for tests.test_converter

import os.path
from shutil import copy, copytree, rmtree

import numpy as np
import pytest

from chemsmart.io.converter import FileConverter
from chemsmart.io.gaussian.folder import (
    GaussianInputFolder,
    GaussianOutputFolder,
)
from chemsmart.io.molecules.structure import Molecule
from chemsmart.io.pdb.pdbfile import PDBFile
from chemsmart.io.xyz.folder import XYZFolder



[docs]
class TestConverter:


[docs]
    def test_convert_log_folder_to_xyz(
        self, tmpdir, gaussian_outputs_test_directory
    ):
        # copy whole directory gaussian_outputs_test_directory to tmpdir
        tmp_log_folder = os.path.join(
            tmpdir, "gaussian_outputs_test_directory"
        )
        copytree(gaussian_outputs_test_directory, tmp_log_folder)

        # remove link folder in tmp_log_folder if exists
        link_folder = os.path.join(tmp_log_folder, "link")
        if os.path.exists(link_folder):
            rmtree(link_folder)
        ###### TODO: when the test for link jobs are fixed, this should be
        ###### removed and the test should pass for link jobs too

        file_converter = FileConverter(
            directory=tmp_log_folder, type="log", output_filetype="xyz"
        )
        file_converter.convert_files()

        # check if the files are converted
        g16_folder = GaussianOutputFolder(folder=tmp_log_folder)
        all_logfiles = g16_folder.all_log_files

        # check all .log files have been converted to .xyz files
        for file in all_logfiles:
            assert os.path.exists(file.replace(".log", ".xyz"))

        ozone_xyz = os.path.join(tmp_log_folder, "ozone.xyz")

        assert os.path.exists(ozone_xyz)
        with open(ozone_xyz, "r") as f:
            lines = f.readlines()
            assert len(lines) == 5  # 5 lines in the log file
            assert lines[0] == "3\n"  # first line is number of atoms



[docs]
    def test_convert_log_folder_to_com(
        self, tmpdir, gaussian_outputs_test_directory
    ):
        # copy whole directory gaussian_outputs_test_directory to tmpdir
        tmp_log_folder = os.path.join(
            tmpdir, "gaussian_outputs_test_directory"
        )
        copytree(gaussian_outputs_test_directory, tmp_log_folder)

        # remove link folder in tmp_log_folder if exists
        link_folder = os.path.join(tmp_log_folder, "link")
        if os.path.exists(link_folder):
            rmtree(link_folder)
        ###### TODO: when the test for link jobs are fixed, this should be
        ###### removed and the test should pass for link jobs too

        file_converter = FileConverter(
            directory=tmp_log_folder, type="log", output_filetype="com"
        )

        file_converter.convert_files()

        # check all .log files have been converted to .com files
        g16_folder = GaussianOutputFolder(folder=tmp_log_folder)
        all_logfiles = g16_folder.all_log_files
        for file in all_logfiles:
            assert os.path.exists(file.replace(".log", ".com"))

        ozone_com = os.path.join(tmp_log_folder, "ozone.com")
        assert os.path.exists(ozone_com)
        with open(ozone_com, "r") as f:
            lines = f.readlines()
            assert len(lines) == 12
            assert lines[5].startswith("Generated from")



[docs]
    def test_convert_com_folder_to_xyz(
        self, tmpdir, gaussian_inputs_test_directory
    ):
        # copy whole directory gaussian_pbc_inputs_test_directory to tmpdir
        tmp_com_folder = os.path.join(tmpdir, "gaussian_inputs_test_directory")
        copytree(gaussian_inputs_test_directory, tmp_com_folder)

        file_converter = FileConverter(
            directory=tmp_com_folder, type="com", output_filetype="xyz"
        )
        file_converter.convert_files()

        # check all .com files have been converted to .xyz files
        g16_folder = GaussianInputFolder(folder=tmp_com_folder)
        all_comfiles = g16_folder.all_com_files
        for file in all_comfiles:
            assert os.path.exists(file.replace(".com", ".xyz"))

        hf_xyz = os.path.join(tmp_com_folder, "hf.xyz")
        assert os.path.exists(hf_xyz)
        with open(hf_xyz, "r") as f:
            lines = f.readlines()
            assert len(lines) == 16
            assert lines[0] == "14\n"

        # files in subfolders
        genecp_xyz = os.path.join(tmp_com_folder, "genecp", "opt_genecp.xyz")
        assert os.path.exists(genecp_xyz)
        with open(genecp_xyz, "r") as f:
            lines = f.readlines()
            assert len(lines) == 17
            assert lines[0] == "15\n"

        additional_xyz = os.path.join(
            tmp_com_folder, "additional", "model_sp_input.xyz"
        )
        assert os.path.exists(additional_xyz)
        with open(additional_xyz, "r") as f:
            lines = f.readlines()
            assert len(lines) == 16
            assert lines[0] == "14\n"



[docs]
    def test_convert_xyz_folder_to_com(self, tmpdir, xyz_directory):
        # copy whole directory xyz_directory to tmpdir
        tmp_xyz_folder = os.path.join(tmpdir, "xyz_directory")
        copytree(xyz_directory, tmp_xyz_folder)

        file_converter = FileConverter(
            directory=tmp_xyz_folder, type="xyz", output_filetype="com"
        )
        file_converter.convert_files()

        # check all .xyz files have been converted to .com files
        xyz_folder = XYZFolder(folder=tmp_xyz_folder)
        all_xyzfiles = xyz_folder.all_xyzfiles
        for file in all_xyzfiles:
            assert os.path.exists(file.replace(".xyz", ".com"))



[docs]
    def test_convert_single_logfile_to_com(
        self, tmpdir, gaussian_singlet_opt_outfile
    ):
        # copy file to tmpdir
        tmp_path = os.path.join(tmpdir, "gaussian_singlet_opt.log")
        copy(gaussian_singlet_opt_outfile, tmp_path)
        assert os.path.exists(tmp_path)
        file_converter = FileConverter(
            filename=tmp_path, output_filetype="com"
        )

        file_converter.convert_files()

        assert os.path.exists(tmp_path.replace(".log", ".com"))
        mol = Molecule.from_filepath(tmp_path.replace(".log", ".com"))
        assert isinstance(mol, Molecule)
        assert mol.num_atoms == 40
        assert mol.chemical_formula == "C19H12F3I2N3O"
        assert np.isclose(mol.mass, 609.128, rtol=1e-4)  # in thermo branch



[docs]
    def test_convert_single_link_opt_logfile_to_com(
        self, tmpdir, gaussian_link_opt_outputfile
    ):
        # copy file to tmpdir
        tmp_path = os.path.join(tmpdir, "gaussian_singlet_opt.log")
        copy(gaussian_link_opt_outputfile, tmp_path)
        assert os.path.exists(tmp_path)
        file_converter = FileConverter(
            filename=tmp_path, output_filetype="com"
        )

        file_converter.convert_files()

        assert os.path.exists(tmp_path.replace(".log", ".com"))
        mol = Molecule.from_filepath(tmp_path.replace(".log", ".com"))
        assert isinstance(mol, Molecule)
        assert mol.num_atoms == 2
        assert mol.chemical_formula == "O2"



[docs]
    def test_convert_single_link_sp_logfile_to_xyz(
        self, tmpdir, gaussian_dna_link_sp_outputfile
    ):
        # copy file to tmpdir
        tmp_path = os.path.join(tmpdir, "dna_link_sp.log")
        copy(gaussian_dna_link_sp_outputfile, tmp_path)
        assert os.path.exists(tmp_path)
        file_converter = FileConverter(
            filename=tmp_path, output_filetype="xyz"
        )

        file_converter.convert_files()

        assert os.path.exists(tmp_path.replace(".log", ".xyz"))
        mol = Molecule.from_filepath(tmp_path.replace(".log", ".xyz"))
        assert isinstance(mol, Molecule)
        assert mol.num_atoms == 603
        assert mol.chemical_formula == "C191H241Cu2N59O96P14"
        assert mol.energy == -25900.214629



[docs]
    def test_convert_single_link_opt_logfile_to_xyz(
        self,
        tmpdir,
        gaussian_dppeFeCl2_link_opt_outputfile,
        gaussian_dppeFeCl2_link_opt_failed_outputfile,
    ):
        # copy file to tmpdir
        tmp_path_normal_termination = os.path.join(
            tmpdir, "dppeFeCl2_opt_quintet_link_opt_link.log"
        )
        copy(
            gaussian_dppeFeCl2_link_opt_outputfile, tmp_path_normal_termination
        )
        assert os.path.exists(tmp_path_normal_termination)
        file_converter = FileConverter(
            filename=tmp_path_normal_termination, output_filetype="xyz"
        )

        file_converter.convert_files()
        assert os.path.exists(
            tmp_path_normal_termination.replace(".log", ".xyz")
        )
        mol = Molecule.from_filepath(
            tmp_path_normal_termination.replace(".log", ".xyz")
        )
        assert isinstance(mol, Molecule)
        assert mol.num_atoms == 55
        assert mol.chemical_formula == "C26H24Cl2FeP2"
        assert mol.energy == -3869.013518

        tmp_path_error_termination = os.path.join(
            tmpdir,
            "dppeFeCl2_phenyldioxazolone_opt_triplet_opt_error_termination_link.log",
        )
        copy(
            gaussian_dppeFeCl2_link_opt_failed_outputfile,
            tmp_path_error_termination,
        )
        assert os.path.exists(tmp_path_error_termination)
        file_converter = FileConverter(
            filename=tmp_path_error_termination, output_filetype="xyz"
        )

        file_converter.convert_files()
        assert os.path.exists(
            tmp_path_error_termination.replace(".log", ".xyz")
        )
        mol = Molecule.from_filepath(
            tmp_path_error_termination.replace(".log", ".xyz")
        )
        assert isinstance(mol, Molecule)
        assert mol.num_atoms == 72
        assert mol.chemical_formula == "C34H29Cl2FeNO3P2"
        assert mol.energy == -4456.134472



[docs]
    def test_convert_single_link_ts_logfile_to_xyz(
        self, tmpdir, gaussian_link_ts_outputfile
    ):  # copy file to tmpdir
        tmp_path_ts_error_termination = os.path.join(
            tmpdir, "dppeFeCl2_opt_quintet_link_opt_link.log"
        )
        copy(gaussian_link_ts_outputfile, tmp_path_ts_error_termination)
        assert os.path.exists(tmp_path_ts_error_termination)
        file_converter = FileConverter(
            filename=tmp_path_ts_error_termination, output_filetype="xyz"
        )

        file_converter.convert_files()
        assert os.path.exists(
            tmp_path_ts_error_termination.replace(".log", ".xyz")
        )
        mol = Molecule.from_filepath(
            tmp_path_ts_error_termination.replace(".log", ".xyz")
        )
        assert isinstance(mol, Molecule)
        assert mol.num_atoms == 2
        assert mol.chemical_formula == "O2"
        assert mol.energy == -150.116584



[docs]
    def test_convert_single_link_logfile_to_xyz(
        self, tmpdir, gaussian_link_sp_outfile
    ):
        # copy file to tmpdir
        tmp_path = os.path.join(tmpdir, "intervening_n_Ap_A.log")
        copy(gaussian_link_sp_outfile, tmp_path)
        assert os.path.exists(tmp_path)
        file_converter = FileConverter(
            filename=tmp_path, output_filetype="xyz"
        )

        file_converter.convert_files()

        assert os.path.exists(tmp_path.replace(".log", ".xyz"))
        mol = Molecule.from_filepath(tmp_path.replace(".log", ".xyz"))
        assert isinstance(mol, Molecule)
        assert mol.num_atoms == 603
        assert mol.chemical_formula == "C191H241Cu2N59O96P14"
        assert mol.energy == -25900.214629



[docs]
    def test_convert_single_comfile_to_xyz(
        self, tmpdir, gaussian_opt_inputfile
    ):
        # copy file to tmpdir
        tmp_path = os.path.join(tmpdir, "gaussian_opt.com")
        copy(gaussian_opt_inputfile, tmp_path)
        assert os.path.exists(tmp_path)
        file_converter = FileConverter(
            filename=tmp_path, output_filetype="xyz"
        )

        file_converter.convert_files()

        assert os.path.exists(tmp_path.replace(".com", ".xyz"))
        mol = Molecule.from_filepath(tmp_path.replace(".com", ".xyz"))
        assert isinstance(mol, Molecule)
        assert mol.num_atoms == 14
        assert mol.chemical_formula == "C7H5ClO"

        with open(tmp_path.replace(".com", ".xyz"), "r") as f:
            lines = f.readlines()
            assert len(lines) == 16
            assert lines[0] == "14\n"

        # assert np.isclose(mol.mass, 609.128, rtol=1e-4)  # in thermo branch


[docs]
    def test_convert_single_sp_log_file_to_xyz(
        self, gaussian_benzene_opt_outfile, tmpdir
    ):
        # copy file to tmpdir
        tmp_path = os.path.join(tmpdir, "benzene_sp.log")
        copy(gaussian_benzene_opt_outfile, tmp_path)
        assert os.path.exists(tmp_path)
        file_converter = FileConverter(
            filename=tmp_path, output_filetype="xyz"
        )

        file_converter.convert_files()

        assert os.path.exists(tmp_path.replace(".log", ".xyz"))
        mol = Molecule.from_filepath(tmp_path.replace(".log", ".xyz"))
        assert isinstance(mol, Molecule)
        assert mol.num_atoms == 12
        assert mol.chemical_formula == "C6H6"
        assert mol.energy == -231.977725



[docs]
    def test_convert_single_opt_log_file_to_xyz(
        self, gaussian_acetone_opt_outfile, tmpdir
    ):
        # copy file to tmpdir
        tmp_path = os.path.join(tmpdir, "acetone_opt.log")
        copy(gaussian_acetone_opt_outfile, tmp_path)
        assert os.path.exists(tmp_path)
        file_converter = FileConverter(
            filename=tmp_path, output_filetype="xyz"
        )

        file_converter.convert_files()

        assert os.path.exists(tmp_path.replace(".log", ".xyz"))
        mol = Molecule.from_filepath(tmp_path.replace(".log", ".xyz"))
        assert isinstance(mol, Molecule)
        assert mol.num_atoms == 10
        assert mol.chemical_formula == "C3H6O"
        assert mol.energy == -192.919416



[docs]
    def test_convert_single_wbi_log_file_to_xyz(self, wbi_outputfile, tmpdir):
        # copy file to tmpdir
        tmp_path = os.path.join(tmpdir, "TS_5coord_XIII_wbi.log")
        copy(wbi_outputfile, tmp_path)
        assert os.path.exists(tmp_path)
        file_converter = FileConverter(
            filename=tmp_path, output_filetype="xyz"
        )

        file_converter.convert_files()

        assert os.path.exists(tmp_path.replace(".log", ".xyz"))
        mol = Molecule.from_filepath(tmp_path.replace(".log", ".xyz"))
        assert isinstance(mol, Molecule)
        assert mol.num_atoms == 128
        assert mol.chemical_formula == "C51H63NNiO9P2Si"
        assert mol.energy == -5189.249707



[docs]
    def test_convert_single_failed_modred_log_file_to_xyz(
        self, gaussian_failed_modred_outfile, tmpdir
    ):
        # copy file to tmpdir
        tmp_path = os.path.join(tmpdir, "cage_free_failed_modred.log")
        copy(gaussian_failed_modred_outfile, tmp_path)
        assert os.path.exists(tmp_path)
        file_converter = FileConverter(
            filename=tmp_path, output_filetype="xyz"
        )

        file_converter.convert_files()

        assert os.path.exists(tmp_path.replace(".log", ".xyz"))
        mol = Molecule.from_filepath(tmp_path.replace(".log", ".xyz"))
        assert isinstance(mol, Molecule)
        assert mol.num_atoms == 10
        assert mol.chemical_formula == "C3H4O3"
        assert mol.energy == -341.883317



[docs]
    def test_convert_single_failed_oniom_log_file_to_xyz(
        self, gaussian_oniom_outputfile, tmpdir
    ):
        # copy file to tmpdir
        tmp_path = os.path.join(tmpdir, "cation_failed_scan.log")
        copy(gaussian_oniom_outputfile, tmp_path)
        assert os.path.exists(tmp_path)
        file_converter = FileConverter(
            filename=tmp_path, output_filetype="xyz"
        )

        file_converter.convert_files()

        assert os.path.exists(tmp_path.replace(".log", ".xyz"))
        mol = Molecule.from_filepath(tmp_path.replace(".log", ".xyz"))
        assert isinstance(mol, Molecule)
        assert mol.num_atoms == 483
        assert mol.chemical_formula == "C155H180CuN53O82P12"
        assert mol.energy == -5300.535128





[docs]
class TestPDBFile:

    # -------------------------------------------------------------------
    # Initialisation and representation
    # -------------------------------------------------------------------


[docs]
    def test_init_stores_filename(self, single_model_pdb_file):
        pdb = PDBFile(filename=single_model_pdb_file)
        assert pdb.filename == single_model_pdb_file



[docs]
    def test_repr(self, single_model_pdb_file):
        pdb = PDBFile(filename=single_model_pdb_file)
        assert repr(pdb) == f"PDBFile({single_model_pdb_file})"



[docs]
    def test_str(self, single_model_pdb_file):
        pdb = PDBFile(filename=single_model_pdb_file)
        assert "PDBFile object" in str(pdb)
        assert single_model_pdb_file in str(pdb)



[docs]
    def test_filepath_resolves_absolute(self, single_model_pdb_file):
        pdb = PDBFile(filename=single_model_pdb_file)
        assert os.path.isabs(pdb.filepath)


    # -------------------------------------------------------------------
    # Raw line access
    # -------------------------------------------------------------------


[docs]
    def test_raw_lines_preserves_column_whitespace(
        self, single_model_pdb_file
    ):
        """raw_lines must not strip leading spaces (fixed-width PDB format)."""
        pdb = PDBFile(filename=single_model_pdb_file)
        for line in pdb.raw_lines:
            if line.startswith("HETATM") or line.startswith("ATOM"):
                assert len(line) >= 54  # at least through z-coordinate



[docs]
    def test_raw_lines_strips_trailing_newlines(self, single_model_pdb_file):
        pdb = PDBFile(filename=single_model_pdb_file)
        for line in pdb.raw_lines:
            assert not line.endswith("\n")


    # -------------------------------------------------------------------
    # Single-model parsing
    # -------------------------------------------------------------------


[docs]
    def test_molecule_returns_molecule_object(self, single_model_pdb_file):
        pdb = PDBFile(filename=single_model_pdb_file)
        mol = pdb.molecule
        assert isinstance(mol, Molecule)



[docs]
    def test_num_atoms(self, single_model_pdb_file):
        pdb = PDBFile(filename=single_model_pdb_file)
        assert pdb.num_atoms == 3



[docs]
    def test_symbols(self, single_model_pdb_file):
        pdb = PDBFile(filename=single_model_pdb_file)
        assert pdb.molecule.symbols == ["O", "H", "H"]



[docs]
    def test_positions(self, single_model_pdb_file):
        pdb = PDBFile(filename=single_model_pdb_file)
        expected = np.array(
            [[0.0, 0.0, 0.0], [0.96, 0.0, 0.0], [-0.24, 0.93, 0.0]]
        )
        assert np.allclose(pdb.molecule.positions, expected)



[docs]
    def test_atom_names(self, single_model_pdb_file):
        pdb = PDBFile(filename=single_model_pdb_file)
        assert pdb.molecule.atom_names == ["O", "H1", "H2"]



[docs]
    def test_residue_names(self, single_model_pdb_file):
        pdb = PDBFile(filename=single_model_pdb_file)
        assert pdb.molecule.residue_names == ["HOH", "HOH", "HOH"]



[docs]
    def test_residue_numbers(self, single_model_pdb_file):
        pdb = PDBFile(filename=single_model_pdb_file)
        assert pdb.molecule.residue_numbers == [7, 7, 7]



[docs]
    def test_chain_ids(self, single_model_pdb_file):
        pdb = PDBFile(filename=single_model_pdb_file)
        assert pdb.molecule.chain_ids == ["A", "A", "A"]



[docs]
    def test_record_types(self, single_model_pdb_file):
        pdb = PDBFile(filename=single_model_pdb_file)
        assert pdb.molecule.record_type == ["HETATM", "HETATM", "HETATM"]



[docs]
    def test_info_dict_populated(self, single_model_pdb_file):
        pdb = PDBFile(filename=single_model_pdb_file)
        info = pdb.molecule.info
        assert "atom_name" in info
        assert "residue_name" in info
        assert "residue_number" in info
        assert "chain_id" in info
        assert "record_type" in info


    # -------------------------------------------------------------------
    # Multi-model parsing
    # -------------------------------------------------------------------


[docs]
    def test_get_molecules_all(self, multi_model_pdb_file):
        pdb = PDBFile(filename=multi_model_pdb_file)
        models = pdb.get_molecules(index=":", return_list=True)
        assert isinstance(models, list)
        assert len(models) == 2



[docs]
    def test_get_molecules_first(self, multi_model_pdb_file):
        pdb = PDBFile(filename=multi_model_pdb_file)
        mol = pdb.get_molecules(index="1")
        assert mol.chain_ids == ["A", "A"]



[docs]
    def test_get_molecules_last(self, multi_model_pdb_file):
        pdb = PDBFile(filename=multi_model_pdb_file)
        mol = pdb.get_molecules(index="-1")
        assert mol.chain_ids == ["B", "B"]
        assert np.allclose(mol.positions[0], np.array([1.5, 2.5, 3.5]))



[docs]
    def test_molecule_property_returns_last_model(self, multi_model_pdb_file):
        pdb = PDBFile(filename=multi_model_pdb_file)
        assert pdb.molecule.chain_ids == ["B", "B"]



[docs]
    def test_return_list_wraps_single(self, multi_model_pdb_file):
        pdb = PDBFile(filename=multi_model_pdb_file)
        result = pdb.get_molecules(index="-1", return_list=True)
        assert isinstance(result, list)
        assert len(result) == 1


    # -------------------------------------------------------------------
    # Element inference
    # -------------------------------------------------------------------


[docs]
    def test_blank_element_columns_infer_two_letter_elements(
        self, blank_element_pdb_file
    ):
        pdb = PDBFile(filename=blank_element_pdb_file)
        mol = pdb.molecule
        assert list(mol.symbols) == ["Fe", "Zn", "Cl", "C"]



[docs]
    def test_infer_element_fe(self):
        assert PDBFile._infer_element_from_atom_name("FE") == "Fe"



[docs]
    def test_infer_element_zn(self):
        assert PDBFile._infer_element_from_atom_name("ZN") == "Zn"



[docs]
    def test_infer_element_cl(self):
        assert PDBFile._infer_element_from_atom_name("CL") == "Cl"



[docs]
    def test_infer_element_ca_is_carbon(self):
        """CA is a biomolecular atom label (C-alpha), should resolve to C."""
        assert PDBFile._infer_element_from_atom_name("CA") == "C"



[docs]
    def test_infer_element_leading_digit(self):
        """Leading digits should be stripped: 1H -> H."""
        assert PDBFile._infer_element_from_atom_name("1H") == "H"



[docs]
    def test_infer_element_empty_raises(self):
        with pytest.raises(ValueError, match="Unable to infer"):
            PDBFile._infer_element_from_atom_name("")



[docs]
    def test_infer_element_digits_only_raises(self):
        with pytest.raises(ValueError, match="Unable to infer"):
            PDBFile._infer_element_from_atom_name("123")


    # -------------------------------------------------------------------
    # Error handling
    # -------------------------------------------------------------------


[docs]
    def test_empty_file_raises_value_error(self, empty_pdb_file):
        pdb = PDBFile(filename=empty_pdb_file)
        with pytest.raises(ValueError, match="No ATOM/HETATM records"):
            pdb.get_molecules()



[docs]
    def test_molecule_property_raises_on_empty(self, empty_pdb_file):
        pdb = PDBFile(filename=empty_pdb_file)
        with pytest.raises(ValueError, match="No ATOM/HETATM records"):
            _ = pdb.molecule


    # -------------------------------------------------------------------
    # Writing
    # -------------------------------------------------------------------


[docs]
    def test_write_creates_file(self, single_model_pdb_file, tmpdir):
        pdb = PDBFile(filename=single_model_pdb_file)
        mol = pdb.molecule

        output_path = os.path.join(str(tmpdir), "output.pdb")
        PDBFile.write(mol, output_path)

        assert os.path.exists(output_path)
        assert os.path.getsize(output_path) > 0



[docs]
    def test_write_round_trip_preserves_atom_count(
        self, single_model_pdb_file, tmpdir
    ):
        """Write then re-read should give the same number of atoms."""
        pdb = PDBFile(filename=single_model_pdb_file)
        mol = pdb.molecule

        output_path = os.path.join(str(tmpdir), "round_trip.pdb")
        PDBFile.write(mol, output_path)

        pdb2 = PDBFile(filename=output_path)
        assert pdb2.num_atoms == pdb.num_atoms



[docs]
    def test_write_round_trip_preserves_symbols(
        self, single_model_pdb_file, tmpdir
    ):
        pdb = PDBFile(filename=single_model_pdb_file)
        mol = pdb.molecule

        output_path = os.path.join(str(tmpdir), "round_trip.pdb")
        PDBFile.write(mol, output_path)

        pdb2 = PDBFile(filename=output_path)
        assert pdb2.molecule.symbols == mol.symbols



[docs]
    def test_write_round_trip_preserves_positions(
        self, single_model_pdb_file, tmpdir
    ):
        pdb = PDBFile(filename=single_model_pdb_file)
        mol = pdb.molecule

        output_path = os.path.join(str(tmpdir), "round_trip.pdb")
        PDBFile.write(mol, output_path)

        pdb2 = PDBFile(filename=output_path)
        assert np.allclose(pdb2.molecule.positions, mol.positions, atol=1e-3)



[docs]
    def test_write_output_contains_atom_records(
        self, single_model_pdb_file, tmpdir
    ):
        pdb = PDBFile(filename=single_model_pdb_file)
        mol = pdb.molecule

        output_path = os.path.join(str(tmpdir), "records.pdb")
        PDBFile.write(mol, output_path)

        with open(output_path, "r") as f:
            content = f.read()
        assert "HETATM" in content or "ATOM" in content
        assert "END" in content


    # -------------------------------------------------------------------
    # Backward compatibility (Molecule delegates to PDBFile)
    # -------------------------------------------------------------------


[docs]
    def test_molecule_from_filepath_uses_pdbfile(self, single_model_pdb_file):
        """Molecule.from_filepath for .pdb should produce identical results."""
        mol_via_molecule = Molecule.from_filepath(single_model_pdb_file)
        pdb = PDBFile(filename=single_model_pdb_file)
        mol_via_pdbfile = pdb.molecule

        assert mol_via_molecule.symbols == mol_via_pdbfile.symbols
        assert np.allclose(
            mol_via_molecule.positions, mol_via_pdbfile.positions
        )
        assert mol_via_molecule.atom_names == mol_via_pdbfile.atom_names
        assert mol_via_molecule.residue_names == mol_via_pdbfile.residue_names
        assert (
            mol_via_molecule.residue_numbers == mol_via_pdbfile.residue_numbers
        )
        assert mol_via_molecule.chain_ids == mol_via_pdbfile.chain_ids



[docs]
    def test_pdb_infer_pdb_element(self):
        assert PDBFile._infer_element_from_atom_name("FE") == "Fe"
        assert PDBFile._infer_element_from_atom_name("CA") == "C"



[docs]
    def test_pdb_parse_pdb_models(self, multi_model_pdb_file):
        models = PDBFile(multi_model_pdb_file)._parse_models()
        assert len(models) == 2



[docs]
    def test_pdb_molecule_from_pdb_atom_lines(self):
        atom_line = (
            "HETATM    1  O   HOH A   7"
            "       0.000   0.000   0.000"
            "  1.00  0.00           O"
        )
        mol = PDBFile._get_molecule_from_atom_lines([atom_line])
        assert mol.symbols == ["O"]


    # ------------------------------------------------------------------
    # CDXML / CDX conversion tests
    # ------------------------------------------------------------------


[docs]
    def test_convert_single_cdxml_to_xyz(
        self, tmpdir, single_molecule_cdxml_file_methane
    ):
        tmp_path = os.path.join(tmpdir, "methane.cdxml")
        copy(single_molecule_cdxml_file_methane, tmp_path)

        FileConverter(filename=tmp_path, output_filetype="xyz").convert_files()

        output = tmp_path.replace(".cdxml", ".xyz")
        assert os.path.exists(output)
        mol = Molecule.from_filepath(output)
        assert isinstance(mol, Molecule)
        assert mol.num_atoms == 5
        assert mol.chemical_formula == "CH4"



[docs]
    def test_convert_single_cdxml_to_com(
        self, tmpdir, single_molecule_cdxml_file_benzene
    ):
        tmp_path = os.path.join(tmpdir, "benzene.cdxml")
        copy(single_molecule_cdxml_file_benzene, tmp_path)

        FileConverter(filename=tmp_path, output_filetype="com").convert_files()

        output = tmp_path.replace(".cdxml", ".com")
        assert os.path.exists(output)
        mol = Molecule.from_filepath(output)
        assert isinstance(mol, Molecule)
        assert mol.num_atoms == 12
        assert mol.chemical_formula == "C6H6"



[docs]
    def test_convert_single_cdx_to_xyz(
        self, tmpdir, single_molecule_cdx_file_imidazole
    ):
        tmp_path = os.path.join(tmpdir, "imidazole.cdx")
        copy(single_molecule_cdx_file_imidazole, tmp_path)

        FileConverter(filename=tmp_path, output_filetype="xyz").convert_files()

        output = tmp_path.replace(".cdx", ".xyz")
        assert os.path.exists(output)
        mol = Molecule.from_filepath(output)
        assert isinstance(mol, Molecule)
        assert mol.num_atoms == 21
        assert mol.chemical_formula == "C8H10N2O"



[docs]
    def test_convert_multi_molecule_cdxml_to_xyz_splits_files(
        self, tmpdir, multi_molecule_cdxml_file
    ):
        # Multi-molecule cdxml should produce basename_1.xyz, basename_2.xyz
        tmp_path = os.path.join(tmpdir, "two_molecules.cdxml")
        copy(multi_molecule_cdxml_file, tmp_path)

        FileConverter(filename=tmp_path, output_filetype="xyz").convert_files()

        output_1 = os.path.join(tmpdir, "two_molecules_1.xyz")
        output_2 = os.path.join(tmpdir, "two_molecules_2.xyz")
        assert os.path.exists(output_1)
        assert os.path.exists(output_2)

        mol1 = Molecule.from_filepath(output_1)
        assert isinstance(mol1, Molecule)
        assert mol1.chemical_formula == "CH2O"

        mol2 = Molecule.from_filepath(output_2)
        assert isinstance(mol2, Molecule)
        assert mol2.chemical_formula == "N2"
        assert mol2.num_atoms == 2



[docs]
    def test_convert_cdxml_folder_to_xyz(self, tmpdir, chemdraw_directory):
        from shutil import copytree

        tmp_cdxml_folder = os.path.join(tmpdir, "chemdraw")
        copytree(chemdraw_directory, tmp_cdxml_folder)

        FileConverter(
            directory=tmp_cdxml_folder, type="cdxml", output_filetype="xyz"
        ).convert_files()

        # Single-molecule cdxml files produce basename.xyz
        for fname in (
            "benzene.cdxml",
            "methane.cdxml",
            "complex_molecule.cdxml",
        ):
            assert os.path.exists(
                os.path.join(tmp_cdxml_folder, fname.replace(".cdxml", ".xyz"))
            )

        # two_molecules.cdxml contains 2 molecules → split into _1.xyz and _2.xyz
        assert os.path.exists(
            os.path.join(tmp_cdxml_folder, "two_molecules_1.xyz")
        )
        assert os.path.exists(
            os.path.join(tmp_cdxml_folder, "two_molecules_2.xyz")
        )



[docs]
    def test_convert_cdxml_folder_to_com(self, tmpdir, chemdraw_directory):
        from shutil import copytree

        tmp_cdxml_folder = os.path.join(tmpdir, "chemdraw")
        copytree(chemdraw_directory, tmp_cdxml_folder)

        FileConverter(
            directory=tmp_cdxml_folder, type="cdxml", output_filetype="com"
        ).convert_files()

        # Single-molecule cdxml files produce basename.com
        for fname in (
            "benzene.cdxml",
            "methane.cdxml",
            "complex_molecule.cdxml",
        ):
            assert os.path.exists(
                os.path.join(tmp_cdxml_folder, fname.replace(".cdxml", ".com"))
            )

        # two_molecules.cdxml contains 2 molecules → split into _1.com and _2.com
        assert os.path.exists(
            os.path.join(tmp_cdxml_folder, "two_molecules_1.com")
        )
        assert os.path.exists(
            os.path.join(tmp_cdxml_folder, "two_molecules_2.com")
        )