Source code for tests.test_converter

import os.path
from shutil import copy, copytree, rmtree

import numpy as np
import pytest

from chemsmart.io.converter import FileConverter
from chemsmart.io.gaussian.folder import (
    GaussianInputFolder,
    GaussianOutputFolder,
)
from chemsmart.io.molecules.structure import Molecule
from chemsmart.io.pdb.pdbfile import PDBFile
from chemsmart.io.xyz.folder import XYZFolder


[docs] class TestConverter:
[docs] def test_convert_log_folder_to_xyz( self, tmpdir, gaussian_outputs_test_directory ): # copy whole directory gaussian_outputs_test_directory to tmpdir tmp_log_folder = os.path.join( tmpdir, "gaussian_outputs_test_directory" ) copytree(gaussian_outputs_test_directory, tmp_log_folder) # remove link folder in tmp_log_folder if exists link_folder = os.path.join(tmp_log_folder, "link") if os.path.exists(link_folder): rmtree(link_folder) ###### TODO: when the test for link jobs are fixed, this should be ###### removed and the test should pass for link jobs too file_converter = FileConverter( directory=tmp_log_folder, type="log", output_filetype="xyz" ) file_converter.convert_files() # check if the files are converted g16_folder = GaussianOutputFolder(folder=tmp_log_folder) all_logfiles = g16_folder.all_log_files # check all .log files have been converted to .xyz files for file in all_logfiles: assert os.path.exists(file.replace(".log", ".xyz")) ozone_xyz = os.path.join(tmp_log_folder, "ozone.xyz") assert os.path.exists(ozone_xyz) with open(ozone_xyz, "r") as f: lines = f.readlines() assert len(lines) == 5 # 5 lines in the log file assert lines[0] == "3\n" # first line is number of atoms
[docs] def test_convert_log_folder_to_com( self, tmpdir, gaussian_outputs_test_directory ): # copy whole directory gaussian_outputs_test_directory to tmpdir tmp_log_folder = os.path.join( tmpdir, "gaussian_outputs_test_directory" ) copytree(gaussian_outputs_test_directory, tmp_log_folder) # remove link folder in tmp_log_folder if exists link_folder = os.path.join(tmp_log_folder, "link") if os.path.exists(link_folder): rmtree(link_folder) ###### TODO: when the test for link jobs are fixed, this should be ###### removed and the test should pass for link jobs too file_converter = FileConverter( directory=tmp_log_folder, type="log", output_filetype="com" ) file_converter.convert_files() # check all .log files have been converted to .com files g16_folder = GaussianOutputFolder(folder=tmp_log_folder) all_logfiles = g16_folder.all_log_files for file in all_logfiles: assert os.path.exists(file.replace(".log", ".com")) ozone_com = os.path.join(tmp_log_folder, "ozone.com") assert os.path.exists(ozone_com) with open(ozone_com, "r") as f: lines = f.readlines() assert len(lines) == 12 assert lines[5].startswith("Generated from")
[docs] def test_convert_com_folder_to_xyz( self, tmpdir, gaussian_inputs_test_directory ): # copy whole directory gaussian_pbc_inputs_test_directory to tmpdir tmp_com_folder = os.path.join(tmpdir, "gaussian_inputs_test_directory") copytree(gaussian_inputs_test_directory, tmp_com_folder) file_converter = FileConverter( directory=tmp_com_folder, type="com", output_filetype="xyz" ) file_converter.convert_files() # check all .com files have been converted to .xyz files g16_folder = GaussianInputFolder(folder=tmp_com_folder) all_comfiles = g16_folder.all_com_files for file in all_comfiles: assert os.path.exists(file.replace(".com", ".xyz")) hf_xyz = os.path.join(tmp_com_folder, "hf.xyz") assert os.path.exists(hf_xyz) with open(hf_xyz, "r") as f: lines = f.readlines() assert len(lines) == 16 assert lines[0] == "14\n" # files in subfolders genecp_xyz = os.path.join(tmp_com_folder, "genecp", "opt_genecp.xyz") assert os.path.exists(genecp_xyz) with open(genecp_xyz, "r") as f: lines = f.readlines() assert len(lines) == 17 assert lines[0] == "15\n" additional_xyz = os.path.join( tmp_com_folder, "additional", "model_sp_input.xyz" ) assert os.path.exists(additional_xyz) with open(additional_xyz, "r") as f: lines = f.readlines() assert len(lines) == 16 assert lines[0] == "14\n"
[docs] def test_convert_xyz_folder_to_com(self, tmpdir, xyz_directory): # copy whole directory xyz_directory to tmpdir tmp_xyz_folder = os.path.join(tmpdir, "xyz_directory") copytree(xyz_directory, tmp_xyz_folder) file_converter = FileConverter( directory=tmp_xyz_folder, type="xyz", output_filetype="com" ) file_converter.convert_files() # check all .xyz files have been converted to .com files xyz_folder = XYZFolder(folder=tmp_xyz_folder) all_xyzfiles = xyz_folder.all_xyzfiles for file in all_xyzfiles: assert os.path.exists(file.replace(".xyz", ".com"))
[docs] def test_convert_single_logfile_to_com( self, tmpdir, gaussian_singlet_opt_outfile ): # copy file to tmpdir tmp_path = os.path.join(tmpdir, "gaussian_singlet_opt.log") copy(gaussian_singlet_opt_outfile, tmp_path) assert os.path.exists(tmp_path) file_converter = FileConverter( filename=tmp_path, output_filetype="com" ) file_converter.convert_files() assert os.path.exists(tmp_path.replace(".log", ".com")) mol = Molecule.from_filepath(tmp_path.replace(".log", ".com")) assert isinstance(mol, Molecule) assert mol.num_atoms == 40 assert mol.chemical_formula == "C19H12F3I2N3O" assert np.isclose(mol.mass, 609.128, rtol=1e-4) # in thermo branch
[docs] def test_convert_single_comfile_to_xyz( self, tmpdir, gaussian_opt_inputfile ): # copy file to tmpdir tmp_path = os.path.join(tmpdir, "gaussian_opt.com") copy(gaussian_opt_inputfile, tmp_path) assert os.path.exists(tmp_path) file_converter = FileConverter( filename=tmp_path, output_filetype="xyz" ) file_converter.convert_files() assert os.path.exists(tmp_path.replace(".com", ".xyz")) mol = Molecule.from_filepath(tmp_path.replace(".com", ".xyz")) assert isinstance(mol, Molecule) assert mol.num_atoms == 14 assert mol.chemical_formula == "C7H5ClO" with open(tmp_path.replace(".com", ".xyz"), "r") as f: lines = f.readlines() assert len(lines) == 16 assert lines[0] == "14\n"
# assert np.isclose(mol.mass, 609.128, rtol=1e-4) # in thermo branch
[docs] def test_convert_single_sp_log_file_to_xyz( self, gaussian_benzene_opt_outfile, tmpdir ): # copy file to tmpdir tmp_path = os.path.join(tmpdir, "benzene_sp.log") copy(gaussian_benzene_opt_outfile, tmp_path) assert os.path.exists(tmp_path) file_converter = FileConverter( filename=tmp_path, output_filetype="xyz" ) file_converter.convert_files() assert os.path.exists(tmp_path.replace(".log", ".xyz")) mol = Molecule.from_filepath(tmp_path.replace(".log", ".xyz")) assert isinstance(mol, Molecule) assert mol.num_atoms == 12 assert mol.chemical_formula == "C6H6" assert mol.energy == -231.977725
[docs] def test_convert_single_opt_log_file_to_xyz( self, gaussian_acetone_opt_outfile, tmpdir ): # copy file to tmpdir tmp_path = os.path.join(tmpdir, "acetone_opt.log") copy(gaussian_acetone_opt_outfile, tmp_path) assert os.path.exists(tmp_path) file_converter = FileConverter( filename=tmp_path, output_filetype="xyz" ) file_converter.convert_files() assert os.path.exists(tmp_path.replace(".log", ".xyz")) mol = Molecule.from_filepath(tmp_path.replace(".log", ".xyz")) assert isinstance(mol, Molecule) assert mol.num_atoms == 10 assert mol.chemical_formula == "C3H6O" assert mol.energy == -192.919416
[docs] def test_convert_single_wbi_log_file_to_xyz(self, wbi_outputfile, tmpdir): # copy file to tmpdir tmp_path = os.path.join(tmpdir, "TS_5coord_XIII_wbi.log") copy(wbi_outputfile, tmp_path) assert os.path.exists(tmp_path) file_converter = FileConverter( filename=tmp_path, output_filetype="xyz" ) file_converter.convert_files() assert os.path.exists(tmp_path.replace(".log", ".xyz")) mol = Molecule.from_filepath(tmp_path.replace(".log", ".xyz")) assert isinstance(mol, Molecule) assert mol.num_atoms == 128 assert mol.chemical_formula == "C51H63NNiO9P2Si" assert mol.energy == -5189.249707
[docs] def test_convert_single_failed_modred_log_file_to_xyz( self, gaussian_failed_modred_outfile, tmpdir ): # copy file to tmpdir tmp_path = os.path.join(tmpdir, "cage_free_failed_modred.log") copy(gaussian_failed_modred_outfile, tmp_path) assert os.path.exists(tmp_path) file_converter = FileConverter( filename=tmp_path, output_filetype="xyz" ) file_converter.convert_files() assert os.path.exists(tmp_path.replace(".log", ".xyz")) mol = Molecule.from_filepath(tmp_path.replace(".log", ".xyz")) assert isinstance(mol, Molecule) assert mol.num_atoms == 10 assert mol.chemical_formula == "C3H4O3" assert mol.energy == -341.883317
[docs] def test_convert_single_failed_oniom_log_file_to_xyz( self, gaussian_oniom_outputfile, tmpdir ): # copy file to tmpdir tmp_path = os.path.join(tmpdir, "cation_failed_scan.log") copy(gaussian_oniom_outputfile, tmp_path) assert os.path.exists(tmp_path) file_converter = FileConverter( filename=tmp_path, output_filetype="xyz" ) file_converter.convert_files() assert os.path.exists(tmp_path.replace(".log", ".xyz")) mol = Molecule.from_filepath(tmp_path.replace(".log", ".xyz")) assert isinstance(mol, Molecule) assert mol.num_atoms == 483 assert mol.chemical_formula == "C155H180CuN53O82P12" assert mol.energy == -5300.535128
[docs] class TestPDBFile: # ------------------------------------------------------------------- # Initialisation and representation # -------------------------------------------------------------------
[docs] def test_init_stores_filename(self, single_model_pdb_file): pdb = PDBFile(filename=single_model_pdb_file) assert pdb.filename == single_model_pdb_file
[docs] def test_repr(self, single_model_pdb_file): pdb = PDBFile(filename=single_model_pdb_file) assert repr(pdb) == f"PDBFile({single_model_pdb_file})"
[docs] def test_str(self, single_model_pdb_file): pdb = PDBFile(filename=single_model_pdb_file) assert "PDBFile object" in str(pdb) assert single_model_pdb_file in str(pdb)
[docs] def test_filepath_resolves_absolute(self, single_model_pdb_file): pdb = PDBFile(filename=single_model_pdb_file) assert os.path.isabs(pdb.filepath)
# ------------------------------------------------------------------- # Raw line access # -------------------------------------------------------------------
[docs] def test_raw_lines_preserves_column_whitespace( self, single_model_pdb_file ): """raw_lines must not strip leading spaces (fixed-width PDB format).""" pdb = PDBFile(filename=single_model_pdb_file) for line in pdb.raw_lines: if line.startswith("HETATM") or line.startswith("ATOM"): assert len(line) >= 54 # at least through z-coordinate
[docs] def test_raw_lines_strips_trailing_newlines(self, single_model_pdb_file): pdb = PDBFile(filename=single_model_pdb_file) for line in pdb.raw_lines: assert not line.endswith("\n")
# ------------------------------------------------------------------- # Single-model parsing # -------------------------------------------------------------------
[docs] def test_molecule_returns_molecule_object(self, single_model_pdb_file): pdb = PDBFile(filename=single_model_pdb_file) mol = pdb.molecule assert isinstance(mol, Molecule)
[docs] def test_num_atoms(self, single_model_pdb_file): pdb = PDBFile(filename=single_model_pdb_file) assert pdb.num_atoms == 3
[docs] def test_symbols(self, single_model_pdb_file): pdb = PDBFile(filename=single_model_pdb_file) assert pdb.molecule.symbols == ["O", "H", "H"]
[docs] def test_positions(self, single_model_pdb_file): pdb = PDBFile(filename=single_model_pdb_file) expected = np.array( [[0.0, 0.0, 0.0], [0.96, 0.0, 0.0], [-0.24, 0.93, 0.0]] ) assert np.allclose(pdb.molecule.positions, expected)
[docs] def test_atom_names(self, single_model_pdb_file): pdb = PDBFile(filename=single_model_pdb_file) assert pdb.molecule.atom_names == ["O", "H1", "H2"]
[docs] def test_residue_names(self, single_model_pdb_file): pdb = PDBFile(filename=single_model_pdb_file) assert pdb.molecule.residue_names == ["HOH", "HOH", "HOH"]
[docs] def test_residue_numbers(self, single_model_pdb_file): pdb = PDBFile(filename=single_model_pdb_file) assert pdb.molecule.residue_numbers == [7, 7, 7]
[docs] def test_chain_ids(self, single_model_pdb_file): pdb = PDBFile(filename=single_model_pdb_file) assert pdb.molecule.chain_ids == ["A", "A", "A"]
[docs] def test_record_types(self, single_model_pdb_file): pdb = PDBFile(filename=single_model_pdb_file) assert pdb.molecule.record_type == ["HETATM", "HETATM", "HETATM"]
[docs] def test_info_dict_populated(self, single_model_pdb_file): pdb = PDBFile(filename=single_model_pdb_file) info = pdb.molecule.info assert "atom_name" in info assert "residue_name" in info assert "residue_number" in info assert "chain_id" in info assert "record_type" in info
# ------------------------------------------------------------------- # Multi-model parsing # -------------------------------------------------------------------
[docs] def test_get_molecules_all(self, multi_model_pdb_file): pdb = PDBFile(filename=multi_model_pdb_file) models = pdb.get_molecules(index=":", return_list=True) assert isinstance(models, list) assert len(models) == 2
[docs] def test_get_molecules_first(self, multi_model_pdb_file): pdb = PDBFile(filename=multi_model_pdb_file) mol = pdb.get_molecules(index="1") assert mol.chain_ids == ["A", "A"]
[docs] def test_get_molecules_last(self, multi_model_pdb_file): pdb = PDBFile(filename=multi_model_pdb_file) mol = pdb.get_molecules(index="-1") assert mol.chain_ids == ["B", "B"] assert np.allclose(mol.positions[0], np.array([1.5, 2.5, 3.5]))
[docs] def test_molecule_property_returns_last_model(self, multi_model_pdb_file): pdb = PDBFile(filename=multi_model_pdb_file) assert pdb.molecule.chain_ids == ["B", "B"]
[docs] def test_return_list_wraps_single(self, multi_model_pdb_file): pdb = PDBFile(filename=multi_model_pdb_file) result = pdb.get_molecules(index="-1", return_list=True) assert isinstance(result, list) assert len(result) == 1
# ------------------------------------------------------------------- # Element inference # -------------------------------------------------------------------
[docs] def test_blank_element_columns_infer_two_letter_elements( self, blank_element_pdb_file ): pdb = PDBFile(filename=blank_element_pdb_file) mol = pdb.molecule assert list(mol.symbols) == ["Fe", "Zn", "Cl", "C"]
[docs] def test_infer_element_fe(self): assert PDBFile._infer_element_from_atom_name("FE") == "Fe"
[docs] def test_infer_element_zn(self): assert PDBFile._infer_element_from_atom_name("ZN") == "Zn"
[docs] def test_infer_element_cl(self): assert PDBFile._infer_element_from_atom_name("CL") == "Cl"
[docs] def test_infer_element_ca_is_carbon(self): """CA is a biomolecular atom label (C-alpha), should resolve to C.""" assert PDBFile._infer_element_from_atom_name("CA") == "C"
[docs] def test_infer_element_leading_digit(self): """Leading digits should be stripped: 1H -> H.""" assert PDBFile._infer_element_from_atom_name("1H") == "H"
[docs] def test_infer_element_empty_raises(self): with pytest.raises(ValueError, match="Unable to infer"): PDBFile._infer_element_from_atom_name("")
[docs] def test_infer_element_digits_only_raises(self): with pytest.raises(ValueError, match="Unable to infer"): PDBFile._infer_element_from_atom_name("123")
# ------------------------------------------------------------------- # Error handling # -------------------------------------------------------------------
[docs] def test_empty_file_raises_value_error(self, empty_pdb_file): pdb = PDBFile(filename=empty_pdb_file) with pytest.raises(ValueError, match="No ATOM/HETATM records"): pdb.get_molecules()
[docs] def test_molecule_property_raises_on_empty(self, empty_pdb_file): pdb = PDBFile(filename=empty_pdb_file) with pytest.raises(ValueError, match="No ATOM/HETATM records"): _ = pdb.molecule
# ------------------------------------------------------------------- # Writing # -------------------------------------------------------------------
[docs] def test_write_creates_file(self, single_model_pdb_file, tmpdir): pdb = PDBFile(filename=single_model_pdb_file) mol = pdb.molecule output_path = os.path.join(str(tmpdir), "output.pdb") PDBFile.write(mol, output_path) assert os.path.exists(output_path) assert os.path.getsize(output_path) > 0
[docs] def test_write_round_trip_preserves_atom_count( self, single_model_pdb_file, tmpdir ): """Write then re-read should give the same number of atoms.""" pdb = PDBFile(filename=single_model_pdb_file) mol = pdb.molecule output_path = os.path.join(str(tmpdir), "round_trip.pdb") PDBFile.write(mol, output_path) pdb2 = PDBFile(filename=output_path) assert pdb2.num_atoms == pdb.num_atoms
[docs] def test_write_round_trip_preserves_symbols( self, single_model_pdb_file, tmpdir ): pdb = PDBFile(filename=single_model_pdb_file) mol = pdb.molecule output_path = os.path.join(str(tmpdir), "round_trip.pdb") PDBFile.write(mol, output_path) pdb2 = PDBFile(filename=output_path) assert pdb2.molecule.symbols == mol.symbols
[docs] def test_write_round_trip_preserves_positions( self, single_model_pdb_file, tmpdir ): pdb = PDBFile(filename=single_model_pdb_file) mol = pdb.molecule output_path = os.path.join(str(tmpdir), "round_trip.pdb") PDBFile.write(mol, output_path) pdb2 = PDBFile(filename=output_path) assert np.allclose(pdb2.molecule.positions, mol.positions, atol=1e-3)
[docs] def test_write_output_contains_atom_records( self, single_model_pdb_file, tmpdir ): pdb = PDBFile(filename=single_model_pdb_file) mol = pdb.molecule output_path = os.path.join(str(tmpdir), "records.pdb") PDBFile.write(mol, output_path) with open(output_path, "r") as f: content = f.read() assert "HETATM" in content or "ATOM" in content assert "END" in content
# ------------------------------------------------------------------- # Backward compatibility (Molecule delegates to PDBFile) # -------------------------------------------------------------------
[docs] def test_molecule_from_filepath_uses_pdbfile(self, single_model_pdb_file): """Molecule.from_filepath for .pdb should produce identical results.""" mol_via_molecule = Molecule.from_filepath(single_model_pdb_file) pdb = PDBFile(filename=single_model_pdb_file) mol_via_pdbfile = pdb.molecule assert mol_via_molecule.symbols == mol_via_pdbfile.symbols assert np.allclose( mol_via_molecule.positions, mol_via_pdbfile.positions ) assert mol_via_molecule.atom_names == mol_via_pdbfile.atom_names assert mol_via_molecule.residue_names == mol_via_pdbfile.residue_names assert ( mol_via_molecule.residue_numbers == mol_via_pdbfile.residue_numbers ) assert mol_via_molecule.chain_ids == mol_via_pdbfile.chain_ids
[docs] def test_pdb_infer_pdb_element(self): assert PDBFile._infer_element_from_atom_name("FE") == "Fe" assert PDBFile._infer_element_from_atom_name("CA") == "C"
[docs] def test_pdb_parse_pdb_models(self, multi_model_pdb_file): models = PDBFile(multi_model_pdb_file)._parse_models() assert len(models) == 2
[docs] def test_pdb_molecule_from_pdb_atom_lines(self): atom_line = ( "HETATM 1 O HOH A 7" " 0.000 0.000 0.000" " 1.00 0.00 O" ) mol = PDBFile._get_molecule_from_atom_lines([atom_line]) assert mol.symbols == ["O"]
# ------------------------------------------------------------------ # CDXML / CDX conversion tests # ------------------------------------------------------------------
[docs] def test_convert_single_cdxml_to_xyz( self, tmpdir, single_molecule_cdxml_file_methane ): tmp_path = os.path.join(tmpdir, "methane.cdxml") copy(single_molecule_cdxml_file_methane, tmp_path) FileConverter(filename=tmp_path, output_filetype="xyz").convert_files() output = tmp_path.replace(".cdxml", ".xyz") assert os.path.exists(output) mol = Molecule.from_filepath(output) assert isinstance(mol, Molecule) assert mol.num_atoms == 5 assert mol.chemical_formula == "CH4"
[docs] def test_convert_single_cdxml_to_com( self, tmpdir, single_molecule_cdxml_file_benzene ): tmp_path = os.path.join(tmpdir, "benzene.cdxml") copy(single_molecule_cdxml_file_benzene, tmp_path) FileConverter(filename=tmp_path, output_filetype="com").convert_files() output = tmp_path.replace(".cdxml", ".com") assert os.path.exists(output) mol = Molecule.from_filepath(output) assert isinstance(mol, Molecule) assert mol.num_atoms == 12 assert mol.chemical_formula == "C6H6"
[docs] def test_convert_single_cdx_to_xyz( self, tmpdir, single_molecule_cdx_file_imidazole ): tmp_path = os.path.join(tmpdir, "imidazole.cdx") copy(single_molecule_cdx_file_imidazole, tmp_path) FileConverter(filename=tmp_path, output_filetype="xyz").convert_files() output = tmp_path.replace(".cdx", ".xyz") assert os.path.exists(output) mol = Molecule.from_filepath(output) assert isinstance(mol, Molecule) assert mol.num_atoms == 21 assert mol.chemical_formula == "C8H10N2O"
[docs] def test_convert_multi_molecule_cdxml_to_xyz_splits_files( self, tmpdir, multi_molecule_cdxml_file ): # Multi-molecule cdxml should produce basename_1.xyz, basename_2.xyz tmp_path = os.path.join(tmpdir, "two_molecules.cdxml") copy(multi_molecule_cdxml_file, tmp_path) FileConverter(filename=tmp_path, output_filetype="xyz").convert_files() output_1 = os.path.join(tmpdir, "two_molecules_1.xyz") output_2 = os.path.join(tmpdir, "two_molecules_2.xyz") assert os.path.exists(output_1) assert os.path.exists(output_2) mol1 = Molecule.from_filepath(output_1) assert isinstance(mol1, Molecule) assert mol1.chemical_formula == "CH2O" mol2 = Molecule.from_filepath(output_2) assert isinstance(mol2, Molecule) assert mol2.chemical_formula == "N2" assert mol2.num_atoms == 2
[docs] def test_convert_cdxml_folder_to_xyz(self, tmpdir, chemdraw_directory): from shutil import copytree tmp_cdxml_folder = os.path.join(tmpdir, "chemdraw") copytree(chemdraw_directory, tmp_cdxml_folder) FileConverter( directory=tmp_cdxml_folder, type="cdxml", output_filetype="xyz" ).convert_files() # Single-molecule cdxml files produce basename.xyz for fname in ( "benzene.cdxml", "methane.cdxml", "complex_molecule.cdxml", ): assert os.path.exists( os.path.join(tmp_cdxml_folder, fname.replace(".cdxml", ".xyz")) ) # two_molecules.cdxml contains 2 molecules → split into _1.xyz and _2.xyz assert os.path.exists( os.path.join(tmp_cdxml_folder, "two_molecules_1.xyz") ) assert os.path.exists( os.path.join(tmp_cdxml_folder, "two_molecules_2.xyz") )
[docs] def test_convert_cdxml_folder_to_com(self, tmpdir, chemdraw_directory): from shutil import copytree tmp_cdxml_folder = os.path.join(tmpdir, "chemdraw") copytree(chemdraw_directory, tmp_cdxml_folder) FileConverter( directory=tmp_cdxml_folder, type="cdxml", output_filetype="com" ).convert_files() # Single-molecule cdxml files produce basename.com for fname in ( "benzene.cdxml", "methane.cdxml", "complex_molecule.cdxml", ): assert os.path.exists( os.path.join(tmp_cdxml_folder, fname.replace(".cdxml", ".com")) ) # two_molecules.cdxml contains 2 molecules → split into _1.com and _2.com assert os.path.exists( os.path.join(tmp_cdxml_folder, "two_molecules_1.com") ) assert os.path.exists( os.path.join(tmp_cdxml_folder, "two_molecules_2.com") )