Source code for tests.test_utils

import subprocess

import numpy as np
import pytest

from chemsmart.io.gaussian.input import Gaussian16Input
from chemsmart.io.molecules.structure import CoordinateBlock, Molecule
from chemsmart.utils.io import (
    clean_duplicate_structure,
    clean_label,
    convert_string_indices_to_pymol_id_indices,
    create_molecule_list,
    line_of_all_integers,
    line_of_integer_followed_by_floats,
)
from chemsmart.utils.utils import (
    cmp_with_ignore,
    content_blocks_by_paragraph,
    convert_string_index_from_1_based_to_0_based,
    deduplicate_string_keywords,
    get_list_from_string_range,
    get_range_from_list,
    is_float,
    iterative_compare,
    naturally_sorted,
    return_objects_and_indices_from_string_index,
    run_command,
    str_indices_range_to_list,
    string2index_1based,
)


[docs] class TestUtils:
[docs] def test_is_float(self): assert is_float("-1.0") assert is_float("1.9") assert is_float("-0.1") assert not is_float("-1") assert not is_float("1") assert not is_float("abc")
[docs] def test_content_blocking(self, gaussian_opt_inputfile): g16_input = Gaussian16Input(filename=gaussian_opt_inputfile) content_blocks = content_blocks_by_paragraph(g16_input.contents) assert len(content_blocks) == 3 cb_string = "\n".join(content_blocks[2]) cb = CoordinateBlock(coordinate_block=cb_string) assert cb.molecule.empirical_formula == "C7H5ClO" assert cb.molecule.translation_vectors is None assert all( np.isclose( cb.molecule.positions[0], [-0.544821, -1.169457, 0.000127], atol=1e-4, ) )
[docs] def test_cmp_with_ignore_string( self, gaussian_written_sp_from_nhc_singlet_log_with_custom_basis_from_api_file, gaussian_written_sp_from_nhc_singlet_log_with_custom_basis_from_api_file_v2, ): assert cmp_with_ignore( gaussian_written_sp_from_nhc_singlet_log_with_custom_basis_from_api_file, gaussian_written_sp_from_nhc_singlet_log_with_custom_basis_from_api_file_v2, ignore_string="Version", )
[docs] def test_cmp_with_ignore_list( self, gaussian_written_opt_file, gaussian_written_opt_file_with_route, ): assert cmp_with_ignore( gaussian_written_opt_file, gaussian_written_opt_file_with_route, ignore_string=["#", "job"], )
[docs] def test_get_list_from_string_range(self): s1 = "1-3" s2 = "1,3" s3 = "1,2,3" s4 = "1-3,5" s5 = "1-3,5-7" s6 = "1-3,5-7,10" s7 = "1,2,3,5-7,10" s8 = "[1-3,28-31,34-41]" s9 = "1-3,28-31,34-41" assert get_list_from_string_range(s1) == [1, 2, 3] assert get_list_from_string_range(s2) == [1, 3] assert get_list_from_string_range(s3) == [1, 2, 3] assert get_list_from_string_range(s4) == [1, 2, 3, 5] assert get_list_from_string_range(s5) == [1, 2, 3, 5, 6, 7] assert get_list_from_string_range(s6) == [1, 2, 3, 5, 6, 7, 10] assert get_list_from_string_range(s7) == [1, 2, 3, 5, 6, 7, 10] assert get_list_from_string_range(s8) == [ 1, 2, 3, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, ] assert get_list_from_string_range(s9) == [ 1, 2, 3, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, ]
[docs] def test_get_indices_from_string(self): """Test the conversion of string indices to a list of integers; 1-based indices.""" objects = ["a", "b", "c", "d", "e", "f", "g", "h"] s1 = "1:3" # standard python slicing s2 = "1,2,4" s3 = "1-3" # user-defined slicing, 1-3 inclusive s4 = "[1-3]" # user-defined slicing, 1-3 inclusive s5 = "2:3" # standard python slicing s6 = "1" # single string index s7 = "-1" # single python last index s8 = "0" # this will raise an error, as 1-based indices are expected assert objects[convert_string_index_from_1_based_to_0_based(s1)] == [ "a", "b", ] assert [ objects[i] for i in convert_string_index_from_1_based_to_0_based(s2) ] == ["a", "b", "d"] assert [ objects[i] for i in convert_string_index_from_1_based_to_0_based(s3) ] == ["a", "b", "c"] assert [ objects[i] for i in convert_string_index_from_1_based_to_0_based(s4) ] == ["a", "b", "c"] assert objects[convert_string_index_from_1_based_to_0_based(s5)] == [ "b" ] assert [objects[convert_string_index_from_1_based_to_0_based(s6)]] == [ "a" ] assert [objects[convert_string_index_from_1_based_to_0_based(s7)]] == [ "h" ] with pytest.raises(ValueError): convert_string_index_from_1_based_to_0_based(s8)
[docs] def test_iterative_compare_list_of_elements(self): list1 = [1, 2, 3, 4, 5] unique_list1 = iterative_compare(list1) assert unique_list1 == list1 list2 = [1, 2, 3, 4, 5, 1, 2, 3, 4, 5] unique_list2 = iterative_compare(list2) assert unique_list2 == list1 list3 = [1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5] unique_list3 = iterative_compare(list3) assert unique_list3 == list1
[docs] def test_iterative_compare_list_of_lists(self): list1 = [[1, 2, 3], [4, 5, 6], [7, 8, 9]] unique_list1 = iterative_compare(list1) assert unique_list1 == list1 list2 = [ [1, 2, 3], [4, 5, 6], [7, 8, 9], [1, 2, 3], [4, 5, 6], [7, 8, 9], ] unique_list2 = iterative_compare(list2) assert unique_list2 == list1 list3 = [ [1, 2, 3], [4, 5, 6], [7, 8, 9], [1, 2, 3], [4, 5, 6], [7, 8, 9], [1, 2, 3], [4, 5, 6], [7, 8, 9], ] unique_list3 = iterative_compare(list3) assert unique_list3 == list1
[docs] def test_iterative_compare_list_of_tuples(self): list1 = [(1, 2, 3), (4, 5, 6), (7, 8, 9)] unique_list1 = iterative_compare(list1) assert unique_list1 == list1 list2 = [ (1, 2, 3), (4, 5, 6), (7, 8, 9), (1, 2, 3), (4, 5, 6), (7, 8, 9), ] unique_list2 = iterative_compare(list2) assert unique_list2 == list1 list3 = [ (1, 2, 3), (4, 5, 6), (7, 8, 9), (1, 2, 3), (4, 5, 6), (7, 8, 9), (1, 2, 3), (4, 5, 6), (7, 8, 9), ] unique_list3 = iterative_compare(list3) assert unique_list3 == list1
[docs] def test_iterative_compare_list_of_string(self): list1 = ["a", "b", "c", "d", "e"] unique_list1 = iterative_compare(list1) assert unique_list1 == list1 list2 = ["a", "b", "c", "d", "e", "a", "b", "c", "d", "e"] unique_list2 = iterative_compare(list2) assert unique_list2 == list1 list3 = [ "a", "b", "c", "d", "e", "a", "b", "c", "d", "e", "a", "b", "c", "d", "e", ] unique_list3 = iterative_compare(list3) assert unique_list3 == list1
[docs] def test_iterative_compare_list_of_dicts(self): dict1 = {"a": 1, "b": 2, "c": 3} dict2 = {"d": 4, "e": 5, "f": 6} dict3 = {"g": 7, "h": 8, "i": 9} list1 = [dict1, dict2, dict3] unique_list1 = iterative_compare(list1) assert unique_list1 == list1 dict4 = {"a": 1, "b": 2, "c": 3} dict5 = {"d": 4, "e": 5, "f": 6} dict6 = {"g": 7, "h": 8, "i": 9} list2 = [dict4, dict5, dict6, dict1, dict2, dict3] unique_list2 = iterative_compare(list2) assert unique_list2 == list1 dict7 = {"a": 1, "b": 2, "c": 3} dict8 = {"d": 4, "e": 5, "f": 6} dict9 = {"g": 7, "h": 8, "i": 9} list3 = [dict7, dict8, dict9, dict1, dict2, dict3, dict4, dict5, dict6] unique_list3 = iterative_compare(list3) assert unique_list3 == list1 dict11 = {"a": 11, "b": 12, "c": 13} dict12 = {"d": 14, "e": 15, "f": 16} dict13 = {"g": 17, "h": 18, "i": 19} list4 = [dict11, dict12, dict13, dict1, dict2, dict3] unique_list4 = iterative_compare(list4) assert len(unique_list4) == 6 list5 = [dict1, dict11] unique_list5 = iterative_compare(list5) assert len(unique_list5) == 2 list6 = [dict1, dict11, dict1] unique_list6 = iterative_compare(list6) assert len(unique_list6) == 2
[docs] def test_get_range_from_list(self): s1 = [1, 2, 3, 5, 6, 7] range = get_range_from_list(s1) assert range == ["1-3", "5-7"] s2 = [1, 34, 45, 46, 48, 50] range = get_range_from_list(s2) assert range == ["1", "34", "45-46", "48", "50"] s3 = [28, 45, 60, 89] range = get_range_from_list(s3) assert range == ["28", "45", "60", "89"] s4 = [ 18, 19, 20, 21, 23, 25, 27, 29, 30, 31, 33, 35, 37, 39, 41, 43, 46, 47, 48, 49, 51, 53, 55, 57, 61, 62, 63, 64, 66, 68, 70, 72, ] range = get_range_from_list(s4) assert range == [ "18-21", "23", "25", "27", "29-31", "33", "35", "37", "39", "41", "43", "46-49", "51", "53", "55", "57", "61-64", "66", "68", "70", "72", ]
[docs] class TestGetListFromStringRange:
[docs] def test_get_list_from_string_range(self): s1 = "[1-3,28-31,34-41]" s1_list = get_list_from_string_range(string_of_range=s1) assert s1_list == [ 1, 2, 3, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, ] s2 = "1-3,28-31,34-41" s2_list = get_list_from_string_range(string_of_range=s2) assert s2_list == [ 1, 2, 3, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, ] s3 = "1,3,33,37,42,43,44,45" s3_list = get_list_from_string_range(string_of_range=s3) assert s3_list == [1, 3, 33, 37, 42, 43, 44, 45]
[docs] def test_get_list_from_string(self): s1 = "1:9" s1_list = str_indices_range_to_list(str_indices=s1) assert s1_list == [1, 2, 3, 4, 5, 6, 7, 8] s2 = "1,2,4" s2_list = str_indices_range_to_list(str_indices=s2) assert s2_list == [1, 2, 4] s3 = "1-9" s3_list = str_indices_range_to_list(str_indices=s3) assert s3_list == [1, 2, 3, 4, 5, 6, 7, 8, 9] s4 = "[1-9]" s4_list = str_indices_range_to_list(str_indices=s4) assert s4_list == [1, 2, 3, 4, 5, 6, 7, 8, 9] s6 = "2:3" s6_list = str_indices_range_to_list(str_indices=s6) assert s6_list == [2]
[docs] class TestString2Index1Based:
[docs] def test_single_integer(self): assert string2index_1based("1") == 0 # 1-based -> 0-based assert string2index_1based("5") == 4 # 1-based -> 0-based assert string2index_1based("10") == 9 # 1-based -> 0-based
[docs] def test_slice(self): result = string2index_1based("1:5") assert isinstance(result, slice) list1 = list(range(5)) assert list1[result] == [0, 1, 2, 3] assert result.start == 0 # 1-based start -> 0-based assert result.stop == 4 # 1-based stop remains same assert result.step is None result = string2index_1based("3:10") assert isinstance(result, slice) assert result.start == 2 # 1-based start -> 0-based assert result.stop == 9 assert result.step is None
[docs] def test_slice_with_step(self): result = string2index_1based("1:10:2") assert isinstance(result, slice) assert result.start == 0 # 1-based -> 0-based assert result.stop == 9 assert result.step == 2 result = string2index_1based("2:8:3") assert isinstance(result, slice) assert result.start == 1 # 1-based -> 0-based assert result.stop == 7 assert result.step == 3
[docs] def test_open_ended_slice(self): result = string2index_1based("5:") assert isinstance(result, slice) assert result.start == 4 # 1-based -> 0-based assert result.stop is None assert result.step is None result = string2index_1based(":5") assert isinstance(result, slice) assert result.start is None assert result.stop == 4 # 1-based stop remains same assert result.step is None result = string2index_1based(":") assert isinstance(result, slice) assert result.start is None assert result.stop is None assert result.step is None
[docs] def test_invalid_inputs(self): # Invalid integer with pytest.raises(ValueError): string2index_1based("invalid") # Slice with non-integer values with pytest.raises(ValueError): string2index_1based("a:b") # Mixed invalid formats with pytest.raises(ValueError): string2index_1based("1:x:2")
[docs] class TestParseIndexSpecification: """Tests for the new unified parse_index_specification function."""
[docs] def test_ase_style_single_indices(self): """Test ASE-style single index specifications.""" from chemsmart.utils.utils import parse_index_specification assert parse_index_specification("1") == 0 assert parse_index_specification("5") == 4 assert parse_index_specification("-1") == -1 assert parse_index_specification("-2") == -2
[docs] def test_ase_style_slices(self): """Test ASE-style slice specifications.""" from chemsmart.utils.utils import parse_index_specification # Basic slices result = parse_index_specification("1:5") assert isinstance(result, slice) assert result == slice(0, 4) # All items result = parse_index_specification(":") assert isinstance(result, slice) assert result == slice(None, None) # Open-ended slices result = parse_index_specification("5:") assert result == slice(4, None) result = parse_index_specification(":5") assert result == slice(None, 4) # With step result = parse_index_specification("::2") assert result == slice(None, None, 2) result = parse_index_specification("1:10:2") assert result == slice(0, 9, 2)
[docs] def test_free_format_comma_separated(self): """Test free-format comma-separated specifications.""" from chemsmart.utils.utils import parse_index_specification assert parse_index_specification("1,3,5") == [0, 2, 4] assert parse_index_specification("1,2,4") == [0, 1, 3]
[docs] def test_free_format_with_negative_indices(self): """Test free-format with negative indices.""" from chemsmart.utils.utils import parse_index_specification assert parse_index_specification("1,-1") == [0, -1] assert parse_index_specification("1,3,-1") == [0, 2, -1] assert parse_index_specification("2,-2") == [1, -2] assert parse_index_specification("-1,-2") == [-1, -2]
[docs] def test_free_format_hyphen_ranges(self): """Test free-format hyphen-based range specifications.""" from chemsmart.utils.utils import parse_index_specification # Simple range (inclusive) assert parse_index_specification("1-5") == [0, 1, 2, 3, 4] assert parse_index_specification("2-4") == [1, 2, 3] # Range with brackets assert parse_index_specification("[1-5]") == [0, 1, 2, 3, 4]
[docs] def test_free_format_mixed(self): """Test free-format mixed specifications.""" from chemsmart.utils.utils import parse_index_specification # Mix of ranges and individual indices assert parse_index_specification("1-3,5") == [0, 1, 2, 4] assert parse_index_specification("1-3,5,7-9") == [0, 1, 2, 4, 6, 7, 8] # Mix with negative indices assert parse_index_specification("1-2,-1") == [0, 1, -1] assert parse_index_specification("1,3-5,-1") == [0, 2, 3, 4, -1]
[docs] def test_invalid_inputs(self): """Test that invalid inputs raise ValueError.""" from chemsmart.utils.utils import parse_index_specification # Index 0 is not allowed (1-based indexing) with pytest.raises(ValueError): parse_index_specification("0") with pytest.raises(ValueError): parse_index_specification("1,0,3")
[docs] def test_with_actual_lists(self): """Test parse_index_specification with actual list indexing.""" from chemsmart.utils.utils import parse_index_specification objects = ["a", "b", "c", "d", "e", "f", "g", "h"] # Single index idx = parse_index_specification("1") assert objects[idx] == "a" # Negative index idx = parse_index_specification("-1") assert objects[idx] == "h" # Slice idx = parse_index_specification("1:4") assert objects[idx] == ["a", "b", "c"] idx = parse_index_specification("1:7:2") assert objects[idx] == ["a", "c", "e"] idx = parse_index_specification("1:8:2") assert objects[idx] == ["a", "c", "e", "g"] idx = parse_index_specification("::2") assert objects[idx] == ["a", "c", "e", "g"] # All idx = parse_index_specification(":") assert objects[idx] == objects # Comma-separated idx = parse_index_specification("1,3,5") assert [objects[i] for i in idx] == ["a", "c", "e"] # Range idx = parse_index_specification("1-3") assert [objects[i] for i in idx] == ["a", "b", "c"] # Mixed with negative idx = parse_index_specification("1,-1") assert [objects[i] for i in idx] == ["a", "h"] idx = parse_index_specification("1,3,-1") assert [objects[i] for i in idx] == ["a", "c", "h"]
[docs] def test_duplicate_detection_enabled(self): """Test duplicate detection when allow_duplicates=False.""" from chemsmart.utils.utils import parse_index_specification # Test explicit duplicates should fail with pytest.raises(ValueError, match="Index overlap detected"): parse_index_specification( "5,-1", total_count=5, allow_duplicates=False )
[docs] def test_boundary_checking_enabled(self): """Test boundary checking when allow_out_of_range=False.""" from chemsmart.utils.utils import parse_index_specification # Test out of range should fail (10 # structures, index 11 is out of range) with pytest.raises( ValueError, match="Index 11 is out of range.*10 structures" ): parse_index_specification( "11", total_count=10, allow_out_of_range=False ) # Test negative out of range should fail (-11 with 10 structures) with pytest.raises( ValueError, match="Negative index -11 is out of range.*10 structures", ): parse_index_specification( "-11", total_count=10, allow_out_of_range=False ) # Test range extending beyond bounds (8-11 with 10 structures) with pytest.raises( ValueError, match="Index 11 is out of range.*10 structures" ): parse_index_specification( "8-11", total_count=10, allow_out_of_range=False )
[docs] def test_parse_index_duplicate_detection_disabled(self): """Test duplicate detection when allow_duplicates=False.""" from chemsmart.utils.utils import parse_index_specification # Test explicit duplicates should fail with pytest.raises(ValueError, match="Index overlap detected"): parse_index_specification( "1,4,-2", total_count=5, allow_duplicates=False ) # Test negative and positive indices pointing to same structure with pytest.raises(ValueError, match="Index overlap detected"): parse_index_specification( "1,-5", total_count=5, allow_duplicates=False )
[docs] def test_parse_index_duplicate_detection_enabled(self): """Test duplicate detection when allow_duplicates=True.""" from chemsmart.utils.utils import parse_index_specification # Test duplicates are allowed - should return all indices normalized result = parse_index_specification( "1,4,-2", total_count=5, allow_duplicates=True ) # After normalization: [1-1=0, 4-1=3, 5+(-2)=3] assert result == [0, 3, 3] # Test negative and positive indices # pointing to same structure are allowed result = parse_index_specification( "1,-5", total_count=5, allow_duplicates=True ) # After normalization: [1-1=0, 5+(-5)=0] assert result == [0, 0]
[docs] def test_parse_index_boundary_detection_disabled(self): """Test boundary detection when allow_out_of_range=False.""" from chemsmart.utils.utils import parse_index_specification # Test out-of-range positive index should fail with pytest.raises(ValueError, match="out of range"): parse_index_specification( "8", total_count=5, allow_out_of_range=False ) # Test out-of-range negative index should fail with pytest.raises(ValueError, match="out of range"): parse_index_specification( "-6", total_count=5, allow_out_of_range=False ) # Test range with out-of-bounds indices should fail with pytest.raises(ValueError, match="out of range"): parse_index_specification( "3-8", total_count=5, allow_out_of_range=False )
[docs] def test_parse_index_boundary_detection_enabled(self): """Test boundary detection when allow_out_of_range=True.""" from chemsmart.utils.utils import parse_index_specification # Test out-of-range indices are filtered out, valid ones remain result = parse_index_specification( "3,8,2", total_count=5, allow_out_of_range=True ) assert result == [ 2, 1, ] # Only indices 3 and 2 (0-based: 2, 1) are valid, 8 is filtered # Test all out-of-range should raise error with pytest.raises( ValueError, match="All specified indices are out of range" ): parse_index_specification( "8,9,10", total_count=5, allow_out_of_range=True )
[docs] class TestIOUtilities:
[docs] def test_clean_duplicate_structure(self): orientations = [ np.array([1, 2, 3]), np.array([4, 5, 6]), np.array([4, 5, 6]), ] clean_duplicate_structure(orientations) assert len(orientations) == 2 # Should remove the duplicate
[docs] def test_create_molecule_list(self): orientations = [np.array([[0, 0, 0]]), np.array([[1, 1, 1]])] orientations_pbc = [None, None] energies = [1.0, 2.0] forces = [[np.array([0, 0, 0])], [np.array([0, 0, 0])]] symbols = ["H"] charge = 0 multiplicity = 1 frozen_atoms = None pbc_conditions = [False] molecules = create_molecule_list( orientations, orientations_pbc, energies, forces, symbols, charge, multiplicity, frozen_atoms, pbc_conditions, ) assert len(molecules) == 2 assert isinstance(molecules[0], Molecule) assert isinstance(molecules[1], Molecule) assert molecules[0].energy == 1.0 assert molecules[1].energy == 2.0
[docs] @pytest.mark.parametrize( "line,allow_sign,expected", [ ("0", True, True), ("1 2 3", True, True), ("+1 -2 +003 0", True, True), (" 10 20 30 ", True, True), ("+0 -0 0", True, True), ("+1 -2", False, False), # signs not allowed ("1 2 3", False, True), ("001 0002 3", False, True), ("", True, False), # empty (" ", True, False), # whitespace only ("1.0 2 3", True, False), # float present ("1e3 2 3", True, False), # scientific notation is not int() ("1 two 3", True, False), # non-numeric ], ) def test_line_of_all_integers(self, line, allow_sign, expected): assert line_of_all_integers(line, allow_sign=allow_sign) is expected
[docs] @pytest.mark.parametrize( "line,expected", [ # Valid: first token int; remaining are # proper floats (decimal or exponent) ("3 1.0 -2.3 4e-2", True), ("0 .5 5. 5.0 -0.3E+2", True), ("-1 +.3 -0.5e2", True), ("+4 .7", True), # Invalid: remaining tokens are plain integers (assuming # your float pattern requires decimal/exponent) ("3 1 2 3", False), # Invalid: not enough floats (only an # integer). Recommended behavior = False. ("+4", False), ("7 ", False), # Invalid: bad first token or malformed floats ("3.0 1.0 2.0", False), # first token is not an integer ("x 1.0 2.0", False), # first token non-numeric ("2 1.0 nope", False), # invalid float token ("2 1.0 2.0e", False), # malformed exponent # Whitespace / empty (" ", False), ("", False), ], ) def test_line_of_integer_followed_by_floats(self, line, expected): assert line_of_integer_followed_by_floats(line) is expected
[docs] def test_header_like_then_data_like(self): # Typical ORCA header/data pattern header = "0 1 2 3 4 5" data = "0 0.123 -0.456 7.89 1e-2 .3" assert line_of_all_integers(header) is True assert line_of_integer_followed_by_floats(data) is True
[docs] def test_trailing_and_leading_spaces(self): assert line_of_all_integers(" 1 2 3 ") is True assert ( line_of_integer_followed_by_floats(" 5 1.0 2e0 .3 ") is True )
[docs] def test_reject_plain_ints_as_floats(self): # Ensures your float regex isn't too permissive assert line_of_integer_followed_by_floats("2 3") is False assert ( line_of_integer_followed_by_floats("2 3.") is True ) # decimal present assert ( line_of_integer_followed_by_floats("2 3e0") is True ) # exponent present
[docs] def test_clean_label(self): # spaces -> "_" assert clean_label("label with space") == "label_with_space" # commas -> "_" assert clean_label("label,with,comma") == "label_with_comma" # periods and parentheses -> "_" assert clean_label("Fig. 1(a)") == "Fig_1_a" # apostrophe -> "_prime_" assert clean_label("O'Hara") == "O_prime_Hara" # asterisk -> "_star_" assert clean_label("label*") == "label_star" # combination of several special characters assert ( clean_label("O'Hara* test, v1.0") == "O_prime_Hara_star_test_v1_0" ) # --- edge cases around underscore collapsing/stripping --- # 1) Empty string input assert clean_label("") == "" # 2) String with only special characters # "***" -> "_star__star__star_" -> collapse + strip -> "star_star_star" assert clean_label("***") == "star_star_star" # 3) Leading/trailing underscores after conversion # "*label*" -> "_star_label_star_" -> # collapse + strip -> "star_label_star" assert clean_label("*label*") == "star_label_star" # 4) Multiple consecutive special characters # "label...test" -> "label___test" -> collapse -> "label_test" assert clean_label("label...test") == "label_test"
[docs] @pytest.mark.parametrize( "input_str, expected", [ ("1-10", "id 1-10"), ("11", "id 11"), ("1-10,11", "id 1-10 or id 11"), ("1-10,11,14,19-30", "id 1-10 or id 11 or id 14 or id 19-30"), ], ) def test_basic_conversion(self, input_str, expected): assert ( convert_string_indices_to_pymol_id_indices(input_str) == expected )
[docs] def test_conversion_strips_whitespace(self): input_str = " 1-10, 11 ,14 , 19-30 " expected = "id 1-10 or id 11 or id 14 or id 19-30" assert ( convert_string_indices_to_pymol_id_indices(input_str) == expected )
[docs] def test_trailing_comma_is_ignored(self): input_str = "1-10," expected = "id 1-10" assert ( convert_string_indices_to_pymol_id_indices(input_str) == expected )
[docs] @pytest.mark.parametrize("bad_input", ["", " ", ",,,", ", ,"]) def test_raises_value_error_on_empty_or_invalid_input(self, bad_input): with pytest.raises(ValueError): convert_string_indices_to_pymol_id_indices(bad_input)
[docs] class TestNaturallySorted:
[docs] def test_empty_list(self): """Test sorting an empty list.""" assert naturally_sorted([]) == []
[docs] def test_single_item(self): """Test sorting a list with one item.""" assert naturally_sorted(["item1"]) == ["item1"]
[docs] def test_numeric_order(self): """Test sorting strings with numbers in natural order.""" input_list = ["z10", "z2", "z1"] expected = ["z1", "z2", "z10"] assert naturally_sorted(input_list) == expected
[docs] def test_mixed_case(self): """Test sorting with mixed case letters.""" input_list = ["Z1", "z2", "Z10", "z1"] expected = ["Z1", "z1", "z2", "Z10"] assert naturally_sorted(input_list) == expected
[docs] def test_alphanumeric(self): """Test sorting alphanumeric strings.""" input_list = ["a11", "a1", "b2", "b10"] expected = ["a1", "a11", "b2", "b10"] assert naturally_sorted(input_list) == expected
[docs] def test_file_names(self): """Test sorting typical file names.""" input_list = ["file10.txt", "file2.txt", "file1.txt"] expected = ["file1.txt", "file2.txt", "file10.txt"] assert naturally_sorted(input_list) == expected
[docs] def test_special_characters(self): """Test sorting with special characters.""" input_list = ["item-2", "item_10", "item_1"] expected = ["item-2", "item_1", "item_10"] assert naturally_sorted(input_list) == expected
[docs] def test_mixed_types(self): """Test sorting with mixed formats (numbers, letters, and empty strings).""" input_list = ["100", "2", "abc", "", "Z", "z1"] expected = ["", "2", "100", "abc", "Z", "z1"] assert naturally_sorted(input_list) == expected
[docs] def test_large_numbers(self): """Test sorting with large numbers.""" input_list = ["item1000", "item999", "item10000"] expected = ["item999", "item1000", "item10000"] assert naturally_sorted(input_list) == expected
[docs] def test_no_numbers(self): """Test sorting strings without numbers.""" input_list = ["zebra", "Apple", "banana"] expected = ["Apple", "banana", "zebra"] assert naturally_sorted(input_list) == expected
[docs] class TestRunCommand: """Tests for the run_command utility function."""
[docs] def test_list_command_success(self, mock_popen): """Test running a command provided as a list with successful execution.""" mock_process = mock_popen.return_value mock_process.communicate.return_value = ("dir contents\n", "") mock_process.returncode = 0 result = run_command(["ls", "-l"]) assert result == "dir contents" mock_popen.assert_called_once_with( ["ls", "-l"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, )
[docs] def test_string_command_success(self, mock_popen): """Test running a command provided as a string with successful execution.""" mock_process = mock_popen.return_value mock_process.communicate.return_value = ("dir contents\n", "") mock_process.returncode = 0 result = run_command("ls -l") assert result == "dir contents" mock_popen.assert_called_once_with( ["ls", "-l"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, )
[docs] def test_string_command_with_quotes(self, mock_popen): """Test running a string command with quoted arguments.""" mock_process = mock_popen.return_value mock_process.communicate.return_value = ("committed\n", "") mock_process.returncode = 0 result = run_command("git commit -m 'initial commit'") assert result == "committed" mock_popen.assert_called_once_with( ["git", "commit", "-m", "initial commit"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, )
[docs] def test_command_failure(self, mock_popen, capture_log): """Test running a command that fails with non-zero return code.""" mock_process = mock_popen.return_value mock_process.communicate.return_value = ("", "command not found") mock_process.returncode = 1 result = run_command(["invalid_cmd"]) assert result is None assert ( "Error running ['invalid_cmd']: command not found" in capture_log.text )
[docs] def test_command_exception(self, mock_popen, capture_log): """Test handling an exception during command execution.""" mock_popen.side_effect = OSError("Permission denied") result = run_command(["ls", "-l"]) assert result is None assert ( "Exception while running ['ls', '-l']: Permission denied" in capture_log.text )
[docs] def test_invalid_input_type(self, capture_log): """Test handling invalid input type (neither string nor list).""" result = run_command(123) assert result is None assert ( "Invalid command type: <class 'int'>. Expected str or list." in capture_log.text )
[docs] class TestReturnObjectsAndIndicesFromStringIndex: """Tests for the return_objects_and_indices_from_string_index utility function."""
[docs] def test_single_index_string(self): """Test single index as a string (1-based).""" objects = ["a", "b", "c", "d", "e"] result_objects, result_indices = ( return_objects_and_indices_from_string_index(objects, "1") ) assert result_objects == "a" assert result_indices == 1
[docs] def test_single_index_middle(self): """Test single index in middle of list.""" objects = ["a", "b", "c", "d", "e"] result_objects, result_indices = ( return_objects_and_indices_from_string_index(objects, "3") ) assert result_objects == "c" assert result_indices == 3
[docs] def test_single_index_last(self): """Test single index at end of list.""" objects = ["a", "b", "c", "d", "e"] result_objects, result_indices = ( return_objects_and_indices_from_string_index(objects, "5") ) assert result_objects == "e" assert result_indices == 5
[docs] def test_single_negative_index(self): """Test negative index (last item).""" objects = ["a", "b", "c", "d", "e"] result_objects, result_indices = ( return_objects_and_indices_from_string_index(objects, "-1") ) assert result_objects == "e" assert result_indices == 5
[docs] def test_slice_range(self): """Test slice with start and stop (1-based, exclusive stop).""" objects = ["a", "b", "c", "d", "e"] result_objects, result_indices = ( return_objects_and_indices_from_string_index(objects, "2:4") ) assert result_objects == ["b", "c"] assert result_indices == [2, 3]
[docs] def test_slice_from_start(self): """Test slice from beginning.""" objects = ["a", "b", "c", "d", "e"] result_objects, result_indices = ( return_objects_and_indices_from_string_index(objects, "1:3") ) assert result_objects == ["a", "b"] assert result_indices == [1, 2]
[docs] def test_slice_to_end(self): """Test slice to end using open-ended slice.""" objects = ["a", "b", "c", "d", "e"] result_objects, result_indices = ( return_objects_and_indices_from_string_index(objects, "3:") ) assert result_objects == ["c", "d", "e"] assert result_indices == [3, 4, 5]
[docs] def test_slice_from_beginning(self): """Test slice from beginning to index.""" objects = ["a", "b", "c", "d", "e"] result_objects, result_indices = ( return_objects_and_indices_from_string_index(objects, ":3") ) assert result_objects == ["a", "b"] assert result_indices == [1, 2]
[docs] def test_slice_all(self): """Test slice selecting all elements using ':'.""" objects = ["a", "b", "c", "d", "e"] result_objects, result_indices = ( return_objects_and_indices_from_string_index(objects, ":") ) assert result_objects == ["a", "b", "c", "d", "e"] assert result_indices == [1, 2, 3, 4, 5]
[docs] def test_slice_with_step(self): """Test slice with step parameter.""" objects = ["a", "b", "c", "d", "e", "f", "g", "h"] result_objects, result_indices = ( return_objects_and_indices_from_string_index(objects, "1:8:2") ) assert result_objects == ["a", "c", "e", "g"] assert result_indices == [1, 3, 5, 7]
[docs] def test_user_defined_range(self): """Test user-defined range format (comma-separated).""" objects = ["a", "b", "c", "d", "e"] result_objects, result_indices = ( return_objects_and_indices_from_string_index(objects, "1,3,5") ) assert result_objects == ["a", "c", "e"] assert result_indices == [1, 3, 5]
[docs] def test_user_defined_range_with_hyphen(self): """Test user-defined range with hyphen notation.""" objects = ["a", "b", "c", "d", "e", "f", "g", "h"] result_objects, result_indices = ( return_objects_and_indices_from_string_index(objects, "1-3") ) assert result_objects == ["a", "b", "c"] assert result_indices == [1, 2, 3]
[docs] def test_user_defined_range_complex(self): """Test complex user-defined range.""" objects = ["a", "b", "c", "d", "e", "f", "g", "h"] result_objects, result_indices = ( return_objects_and_indices_from_string_index(objects, "1-3,5,7-8") ) assert result_objects == ["a", "b", "c", "e", "g", "h"] assert result_indices == [1, 2, 3, 5, 7, 8]
[docs] def test_range_with_brackets(self): """Test user-defined range with brackets.""" objects = ["a", "b", "c", "d", "e"] result_objects, result_indices = ( return_objects_and_indices_from_string_index(objects, "[1-3]") ) assert result_objects == ["a", "b", "c"] assert result_indices == [1, 2, 3]
[docs] def test_with_integer_objects(self): """Test with list of integers as objects.""" objects = [10, 20, 30, 40, 50] result_objects, result_indices = ( return_objects_and_indices_from_string_index(objects, "2:4") ) assert result_objects == [20, 30] assert result_indices == [2, 3]
[docs] def test_with_mixed_objects(self): """Test with list of mixed types as objects.""" objects = [1, "two", 3.0, [4], {"five": 5}] result_objects, result_indices = ( return_objects_and_indices_from_string_index(objects, "1,3,5") ) assert result_objects == [1, 3.0, {"five": 5}] assert result_indices == [1, 3, 5]
[docs] def test_specified_indices_5_to_8(self): """Test that specified indices are preserved (e.g., 5:8 gives indices 5,6,7).""" objects = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"] result_objects, result_indices = ( return_objects_and_indices_from_string_index(objects, "5:8") ) assert result_objects == ["e", "f", "g"] assert result_indices == [5, 6, 7]
[docs] def test_empty_list_raises_index_error(self): """Test that accessing empty list raises IndexError.""" objects = [] with pytest.raises(IndexError): return_objects_and_indices_from_string_index(objects, "1")
[docs] def test_index_zero_raises_value_error(self): """Test that index 0 raises ValueError (1-based indexing required).""" objects = ["a", "b", "c"] with pytest.raises(ValueError): return_objects_and_indices_from_string_index(objects, "0")
[docs] def test_out_of_range_raises_index_error(self): """Test that out-of-range index raises IndexError.""" objects = ["a", "b", "c"] with pytest.raises(IndexError): return_objects_and_indices_from_string_index(objects, "10")
[docs] class TestDeduplicateStringKeywords: """Tests for deduplicate_string_keywords utility function."""
[docs] def test_empty_route_string_returns_unchanged(self): """Empty route string is returned as-is regardless of keywords.""" assert deduplicate_string_keywords("", "cosmors") == ""
[docs] def test_none_route_string_returns_unchanged(self): """None route string is returned as-is.""" assert deduplicate_string_keywords(None, "cosmors") is None
[docs] def test_empty_keywords_returns_unchanged(self): """Route string is returned unchanged when keywords is empty.""" route = "! m062x def2-tzvp cosmors" assert deduplicate_string_keywords(route, "") == route
[docs] def test_empty_keywords_list_returns_unchanged(self): """Route string is returned unchanged when keywords is an empty list.""" route = "! m062x def2-tzvp cosmors" assert deduplicate_string_keywords(route, []) == route
[docs] def test_no_duplicates_returns_unchanged(self): """Route string without duplicates is returned unchanged.""" route = "! m062x def2-tzvp COSMORS(water)" assert deduplicate_string_keywords(route, "cosmors") == route
[docs] def test_bare_keyword_duplicate_keeps_first(self): """When a bare keyword appears twice, the first occurrence is kept.""" route = "! m062x cosmors def2-tzvp cosmors" result = deduplicate_string_keywords(route, "cosmors") assert result == "! m062x cosmors def2-tzvp"
[docs] def test_bare_and_args_keyword_keeps_args_form(self): """When bare keyword and keyword(args) both appear, keyword(args) is kept.""" route = "! m062x def2-tzvp cosmors defgrid2 COSMORS(water)" result = deduplicate_string_keywords(route, "cosmors") assert result == "! m062x def2-tzvp defgrid2 COSMORS(water)"
[docs] def test_args_and_bare_keyword_keeps_args_form(self): """When keyword(args) appears before the bare keyword, keyword(args) is kept.""" route = "! m062x COSMORS(water) def2-tzvp cosmors" result = deduplicate_string_keywords(route, "cosmors") assert result == "! m062x COSMORS(water) def2-tzvp"
[docs] def test_case_insensitive_matching(self): """Matching is case-insensitive.""" route = "! m062x CoSmOrS def2-tzvp COSMORS(water)" result = deduplicate_string_keywords(route, "cosmors") assert result == "! m062x def2-tzvp COSMORS(water)"
[docs] def test_keyword_as_string(self): """A single keyword can be passed as a string.""" route = "! m062x smd def2-tzvp SMD(water)" result = deduplicate_string_keywords(route, "smd") assert result == "! m062x def2-tzvp SMD(water)"
[docs] def test_keyword_as_list(self): """Keywords can be passed as a list.""" route = "! m062x smd def2-tzvp SMD(water)" result = deduplicate_string_keywords(route, ["smd"]) assert result == "! m062x def2-tzvp SMD(water)"
[docs] def test_keyword_as_tuple(self): """Keywords can be passed as a tuple.""" route = "! m062x smd def2-tzvp SMD(water)" result = deduplicate_string_keywords(route, ("smd",)) assert result == "! m062x def2-tzvp SMD(water)"
[docs] def test_multiple_keywords_deduplicated(self): """Multiple keywords are all deduplicated in one pass.""" route = "! m062x smd cosmors def2-tzvp SMD(water) COSMORS(methanol)" result = deduplicate_string_keywords(route, ["smd", "cosmors"]) assert result == "! m062x def2-tzvp SMD(water) COSMORS(methanol)"
[docs] def test_unrelated_tokens_preserved(self): """Tokens not in the keyword list are always preserved.""" route = "! m062x def2-tzvp defgrid2 COSMORS(water)" result = deduplicate_string_keywords(route, "cosmors") assert result == route
[docs] def test_keyword_with_longer_args_preferred_over_shorter(self): """When keyword appears twice with args, the longer args form is kept.""" route = "! COSMORS(water) def2-tzvp COSMORS(water_long)" result = deduplicate_string_keywords(route, "cosmors") assert result == "! def2-tzvp COSMORS(water_long)"
[docs] def test_docstring_example(self): """Reproduce the example given in the docstring.""" route = "! m062x def2-tzvp cosmors defgrid2 COSMORS(water)" result = deduplicate_string_keywords(route, "cosmors") assert result == "! m062x def2-tzvp defgrid2 COSMORS(water)"
[docs] def test_keyword_not_present_returns_unchanged(self): """Route string is returned unchanged when the keyword is not present.""" route = "! m062x def2-tzvp defgrid2" result = deduplicate_string_keywords(route, "cosmors") assert result == route
[docs] def test_three_bare_duplicates_keeps_first(self): """When a bare keyword appears three times, only the first is kept.""" route = "! m062x cosmors def2-tzvp cosmors defgrid2 cosmors" result = deduplicate_string_keywords(route, "cosmors") assert result == "! m062x cosmors def2-tzvp defgrid2"
[docs] def test_mixed_case_keyword_argument(self): """The keyword argument itself is matched case-insensitively.""" route = "! m062x cosmors def2-tzvp COSMORS" result = deduplicate_string_keywords(route, "COSMORS") assert result == "! m062x cosmors def2-tzvp"