示例#1
0
def test_parse_lrg5():
    """Test that all expected exon labels are parsed from LRG files."""
    # Get exons for LRG 5
    lrg5_root = lrg_parser.set_root(lrg_parser.parse_file('data/LRG_5.xml'))
    lrg5_exon_data_tuple = (lrg_parser.lrg_parse(lrg5_root))
    lrg5_exons = list(zip(*lrg5_exon_data_tuple))[0]
    # Get exon label truthset from local data
    lrg5_truthset = open('data/LRG5_exons.list', 'r').read().split(",")
    assert set(lrg5_exons) == set(lrg5_truthset)
示例#2
0
def test_parse_lrg1():
    """Test that all expected exon labels are parsed from LRG files."""
    # Get exons for LRG 1
    lrg1_root = lrg_parser.set_root(lrg_parser.parse_file('data/LRG_1.xml'))
    lrg1_exon_data_tuple = (lrg_parser.lrg_parse(lrg1_root))
    # Exon labels are the first element of each tuple in returned by lrg_parser.lrg_parse().
    # zip() is used to merge the tuples by matching element indexes. e.g.:
    #     a = [(1,2,3), (4,5,6)]
    #     zip(*a) = [(1,4), (2,5), (3,6)]
    lrg1_exons = list(zip(*lrg1_exon_data_tuple))[0]
    # Get exon label truthset from local data
    lrg1_truthset = open('data/LRG1_exons.list', 'r').read().split(",")
    assert set(lrg1_exons) == set(lrg1_truthset)
示例#3
0
def test_convert_lrg5():
    """Assert LRG_5 exon regions are accurately converted to GRCh38.p12 coordinates"""
    lrg5_root = lrg_parser.set_root(lrg_parser.parse_file('data/LRG_5.xml'))
    lrg5_exon_data_tuple = (lrg_parser.lrg_parse(lrg5_root))
    lrg5_coordinates = lrg_parser.convert_coords(lrg5_root,
                                                 lrg5_exon_data_tuple)
    lrg5_coords_truthset_wheaders = [
        tuple(line.strip().split(",")) for line in open(
            'data/LRG_5_GRCh38_p12_coordinates.csv', 'r').readlines()
    ]
    # Remove lines from truthset file that start with '#'. These lines contain headers of the truthset columns
    lrg5_coords_truthset = list(
        filter(lambda x: not x[0].startswith('#'),
               lrg5_coords_truthset_wheaders))
    print(lrg5_coordinates)
    for index in range(len(lrg5_coords_truthset)):
        assert lrg5_coordinates[index][1] == lrg5_coords_truthset[index][1]
        assert lrg5_coordinates[index][2] == lrg5_coords_truthset[index][2]
示例#4
0
def test_set_root_from_file():
    """Test that the xml data string objects are converted to root 
    objects for parsing for file inputs"""
    root = lrg_parser.set_root(lrg_parser.parse_file('data/LRG_1.xml'))
    assert isinstance(root, ET.Element)
示例#5
0
def test_LRG_input():
    """Test that if the input is an existing file, parse_file outputs
    it as a string"""
    data = lrg_parser.parse_file('data/LRG_1.xml')
    assert isinstance(data, str)