Пример #1
0
def test_add_domain_attribute():

    test_data_dir = shephard.get_data('test_data')
    fasta_file = '%s/%s' % (test_data_dir, 'testset_1.fasta')
    domain_file = '%s/%s' % (test_data_dir, 'TS1_domains_idr.tsv')

    P = uniprot.uniprot_fasta_to_proteome(fasta_file)
    interfaces.si_domains.add_domains_from_file(P, domain_file)
    prot = P.protein('O00401')
    domain = prot.domains[0]
    domain.add_attribute('test_attribute', 1)

    assert domain.attribute('test_attribute') == 1

    # this should fail
    with pytest.raises(DomainException):
        domain.add_attribute('test_attribute', 20)

    # because the operation above should have failed, this too should
    # have failed
    assert domain.attribute('test_attribute') == 1

    domain.add_attribute('test_attribute', 20, safe=False)
    assert domain.attribute('test_attribute') == 20

    assert len(domain.attributes) == 1
    domain.add_attribute('another_test_attribute', 'testval')
    assert len(domain.attributes) == 2

    with pytest.raises(DomainException):
        assert domain.attribute('does not exist') == 20

    # check this returns none
    assert domain.attribute('does not exist', safe=False) is None
Пример #2
0
def test_add_domains_file():

    test_data_dir = shephard.get_data('test_data')
    fasta_file = '%s/%s' % (test_data_dir, 'testset_1.fasta')
    domain_file = '%s/%s' % (test_data_dir, 'TS1_domains_idr.tsv')

    P = uniprot.uniprot_fasta_to_proteome(fasta_file)
    interfaces.si_domains.add_domains_from_file(P, domain_file)

    # this should fail because already added
    with pytest.raises(ProteinException):
        interfaces.si_domains.add_domains_from_file(P, domain_file)

    P = uniprot.uniprot_fasta_to_proteome(fasta_file)
    interfaces.si_domains.add_domains_from_file(P, domain_file, autoname=True)

    print('')
    P = uniprot.uniprot_fasta_to_proteome(fasta_file)
    interfaces.si_domains.add_domains_from_file(P, domain_file, autoname=False)

    # autoname allows 2 apparetly identical domain files to be added
    interfaces.si_domains.add_domains_from_file(P, domain_file, autoname=True)

    # autoname allows 2 apparetly identical domain files to be added
    P = uniprot.uniprot_fasta_to_proteome(fasta_file)
    interfaces.si_domains.add_domains_from_file(P,
                                                domain_file,
                                                autoname=False,
                                                skip_bad=True)
Пример #3
0
def test_write_domain_with_attributes():

    # this setup was also tested in test_add_domain_attribute
    test_data_dir = shephard.get_data('test_data')
    fasta_file = '%s/%s' % (test_data_dir, 'testset_1.fasta')
    domain_file = '%s/%s' % (test_data_dir, 'TS1_domains_idr.tsv')

    P = uniprot.uniprot_fasta_to_proteome(fasta_file)
    interfaces.si_domains.add_domains_from_file(P, domain_file)
    prot = P.protein('O00401')
    domain = prot.domains[0]
    domain.add_attribute('test_attribute_1', 1)
    domain.add_attribute('test_attribute_cat', 'cat')
Пример #4
0
def test_fasta_to_proteome_part_1():

    test_data_dir = shephard.get_data('test_data')
    print(test_data_dir)

    P = fasta.fasta_to_proteome('%s/%s' % (test_data_dir, 'testset_1.fasta'))
    assert len(P.protein('1')) == 390
    assert len(P) == 9

    test_UID = 0
    for i in P.proteins:
        assert i == str(test_UID)
        test_UID = test_UID + 1

    P = fasta.fasta_to_proteome('%s/%s' % (test_data_dir, 'testset_1.fasta'),
                                proteome=P)
    assert len(P) == 18

    test_UID = 0
    for i in P.proteins:
        assert i == str(test_UID)
        test_UID = test_UID + 1

    ##
    ## This block checks that removing a protein from the integer-indexed added proteins
    ## really removes it and that adding new proteins in does correctly start counting
    ## in the right place
    P.remove_protein(10)
    P = fasta.fasta_to_proteome('%s/%s' % (test_data_dir, 'testset_1.fasta'),
                                proteome=P)

    test_UID = 0
    for i in P.proteins:
        if test_UID == 10:
            with pytest.raises(ProteomeException):
                assert P.protein(test_UID)
            test_UID = test_UID + 1

        assert i == str(test_UID)
        test_UID = test_UID + 1
Пример #5
0
"""

import shephard
from shephard.interfaces import si_sites, si_domains, si_tracks, si_protein_attributes
from shephard.apis import uniprot

import pytest
import sys

TS1_FILE = [
    'testset_1.fasta', 'TS1_domains_idr.tsv', 'TS1_domains_pscore.tsv',
    'TS1_sites.tsv', 'TS1_tracks_pscore.tsv', 'TS1_protein_attributes.tsv',
    'testset_1_ptms.tsv'
]

test_data_dir = shephard.get_data('test_data')


def build_proteome(fn):
    return uniprot.uniprot_fasta_to_proteome('%s/%s' % (test_data_dir, fn))


@pytest.fixture
def TS1(request):
    TS1_proteome = build_proteome(TS1_FILE[0])
    return TS1_proteome


@pytest.fixture
def TS1_domains(request):
    TS1_proteome = build_proteome(TS1_FILE[0])
Пример #6
0
def test_fasta_to_proteome_part_2():
    def header_parser(s):
        return s.split('|')[1]

    test_data_dir = shephard.get_data('test_data')
    print(test_data_dir)

    # check we've read in the proteome OK
    P = fasta.fasta_to_proteome('%s/%s' % (test_data_dir, 'testset_1.fasta'),
                                build_unique_ID=header_parser)
    assert len(P) == 9

    # this should trigger an exception because we're adding in duplicates
    with pytest.raises(ProteomeException):
        P = fasta.fasta_to_proteome('%s/%s' %
                                    (test_data_dir, 'testset_1.fasta'),
                                    build_unique_ID=header_parser,
                                    proteome=P)

    # this should not...
    P = fasta.fasta_to_proteome('%s/%s' % (test_data_dir, 'testset_1.fasta'),
                                proteome=P)
    assert len(P) == 18

    # this also should not but should NOT add new sequences in
    P = fasta.fasta_to_proteome('%s/%s' % (test_data_dir, 'testset_1.fasta'),
                                build_unique_ID=header_parser,
                                proteome=P,
                                force_overwrite=True)
    assert len(P) == 18

    P = fasta.fasta_to_proteome('%s/%s' % (test_data_dir, 'testset_1.fasta'),
                                proteome=P)
    assert len(P) == 27

    P = fasta.fasta_to_proteome('%s/%s' % (test_data_dir, 'testset_1.fasta'),
                                proteome=P,
                                use_header_as_unique_ID=True)
    assert len(P) == 36

    # this SHOULD trigger an exception because we shouldn't
    with pytest.raises(APIException):
        P = fasta.fasta_to_proteome('%s/%s' %
                                    (test_data_dir, 'testset_1.fasta'),
                                    proteome=P,
                                    use_header_as_unique_ID=True,
                                    build_unique_ID=header_parser)

    expected_protein_uids = [
        'O00401', 'O00470', 'O00472', 'O00499', 'O00629', 'O00712', 'O00716',
        'O14786', 'Q9UJX3', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
        '10', '11', '12', '13', '14', '15', '16', '17',
        'sp|O00401|WASL_HUMAN Neural Wiskott-Aldrich syndrome protein OS=H**o sapiens OX=9606 GN=WASL PE=1 SV=2',
        'sp|O00470|MEIS1_HUMAN Homeobox protein Meis1 OS=H**o sapiens OX=9606 GN=MEIS1 PE=1 SV=1',
        'sp|O00472|ELL2_HUMAN RNA polymerase II elongation factor ELL2 OS=H**o sapiens OX=9606 GN=ELL2 PE=1 SV=2',
        'sp|O00499|BIN1_HUMAN Myc box-dependent-interacting protein 1 OS=H**o sapiens OX=9606 GN=BIN1 PE=1 SV=1',
        'sp|O00629|IMA3_HUMAN Importin subunit alpha-3 OS=H**o sapiens OX=9606 GN=KPNA4 PE=1 SV=1',
        'sp|O00712|NFIB_HUMAN Nuclear factor 1 B-type OS=H**o sapiens OX=9606 GN=NFIB PE=1 SV=2',
        'sp|O00716|E2F3_HUMAN Transcription factor E2F3 OS=H**o sapiens OX=9606 GN=E2F3 PE=1 SV=1',
        'sp|O14786|NRP1_HUMAN Neuropilin-1 OS=H**o sapiens OX=9606 GN=NRP1 PE=1 SV=3',
        'sp|Q9UJX3|APC7_HUMAN Anaphase-promoting complex subunit 7 OS=H**o sapiens OX=9606 GN=ANAPC7 PE=1 SV=4'
    ]

    # CHECK all the unique IDs expected can be read in
    for i in expected_protein_uids:
        P.protein(i)
Пример #7
0
def test_add_protein():

    # creating proteome and adding protein
    test_data_dir = shephard.get_data('test_data')
    P = uniprot.uniprot_fasta_to_proteome('%s/%s' %
                                          (test_data_dir, 'testset_1.fasta'))
    assert len(P.protein('O00401')) == 505
    assert len(P.protein('O00470')) == 390
    assert len(P.protein('O00472')) == 640
    assert len(P.protein('O00499')) == 593
    assert len(P.protein('O00629')) == 521
    assert len(P.protein('O00712')) == 420
    assert len(P.protein('O00716')) == 465
    assert len(P.protein('O14786')) == 923
    assert len(P.protein('Q9UJX3')) == 599

    # creating a proteome from a FASTA file (using defaul unique key)
    P = fasta.fasta_to_proteome('%s/%s' % (test_data_dir, 'testset_1.fasta'))
    assert len(P.protein('1')) == 390
    assert len(P.protein('2')) == 640
    assert len(P.protein('3')) == 593
    assert len(P.protein('4')) == 521
    assert len(P.protein('5')) == 420
    assert len(P.protein('6')) == 465
    assert len(P.protein('7')) == 923
    assert len(P.protein('8')) == 599

    # create a proteome where FASTA header is used as uniqueID
    P = fasta.fasta_to_proteome('%s/%s' % (test_data_dir, 'testset_1.fasta'),
                                use_header_as_unique_ID=True)
    assert len(
        P.protein(
            'sp|O00470|MEIS1_HUMAN Homeobox protein Meis1 OS=H**o sapiens OX=9606 GN=MEIS1 PE=1 SV=1'
        )) == 390
    assert len(
        P.protein(
            'sp|O00472|ELL2_HUMAN RNA polymerase II elongation factor ELL2 OS=H**o sapiens OX=9606 GN=ELL2 PE=1 SV=2'
        )) == 640
    assert len(
        P.protein(
            'sp|O00499|BIN1_HUMAN Myc box-dependent-interacting protein 1 OS=H**o sapiens OX=9606 GN=BIN1 PE=1 SV=1'
        )) == 593
    assert len(
        P.protein(
            'sp|O00629|IMA3_HUMAN Importin subunit alpha-3 OS=H**o sapiens OX=9606 GN=KPNA4 PE=1 SV=1'
        )) == 521
    assert len(
        P.protein(
            'sp|O00712|NFIB_HUMAN Nuclear factor 1 B-type OS=H**o sapiens OX=9606 GN=NFIB PE=1 SV=2'
        )) == 420
    assert len(
        P.protein(
            'sp|O00716|E2F3_HUMAN Transcription factor E2F3 OS=H**o sapiens OX=9606 GN=E2F3 PE=1 SV=1'
        )) == 465
    assert len(
        P.protein(
            'sp|O14786|NRP1_HUMAN Neuropilin-1 OS=H**o sapiens OX=9606 GN=NRP1 PE=1 SV=3'
        )) == 923
    assert len(
        P.protein(
            'sp|Q9UJX3|APC7_HUMAN Anaphase-promoting complex subunit 7 OS=H**o sapiens OX=9606 GN=ANAPC7 PE=1 SV=4'
        )) == 599

    # check manually adding proteomes
    local_seq = 'PPPPP'
    P.add_protein(local_seq, '5pp', 'U5P')
    assert P.protein('U5P').sequence == local_seq
    assert P.protein('U5P').name == '5pp'

    # should trigger exception
    with pytest.raises(ProteomeException):
        P.add_protein(local_seq, '5pp', 'U5P')
    P.add_protein('ASDF', '5pp', 'U5P', force_overwrite=True)
    assert P.protein('U5P').sequence == 'ASDF'

    protein_list = []
    p1 = {
        'sequence': 'ASDFGH',
        'name': "Test protein 1",
        'unique_ID': 1.23,
        "attributes": None
    }
    protein_list.append(p1)

    # check this works
    P = proteome.Proteome(protein_list)
    print(P.proteins)
    assert P.protein(1.23).sequence == 'ASDFGH'
    assert P.protein("1.23").sequence == 'ASDFGH'

    P.remove_protein(1.23)
    with pytest.raises(ProteomeException):
        assert P.protein(1.23).sequence == 'ASDFGH'