Exemplo n.º 1
0
if __name__ == '__main__':

    # Get input arguments, get the required data read in
    fxn.check_scripts_dir()
    input_args = vars(args())

    codons = fxn.get_optimal_codons(input_args['codon_usage'],
                                    input_args['species'].upper())
    linker_dict = fxn.get_linker_dict()

    tcr_dat = {}
    tcr_functionality = {}

    for c in ['TRA', 'TRB']:

        tmp_tcr_dat, tmp_functionality = fxn.get_imgt_data(
            c, st.gene_types, input_args['species'].upper())
        tcr_dat[c] = tmp_tcr_dat
        tcr_functionality[c] = tmp_functionality

    # Then go through in file and stitch each TCR on each line
    if not os.path.isfile(input_args['in_file']):
        raise IOError(
            input_args['in_file'] +
            " not detected - please check and specify in file again.")

    # TODO opener function for gzipped
    with open(input_args['in_file'], 'rU') as in_file:

        line_count = 0
        out_data = ['\t'.join(out_headers)]
Exemplo n.º 2
0
regions = {
    'v': 'V-REGION',
    'j': 'J-REGION',
    'c': 'EX1+EX2+EX3+EX4',
    'l': 'L-PART1+L-PART2'
}
gene_types = list(regions.values())

if __name__ == '__main__':

    # Get input arguments, determine the TCR chain in use, get codon table, then load the IMGT data in
    fxn.check_scripts_dir()
    input_args, chain, codons = fxn.sort_input(vars(args()))

    imgt_dat, tcr_functionality = fxn.get_imgt_data(chain, gene_types,
                                                    input_args['species'])

    out_list, stitched = stitch(input_args, chain, imgt_dat, tcr_functionality,
                                codons)
    out_str = '|'.join(out_list) + '(L)'

    print(
        '----------------------------------------------------------------------------------------------'
    )
    print(fxn.fastafy('nt|' + out_str, stitched))
    print(fxn.fastafy('aa|' + out_str, fxn.translate_nt(stitched)))

    # If a known/partial amino acid sequence provided, ensure they match up with a quick printed alignment
    if 'aa' in input_args:
        from Bio import pairwise2
        from Bio.pairwise2 import format_alignment
Exemplo n.º 3
0
        # Then stitch each individual chain...
        for ref_chain in ['TR1', 'TR2']:
            chain = convert_chains[receptor][ref_chain]

            window[ref_chain + '_out'].update('')
            window[ref_chain + '_log'].update('')

            with warnings.catch_warnings(record=True) as chain_log:
                warnings.simplefilter("always")

                if values[ref_chain +
                          'V'] and values[ref_chain +
                                          'J'] and values[ref_chain + '_CDR3']:

                    try:
                        tcr_dat, functionality, partial = fxn.get_imgt_data(
                            chain, st.gene_types, species)

                        # If additional genes provided, just add them to all possible gene segment types
                        if values['additional_genes'] != extra_gene_text + '\n':
                            for extra_gene in outputs['additional_fastas']:
                                gene, allele = extra_gene[0].split('*')

                                for gene_type in tcr_dat.keys():

                                    if gene not in tcr_dat[gene_type]:
                                        tcr_dat[gene_type][
                                            gene] = coll.defaultdict(list)

                                    if allele in tcr_dat[gene_type][gene]:
                                        raise warnings.warn(
                                            "User provided gene/allele combination "
Exemplo n.º 4
0
    else:  # If not explicitly provided, infer from input TSV headers
        with fxn.opener(input_args['in_file']) as in_file:
            for line in in_file:
                if 'TRAV' in line and 'TRGV' not in line:
                    receptor = 'TRA/TRB'
                elif 'TRGV' in line and 'TRAV' not in line:
                    receptor = 'TRG/TRD'
                else:
                    raise IOError("Unable to determine receptor from input file header, please check template. ")
                break

    # Define the individual receptors (chains or loci, i.e. TRA and TRB or TRG and TRD) in play
    r1, r2 = receptor.split('/')
    for c in [r1, r2]:

        tmp_tcr_dat, tmp_functionality, partial = fxn.get_imgt_data(c, st.gene_types, species)
        tcr_dat[c] = tmp_tcr_dat
        tcr_functionality[c] = tmp_functionality

        if 'extra_genes' in input_args:
            if input_args['extra_genes']:
                tcr_dat[c], tcr_functionality[c] = fxn.get_additional_genes(tcr_dat[c], tcr_functionality[c])
                input_args['skip_c_checks'] = True
            else:
                input_args['skip_c_checks'] = False

        # Allow for provision of preferred alleles
        if input_args['preferred_alleles_path']:
            preferences[c] = fxn.get_preferred_alleles(input_args['preferred_alleles_path'], list(fxn.regions.values()),
                                                       tcr_dat[c], partial, c)
        else:
Exemplo n.º 5
0
if __name__ == '__main__':

    # TODO move all this to one large bracketing function?
    # Get input arguments, determine the TCR chain in use, get codon table, then load the IMGT data in
    fxn.check_scripts_dir()
    input_args, chain, codons = fxn.sort_input(vars(args()))

    regions = {
        'v': 'V-REGION',
        'j': 'J-REGION',
        'c': 'EX1+EX2+EX3+EX4',
        'l': 'L-PART1+L-PART2'
    }
    gene_types = regions.values()
    imgt_dat, functionality = fxn.get_imgt_data(chain, gene_types)

    # Then find each of the appropriate sequences
    done = {}
    for r in regions:

        if '*' in input_args[r]:
            gene, allele = input_args[r].split('*')
            if allele not in imgt_dat[regions[r]][gene]:
                print "\tCannot find", r.upper(), "gene", input_args[r] + \
                                                          ": attempting prototypical allele (" + gene + "*01)"
                allele = '01'
        else:
            gene = input_args[r]
            allele = '01'