Exemplo n.º 1
0
def main(aligner_fname, cascade_fname, image_fnames):
    aligner = Aligner(aligner_fname)
    cascade = load_cascade(cascade_fname)
    images = (pvImage(fname) for fname in image_fnames)
    return [
        aligner.align_face(detect_faces(img, cascade)[0], img)
        for img in images
    ]
Exemplo n.º 2
0
def main(epitope_list=None,
         alignments_dir=None,
         alignment_score_threshold=None,
         slope_parameter=None,
         output_file=None):

    # Compute MHC amplitudes for all neoantigens
    a_val_by_index = {}
    peptide_by_index = {}
    sample_by_index = {}

    with open(epitope_list) as f:
        for data in csv.DictReader(f, delimiter='\t'):
            index = data['id']
            sample = data['sample']
            mtpeptide = data['epitope']
            kdwt = data['wt_score']
            kdmt = data['mt_score']
            kdmt = float(kdmt)
            if kdwt == 'nan':
                kdwt = 1000.
            kdwt = float(kdwt)
            index = int(index)
            peptide_by_index[index] = mtpeptide.upper()
            a_val_by_index[index] = kdwt / kdmt
            sample_by_index[index] = sample

    # Compute TCR-recognition probabilities for all neoantigens
    aligner = Aligner()
    for sname in set(sample_by_index.values()):
        xml_path = join(alignments_dir, f'neoantigens_{sname}_iedb.xml')
        aligner.read_all_blast_alignments(xml_path)
    aligner.compute_rval(alignment_score_threshold, slope_parameter)

    # Compute qualities for all epitopes and write the result
    with open(output_file, 'w') as out:
        header = [
            'Sample', 'NeoantigenID', 'MT.Peptide.Form', 'NeoantigenQuality',
            'NeoantigenAlignment', 'IEDB_EpitopeAlignment', 'AlignmentScore',
            'IEDB_Epitope'
        ]
        out.write('\t'.join(header) + '\n')
        for index, peptide in peptide_by_index.items():
            a_val = a_val_by_index[index]
            [r_val, species, alignment] = aligner.get_rval(index)

            neo_alignment = alignment[0]
            epitope_alignment = alignment[1]
            score = alignment[2]

            quality = a_val * r_val
            res = [
                sample_by_index[index], index, peptide, quality, neo_alignment,
                epitope_alignment, score, species
            ]
            out.write('\t'.join(map(str, res)) + '\n')
Exemplo n.º 3
0
def main(argv):

    '''
    command line parameters:
    neofile - text file with neoantigen data (supplementary data)
    alignmentDirectory - folder with precomputed alignments
    a - midpoint parameter of the logistic function, alignment score threshold
    k - slope parameter of the logistic function
    outfile - path to a file where to output neoantigen fitness computation
    '''

    neofile=argv[1]
    alignmentDirectory=argv[2]
    a=float(argv[3])
    k=float(argv[4])
    outfile=sys.argv[5]
    nmerl=float(argv[6])

    [neoantigens,samples]=readNeoantigens(neofile, nmerl)
    #Compute TCR-recognition probabilities for all neoantigens
    aligner=Aligner()
    for sample in samples:
        xmlpath=alignmentDirectory+"/neoantigens_"+sample+"_iedb.xml"
        aligner.readAllBlastAlignments(xmlpath)
    aligner.computeR(a, k)

    #Write neoantigen recognition potential
    of=open(outfile,'w')
    header=["NeoantigenID","Mutation","Sample","MutatedPeptide","ResidueChangeClass","MutantPeptide","WildtypePeptide","A","R","Excluded","NeoantigenRecognitionPotential"]
    header="\t".join(header)
    of.write(header+"\n")
    for i in neoantigens:
        neoantigen=neoantigens[i]
        w=neoantigen.getWeight() #excludes neoantigens that mutated from a nonhydrophobic residue on position 2 or 9
        A=neoantigens[i].getA() #MHC amplitude A
        mtpeptide=neoantigens[i].mtPeptide #mutant peptide
        wtpeptide=neoantigens[i].wtPeptide
        R=aligner.getR(i)

        # Residue change:
        # HH: from hydrophobic to hydrophobic,
        # NN: from non-hydrophobic to non-hydrophobic
        # HN: from hydrophobic to non-hydrophobic,
        # NH: from non-hydrophobic to hydrophobic
        # other (WW, WH, HW, NW, WN) which include aminoacids without a clear classification
        residueChange=neoantigen.residueChange

        fitnessCost=A*R*w

        l=[i, neoantigen.mid, neoantigen.sample, neoantigen.position, residueChange, mtpeptide, wtpeptide, A,R, 1-w, fitnessCost]#, neoAlignment, epitopeAlignment, score, species]
        l="\t".join(map(lambda s: str(s),l))
        of.write(l+"\n")
    def alignOneChunk(pathToHtkModel, path_TO_OUTPUT, lyrics,
                      currPathToAudioFile, isLyricsFromFile, withSynthesis):

        if not (os.path.isdir(path_TO_OUTPUT)):
            os.mkdir(path_TO_OUTPUT)

        chunkAligner = Aligner(pathToHtkModel, currPathToAudioFile, lyrics,
                               isLyricsFromFile, withSynthesis)

        baseNameAudioFile = os.path.splitext(
            os.path.basename(chunkAligner.pathToAudioFile))[0]

        outputHTKPhoneAlignedURI = os.path.join(
            path_TO_OUTPUT, baseNameAudioFile) + HTK_MLF_ALIGNED_SUFFIX

        chunkAligner.alignAudio(0, path_TO_OUTPUT, outputHTKPhoneAlignedURI)

        return outputHTKPhoneAlignedURI
Exemplo n.º 5
0
def main(argv):
    
    '''
    command line parameters:
    neofile - text file with neoantigen data (supplementary data)
    alignmentDirectory - folder with precomputed alignments
    a - midpoint parameter of the logistic function, alignment score threshold
    k - slope parameter of the logistic function
    outfile - path to a file where to output neoantigen fitness computation
    '''
        
    neofile=argv[1]
    alignmentDirectory=argv[2]
    a=float(argv[3])
    k=float(argv[4])
    outfile=sys.argv[5]
    xmlpath=sys.argv[6]
 
    [neoantigens,samples]=readNeoantigens(neofile)    
    #Compute TCR-recognition probabilities for all neoantigens
    aligner=Aligner()    
    #for sample in samples:
    #    xmlpath=alignmentDirectory+"/neoantigens_"+sample+"_iedb.xml"
    #    aligner.readAllBlastAlignments(xmlpath)    
    #xmlpath=alignmentDirectory+"/neoantigens_5NSAK_iedb.xml"
    #xmlpath="/scratch/eknodel/cohen_melanoma/validated_peptides/3466/3466_lukszablast.out"
    aligner.readAllBlastAlignments(xmlpath)
    aligner.computeR(a, k)    
    
    #Write neoantigen recognition potential
    of=open(outfile,'w')
    #header=["NeoantigenID","Mutation","Sample","MutatedPeptide","ResidueChangeClass","MutantPeptide","WildtypePeptide","A","R","Excluded","NeoantigenRecognitionPotential"]
    header=["NeoantigenID","Mutation","Sample","MutantPeptide","WildtypePeptide","A","R","w","wc","Fitness"]
    header="\t".join(header)
    of.write(header+"\n")
    for i in neoantigens:
        #print(i)
        neoantigen=neoantigens[i]
        #print(neoantigen)
        w=neoantigen.getHydro() #calculates neoantigen fraction based on Luksza definition
        wc = neoantigen.getConsortiumHydro() #Calcuculates neoantigen fraction based on Consortium's definition (https://doi.org/10.1016/j.cell.2020.09.015)
        A=neoantigens[i].getA() #MHC amplitude A
        #print(A)
        mtpeptide=neoantigens[i].mtPeptide #mutant peptide
        wtpeptide=neoantigens[i].wtPeptide
        R=aligner.getR(i)
        #print(R)        
        
        # Residue change:
        # HH: from hydrophobic to hydrophobic, 
        # NN: from non-hydrophobic to non-hydrophobic
        # HN: from hydrophobic to non-hydrophobic, 
        # NH: from non-hydrophobic to hydrophobic
        # other (WW, WH, HW, NW, WN) which include aminoacids without a clear classification
        #residueChange=neoantigen.residueChange 
        #print(residueChange)
 
        fitnessCost=A*R*w
        #fitnessCost=A*R
        
        #l=[i, neoantigen.mid, neoantigen.sample, neoantigen.position, residueChange, mtpeptide, wtpeptide, A,R, 1-w, fitnessCost]#, neoAlignment, epitopeAlignment, score, species]
        l=[i, neoantigen.mid, neoantigen.sample, mtpeptide, wtpeptide, A,R,w,wc, fitnessCost]#, neoAlignment, epitopeAlignment, score, species]
        l="\t".join(map(lambda s: str(s),l))
        of.write(l+"\n")
import sys
from Aligner import Aligner
from SentenceSplitter import SentenceSplitter
import pickle
import codecs

#UTF8Writer = codecs.getwriter('utf8')
#sys.stdout = UTF8Writer(sys.stdout)
char_stream = codecs.getreader("utf-8")(sys.stdin)
#UTF8Reader = codecs.getreader('utf8')
#sys.stdin = UTF8Reader(sys.stdin)

pkl_file = open('../../dictionaries/dictionary.pkl', 'rb')
lang_dict = pickle.load(pkl_file)
pkl_file.close()
pkl_file = open('../../dictionaries/rev_dictionary.pkl', 'rb')
rev_lang_dict = pickle.load(pkl_file)
pkl_file.close()
aligner = Aligner(lang_dict, rev_lang_dict)
splitter = SentenceSplitter()

for line in char_stream:
    try:
        [sentence, translation] = line.strip().split('\t')
        [sentence, dummy] = splitter.split_sentence(sentence)
        [translation, dummy] = splitter.split_english_sentence(translation)
        aligner.print_dict_alignments(sentence, translation, 5)
    except ValueError:
        pass