Пример #1
0
    def regress_cnv(self): 
        Align=MegaAlignment()	
        self.Tumor2Clone_frequency = {}
        HitCloSeq_dic={}
        for tumor in self.v_obs:
             v_obs_single = self.v_obs[tumor]
             v_obs_single_sub = []	
             Seq_dic_sub={}			 
             RmSNVPosi=[]
             CNVls=  self._CNV_file[tumor]
             Len=len(CNVls)
             c=0	
             while c<Len:
                 if CNVls[c]=='normal':
                      v_obs_single_sub.append(v_obs_single[c])
				  
                 else: RmSNVPosi.append(c)
                 c+=1
             for Clo in self.ini_clone_order:
                  NewSeq=''
                  OldSeq=self.ini_clone_seq[Clo]				  
                  c=0	
                  while c<Len:
                      if RmSNVPosi.count(c)==0: NewSeq+=OldSeq[c]
                      c+=1					  
                  Seq_dic_sub[Clo]=NewSeq	
             print(tumor,'exclude bad SNVs for clone frequency computation', RmSNVPosi)				  		 
             Cmatrix_noCNV = self.make_Cmatrix(Seq_dic_sub)				
             Clone2Freq = self.do_nnls0(Cmatrix_noCNV, v_obs_single_sub)
             self.Tumor2Clone_frequency['T-'+tumor]=Clone2Freq
             for Clo in Clone2Freq:
                  if Clone2Freq[Clo]>0:
                            HitCloSeq_dic['#'+Clo]= self.ini_clone_seq['#'+Clo]					   
        self.hitclone_seq_builder = Align.UpMeg(HitCloSeq_dic, [])       			
Пример #2
0
    def do_mega_mp(self, alignment_builder, mega_id):
        self._newick_trees = []
        print('constructing MP tree')
        result = False
        self._update_file_names(mega_id)
        Align = MegaAlignment()
        Align.save_mega_alignment_to_file(self._alignment_file,
                                          alignment_builder)
        cl = self._command_line_string()
        os.system(cl)
        if os.path.isfile(self._newick_file) == True:
            result = True
            nf = open(self._newick_file, 'r')
            ns = nf.readlines()
            for line in ns:
                self._newick_trees.append(line)
            nf.close()
            files = self._get_ancestral_states_files()
            self._retrieve_ancestral_states()
            seq_maker = MakeAncSeqMPMin()

            self.best_align_result = seq_maker.get_best_alignment(
                files, self._mega_id, True, self.newick_trees)

        self._cleanup_temp_files()
        return result
Пример #3
0
 def extract_hitseq(self, seq_buil, CloFre, Cut):
     Align = MegaAlignment()
     CloLs, Clo2Seq = Align.name2seq(seq_buil)
     Hit = {}
     for Clo in CloFre:
         if CloFre[Clo] > Cut: Hit['#' + Clo] = Clo2Seq['#' + Clo]
     return Hit
Пример #4
0
 def findcombohit(self, seq_builder):
     Align = MegaAlignment()
     SeqLs, SeqDic = Align.name2seq(seq_builder)
     Find = 'n'
     for i in SeqLs:
         if i.find('Clu') != -1: Find = 'y'
     return Find
Пример #5
0
    def ReNameCloFreMeg(self, seqs, CloFre, Name):
        Align = MegaAlignment()
        CloFreAnalize = CloneFrequencyAnalizer()

        NameOrder, Clo2Seq = Align.name2seq(seqs)
        if CloFre == {}:
            CloFre['T-A'] = {}
            for Clo in Clo2Seq:
                CloFre['T-A'][Clo[1:]] = 1

    # print Clo2Seq,seqs
        Len = len(Clo2Seq[NameOrder[0]])
        out = [
            '#MEGA', '!Title SNVs;', '!Format datatype=dna;', ' ', '#hg19',
            'A' * Len
        ]
        TuLs = []
        for Tu in CloFre:
            TuLs.append(Tu[3:])
        TuLs.sort()
        Old2NewCloLs = {}
        Old2NewCloNum = {}
        CloOrder = []
        Num = 1
        for Tu in CloFre:
            Clo2Fre = CloFre[Tu]
            HitClo = []
            for Clo in Clo2Fre:
                if Clo2Fre[Clo] > 0: HitClo.append(Clo)
            Tu = Tu[2:]
            C = 1
            CloLs, Fre2Clo = CloFreAnalize.Sort(HitClo,
                                                Clo2Fre)  #from large frequency
            for Clo in CloLs:
                Code = Clo in Old2NewCloLs
                if Code != True:
                    Old2NewCloLs[Clo] = ''
                    Old2NewCloNum[Clo] = 'Clone' + str(Num)
                    CloOrder.append(Clo)
                    Num += 1
                Old2NewCloLs[Clo] += Tu + str(C)
                C += 1
        if Name == 'list': Old2NewClo = Old2NewCloLs
        else: Old2NewClo = Old2NewCloNum
        NewCloOrder = []
        NewT2C2F = {}
        for Clo in CloOrder:
            NewCloOrder.append(Old2NewClo[Clo])
            out += ['#' + Old2NewClo[Clo], Clo2Seq['#' + Clo]]  #+'\n'
        for Tu in CloFre:
            C2F = CloFre[Tu]
            NewC2F = {}

            for C in C2F:
                if C2F[C] > 0:
                    NewC2F[Old2NewClo[C]] = C2F[C]
            NewT2C2F[Tu] = NewC2F
        return out, NewT2C2F, NewCloOrder
Пример #6
0
 def __init__(self, ini_seq_builder, v_obs, clone_frequencies, CNV,
              freq_cutoff):
     self.freq_cutoff = freq_cutoff
     self.Tu2CloFre = clone_frequencies
     self.CloFreCutOff = self.freq_cutoff
     self.v_obs = v_obs
     Align = MegaAlignment()
     self.clone_order, self.clone_seq = Align.name2seq(ini_seq_builder)
     self._CNV_file = CNV
     self.snv_num = len(self.clone_seq[self.clone_order[0]])
Пример #7
0
    def get_decomposed_seq(self):
        Align = MegaAlignment()
        TuLs, Tu2Seq = Align.name2seq(self.tumor_seqs)
        print('make SNV clusters')
        clusters = SNPClusterGenerator_cnv1(self.ini_seq_builder, self.v_obs,
                                            self.Tu2CloFre, self._CNV_file,
                                            self.freq_cutoff)
        Tumor_cluster_dic = clusters.cluster_cnv(
        )  #Tu2Cluster={tumor:[[seq_builder,{tumor:{clone frequency}}]]}
        print('Decompose incorrect sample genotype clones')

        AllhitWithDecom = {}
        All_convol_tuseq = []
        DecomLs = []

        for Tu in Tumor_cluster_dic:
            ClusterInfo = Tumor_cluster_dic[Tu]
            if ClusterInfo != []:

                HitWithDecomSeq_build, convol_tuseq = self.get_candidate_decomposed_clones(
                    Tu, ClusterInfo, Tu2Seq['#' + Tu])
                if convol_tuseq != '':
                    A1, HitWithDecomSeq_dic = Align.name2seq(
                        HitWithDecomSeq_build)
                    AllhitWithDecom.update(HitWithDecomSeq_dic)
                    All_convol_tuseq.append(convol_tuseq)
                    DecomLs.append(Tu)

                else:
                    Original_hit_seq_dic = self.extract_hitseq(
                        self.ini_seq_builder, self.Tu2CloFre['T-' + Tu],
                        self.freq_cutoff)
                    AllhitWithDecom.update(Original_hit_seq_dic)
            else:
                Original_hit_seq_dic = self.extract_hitseq(
                    self.ini_seq_builder, self.Tu2CloFre['T-' + Tu],
                    self.freq_cutoff)
                AllhitWithDecom.update(Original_hit_seq_dic)

        if DecomLs == []:

            return self.clone_seq, 'no decomposed clone was made'
        else:

            for ConvTuSeq in All_convol_tuseq:
                Redun_ls = Align.find_redundant(ConvTuSeq, AllhitWithDecom)
                if Redun_ls != []:

                    return self.clone_seq, 'tumor genotype that was decomposed was hit in different tumor: failed decomposition'

            return AllhitWithDecom, 'decomposed' + str(DecomLs)
Пример #8
0
 def get_best_alignment(self, files, ID, remove_redundant, tree_list):
     Align = MegaAlignment()
     AncFile = files[0]
     print('processing file: ' + AncFile)
     AncFile = open(AncFile, 'r').readlines()
     Lines = []
     Add = 'n'
     for i in AncFile:
         if i.find('Index ') != -1: Add = 'y'
         elif Add == 'y':
             i = i.strip().split(' ')
             In = []
             for ii in i:
                 if ii != '': In.append(ii)
             Lines.append(In)
     Anc2Seq = self.GetRel(Lines)
     outseq = Align.UpMeg(Anc2Seq, [])
     outseq = Align.remove_redund_seqs(outseq)
     return outseq, tree_list[
         0]  #, best_outset[1], best_outset[2], best_outset[4] #NadeMapInfo, mask_seq, Good_posi_info]
Пример #9
0
    def MLancetor(self, seqs, Nwk):

        Align = MegaAlignment()
        #  seqs = open(seqsFile,'r').readlines()
        self.CellLs, self.Cell2Seq = Align.name2seq(seqs)
        # print ('h',self.CellLs)
        # print (self.Cell2Seq)
        # open('A','r').readlines()
        self.SNVnum = len(self.Cell2Seq[self.CellLs[0]])

        #  self.InMeg = Align.AddNormal(seqs)

        InferAncestor = MegaAncestor()
        InferAncestor.alignment_file = seqs
        InferAncestor.input_tree_file = Nwk

        self.ancestor_states, self.offspring2ancestor, cell2code, self.code2cell = InferAncestor.retrieve_ancestor_states(
        )
        self.RescaledTree = InferAncestor.get_scaledNWK()
        self.nodeid2seq = {}
        # print (self.ancestor_states)
        print('SNV count', self.SNVnum)
        #   open('A','r').readlines()
        for node in self.ancestor_states:
            Seq = ''
            States = self.ancestor_states[node]
            c = 0
            while c < self.SNVnum:
                # print (node)
                # print (c)
                # print (States)
                Nuc = States[c].split('\t')[0]
                Seq += Nuc
                c += 1
            self.nodeid2seq[node] = Seq

    # print (RescaledTree)
        print(self.nodeid2seq)
        print(self.offspring2ancestor)
        print(self.code2cell)
        self.offspring2ancestor_withou_redunSeq = self.RemoveRedun()
Пример #10
0
    def do_mega_ancestor(self):

        print('infer ancestral sequences')
        result = False
        alignment_builder = self._alignment_file
        tree_builder = self._input_tree_file
        self._update_file_names('Ancestor')

        Align = MegaAlignment()
        Align.save_mega_alignment_to_file(self._alignment_file,
                                          alignment_builder)  ###
        self.save_str_to_file(tree_builder, self._input_tree_file)

        cl = self._command_line_string()
        os.system(cl)
        if os.path.isfile(self._ancestor_file) == True:
            result = True
            self.branchLengthTree = open(self._ancestor_file[:-4] + '.nwk',
                                         'r').readlines()[0]

        return result
Пример #11
0
    def cluster_cnv(self):
        self.tumor2clusters = {}
        Align = MegaAlignment()
        for tumor in self.v_obs:
            self.ObsSNVLs_all = self.v_obs[tumor]
            clone2frequency = self.Tu2CloFre['T-' + tumor]
            clone_seq_dic_sub = {}
            ObsSNV_sub = []
            CNVlist = self._CNV_file[tumor]
            NoCNVPosi = []
            c = 0
            while c < self.snv_num:
                if CNVlist[c] == 'normal':
                    NoCNVPosi.append(c)  #
                    ObsSNV_sub.append(self.ObsSNVLs_all[c])
                    for Clone in self.clone_seq:
                        if (Clone in clone_seq_dic_sub) != True:
                            clone_seq_dic_sub[Clone] = ''
                        clone_seq_dic_sub[Clone] += self.clone_seq[Clone][c]
                c += 1
            Site2EstSNV_sub = Calculation.compute_estimated_SNVfrequency1(
                self, clone2frequency, clone_seq_dic_sub)
            Exp2Diff2ObsSNVID = Calculation.compute_diff1(
                self, ObsSNV_sub, Site2EstSNV_sub)
            Clu2Seq = self.GetExp2CluSNV(
                Exp2Diff2ObsSNVID, tumor,
                NoCNVPosi)  ##get_candidate_decomposed_clones change

            if Clu2Seq != {}:
                seq_list_sub = []
                for HitClo in clone2frequency:
                    if clone2frequency[HitClo] > 0:
                        seq_list_sub += [
                            '#' + HitClo, clone_seq_dic_sub['#' + HitClo]
                        ]
                for Clu in Clu2Seq:
                    seq_list_sub += [Clu, Clu2Seq[Clu]]
                clone_frequency_clu = CloneFrequencyComputer_cnv1(
                    seq_list_sub, {tumor: ObsSNV_sub}, {tumor: CNVlist},
                    self.freq_cutoff)
                clone_frequency_clu.regress_cnv()

                self.tumor2clusters[tumor] = [
                    clone_frequency_clu.hitclone_seq_builder,
                    clone_frequency_clu.Tumor2Clone_frequency
                ]
            else:
                self.tumor2clusters[tumor] = []

        return self.tumor2clusters
Пример #12
0
 def __init__(self, seqs_with_ancestor, v_obs, CNV_info, freq_cutoff):
     self.CutOff = freq_cutoff	
     Align=MegaAlignment()	
     self.ini_clone_order, self.ini_clone_seq = Align.name2seq(seqs_with_ancestor)
     self._CNV_file = CNV_info
     self.v_obs = v_obs		
Пример #13
0
    def get_candidate_decomposed_clones(self, target_tumor, CluInf_tu, Tuseq):
        Align = MegaAlignment()

        NameOrder, Name2Seq = Align.name2seq(CluInf_tu[0])

        LenSeq = len(Name2Seq[NameOrder[0]])

        SigCluLs = []
        for Name in NameOrder:  #Root is the first cluster or initial candidate clone
            if Name != '#Clu0' and Name.find('Clu') != -1:
                SigCluLs.append(Name)
        CluCombLs, IDend = self.combinations([], SigCluLs, 0, {})
        print(target_tumor, 'make cluster comb', SigCluLs, CluCombLs,
              NameOrder)

        if CluCombLs != {}:

            CloCan2Seq = {}
            Got_Candidate = 'n'
            for Root in NameOrder:  #Root is the first cluster or initial candidate clone
                if Root == '#Clu0' or Root.find('Clu') == -1:
                    RootSeq = Name2Seq[Root]
                    if Root == '#Clu0':
                        CloCan2Seq['#' + target_tumor +
                                   'Clu0'] = RootSeq  #Root is candidate clone
                    RootMut = Align.GetMutPos(RootSeq)
                    Got_Candidate = 'y'
                    if CluCombLs != {}:
                        for ID in CluCombLs:
                            CluLs = CluCombLs[ID]

                            CluN = ''
                            MutPosLs = []
                            for Clu in CluLs:
                                Seq = Name2Seq[Clu]
                                CluMut = Align.GetMutPos(Seq)
                                MutPosLs += CluMut
                                CluN += Clu.replace('#', '')

                            Good = 'y'
                            for Mut in MutPosLs:
                                if RootMut.count(Mut) != 0: Good = 'n'

                            if Good == 'y':
                                AllMutPosLs = MutPosLs + RootMut
                                Seq = Align.ModSeq('A' * LenSeq, AllMutPosLs,
                                                   'T', LenSeq)
                                Redun_ls = Align.find_redundant(
                                    Seq,
                                    self.clone_seq)  #all other clones ####

                                if Redun_ls == []:
                                    CloCan2Seq['#' + target_tumor +
                                               Root.replace('#', '') +
                                               CluN] = Seq

            if CloCan2Seq != {}:

                CloCan2Seq.update(self.clone_seq)
                Can_list = list(CloCan2Seq.keys())

                new_seq = Align.UpMeg(CloCan2Seq, Can_list)

                clone_frequency_combo = CloneFrequencyComputer_cnv1(
                    new_seq, {target_tumor: self.v_obs[target_tumor]},
                    {target_tumor: self._CNV_file[target_tumor]},
                    self.freq_cutoff)
                clone_frequency_combo.regress_cnv()
                CluComboHit = self.findcombohit(
                    clone_frequency_combo.hitclone_seq_builder)
                if CluComboHit == 'y':
                    print(
                        'test the quality of clustercombo, by removing tumor seq (if any)'
                    )
                    hit_seq_ls, hit_seq_dic = Align.name2seq(
                        clone_frequency_combo.hitclone_seq_builder)
                    Tuseq_ls = Align.find_redundant(Tuseq, hit_seq_dic)
                    if Tuseq_ls == []:
                        print(
                            'tumor genotype did not hit, so clustercombo is good'
                        )
                        return clone_frequency_combo.hitclone_seq_builder, Tuseq
                    else:
                        print(
                            'tumor genotype was hit, so test if clustercombo still hit without tumor genotype: testing if clustercombo genotypes fit well'
                        )
                        Tuseq_ls = Align.find_redundant(Tuseq, CloCan2Seq)
                        sub_hit_seq = []
                        for seqname in CloCan2Seq:
                            if Tuseq_ls.count(seqname) == 0:
                                sub_hit_seq += [seqname, CloCan2Seq[seqname]]

                        clone_frequency_combo_new = CloneFrequencyComputer_cnv1(
                            sub_hit_seq,
                            {target_tumor: self.v_obs[target_tumor]},
                            {target_tumor: self._CNV_file[target_tumor]},
                            self.freq_cutoff)
                        clone_frequency_combo_new.regress_cnv()
                        CluComboHit = self.findcombohit(
                            clone_frequency_combo_new.hitclone_seq_builder)
                        if CluComboHit == 'y':

                            return clone_frequency_combo_new.hitclone_seq_builder, Tuseq
                        else:
                            return CluInf_tu[0], ''
                else:
                    return CluInf_tu[0], ''

            else:
                return CluInf_tu[0], ''
        return CluInf_tu[0], ''
Пример #14
0
 def get_tree_with_branchLen(self, ID):
     Align = MegaAlignment()
     self.GetOut(ID + '.nwk', self.RescaledTree)
     SeqLs = Align.UpMeg(self.nodeid2seq, [])
     Align.save_mega_alignment_to_file(ID + '_NodeSeq.meg', SeqLs)
Пример #15
0
    def remove_insignificant_clones(self, v_obs, CloFre_clone,
                                    clone_seq_builder, Tu2CNV, Cut):
        Align = MegaAlignment()
        OutAncAll = 'SigTest.txt'
        outAncAll = 'tumor\tDecsendant-Ancestor\tSNV posi\tType\tObsFre\n'
        Clone_list, clone_seq_dic = Align.name2seq(clone_seq_builder)
        new_clone_freq = {}
        new_clone_seq_dic = {}
        for tumor in v_obs:
            CNV = Tu2CNV[tumor]
            Clo2Fre = CloFre_clone['T-' + tumor]
            ObsFre = v_obs[tumor]

            clone_order = []
            MutNum2Clo = {}
            MutNum_ls = []
            for Clo in Clo2Fre:
                if Clo2Fre[Clo] > 0:
                    MutPosLs = Align.GetMutPos(clone_seq_dic['#' + Clo])
                    MutNum = len(MutPosLs)
                    if (MutNum in MutNum2Clo) != True: MutNum2Clo[MutNum] = []
                    MutNum2Clo[MutNum].append(Clo)
                    MutNum_ls.append(MutNum)
            MutNum_ls = list(set(MutNum_ls))
            MutNum_ls.sort(reverse=True)
            for MutNum in MutNum_ls:

                clone_order += MutNum2Clo[MutNum]

            CloNum = len(clone_order)
            C1Max = CloNum - 1
            InsigLs = []

            C1 = 0
            while C1 < C1Max:
                Clo1 = clone_seq_dic['#' + clone_order[C1]]
                num_sites = len(Clo1)
                Min_num = 0.01 * num_sites
                C2 = C1 + 1
                while C2 < CloNum:
                    Clo2 = clone_seq_dic['#' + clone_order[C2]]

                    Share = []
                    Unique = []
                    c = 0
                    while c < num_sites:
                        if CNV[c] == 'normal':
                            if Clo1[c] == 'T' and Clo2[c] == 'T':
                                Share.append(ObsFre[c])
                                outAncAll += tumor + '\t' + clone_order[
                                    C1] + '-' + clone_order[C2] + '\t' + str(
                                        c) + '\tShare\t' + str(
                                            ObsFre[c]) + '\n'
                            elif Clo1[c] == 'T' and Clo2[c] == 'A':
                                Unique.append(ObsFre[c])
                                outAncAll += tumor + '\t' + clone_order[
                                    C1] + '-' + clone_order[C2] + '\t' + str(
                                        c) + '\tUnique\t' + str(
                                            ObsFre[c]) + '\n'

                        c += 1
                    if (len(Share) < 3
                            or len(Unique) < 3) or (len(Share) < Min_num
                                                    or len(Unique) < Min_num):
                        P = 1
                    else:
                        P = scipy.stats.ttest_ind(Share,
                                                  Unique,
                                                  equal_var=False)

                        P = P[-1]
                    if P > Cut:
                        if clone_order[C1].find('Clu') != -1 and clone_order[
                                C2].find('Clu') == -1:
                            InsigLs.append(clone_order[C1])
                        else:
                            InsigLs.append(clone_order[C2])

                    C2 += 1

                C1 += 1
            InsigLs = list(set(InsigLs))
            if InsigLs != []: print('insignificant clones', tumor, InsigLs)
            new_clone_fre_in = {}
            for Clo in Clo2Fre:
                if Clo2Fre[Clo] > 0 and InsigLs.count(Clo) == 0:
                    new_clone_fre_in[Clo] = Clo2Fre[Clo]
                    new_clone_seq_dic['#' + Clo] = clone_seq_dic['#' + Clo]
            new_clone_freq['T-' + tumor] = new_clone_fre_in
        new_seq_builder = Align.UpMeg(new_clone_seq_dic, [])

        return new_seq_builder, new_clone_freq