Пример #1
0
    def remove_ancestral_decomposed(self,
                                    remove_tumor_and_rename_decomposed_seq,
                                    Error_rate, tumor_seqs):
        #  print 'Tu',tumor_seqs
        Align = MegaAlignment()
        SeqOrderIni, Meg2Seq = Align.name2seq(
            remove_tumor_and_rename_decomposed_seq)
        TuLs, Tu2Seq = Align.name2seq(tumor_seqs)
        good_seq = ['#MEGA', '!Title SNVs;', '!Format datatype=dna;', ' ']
        RmCluClo = []
        for name1 in SeqOrderIni:
            if name1.find('Clu') != -1:
                seq1 = Meg2Seq[name1]
                for name2 in SeqOrderIni:
                    if name2 != '#hg19' and name1 != name2:
                        seq2 = Meg2Seq[name2]
                        Additional_mut_num1 = Align.CountAdditionalMut(
                            seq1, seq2)
                        Der = 1.0 * Additional_mut_num1 / len(seq1)

                        if name2.find('Clu') != -1:
                            if Additional_mut_num1 == 0: RmCluClo.append(name1)
                        else:
                            if Der < Error_rate: RmCluClo.append(name1)
        AddedTuLs = []
        for Name in SeqOrderIni:
            if RmCluClo.count(Name) == 0:
                good_seq += [Name, Meg2Seq[Name]]
                AddedTuLs.append(Name.split('Clu')[0])
        for Tu in TuLs:
            if AddedTuLs.count(Tu) == 0: good_seq += [Tu, Tu2Seq[Tu]]
        good_seq += ['#hg19', 'A' * len(seq1)]
        return good_seq
Пример #2
0
   def __init__(self, seqs, num_support_position, Cell2PPls, initial_seq_builder, OutFileName):
      	
       self.cut = num_support_position
       Align = MegaAlignment()
       self.ini_seqs_builder = seqs
       self.CellLs, self.Cell2Seq = Align.name2seq(seqs)
 	
       self.SNVnum = len(self.Cell2Seq[self.CellLs[0]])
     
       self.InMeg = Align.AddNormal(seqs)
       IniCellLs, self.Cell2iniSeq = Align.name2seq(initial_seq_builder)
       self.Cell2PPls = Cell2PPls
       self.out_file_name = OutFileName
Пример #3
0
    def remove_tumor_and_rename_decomposed(
            self, tumor2seqs_with_decompose, seqs_with_ancestor, tumor_seqs,
            REP, clone_frequency_for_seqs_with_ancestor):
        Align = MegaAlignment()
        SeqOrderIni, IniMeg2Seq = Align.name2seq(seqs_with_ancestor)
        TuLs, TuMeg2Seq = Align.name2seq(tumor_seqs)
        SNVNum = len(TuMeg2Seq[TuLs[0]])
        IdenLs = Align.identify_similar_seq(tumor_seqs, 0)
        Tu2IdenTu = Align.make_similar_seq_dic(IdenLs)
        outAllSeq = ['MEGA', '!Title SNVs;', '!Format datatype=dna;', ' ']
        RmCloLs = []
        for Tu in tumor2seqs_with_decompose:
            DeComCloLs = tumor2seqs_with_decompose[Tu]
            if DeComCloLs != []:
                IdenTu = Tu2IdenTu['T-' + Tu]
                RmCloLs += IdenTu
        RmCloLs = list(set(RmCloLs))
        Done = []
        for Tu in tumor2seqs_with_decompose:
            DeComCloLs = tumor2seqs_with_decompose[Tu]
            if RmCloLs.count(Tu) == 0:
                if Done.count('#' + Tu) == 0:
                    outAllSeq += ['#' + Tu, TuMeg2Seq['#' + Tu]]
                    Done.append('#' + Tu)
            if DeComCloLs != []:
                DecomCloOrder, Clu2Seq = Align.name2seq(DeComCloLs)
                for Clu in Clu2Seq:
                    Seq = Clu2Seq[Clu]
                    TuClu = Clu[1:].split('Clu')[0]
                    Code = TuClu in RmCloLs
                    if Code != True and Clu.find('#Node') == -1:
                        if Clu.find('#Clu') != -1:
                            Clu = '#' + Tu + Clu[1:] + 'REP' + str(REP)
                        if Done.count(Clu) == 0:
                            outAllSeq += [Clu, Seq]
                            Done.append(Clu)
            else:

                HitCloLs = clone_frequency_for_seqs_with_ancestor['T-' + Tu]
                for Clo in HitCloLs:
                    if HitCloLs[Clo] > 0:
                        TuClo = Clo.split('Clu')[0]
                        Code = TuClo in RmCloLs
                        if Code != True and Clo[:4] != 'Node':
                            if Done.count('#' + Clo) == 0:
                                outAllSeq += ['#' + Clo, IniMeg2Seq['#' + Clo]]
                                Done.append('#' + Clo)

        outAllSeq_without_redindant = Align.RmRedunSeq(outAllSeq)
        outAllSeq_without_redindant += ['#hg19', ('A' * SNVNum)]
        return outAllSeq_without_redindant
    def EstimateSNVfre(self, Tu2CloFre, clone_seq0, ReadCount):
        Align = MegaAlignment()
        cloorder, clone_seq = Align.name2seq(clone_seq0)

        tumor2estSNV = {}
        tumor2diff = {}

        for tumor in Tu2CloFre:
            clone2frequency = Tu2CloFre[tumor]
            tumor = tumor.split('-')[-1]
            estSNVfreLs = []
            DiffLs = []
            snv_num = len(ReadCount[tumor + ':ref'])

            c = 0
            while c < snv_num:
                estSNVfre = 0
                for Clo in clone2frequency:

                    S = clone_seq['#' + Clo]

                    if str(clone2frequency[Clo]).find('e') != -1: F = 0
                    else: F = clone2frequency[Clo] / 2
                    if S[c] == 'T': estSNVfre += F

                estSNVfreLs.append(estSNVfre)
                Obs = 1.0 * float(ReadCount[tumor + ':alt'][c]) / (
                    float(ReadCount[tumor + ':alt'][c]) +
                    float(ReadCount[tumor + ':ref'][c]))
                Dif = estSNVfre - Obs
                DiffLs.append(Dif)
                c += 1
            tumor2estSNV[tumor] = estSNVfreLs
            tumor2diff[tumor] = DiffLs
        return tumor2estSNV, tumor2diff
Пример #5
0
 def __init__(self, seqs_with_ancestor, v_obs, CNV_info, freq_cutoff):
     self.CutOff = freq_cutoff
     Align = MegaAlignment()
     self.ini_clone_order, self.ini_clone_seq = Align.name2seq(
         seqs_with_ancestor)
     self._CNV_file = CNV_info
     self.v_obs = v_obs
 def findcombohit(self,seq_builder):
     Align=MegaAlignment()
     SeqLs,SeqDic=Align.name2seq(seq_builder)
     Find='n'
     for i in SeqLs:
         if i.find('Clu')!=-1: Find='y'
     return Find			
 def extract_hitseq(self,seq_buil,CloFre,Cut):
     Align=MegaAlignment()
     CloLs,Clo2Seq=Align.name2seq(seq_buil)	
     Hit={}
     for Clo in CloFre:
          if CloFre[Clo]>Cut: Hit['#'+Clo]=Clo2Seq['#'+Clo]
     return Hit			 
    def __init__(self, cluster_information, original_seq, tumor_seq, tsp_list,
                 clone_frequency_cutoff, CNV_info, ReadCountTable):
        self.Tu2Cluster = cluster_information
        self.CNV_info = CNV_info
        self.ReadCountTable = ReadCountTable
        Align = MegaAlignment()
        self.OriAncOrder, self.OriAnc2Seq0 = Align.name2seq(original_seq)
        self.TOrder, self.T2Seq = Align.name2seq(tumor_seq)
        self.SharePosi = Align.GetSharePosi1(self.OriAnc2Seq0, 'T')
        self.all_tsp = tsp_information(tsp_list)
        self.CloFreCutOff = clone_frequency_cutoff
        self.v_obs = self.all_tsp.tumor2alt_frequency()
        identical_seq_list = Align.identify_similar_seq(tumor_seq, 0)
        self.identical_seq = Align.make_similar_seq_dic(identical_seq_list)

        self.freq_cutoff = self.CloFreCutOff
Пример #9
0
    def finalize_results(self, decomposed_seq_builder,
                         decomposed_Tumor2Clone_frequency,
                         origianl_seq_builder, original_Tumor2Clone_frequency,
                         REP):
        Align = MegaAlignment()
        Ls, DecomSeqDic = Align.name2seq(decomposed_seq_builder)
        #   print Ls
        Ls, OriSeqDic = Align.name2seq(origianl_seq_builder)
        NewCloSeqDic = {}
        NewCloFre = {}
        #    print decomposed_Tumor2Clone_frequency,original_Tumor2Clone_frequency
        for Tu in original_Tumor2Clone_frequency:
            if decomposed_Tumor2Clone_frequency.has_key(Tu) != True:

                CloFre = original_Tumor2Clone_frequency[Tu]
                for Clo in CloFre:
                    if CloFre[Clo] > 0:
                        NewCloSeqDic['#' + Clo] = OriSeqDic['#' + Clo]
            elif decomposed_Tumor2Clone_frequency[Tu] == {}:

                CloFre = original_Tumor2Clone_frequency[Tu]
                for Clo in CloFre:
                    if CloFre[Clo] > 0:
                        NewCloSeqDic['#' + Clo] = OriSeqDic['#' + Clo]

            else:
                CloFre0 = decomposed_Tumor2Clone_frequency[Tu]
                CloFre = {}
                for Clo in CloFre0:
                    Fre = CloFre0[Clo]
                    if Fre > 0:
                        if (Clo.find('Clu') != -1 and Clo.find('REP')
                                == -1) or Clo.find('REP' + str(REP - 1)) != -1:
                            CloFre[Clo + 'REP' + str(REP)] = Fre
                            NewCloSeqDic['#' + Clo + 'REP' +
                                         str(REP)] = DecomSeqDic['#' + Clo]
                        else:
                            CloFre[Clo] = Fre
                            if OriSeqDic.has_key('#' + Clo) == True:
                                NewCloSeqDic['#' + Clo] = OriSeqDic['#' + Clo]
                            else:
                                NewCloSeqDic['#' + Clo] = DecomSeqDic['#' +
                                                                      Clo]
            NewCloFre[Tu] = CloFre
        rename_seq_builder = Align.UpMeg(NewCloSeqDic, [])
        #  open('AA','r').readlines()
        return rename_seq_builder, NewCloFre
Пример #10
0
 def __init__(self, tumor_seq, tsp_list, mao_file):
     Align = MegaAlignment()
     self.tumor_list, self.tumor2seq = Align.name2seq(tumor_seq)
     self.Len = len(self.tumor2seq[self.tumor_list[0]])
     self.mao_file = mao_file
     self.tsp_list = tsp_list
     TSPinfo = tsp_information(tsp_list)
     self.Tu2SNV = TSPinfo.tumor2alt_frequency()
Пример #11
0
    def add_back_CNVSNV(self, DecomTu2Seq_builder_sub, CNV_information,
                        original_seqs_builder_all,
                        original_Tumor2Clone_frequency, tsp_list):
        all_tsp = tsp_information(tsp_list)
        v_obs = all_tsp.tumor2alt_frequency()
        Seq_all_dic = {}
        Align = MegaAlignment()
        Original_clols, Original_clodic_all = Align.name2seq(
            original_seqs_builder_all)
        for Tumor in DecomTu2Seq_builder_sub:
            Seq_builder_sub = DecomTu2Seq_builder_sub[Tumor]
            if Seq_builder_sub != []:
                SNVfre_list = v_obs[Tumor]
                CloLs, Clo2Seq = Align.name2seq(Seq_builder_sub)
                CNVinfo = CNV_information[Tumor]
                Len = len(CNVinfo)
                #  print Tumor, Clo2Seq.keys()
                for Clo in Clo2Seq:
                    Seq_sub = Clo2Seq[Clo]
                    c_seq = 0
                    c_all = 0
                    Seq_all = ''
                    while c_all < Len:
                        if CNVinfo[c_all] == 'normal':
                            Seq_all += Seq_sub[c_seq]
                            c_seq += 1
                        else:
                            if SNVfre_list[c_all] == 0: Seq_all += 'A'
                            else: Seq_all += '?'
                        c_all += 1
                    if Original_clodic_all.has_key(Clo) == True:
                        Seq_all_dic[Clo] = Original_clodic_all[Clo]
                    else:
                        Seq_all_dic[Clo] = Seq_all
            else:
                CloFre = original_Tumor2Clone_frequency['T-' + Tumor]

                for Clo in CloFre:
                    if CloFre[Clo] > 0:
                        if Seq_all_dic.has_key('#' + Clo) != True:
                            Seq_all_dic['#' + Clo] = Original_clodic_all['#' +
                                                                         Clo]

        decom_all_seq_builder = Align.UpMeg(Seq_all_dic, [])

        return decom_all_seq_builder
Пример #12
0
    def ReNameCloFreMeg(self, seqs, CloFre, Name):
        Align = MegaAlignment()
        CloFreAnalize = CloneFrequencyAnalizer()

        NameOrder, Clo2Seq = Align.name2seq(seqs)
        if CloFre == {}:
            CloFre['T-A'] = {}
            for Clo in Clo2Seq:
                CloFre['T-A'][Clo[1:]] = 1

    # print Clo2Seq,seqs
        Len = len(Clo2Seq[NameOrder[0]])
        out = [
            '#MEGA', '!Title SNVs;', '!Format datatype=dna;', ' ', '#hg19',
            'A' * Len
        ]
        TuLs = []
        for Tu in CloFre:
            TuLs.append(Tu[3:])
        TuLs.sort()
        Old2NewCloLs = {}
        Old2NewCloNum = {}
        CloOrder = []
        Num = 1
        for Tu in CloFre:
            Clo2Fre = CloFre[Tu]
            HitClo = []
            for Clo in Clo2Fre:
                if Clo2Fre[Clo] > 0: HitClo.append(Clo)
            Tu = Tu[2:]
            C = 1
            CloLs, Fre2Clo = CloFreAnalize.Sort(HitClo,
                                                Clo2Fre)  #from large frequency
            for Clo in CloLs:
                Code = Clo in Old2NewCloLs
                if Code != True:
                    Old2NewCloLs[Clo] = ''
                    Old2NewCloNum[Clo] = 'Clone' + str(Num)
                    CloOrder.append(Clo)
                    Num += 1
                Old2NewCloLs[Clo] += Tu + str(C)
                C += 1
        if Name == 'list': Old2NewClo = Old2NewCloLs
        else: Old2NewClo = Old2NewCloNum
        NewCloOrder = []
        NewT2C2F = {}
        for Clo in CloOrder:
            NewCloOrder.append(Old2NewClo[Clo])
            out += ['#' + Old2NewClo[Clo], Clo2Seq['#' + Clo]]  #+'\n'
        for Tu in CloFre:
            C2F = CloFre[Tu]
            NewC2F = {}

            for C in C2F:
                if C2F[C] > 0:
                    NewC2F[Old2NewClo[C]] = C2F[C]
            NewT2C2F[Tu] = NewC2F
        return out, NewT2C2F, NewCloOrder
Пример #13
0
    def find_decomposed_clone(self, no_back_para_mut_decomposed_seq, REP,
                              Tree):
        Align = MegaAlignment()
        CloLs, Clo2Seq = Align.name2seq(no_back_para_mut_decomposed_seq)
        DecTipLs = []
        DecLs = []
        DecAncLs = []
        RmDecom = []
        for Clo in CloLs:
            #   ClosestAnc = Align.find_closest_anc(Clo,Clo2Seq)
            #  if ClosestAnc!='' :
            #      DecAncLs.appned(Clo)
            if Clo.find('Clu') != -1:
                ID = 'REP' + str(REP)
                In = -1 * len(ID)
                if Clo[In:] == ID:
                    DecLs.append(Clo)
                    Posi = Tree.find(Clo[1:] + ':') + len(Clo)
                    #   print Tree[Posi]
                    Go = 'y'
                    BraLen = ''
                    while Go == 'y':
                        BraLen += Tree[Posi]
                        if Tree[Posi] == ',' or Tree[Posi] == ')': Go = 'n'
                        Posi += 1
                #   print Clo,BraLen
                    if float(BraLen[:-1]) == 0: DecAncLs.append(Clo)  ######
                    else: DecTipLs.append(Clo)

    #  print DecLs,DecAncLs

        if DecLs == []: NewDecom = 'n'
        elif DecTipLs != []:
            NewDecom = 'y'
            for Tip in DecTipLs:
                TipSeq = Clo2Seq[Tip]
                OriTu = Tip.split('Clu')[0]
                #  TipMutC
                Anc = 'n'
                for Clo in Clo2Seq:
                    if Clo != Tip:  # and OriTu!=Clo:
                        UniNum = Align.CountAdditionalMut(TipSeq, Clo2Seq[Clo])
                        if UniNum == 0: Anc = 'y'
                if Anc == 'y': RmDecom.append(Tip)
        else: NewDecom = 'anc'
        #      NewDecom='anc'
        #     for Dclo in DecLs:
        #        if DecAncLs.count(Dclo)==0: 	NewDecom='y'

        #   print Clo2Seq.keys()
        if RmDecom == []: NewClo2Seq_buil = no_back_para_mut_decomposed_seq
        else:
            NewCloDic = {}
            for Clo in Clo2Seq:
                if RmDecom.count(Clo) == 0: NewCloDic[Clo] = Clo2Seq[Clo]
            NewClo2Seq_buil = Align.UpMeg(NewCloDic, [])
        return NewDecom, RmDecom, NewClo2Seq_buil
Пример #14
0
 def add_back_anc(self, Sub_seq_builder, All_seq_builder):
     Align = MegaAlignment()
     Ls, Sub = Align.name2seq(Sub_seq_builder)
     Ls, All = Align.name2seq(All_seq_builder)
     Clo2Seq = {}
     for Clo in All:
         if Sub.has_key(Clo) == True: Clo2Seq[Clo] = Sub[Clo]
         else: Clo2Seq[Clo] = All[Clo]
     Seq_Buil = Align.UpMeg(Clo2Seq, [])
     return Seq_Buil
Пример #15
0
 def __init__(self, ini_seq_builder, v_obs, clone_frequencies, CNV,
              freq_cutoff):
     self.freq_cutoff = freq_cutoff
     self.Tu2CloFre = clone_frequencies
     self.CloFreCutOff = self.freq_cutoff
     self.v_obs = v_obs
     Align = MegaAlignment()
     self.clone_order, self.clone_seq = Align.name2seq(ini_seq_builder)
     self._CNV_file = CNV
     self.snv_num = len(self.clone_seq[self.clone_order[0]])
    def get_decomposed_seq(self):
        Align=MegaAlignment()	
        TuLs, Tu2Seq = Align.name2seq(self.tumor_seqs)			
        print('make SNV clusters')
        clusters = SNPClusterGenerator_cnv1(self.ini_seq_builder, self.v_obs, self.Tu2CloFre, self._CNV_file, self.freq_cutoff)		
        Tumor_cluster_dic = clusters.cluster_cnv()	#Tu2Cluster={tumor:[[seq_builder,{tumor:{clone frequency}}]]}			
        print('Decompose incorrect sample genotype clones')	
 
        AllhitWithDecom={}	
        All_convol_tuseq=[]	
        DecomLs=[]		
        
        for Tu in Tumor_cluster_dic:
            ClusterInfo = Tumor_cluster_dic[Tu]		
            if ClusterInfo != []:
           
                			  
                HitWithDecomSeq_build,convol_tuseq = self.get_candidate_decomposed_clones(Tu,ClusterInfo,Tu2Seq['#'+Tu])
                if convol_tuseq!='': 				
                    A1,HitWithDecomSeq_dic=Align.name2seq(HitWithDecomSeq_build)
                    AllhitWithDecom.update(HitWithDecomSeq_dic)	
                    All_convol_tuseq.append(convol_tuseq)
                    DecomLs.append(Tu)					
                  
                else:
                    Original_hit_seq_dic = self.extract_hitseq(self.ini_seq_builder,self.Tu2CloFre['T-'+Tu],self.freq_cutoff)				
                    AllhitWithDecom.update(Original_hit_seq_dic)
            else:
                    Original_hit_seq_dic = self.extract_hitseq(self.ini_seq_builder,self.Tu2CloFre['T-'+Tu],self.freq_cutoff)				
                    AllhitWithDecom.update(Original_hit_seq_dic)			   
            
        if DecomLs==[]:
          	
             return self.clone_seq,'no decomposed clone was made'
        else:
         	
            for ConvTuSeq in All_convol_tuseq:
                Redun_ls=Align.find_redundant(ConvTuSeq,AllhitWithDecom) 
                if Redun_ls!=[]:
                    				
                     return self.clone_seq,'tumor genotype that was decomposed was hit in different tumor: failed decomposition'	
          
            return AllhitWithDecom,'decomposed'+str(DecomLs)			
 def __init__(self, ini_seq_builder, tsp_list, clone_frequencies, CNV,
              freq_cutoff):
     self.tsp_list = tsp_list
     self.freq_cutoff = freq_cutoff
     self.Tu2CloFre = clone_frequencies
     self.all_tsp = tsp_information(tsp_list)
     self.CloFreCutOff = self.freq_cutoff
     self.v_obs = self.all_tsp.tumor2alt_frequency()
     Align = MegaAlignment()
     self.clone_order, self.clone_seq = Align.name2seq(ini_seq_builder)
     self._CNV_file = CNV
Пример #18
0
    def _remove_redund_seqs(self, Meg):
        print 'removing redundant seqs...'
        Align = MegaAlignment()
        NameOrder, Name2Seq = Align.name2seq(Meg)

        out2 = ['#MEGA', '!Title SNVs;', '!Format datatype=dna;']
        c = 0
        Name2IdenLs = {}
        SeqNum = len(NameOrder)
        Len = len(Name2Seq[NameOrder[0]])
        while c < SeqNum:
            Ref = NameOrder[c]
            RefSeq = Name2Seq[Ref]
            Name2IdenLs[Ref] = [Ref]
            Tc = 0
            while Tc < SeqNum:
                Tar = NameOrder[Tc]
                TarSeq = Name2Seq[Tar]
                DifC = self._count_diff_num(RefSeq, TarSeq)
                if DifC == 0:
                    Name2IdenLs[Ref].append(Tar)
                Tc += 1
            c += 1
        Done = []
        for Name in Name2Seq:
            Code = Name in Done
            if Code != True:
                IdenLs = Name2IdenLs[Name]
                GoodName = ''
                for i in IdenLs:
                    if GoodName == '': GoodName = i
                    elif i == '#hg19': GoodName = i
                    elif GoodName == '#hg19': pass
                    elif i.find('Node') == -1 and i.find('Clu') == -1:
                        GoodName = i
                    elif GoodName.find('Node') == -1 and GoodName.find(
                            'Clu') == -1:
                        pass
                    elif GoodName.find('Node') != -1:
                        GoodName = i
                    elif i.find('Node') != -1:
                        pass
                    elif i.find('Clu') != -1 and GoodName.find(
                            'Clu') != -1 and i.find(
                                'REP') != -1 and GoodName.find('REP') == -1:
                        GoodName = i
                    else:
                        pass
                out2 += [GoodName, Name2Seq[Name]]
                Done += IdenLs

        return out2
Пример #19
0
 def clone_to_tumor_phylogeny(self, OriginalNwk, Tu2CloFre, CloSeqLs):
     KeepLs = ['hg19']
     Keep2TuLs = {'hg19': []}
     Align = MegaAlignment()
     CloOr, CloSeq = Align.name2seq(CloSeqLs)
     print Tu2CloFre
     for Tu in Tu2CloFre:
         CloFre = Tu2CloFre[Tu]
         CloLs = []
         for Clo in CloFre:
             if CloFre[Clo] > 0: CloLs.append(Clo)
         LarClo = ''
         LarMut = 0
         for Clo0 in CloLs:
             Seq0 = CloSeq['#' + Clo0]
             MutC = len(Align.GetMutPos(Seq0))
             if MutC > LarMut:
                 LarMut = MutC
                 LarClo = Clo0
             Keep = 'y'
             for Clo1 in CloLs:
                 if Clo0 != Clo1:
                     Seq1 = CloSeq['#' + Clo1]
                     UniMutNum = 0
                     Len = len(Seq1)
                     c = 0
                     while c < Len:
                         if Seq0[c] == 'T' and Seq1[c] == 'A':
                             UniMutNum += 1
                         c += 1
                     Pro = 1.0 * UniMutNum / Len
                     if Pro < 0.05: Keep = 'n'
             if Keep == 'y':
                 if KeepLs.count(Clo0) == 0:
                     KeepLs.append(Clo0)
                     Keep2TuLs[Clo0] = []
                 Keep2TuLs[Clo0].append(Tu)
         #KeepLs.append(LarClo)
         if KeepLs.count(LarClo) == 0:
             KeepLs.append(LarClo)
             Keep2TuLs[LarClo] = []
         Keep2TuLs[LarClo].append(Tu)
     RmLs = []
     for Clo in CloSeq:
         if KeepLs.count(Clo[1:]) == 0: RmLs.append(Clo[1:])
     print 'remove ancestral clones', RmLs
     print 'tumor ls for each clone', Keep2TuLs
     Pruned = self.PruneTree(OriginalNwk, KeepLs)
     Pruned_Root = self.RootTree(Pruned)
     return Pruned_Root, Keep2TuLs
Пример #20
0
    def compare_good_posi_number(self, Initial, After, IniSeq_buil,
                                 AftSeq_buil):
        IniCou = self.count_good_posi(Initial)
        AftCou = self.count_good_posi(After)
        Align = MegaAlignment()
        CloLs, IniSeq_dic = Align.name2seq(IniSeq_buil)
        CloLs, AftSeq_dic = Align.name2seq(AftSeq_buil)
        ShareIni = Align.GetSharePosi1(IniSeq_dic, 'A')
        ShareAft = Align.GetSharePosi1(AftSeq_dic, 'A')
        print IniCou, AftCou, len(ShareIni), len(ShareAft)
        if IniCou > AftCou or len(ShareAft) > len(ShareIni): AfterGood = 'n'
        else: AfterGood = 'y'

        return AfterGood
 def __init__(self, seqs_with_ancestor, tsp_list, CNV_info, freq_cutoff,
              ReadCountTable):
     self.CutOff = freq_cutoff
     if seqs_with_ancestor != {}:
         Align = MegaAlignment()
         self.ini_seq_builder = seqs_with_ancestor
         self.ini_clone_order, self.ini_clone_seq = Align.name2seq(
             self.ini_seq_builder)
         self.tsp_list = tsp_list
         self.make_readcount()
         self._CNV_file = CNV_info
         self.ReadCountTable = ReadCountTable
         self.SNVnum = len(ReadCountTable[ReadCountTable.keys()[0]])
         self.CNVnum = len(CNV_info[CNV_info.keys()[0]])
Пример #22
0
    def find_new_clone(self, new_seq_buil, old_seq_buil):
        Align = MegaAlignment()
        Ls, New_dic = Align.name2seq(new_seq_buil)
        Ls, Old_dic = Align.name2seq(old_seq_buil)
        #   print 'old list',Old_dic.keys(),	 '\nnew list',New_dic.keys()
        Iden = 'y'
        for Clo in New_dic:
            if Clo != '#hg19':
                NewSeq = New_dic[Clo]
                Redun = Align.find_redundant(NewSeq, Old_dic)
                if Redun == []:
                    Iden = 'n'
                #   print 'new seq',Clo

        return Iden
 def add_cluster_Cmat(self, seq_list):
     Align = MegaAlignment()
     inCloLs, inClo2Seq = Align.name2seq(seq_list)
     Cmat_dic = {}
     Cmat_mat = ''
     for inClo1 in inCloLs:
         inClo = inClo1[1:]
         Seq = inClo2Seq['#' + inClo]
         Len = len(Seq)
         c = 0
         C_val = []
         while c < Len:
             if Seq[c] == 'T': C_val.append(0.5)
             else: C_val.append(0)
             c += 1
         Cmat_dic[inClo] = C_val
     Cmat_mat, Cmat_clone_order = self.convert_Cmatdic_to_mat(Cmat_dic)
     return Cmat_mat, Cmat_dic, Cmat_clone_order
Пример #24
0
    def MLancetor(self, seqs, Nwk):

        Align = MegaAlignment()
        #  seqs = open(seqsFile,'r').readlines()
        self.CellLs, self.Cell2Seq = Align.name2seq(seqs)
        # print ('h',self.CellLs)
        # print (self.Cell2Seq)
        # open('A','r').readlines()
        self.SNVnum = len(self.Cell2Seq[self.CellLs[0]])

        #  self.InMeg = Align.AddNormal(seqs)

        InferAncestor = MegaAncestor()
        InferAncestor.alignment_file = seqs
        InferAncestor.input_tree_file = Nwk

        self.ancestor_states, self.offspring2ancestor, cell2code, self.code2cell = InferAncestor.retrieve_ancestor_states(
        )
        self.RescaledTree = InferAncestor.get_scaledNWK()
        self.nodeid2seq = {}
        # print (self.ancestor_states)
        print('SNV count', self.SNVnum)
        #   open('A','r').readlines()
        for node in self.ancestor_states:
            Seq = ''
            States = self.ancestor_states[node]
            c = 0
            while c < self.SNVnum:
                # print (node)
                # print (c)
                # print (States)
                Nuc = States[c].split('\t')[0]
                Seq += Nuc
                c += 1
            self.nodeid2seq[node] = Seq

    # print (RescaledTree)
        print(self.nodeid2seq)
        print(self.offspring2ancestor)
        print(self.code2cell)
        self.offspring2ancestor_withou_redunSeq = self.RemoveRedun()
Пример #25
0
    def BranchDecClone(self, seq_list, clone_frequency, Tu2CNV):
        Align = MegaAlignment()
        TumorSampleExtract = tsp_information(self.tsp_list)
        CloFreAna = CloneFrequencyAnalizer()
        CloOrder, Clo2Seq = Align.name2seq(seq_list)
        Align.save_mega_alignment_to_file('Test.meg', seq_list)
        tree_builder = MegaMP()
        tree_builder.mao_file = self.mao_file
        id = 'branchdec_mega_alignment'

        status = tree_builder.do_mega_mp(seq_list, id)
        if status == True:
            seqs_with_ancestor, tree, nade_map, mask_seq, Good_posi_info = tree_builder.alignment_least_back_parallel_muts(
                True
            )  # True will execute code to remove redundant seqs (True is default)
        else:
            print 'failed to run megaMP'
        BadPosiLs = []  #multiple mutations
        BadPosi2ChnageCloLs = {}
        for c in Good_posi_info:
            Posi_Inf = Good_posi_info[c]
            if Posi_Inf != ['Good']:
                if Posi_Inf[0] == 'ToWild':
                    BadPosiLs.append(c)
                    BadPosi2ChnageCloLs[c] = Posi_Inf[1][0]
        print 'bad positions', BadPosiLs  #,BadPosi2ChnageCloLs
        if BadPosiLs != []:
            NewT2C2F = {}
            NewT2Cls = {}
            for Tu in clone_frequency:
                NewC2F = {}
                single_tsp_list = TumorSampleExtract.make_single_tsp_list(Tu)
                CloFreDic = clone_frequency[Tu]
                CNV = Tu2CNV[Tu[2:]]
                Tu = Tu[2:]
                TuSeq = self.tumor2seq['#' + Tu]
                NewCloLs = []
                NewCloLs1 = []

                for Clo in CloFreDic:  #original hit clo for the tumor
                    ChangeOptions = 'n'
                    #   print Tu,CloFreDic
                    if CloFreDic[Clo] > 0:
                        CSeq0 = Clo2Seq['#' + Clo]
                        ChangePosi = []  #list to fix multiple mutaitons
                        NewBadPosi = [
                        ]  #remove fixed multiple mutations from BadExtMutPosi
                        for Bad in BadPosi2ChnageCloLs:
                            if BadPosi2ChnageCloLs[Bad].count(
                                    '#' + Clo) != 0 and (CNV[Bad] == 'normal'
                                                         or CNV[Bad]
                                                         == 'Bad-normal'):
                                Change = 'n'
                                for Oth in CloFreDic:  #find multiple mutations at the external branch
                                    if Oth != Clo and CloFreDic[Oth] > 0:
                                        Soth = Clo2Seq['#' + Oth]
                                        if Soth[Bad] == 'T' and BadPosi2ChnageCloLs[
                                                Bad].count('#' + Oth) == 0:
                                            Change = 'y'
                                if Change == 'y':
                                    ChangePosi.append(Bad)
                                else:
                                    NewBadPosi.append(Bad)
                        print 'change positions', Tu, ChangePosi
                        if ChangePosi != []:  #fix multiple mutaitons
                            #  print 'hhh'
                            CutCloSeq = Align.ModSeq(CSeq0, ChangePosi, 'A',
                                                     self.Len)
                            NewCloLs.append(Clo + 'Cut' + Tu)
                            NewC2F[Clo + 'Cut' + Tu] = CloFreDic[Clo]
                            Clo2Seq['#' + Clo + 'Cut' + Tu] = CutCloSeq
                            ChangeOptions = 'y'

                    if ChangeOptions == 'n':
                        NewC2F[Clo] = 1
                NewT2C2F[Tu] = NewC2F

    #  print Clo2Seq
            hitseq_align, hitclone_frequency = CloFreAna.ListHitCloAndSeq(
                NewT2C2F, Clo2Seq)
            outSeqMaj, outSeqAmb, NewT2C2F = Align.CombSimClo(
                hitseq_align, hitclone_frequency, 0.0)
            #   print outSeqMaj, NewT2C2F
            return outSeqMaj, NewT2C2F
        else:
            return seq_list, clone_frequency
Пример #26
0
    def AdjDecClo(self, ID, SeqLs, NodeMap, BackFor, Tree):
        Align = MegaAlignment()
        Ao, Anc2Seq = Align.name2seq(SeqLs)
        Len = len(Anc2Seq[Ao[0]])
        outNew = ['MEGA', '!Title SNVs;', '!Format datatype=dna;', ' ']

        Clu2Change = {}
        for Name in Anc2Seq:
            if Name.find('Clu') != -1:
                Clu2Change[Name] = {}
        Posi = 0
        while Posi < Len:
            i = BackFor[Posi]
            if i != ['Good']:
                Change = i[0]
                ChangeCloLs = i[1][0]  #list
                #  print i,Posi
                A = i[1][0][0].split('Clu')[0]
                B = i[1][1][0].split('Clu')[0]
                # print A,B
                if (len(i[1][0]) == 1 and len(i[1][1]) == 1
                        and i[1][0][0].find('Clu') == -1
                        and i[1][1][0].find('Clu') != -1):
                    ChangeCloLs = i[1][1]  #list
                    if A == B: ChangeCloLs = i[1][0]
                else:
                    ChangeCloLs = i[1][0]  #list
                    if A == B: ChangeCloLs = i[1][1]
                #print ChangeCloLs
                if Change == 'ToMut':  #BC>0: #back
                    for Clo in ChangeCloLs:
                        # if Clo.find('Clu')!=-1:
                        if Clu2Change.has_key(Clo) != True:
                            Clu2Change[Clo] = {}
                        Clu2Change[Clo][Posi] = 'T'
                if Change == 'ToWild':  #MC>0: #multi
                    for Clo in ChangeCloLs:
                        # if Clo.find('Clu')!=-1:
                        if Clu2Change.has_key(Clo) != True:
                            Clu2Change[Clo] = {}
                        Clu2Change[Clo][Posi] = 'A'

            Posi += 1

    #   print Clu2Change
#     open('AAA','r').readlines()
        for Clo in Anc2Seq:

            if Clo.find('#Node') == -1:
                TreeAna = TreeAnalizer()
                BraLen = TreeAna.Get_branch_lenghth(Tree, Clo[1:])
                #  if Clo.find('Clu')!=-1 and BraLen>=1:
                if BraLen >= 1 and Clu2Change.has_key(Clo):
                    Change = Clu2Change[Clo]

                    CluSeq = Anc2Seq[Clo]
                    Len = len(CluSeq)
                    c = 0
                    NewSeq = ''
                    while c < Len:
                        Code = c in Change
                        if Code == True: NewSeq += Change[c]
                        else: NewSeq += CluSeq[c]
                        c += 1
                    outNew += [Clo, NewSeq]
                else:
                    outNew += [Clo, Anc2Seq[Clo]]
        outNew_without_redundant_seq = Align.RmRedunSeq(outNew)
        outNew_without_redundant_seq += ['#hg19', 'A' * len(Anc2Seq[Clo])]
        #   open('AA','r').readlines()
        return outNew_without_redundant_seq
Пример #27
0
Cell2PPselected = PP2.get_PP_for_selected_nuc_corr()
Align.save_mega_alignment_to_file(OutMegFile[:-4]+'Correct2.meg', MEGAseqs_Corrected_1)

print('Compute final PP')
PP2 = PredictCellGenotype('Correct', MEGAseqs_Corrected_1, Cut2)
MEGAseqs_Corrected_2 = PP2.Correct_error5()
Cell2PPselected = PP2.get_PP_for_selected_nuc_corr()

print('clone annotation')
In0=InFile
OutMegFile=In0[:-4]+'_BEAM.meg'
dir = os.getcwd()
MEGAseqs_Corrected=OutMegFile[:-4]+'Correct2.meg'
MEGAseqs_Corrected_1 = open(MEGAseqs_Corrected,'r').readlines()
In=open(In0,'r').readlines()
CellLs, Cell2Seq = Align.name2seq(In)
Cell2PPls={}
dir=os.getcwd()
CellC=1
for Cell in CellLs:
     PPoutF=dir+'\\All_alignment_PPseq-'+str(CellC)+'.csv'	 
     PPout=open(PPoutF,'r').readlines()
     CellN=PPout[0].split('\"')[1]	 
     PPout=PPout[3:]	 
     PPls=[]	 
     for i in PPout:
         i=i.strip().split(',')
         PP={'A':float(i[1]),'T':float(i[4])}
         PPls.append(PP)
     Cell2PPls[CellN]=PPls
     os.remove(PPoutF)	
    def get_candidate_decomposed_clones(self, target_tumor, CluInf_tu,Tuseq):
        Align = MegaAlignment()	
     	
        NameOrder, Name2Seq = Align.name2seq(CluInf_tu[0])
	
        LenSeq = len(Name2Seq[NameOrder[0]])
    
        SigCluLs=[]		
        for Name in NameOrder: #Root is the first cluster or initial candidate clone
               if Name!='#Clu0' and Name.find('Clu')!=-1: SigCluLs.append(Name)
        CluCombLs,IDend=self.combinations([],SigCluLs,0,{})   
        print(target_tumor,'make cluster comb',SigCluLs,CluCombLs,NameOrder)
   	
        if CluCombLs!={}:   
			 
             CloCan2Seq={}
             Got_Candidate='n'			 
             for Root in NameOrder: #Root is the first cluster or initial candidate clone
               if Root=='#Clu0' or Root.find('Clu')==-1:			 
                RootSeq=Name2Seq[Root]
                if Root=='#Clu0': CloCan2Seq['#'+target_tumor+'Clu0']=RootSeq  #Root is candidate clone              				
                RootMut=Align.GetMutPos(RootSeq)
                Got_Candidate='y'
                if CluCombLs!={}:				
                 for ID in CluCombLs:
                    CluLs=CluCombLs[ID]	
            			
                    CluN=''
                    MutPosLs=[]						
                    for Clu in CluLs:  
                        Seq=Name2Seq[Clu]
                        CluMut=Align.GetMutPos(Seq)
                        MutPosLs+=	CluMut							
                        CluN+=Clu.replace('#','')

                    Good='y'					
                    for Mut in MutPosLs:
                         if RootMut.count(Mut)!=0: Good='n'
					 
                    if Good=='y':	
                         AllMutPosLs=MutPosLs+RootMut					
                         Seq=Align.ModSeq('A'*LenSeq,AllMutPosLs,'T',LenSeq)
                         Redun_ls=Align.find_redundant(Seq,self.clone_seq) #all other clones ####	
                       				
                         if Redun_ls==[]:			
                            CloCan2Seq['#'+target_tumor+Root.replace('#','')+CluN]=Seq
                   					
  
             if CloCan2Seq!={}:	  
	
                      CloCan2Seq.update(self.clone_seq) 
                      Can_list=list(CloCan2Seq.keys())					  
                            					  
                      new_seq = Align.UpMeg(CloCan2Seq,Can_list)
                   						   
                      clone_frequency_combo = CloneFrequencyComputer_cnv1(new_seq, {target_tumor:self.v_obs[target_tumor]}, {target_tumor:self._CNV_file[target_tumor]}, self.freq_cutoff)
                      clone_frequency_combo.regress_cnv()					
                      CluComboHit=self.findcombohit(clone_frequency_combo.hitclone_seq_builder)
                      if CluComboHit=='y':
                            print('test the quality of clustercombo, by removing tumor seq (if any)')
                            hit_seq_ls,hit_seq_dic=Align.name2seq(clone_frequency_combo.hitclone_seq_builder) 							
                            Tuseq_ls=Align.find_redundant(Tuseq,hit_seq_dic)	
                            if Tuseq_ls==[]:
                                  print('tumor genotype did not hit, so clustercombo is good')							
                                  return clone_frequency_combo.hitclone_seq_builder,Tuseq
                            else:
                                  print('tumor genotype was hit, so test if clustercombo still hit without tumor genotype: testing if clustercombo genotypes fit well')
                                  Tuseq_ls=Align.find_redundant(Tuseq,CloCan2Seq)									  
                                  sub_hit_seq=[]
                                  for seqname in CloCan2Seq:
                                        if Tuseq_ls.count(seqname)==0:sub_hit_seq+=[seqname,CloCan2Seq[seqname]]
                            
                                  clone_frequency_combo_new = CloneFrequencyComputer_cnv1(sub_hit_seq, {target_tumor:self.v_obs[target_tumor]}, {target_tumor:self._CNV_file[target_tumor]}, self.freq_cutoff)
                                  clone_frequency_combo_new.regress_cnv()					
                                  CluComboHit=self.findcombohit(clone_frequency_combo_new.hitclone_seq_builder)
                                  if CluComboHit=='y': 
                           					  
                                     return clone_frequency_combo_new.hitclone_seq_builder,Tuseq 
                                  else: 
                                     return CluInf_tu[0],''								  
                      else: return CluInf_tu[0] ,''                                 								  
                                							
                      	
             else: return CluInf_tu[0],''
        return CluInf_tu[0],''		
    def get_candidate_decomposed_clones(self, target_tumor):
        Align = MegaAlignment()
        CluInf_tu = self.ClusterInfo  #[target_tumor]
        NameOrder, Name2Seq = Align.name2seq(CluInf_tu[2])
        #  print target_tumor, CluInf_tu[0],CluInf_tu[1]
        HitCloCluLs = CluInf_tu[1]  #['T-'+target_tumor]
        TuIdentical_seq = self.identical_seq['T-' + target_tumor]
        LenSeq = len(Name2Seq[NameOrder[0]])
        TuSeq = self.T2Seq['#' + target_tumor]
        Clu2center = CluInf_tu[0]
        SigCluLs = []
        HitCloLs = []
        HitCloSeq_dic = {}
        RootClu = ''
        LarCen = 0.0
        for Hit in HitCloCluLs:
            if HitCloCluLs[Hit] > 0.02:
                if Hit[:len(target_tumor +
                            'Clu')] == target_tumor + 'Clu' and Hit.find(
                                'REP') == -1:
                    SigCluLs.append(Hit)
                    CluName = 'Clu' + Hit.split('Clu')[-1]
                    Center = float(Clu2center[CluName].split('-')[0])

                    for CluN in Clu2center:
                        Center2 = float(Clu2center[CluN].split('-')[0])
                        Sign = Clu2center[CluN].split('-')[1]
                        if Center == Center2 and CluName != CluN:
                            SigCluLs.append(target_tumor + CluN)
                        if LarCen < Center2:  # and Sign=='Pos':  #Pos for middle cut, Neg for K-means
                            LarCen = Center2
                            if Center == Center2: RootClu = target_tumor + CluN
                        elif LarCen <= Center2 and Sign == 'Pos':  #Pos for middle cut, Neg for K-means
                            LarCen = Center2
                            if Center == Center2: RootClu = target_tumor + CluN

                else:
                    HitCloLs.append(Hit)
                    HitCloSeq_dic['#' + Hit] = Name2Seq['#' + Hit]

    #  print 'cluls0',SigCluLs, HitCloLs, RootClu
        if RootClu != '':
            SigCluLs.remove(RootClu)
            HitCloLs.append(RootClu)
        # print 'cluls',SigCluLs, HitCloLs, RootClu
        if SigCluLs != []:
            CluCombLs, IDend = self.combinations([], SigCluLs, 0, {})
        else:
            CluCombLs = {}
        #  print CluCombLs
        if RootClu != '' or CluCombLs != {}:
            print 'make cluster comb'
            CloCan2Seq = {}
            Got_Candidate = 'n'
            for Root in HitCloLs:
                RootSeq = Name2Seq['#' + Root]
                LenSeq = len(RootSeq)
                RootMut = Align.GetMutPos(RootSeq)
                CloCan2Seq['#' + Root] = RootSeq
                Got_Candidate = 'y'
                if CluCombLs != {}:
                    for ID in CluCombLs:
                        CluLs = CluCombLs[ID]
                        #   print 'try make combo',Root,CluLs
                        CluN = ''
                        MutPosLs = []
                        for Clu in CluLs:
                            Seq = Name2Seq['#' + Clu]
                            CluMut = Align.GetMutPos(Seq)
                            MutPosLs += CluMut
                            CluN += Clu.replace(target_tumor + 'Clu', 'Clu')

                        MutPosLs = list(set(MutPosLs))
                        Go = 'y'
                        for Mut in MutPosLs:
                            if RootMut.count(Mut) != 0: Go = 'n'

                        if Go == 'y':
                            AllMutPosLs = MutPosLs + RootMut
                            Seq = Align.ModSeq('A' * LenSeq, AllMutPosLs, 'T',
                                               LenSeq)
                            Redun_ls = Align.find_redundant(Seq, HitCloSeq_dic)

                            if Redun_ls == []:
                                CloCan2Seq['#' + target_tumor + Root.replace(
                                    target_tumor + 'Clu', 'Clu') + CluN] = Seq
                                Got_Candidate = 'y'

            if Got_Candidate == 'y':
                Can_list = CloCan2Seq.keys()
                #   print 'find the good comb',Can_list

                new_seq = Align.UpMeg(CloCan2Seq, Can_list)
                alt_frequency = []
                CNVls = self.CNV_info[target_tumor]
                Len = len(CNVls)
                c = 0
                TuMatPosi = []
                tumor_genotype = ''
                while c < Len:
                    if CNVls[c] == 'normal':
                        alt_frequency.append(self.v_obs[target_tumor][c])
                        if self.v_obs[target_tumor][c] > 0:
                            TuMatPosi.append(c)
                            tumor_genotype += 'T'
                        else:
                            tumor_genotype += 'A'
                    c += 1

                clone_frequency = CloneFrequencyComputer_cnv1({}, {}, {},
                                                              self.freq_cutoff,
                                                              {})

                MutWildAlleleCount_noCNV = clone_frequency.make_mut_wild_allele_count_noCNV(
                    {}, Can_list,
                    CloCan2Seq)  #PreAbsCNV, clone_order, SNV_seq, Tu2CloFre
                Cmatrix_noCNV, Cmatrix_noCNV_dic = clone_frequency.make_Min(
                    Can_list, CloCan2Seq, MutWildAlleleCount_noCNV)
                Clone2Freq = clone_frequency.do_nnls0(Cmatrix_noCNV, Can_list,
                                                      alt_frequency)

                out2 = ['#MEGA', '!Title SNVs;', '!Format datatype=dna;', ' ']
                AllMut = []
                NewClone2Freq = {}
                CluHit = 'n'
                for Clo0 in Clone2Freq:
                    NewClone2Freq[Clo0] = Clone2Freq[Clo0]
                    if Clone2Freq[Clo0] > 0.02:

                        SeqMutPos = Align.GetMutPos(CloCan2Seq['#' + Clo0])
                        TuSeq = 'y'
                        for Mut in SeqMutPos:
                            if TuMatPosi.count(Mut) != 0: AllMut.append(Mut)
                        for Mut in TuMatPosi:
                            if SeqMutPos.count(Mut) == 0: TuSeq = 'n'
                        Iden = 'n'
                        for OriClo in self.OriAnc2Seq0:
                            c = 0
                            Dif = 'n'
                            while c < Len:
                                if self.OriAnc2Seq0[OriClo][c] != CloCan2Seq[
                                        '#' + Clo0][c]:
                                    Dif = 'y'
                                c += 1
                            if Dif == 'n': Iden = OriClo
                        if Iden != 'n':
                            out2 += [Iden, self.OriAnc2Seq0[Iden]]
                            NewClone2Freq[Iden[1:]] = Clone2Freq[Clo0]
                            NewClone2Freq[Clo0] = 0
                        elif TuSeq == 'n':

                            out2 += [
                                '#' + Clo0.replace(target_tumor + target_tumor,
                                                   target_tumor),
                                CloCan2Seq['#' + Clo0]
                            ]
                            if Clo0.find('Clu') != -1 and Clo0.find(
                                    'REP') == -1:
                                CluHit = 'y'
                        else:
                            out2 += [
                                '#' + target_tumor, CloCan2Seq['#' + Clo0]
                            ]
                            NewClone2Freq[target_tumor] = Clone2Freq[Clo0]
                            NewClone2Freq[Clo0] = 0
                AllMut = list(set(AllMut))
                if len(AllMut) < len(TuMatPosi):
                    out2 += ['#' + target_tumor, tumor_genotype]
                if CluHit == 'y':

                    #  print 'Decomposed!'	,target_tumor,NewClone2Freq,out2
                    return out2, NewClone2Freq

        return [], {}
Пример #30
0
    def remove_insignificant_clones(self, v_obs, CloFre_clone,
                                    clone_seq_builder, Tu2CNV, Cut):
        Align = MegaAlignment()
        OutAncAll = 'SigTest.txt'
        outAncAll = 'tumor\tDecsendant-Ancestor\tSNV posi\tType\tObsFre\n'
        Clone_list, clone_seq_dic = Align.name2seq(clone_seq_builder)
        new_clone_freq = {}
        new_clone_seq_dic = {}
        for tumor in v_obs:
            CNV = Tu2CNV[tumor]
            Clo2Fre = CloFre_clone['T-' + tumor]
            ObsFre = v_obs[tumor]

            clone_order = []
            MutNum2Clo = {}
            MutNum_ls = []
            for Clo in Clo2Fre:
                if Clo2Fre[Clo] > 0:
                    MutPosLs = Align.GetMutPos(clone_seq_dic['#' + Clo])
                    MutNum = len(MutPosLs)
                    if MutNum2Clo.has_key(MutNum) != True:
                        MutNum2Clo[MutNum] = []
                    MutNum2Clo[MutNum].append(Clo)
                    MutNum_ls.append(MutNum)
            MutNum_ls = list(set(MutNum_ls))
            MutNum_ls.sort(reverse=True)
            for MutNum in MutNum_ls:

                clone_order += MutNum2Clo[MutNum]

            CloNum = len(clone_order)
            C1Max = CloNum - 1
            InsigLs = []

            C1 = 0
            while C1 < C1Max:
                Clo1 = clone_seq_dic['#' + clone_order[C1]]
                num_sites = len(Clo1)
                Min_num = 0.01 * num_sites
                C2 = C1 + 1
                while C2 < CloNum:
                    Clo2 = clone_seq_dic['#' + clone_order[C2]]

                    Share = []
                    Unique = []
                    c = 0
                    while c < num_sites:
                        if CNV[c] == 'normal':
                            if Clo1[c] == 'T' and Clo2[c] == 'T':
                                Share.append(ObsFre[c])
                                outAncAll += tumor + '\t' + clone_order[
                                    C1] + '-' + clone_order[C2] + '\t' + str(
                                        c) + '\tShare\t' + str(
                                            ObsFre[c]) + '\n'
                            elif Clo1[c] == 'T' and Clo2[c] == 'A':
                                Unique.append(ObsFre[c])
                                outAncAll += tumor + '\t' + clone_order[
                                    C1] + '-' + clone_order[C2] + '\t' + str(
                                        c) + '\tUnique\t' + str(
                                            ObsFre[c]) + '\n'

                        c += 1
                    if (len(Share) < 3
                            or len(Unique) < 3) or (len(Share) < Min_num
                                                    or len(Unique) < Min_num):
                        P = 1
                    else:
                        P = scipy.stats.ttest_ind(Share,
                                                  Unique,
                                                  equal_var=False)

                        P = P[-1]
                    if P > Cut:
                        if clone_order[C1].find('Clu') != -1 and clone_order[
                                C2].find('Clu') == -1:
                            InsigLs.append(clone_order[C1])
                        else:
                            InsigLs.append(clone_order[C2])

                    C2 += 1

                C1 += 1
            InsigLs = list(set(InsigLs))
            if InsigLs != []: print 'insignificant clones', tumor, InsigLs
            new_clone_fre_in = {}
            for Clo in Clo2Fre:
                if Clo2Fre[Clo] > 0 and InsigLs.count(Clo) == 0:
                    new_clone_fre_in[Clo] = Clo2Fre[Clo]
                    new_clone_seq_dic['#' + Clo] = clone_seq_dic['#' + Clo]
            new_clone_freq['T-' + tumor] = new_clone_fre_in
        new_seq_builder = Align.UpMeg(new_clone_seq_dic, [])

        return new_seq_builder, new_clone_freq