Exemplo n.º 1
0
    def save(self, *args, **kwargs):
        if not self.pk:
            if MAPPING:
                #print("lifespan.models.Factors.save()")
                if self.species:
                    self.taxid = taxid = self.species.taxid
                elif self.taxid:
                    taxid = self.taxid
                    self.species = Species.objects.get(taxid=taxid)
                else:
                    taxid = None
                ids = [self.entrez_gene_id, self.ensembl_gene_id, self.symbol, self.name]
                ids = m([str(id) for id in ids if id], taxid)
                entrez_gene_id = ids[0]
                if entrez_gene_id and isinstance(entrez_gene_id, int):
                    entrez = Entrez.objects.get(entrez_gene_id=ids[0])
                    self.entrez_gene_id = self.entrez_gene_id or entrez_gene_id
                    self.ensembl_gene_id = self.ensembl_gene_id or entrez.ensembl_gene_id
                    self.symbol = self.symbol or entrez.gene_symbol
                    self.name = self.name or entrez.gene_name
                    if not self.taxid:
                        taxid = entrez.taxid
                        self.species = Species.objects.get(taxid=taxid)

        super(Factor, self).save(*args, **kwargs)
Exemplo n.º 2
0
    def save(self, *args,
             **kwargs):  #TODO: must be splitted into several methods
        if not self.pk:
            if MAPPING:
                #print("lifespan.models.Factors.save()")
                if self.species:
                    self.taxid = taxid = self.species.taxid
                elif self.taxid:
                    taxid = self.taxid
                    self.species = Species.objects.get(taxid=taxid)
                else:
                    taxid = None
                ids = [
                    self.entrez_gene_id, self.ensembl_gene_id, self.symbol,
                    self.name
                ]
                ids = m([str(id) for id in ids if id], taxid)
                entrez_gene_id = ids[0]
                if entrez_gene_id and isinstance(entrez_gene_id, int):
                    self.entrez_gene_id = self.entrez_gene_id or entrez_gene_id
                    if self.taxid == 4932:
                        self.ensembl_gene_id = ids[1][
                            'ensembl_gene']  # ro maybe sgd
                        annotation = retrieve(self.ensembl_gene_id)
                        self.symbol = annotation['symbol']
                        self.name = annotation['name']
                        self.description = annotation['description']
                        if not self.function:
                            self.functional_description = self.description
                        if self.symbol and self.name:
                            number = re.findall(
                                '\d+', self.symbol
                            )  # match would be more suitable here.
                            if number:
                                self.name += " " + number[0]
                    else:
                        try:
                            entrez = Entrez.objects.get(entrez_gene_id=ids[0])
                            self.ensembl_gene_id = self.ensembl_gene_id or entrez.ensembl_gene_id
                            self.symbol = self.symbol or entrez.gene_symbol
                            self.name = self.name or entrez.gene_name
                        except ObjectDoesNotExist:
                            self.entrez_gene_id = ids[0]
                        if not self.taxid:
                            taxid = entrez.taxid
                            self.species = Species.objects.get(taxid=taxid)

        super(Factor, self).save(*args, **kwargs)
Exemplo n.º 3
0
    def save(self, *args, **kwargs):
        if not self.pk:
            if MAPPING:
                #print("lifespan.models.Factors.save()")
                if self.species:
                    self.taxid = taxid = self.species.taxid
                elif self.taxid:
                    taxid = self.taxid
                    self.species = Species.objects.get(taxid=taxid)
                else:
                    taxid = None
                ids = [self.entrez_gene_id, self.ensembl_gene_id, self.symbol, self.name]
                ids = m([str(id) for id in ids if id], taxid)
                entrez_gene_id = ids[0]
                if entrez_gene_id and isinstance(entrez_gene_id, int):
                    self.entrez_gene_id = self.entrez_gene_id or entrez_gene_id
                    if self.taxid == 4932:
                        self.ensembl_gene_id = ids[1]['ensembl_gene'] # ro maybe sgd
                        annotation = retrieve(self.ensembl_gene_id)
                        self.symbol = annotation['symbol']
                        self.name = annotation['name']
                        self.description = annotation['description']
                        if not self.function:
                            self.functional_description = self.description
                        if self.symbol and self.name:
                            number = re.findall('\d+', self.symbol) # match would be more suitable here.
                            if number:
                                self.name += " " + number[0]
                    else:
                        try:
                            entrez = Entrez.objects.get(entrez_gene_id=ids[0])
                            self.ensembl_gene_id = self.ensembl_gene_id or entrez.ensembl_gene_id
                            self.symbol = self.symbol or entrez.gene_symbol
                            self.name = self.name or entrez.gene_name
                        except ObjectDoesNotExist:
                            self.entrez_gene_id = ids[0]
                        if not self.taxid:
                            taxid = entrez.taxid
                            self.species = Species.objects.get(taxid=taxid)

        super(Factor, self).save(*args, **kwargs)
Exemplo n.º 4
0
def main(memory=True, header=True):
    """Integrating interaction information..."""
    remove = ['AfCS'] #'STRING'
    for i in remove:
        if i in databases:
            databases.remove(i) #: continue # Problematic

    os.chdir(path)
    
    # Collect
    output = open(os.path.join(path, 'integrated.txt'), 'w')

    GeneList = {}
    for database in databases:
        #if database != "BioGRID": continue
        data_file = os.path.join(path, database, 'interactions.txt')
        if database != "BioGRID":

            #load file in memory
            if memory:
                data = file(data_file).read().split('\n')
                L = len(data); n = 0; PB = 0 #Start Counter
            else:
                data = fileinput.input([data_file])
                L = bufcount(data_file); n = 0; PB = 0
            print("%s(%s):" % (database, L))

            # in
            
            for i in data:

                n += 1; PA = 100*n/L #Continue Counter
                if PA != PB: print PA,
                PB = PA
                
                if not i: continue
                s = i.split('\t')
                if s[6] and s[6] == s[7] and int(s[6]) in taxid_list and int(s[7]) in taxid_list :
                    alias_a, taxid_a = s[0], int(s[6])
                    alias_b, taxid_b = s[1], int(s[7])

                    if taxid_a not in GeneList: GeneList[taxid_a] = {}
                    if alias_a not in GeneList[taxid_a]: GeneList[taxid_a][alias_a] = ''
                    if taxid_b not in GeneList: GeneList[taxid_b] = {}
                    if alias_b not in GeneList[taxid_b]: GeneList[taxid_b][alias_b] = ''

    # Map:              
    #    print '\n'
    #for taxid, genes in GeneList.items():
    #    print taxid, len(genes)
    #print('')

    UniqueIDs = {}
    for taxid, aliases in GeneList.items():
        #Ma = M(taxid)   # Alter Map to convert int to string!
        #print taxid, len(GeneList[taxid])

        L = len(GeneList[taxid]); n = 0; PB = 0 #Start Counter
        
        for alias, unique_id in aliases.items():

            n += 1; PA = 100*n/L #Continue Counter
            if PA != PB: print PA,
            PB = PA

            GeneList[taxid][alias] = m(alias.split('; '), taxid)[0]
        #print('')

    for database in databases:
        data_file = os.path.join(path, database, 'interactions.txt')

        # Load file in memory or read from disk:
        if memory:
            data = file(data_file).read().split('\n')
            L = len(data); n = 0; PB = 0 #Start Counter

        else:
            data = fileinput.input([data_file])
            L = bufcount(data_file); n = 0; PB = 0
        
        for i in data:

            n += 1; PA = 100*n/L #Continue Counter
            if PA != PB: print PA,
            PB = PA
        
            if i != '' and i != '\n':
                s = i.split('\t')
                if s[6] != "" and s[6] == s[7] and int(s[6])  in taxid_list and int(s[7]) in taxid_list:
                    alias_a, taxid_a = s[0], int(s[6])
                    alias_b, taxid_b = s[1], int(s[7])

                    if database != "BioGRID": source, target = GeneList[taxid_a][alias_a], GeneList[taxid_b][alias_b] #['SecondaryEntrezGeneID'] ensembl_a, ensembl_b = GeneList[taxid_a][alias_a]['EnsemblGeneID'], GeneList[taxid_b][alias_b]['EnsemblGeneID']
                    else: source, target = alias_a.split('; ')[0], alias_b.split('; ')[0]
                    output.write('\t'.join(map(str, [source,target,i]))+'\n')


        #read line from file without loading into memory
    ##    GeneAlias_List[taxid] =
    ##    for taxid in GenAlias_List:
    ##        import Map with taxid

    output.close()
    GeneList = ''

    #Merg
    ##MergedInteractions = {}
    def Combine(IdX, IdY):
        Together = []
        for n in range(0, 10):
            if n != 6 and n != 7:
                Xs = IdX[n].split('; ')
    ##            print  Xs
                Ys = IdY[n].split('; ')
    ##            print Ys
                for a in Xs:
                    if a != "" and a not in Ys:
                        Ys.append(a)
    ##            print Ys
                if '' in Ys: Ys.remove('')      #Added this last time to remove empty strings
                Together.append('; '.join(Ys))
            else:
                Together.append(IdY[n])
        return Together

    I = {}
    if memory:
        data = file(os.path.join(path, 'integrated.txt')).read().split('\n')
        print len(data)
    else:
        data = fileinput.input([os.path.join(path, 'integrated.txt')])

    output = open(os.path.join(path, 'merged.txt'), 'w')
    
    for i in data:
        if not i: continue
        s = i.split('\t')
        try:
            IdA, IdB = int(s[0]), int(s[1])
            if IdA not in I:
                I[IdA] = {IdB:s[2:]}
            elif IdA in I and IdB not in I[IdA]:
                I[IdA][IdB] = s[2:]
            else:
                I[IdA][IdB] = Combine(I[IdA][IdB], s[2:])
    ##                print Together 
                
        except: pass #print s 
    input = ''

    for IdA, IdBs in I.items():
        for IdB, info in IdBs.items():
            if IdB in I and IdA in I[IdB]:
                if ("regulatory" not in I[IdB][IdA][2] and I[IdB][IdA][5] == '') and ("regulatory" not in I[IdA][IdB][2] and I[IdA][IdB][5] == ''):
                    I[IdA][IdB] = Combine(I[IdB][IdA], I[IdA][IdB])
    ##                print I[IdA][IdB]
                    del I[IdB][IdA]     
    ##                alias_a = I[IdA][IdB][0]
    ##                alias_b = I[IdA][IdB][1]
    ##                experimental_system_type = I[IdA][IdB][2]
                    
                    #put both togehter to IdA[IdB}
                    #delete the IdB[IdA]
                elif ("regulatory" in I[IdA][IdB][2] or I[IdA][IdB][5] != '') and ("regulatory" not in I[IdB][IdA][2] and I[IdB][IdA][5] == ''):
                    I[IdA][IdB] = Combine(I[IdB][IdA], I[IdA][IdB])
    ##                print I[IdA][IdB]
                    del I[IdB][IdA] 

                    #put both togehter to IdA[IdB}
                    #delete the IdB[IdA]
                elif ("regulatory" not in I[IdA][IdB][2] and I[IdA][IdB][5] == '') and ("regulatory"  in I[IdB][IdA][2] or I[IdB][IdA][5] != ''):
                    I[IdB][IdA] = Combine(I[IdA][IdB], I[IdB][IdA])
    ##                print I[IdA][IdB]
                    del I[IdA][IdB] 
                    #put both togehter to IdB[IdA}
                    #delete the IdA[IdB]
    ##            if ("regulatory" in I[IdA][IdB][2] or I[IdA][IdB][5] != '') and ("regulatory"  in I[IdB][IdA][2] or I[IdB][IdA][5] == ''):
    ##                pass

                    #keep both
                
    if header: output.write('\t'.join(['unique_id_a', 'unique_id_b', 'alias_a', 'alias_b', 'experimental_system_type',
                                       'interaction_type', 'experimental_system', 'modification','taxid_a', 'taxid_b',
                                       'pmid',  'source_database', 'score'])+'\n')
    for IdA, IdBs in I.items():
        for IdB, info in IdBs.items():

            experimental_system_type = info[2].split('; ')
            if '' in experimental_system_type: experimental_system_type.remove('')

            experimental_system = info[4].split('; ')
            if '' in experimental_system: experimental_system.remove('')

            modification = info[5].split('; ')
            if '' in modification: modification.remove('')

            pmid = info[8].split('; ')
            if '' in pmid:  pmid.remove('')

            source_database = info[9].split('; ')
            if '' in source_database: source_database.remove('')

            score = str(len(experimental_system_type)+len(experimental_system)+len(modification)+len(pmid)+len(source_database))
            I[IdA][IdB].append(score)
            output.write('\t'.join(map(str, [IdA, IdB, '\t'.join(info)]))+'\n')
    output.close()