示例#1
0
def saveGeneDataToDB(gene_file):
    current_info_type = ''
    compound_info = dict()
    for line in gene_file.readlines():
        if line.endswith('\n'):
            line = line[:-1]
        if line.startswith('/'):
            continue
        infos = line.split(' ')
        if not line.startswith(' ') and not line.startswith('/'):
            current_info_type = infos[0]
            infos = infos[1:]
            if current_info_type == 'ENTRY':
                for info_item in infos:
                    if info_item != '':
                        compound_info['ENTRY'] = info_item
                        break
            if current_info_type == 'NTSEQ':
                for info_item in infos:
                    if info_item != '':
                        compound_info['NTSEQLEN'] = int(info_item)
                        break
            if current_info_type == 'ORGANISM':
                orStr = line.replace('ORGANISM    ', '')
                orList = orStr.split('  ')
                compound_info['ORGANISM_SHORT'] = orList[0]
                compound_info['ORGANISM'] = orList[1]
        for info_item in infos:
            if info_item.endswith('\n'):
                info_item = info_item[:-1]
            if current_info_type == 'ENTRY' or current_info_type == 'ORGANISM':
                continue
            if line.startswith('NTSEQ'):
                continue
            try:
                compound_info[current_info_type] += info_item
            except:
                compound_info[current_info_type] = info_item           
    try:
        names = compound_info['NAME'].split(';')
        compound_info['NAME'] = names[0]
        compound_info['NICKNAME'] = ''
        for i in range(1, len(names)):
            compound_info['NICKNAME'] += names[i] + '_'
        new_gene = gene(gene_id=compound_info['ENTRY'])
        new_gene.name = compound_info['NAME']
        new_gene.nicknames = compound_info['NICKNAME']
        new_gene.definition = compound_info['DEFINITION']
        new_gene.organism_short = compound_info['ORGANISM_SHORT']
        new_gene.organism = compound_info['ORGANISM']
        new_gene.position = compound_info['POSITION']
        new_gene.ntseq_length = compound_info['NTSEQLEN']
        new_gene.ntseq = compound_info['NTSEQ']
        try:
            new_gene.save()
        except:
            traceback.print_exc()
            print '%s can not be saved' % compound_info['NAME']
    except:
        traceback.print_exc()
示例#2
0
    def get_or_create_gene(self, gid):
        """
        find gene in database, if found, return gene, or search in ncbi

        @param gid: gene id
        @type gid: str
        @return gene object
        @rtype: gene
        
        """
    #get in database
        try:
            gene_obj = gene.objects.get(gene_id=gid)
            return gene_obj
        except:
            #get from ncbi
            baseUrl = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&rettype=fasta&id='
            req = urllib2.Request(baseUrl + gid)
            response = urllib2.urlopen(req)
            resStr = response.read()
            gene_dict = parse_fasta_str(resStr)
            for gn in gene_dict.keys():
                gid = gn.split('|')[1]
                #get detail information
                new_gene_obj = gene(gene_id=gid)
                detail_info = self.retrive_gene_detain(gid)
                if detail_info == None:
                    continue
                new_gene_obj.name = detail_info['name']
                new_gene_obj.definition = detail_info['definition']
                new_gene_obj.organism = detail_info['organism']
                new_gene_obj.ntseq = gene_dict[gn]
                new_gene_obj.ntseq_length = len(gene_dict[gn])
                try:
                    new_gene_obj.save()
                    return new_gene_obj
                except:
                    pass
            return None