Exemplo n.º 1
0
    def blast_against_genome(self, ctx, params):
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN blast_against_genome

        if len(params['query']) > 5:
            sequence = params['query']
        else:
            #error message: your sequence are too short
            print "error"
        #else:
        #sequence=script_util.get_seq(params['gene_id'])
        #sequence=(params['gene_id'])

        genome_id = 'Bifidobacterium_animalis_subsp._lactis_AD011'
        workspaceid = 'plane83:1436884411390'

        #print "generate input file\n"
        target = open('tmp_seq', 'w')
        target.write(">")
        target.write("input_seq\n")
        target.write(sequence)
        target.close()

        #print "downloading genome object from workspace\n"
        genome = script_util.get_genome('genome_id', 'workspaceid',
                                        ctx['token'])
        #print "finished downloading\n";

        #extract sequences from the genome object
        with open('tmp_data', 'w') as outfile:
            json.dump(genome, outfile)
        res1 = open('tmp_data').read()
        res = json.loads(res1)
        os.remove('tmp_data')
        #print "making dir\n"
        if os.path.exists('blast_db'):
            files = glob.glob('blast_db/*')
            for f in files:
                os.remove(f)
        if not os.path.exists('blast_db'): os.makedirs('blast_db')
        target = open('blast_db/tmp_genome_fasta', 'w')
        for gene in res['data']['features']:
            if 'protein_translation' in gene.keys():
                target.write(">")
                target.write(gene['id'])
                target.write("\n")
                target.write(gene['protein_translation'])
                target.write("\n")
        target.close()

        #print "formatdb..\n"
        #format database for blast
        os.system("formatdb -i blast_db/tmp_genome_fasta -p T")
        os.system(
            "blastall -p blastp -i tmp_seq -m 9 -o tmp_out -d blast_db/tmp_genome_fasta"
        )
        os.remove('tmp_seq')

        print "test"
        res = script_util.extract_blast_output('tmp_out')
        os.remove(tmp_out)
        os.remove(input_seq)

        returnVal = res
        #END blast_against_genome

        # At some point might do deeper type checking...
        if not isinstance(returnVal, basestring):
            raise ValueError('Method blast_against_genome return value ' +
                             'returnVal is not type basestring as required.')

    # return the results
        return [returnVal]
Exemplo n.º 2
0
    def blast_against_genome(self, ctx, params):
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN blast_against_genome
	
	
	if len(params['query']) > 5:
		sequence=params['query']
	else:
		#error message: your sequence are too short
		print "error"
	#else:
		#sequence=script_util.get_seq(params['gene_id'])
		#sequence=(params['gene_id'])
	

	genome_id='Bifidobacterium_animalis_subsp._lactis_AD011'
	workspaceid='plane83:1436884411390'
	
	#print "generate input file\n"
	target=open('tmp_seq','w')
	target.write(">")
	target.write("input_seq\n")
	target.write(sequence)
	target.close()
	
	#print "downloading genome object from workspace\n"
	genome=script_util.get_genome('genome_id','workspaceid',ctx['token'])
	#print "finished downloading\n";
	
	#extract sequences from the genome object
	with open('tmp_data','w') as outfile:
		json.dump(genome, outfile)	
	res1=open('tmp_data').read()
	res=json.loads(res1)
	os.remove('tmp_data')
	#print "making dir\n"
	if os.path.exists('blast_db'):
		files=glob.glob('blast_db/*')
		for f in files: os.remove(f)
	if not os.path.exists('blast_db'): os.makedirs('blast_db')
	target=open('blast_db/tmp_genome_fasta','w')
	for gene in res['data']['features']:
		if 'protein_translation' in gene.keys():
			target.write(">")
			target.write(gene['id'])
			target.write("\n")
			target.write(gene['protein_translation'])
			target.write("\n")
	target.close()
	
	#print "formatdb..\n"
	#format database for blast
	os.system("formatdb -i blast_db/tmp_genome_fasta -p T")
	os.system("blastall -p blastp -i tmp_seq -m 9 -o tmp_out -d blast_db/tmp_genome_fasta")
	os.remove('tmp_seq')

	print "test"
	res=script_util.extract_blast_output('tmp_out')
	os.remove(tmp_out)
	os.remove(input_seq)
	
		
	

	returnVal = res
        #END blast_against_genome

        # At some point might do deeper type checking...
        if not isinstance(returnVal, basestring):
            raise ValueError('Method blast_against_genome return value ' +
                             'returnVal is not type basestring as required.')
        # return the results
        return [returnVal]
Exemplo n.º 3
0
    def blast_against_genome(self, ctx, params):
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN blast_against_genome
	
	print "start"
	if len(params['query']) > 5:
		sequence=params['query']
	else:
		#error message: your sequence are too short
		print "error"
	#else:
		#sequence=script_util.get_seq(params['gene_id'])
		#sequence=(params['gene_id'])
	

	genome_id='Bifidobacterium_animalis_subsp._lactis_AD011'
	workspaceid='plane83:1436884411390'
	
	#print "generate input file for query sequence\n"
	target=open('tmp_seq','w')
	target.write(">")
	target.write("input_seq\n")
	target.write(sequence)
	target.close()
	

	#print "downloading genome object from workspace\n"
	#genome=script_util.get_genome('genome_id','workspaceid',ctx['token'])
	#print "finished downloading\n";

	if os.path.exists('blast_db'):
		files=glob.glob('blast_db/*')
		for f in files: os.remove(f)
	if not os.path.exists('blast_db'): os.makedirs('blast_db')
	#with open('tmp_data','w') as outfile:
	#	json.dump(genome, outfile)	
	
	if(params['blast_program'] == 'blastp'):
		formatdb_type='T'
		#extract protein sequences from the genome object
		res1=open('tmp_data').read()
		res=json.loads(res1)
		target=open('blast_db/tmp_genome_fasta','w')
		for gene in res['data']['features']:
			if 'protein_translation' in gene.keys():
				target.write(">" + gene['id'] + "\n" + gene['protein_translation'] + "\n")
		target.close()
	
	
	if(params['blast_program'] == 'blastn'):
		formatdb_type='F'
		#extract dna sequence from the genome object
		res1=open('tmp_data').read()
		res=json.loads(res1)
		target=open('blast_db/tmp_genome_fasta','w')
		for gene in res['data']['features']:
			if 'dna_sequence' in gene.keys():
				target.write(">" + gene['id'] + "\n" + gene['dna_sequence'] + "\n")
		target.close()


	
	
	
	#os.remove('tmp_data')
	
	#print "formatdb..\n"
	#format database for blast
	
	cmdstring="formatdb -i blast_db/tmp_genome_fasta -p %s" %(formatdb_type)
	os.system(cmdstring)

	#blast search
	cmdstring="blastall -p %s -i tmp_seq -m 9 -o tmp_out -d blast_db/tmp_genome_fasta -e %s" % (params['blast_program'], params['e-value'])
	os.system(cmdstring)
	os.remove('tmp_seq')
	
	#extract the blast output
	res=script_util.extract_blast_output('tmp_out')
	os.remove('tmp_out')
	res1=json.loads(res)
	
	print "finished"
	returnVal = res1
        #END blast_against_genome

        # At some point might do deeper type checking...
        if not isinstance(returnVal, list):
            raise ValueError('Method blast_against_genome return value ' +
                             'returnVal is not type list as required.')
        # return the results
        return [returnVal]