def setup(): shared.setup() global t_new, t_new_e, t_old, t_old_e, t_client_new, t_client_old t_new = GenomeAnnotationAPI(shared.services, shared.token, genome_new) t_new_e = _GenomeAnnotation(shared.services, shared.token, genome_new) t_old = GenomeAnnotationAPI(shared.services, shared.token, genome_old) t_old_e = _KBaseGenomes_Genome(shared.services, shared.token, genome_old) t_client_new = GenomeAnnotationClientAPI( shared.services["genome_annotation_service_url"], shared.token, genome_new) t_client_old = GenomeAnnotationClientAPI( shared.services["genome_annotation_service_url"], shared.token, genome_old)
def proteins_to_fasta(ws_url='https://ci.kbase.us/services/ws/'): """Write FASTA file from a genome reference. Args: ws_url: Workspace service URL Returns: Full path to output file """ ref = "ReferenceGenomeAnnotations/kb|g.166819" # ref = "ReferenceGenomeAnnotations/kb|g.3899" genome_annotation = GenomeAnnotationAPI( token=os.environ.get('KB_AUTH_TOKEN'), services={'workspace_service_url': ws_url}, ref=ref) # Get all the proteins with the Data API proteins = genome_annotation.get_proteins() # Create an output file and write to it outfile = '/tmp/166819_prot.fasta' with open(outfile, 'w') as f: for fasta_line in get_fasta(proteins): f.write(fasta_line) return outfile
def run(ws_url='https://ci.kbase.us/services/ws/'): #use a KBase reference genome genomeref = "ReferenceGenomeAnnotations/kb|g.166819" #instantiate a new genome annotation API genome_annotation = GenomeAnnotationAPI( services={"workspace_service_url": ws_url}, token=os.environ.get('KB_AUTH_TOKEN'), ref=genomeref) #pick first locus gene = 'kb|g.166819.locus.1' #retriee core exons for locus exons = get_core_exons(gene, genome_annotation) print "Core exon(s) found in all mRNAs for gene " + gene print exons
def run(ws_url='https://ci.kbase.us/services/ws/'): #an example KBase reference genome genomeref = "ReferenceGenomeAnnotations/kb|g.166819" #instantiate a new genome annotation API genome_annotation = GenomeAnnotationAPI( services={"workspace_service_url": ws_url}, token=os.environ.get('KB_AUTH_TOKEN'), ref=genomeref) #pick the first locus gene = 'kb|g.166819.locus.1' #pick the first locus gffdata = get_gff(gene, genome_annotation) outfile = 'g.166819.locus.1.gff' print outfile with open(outfile, 'w') as f: f.write(gffdata)
def run(ws_url='https://ci.kbase.us/services/ws/'): #an example KBase reference genome genomeref = "ReferenceGenomeAnnotations/kb|g.166819" #creating a new GenomeAnnotation object genome_annotation = GenomeAnnotationAPI(services = {"workspace_service_url": ws_url}, token=os.environ.get('KB_AUTH_TOKEN'), ref=genomeref) #return all protein objects from this genome proteins= genome_annotation.get_proteins() #format the proteins in FASTA format fasta = get_protein_fasta(proteins) outfile = '166819_prot.fasta' with open(outfile, 'w') as f: f.write(fasta)
def generate_gff(logger, internal_services, token, ref, output_dir, obj_name, output_file): try: ga = GenomeAnnotationAPI(internal_services, token=token, ref="{}/{}".format(ref, obj_name)) except: raise Exception("Unable to Call GenomeAnnotationAPI : {0}".format(e)) logger.info("Requesting GenomeAnnotation GFF for {}/{}".format( ref, obj_name)) gff_start = datetime.datetime.utcnow() #output_file = os.path.join(output_dir,'{}.gff'.format(obj_name)) with open(output_file, 'w') as gff_file: ga.get_gff().to_file(gff_file) gff_file.close() gff_end = datetime.datetime.utcnow() logger.info("Generating GFF for {} took {}".format(obj_name, gff_end - gff_start))
def run(ws_url='https://ci.kbase.us/services/ws/'): #an example KBase reference genome genomeref = "ReferenceGenomeAnnotations/kb|g.166819" #creating a new GenomeAnnotation object genome_annotation = GenomeAnnotationAPI( services={"workspace_service_url": ws_url}, token=os.environ.get('KB_AUTH_TOKEN'), ref=genomeref) gc = contig_gc(genome_annotation, genomeref) print gc outfile = '166819_GC.txt' with open(outfile, 'w') as f: f.write(json.dumps(gc))
def generate_gff(logger,internal_services,token,ref,output_dir,obj_name,output_file): try: ga = GenomeAnnotationAPI(internal_services, token=token, ref= ref) except: raise Exception("Unable to Call GenomeAnnotationAPI : {0}".format(("".join(traceback.format_exc())))) logger.info("Requesting GenomeAnnotation GFF for {}".format(obj_name)) gff_start = datetime.datetime.utcnow() gff_file= io.open(output_file, 'wb') #output_file = os.path.join(output_dir,'{}.gff'.format(obj_name)) try: ga.get_gff().to_file(gff_file) except Exception as e: #raise Exception("Unable to Create GFF file from Genome Annotation : {0}: {1}".format(obj_name,e)) raise Exception("Unable to Create GFF file from Genome Annotation : {0}: {1}".format(obj_name,"".join(traceback.format_exc()))) finally: gff_file.close() gff_end = datetime.datetime.utcnow() logger.info("Generating GFF for {} took {}".format(obj_name, gff_end - gff_start))
def generate_fasta(logger, internal_services, token, ref, output_dir, obj_name): try: ga = GenomeAnnotationAPI(internal_services, token=token, ref="{}/{}".format(ref, obj_name)) except Exception as e: raise Exception( "Unable to Call GenomeAnnotationAPI : {0}: {1}".format(e)) logger.info("Generating FASTA file from Assembly for {}/{}".format( ref, obj_name)) fasta_start = datetime.datetime.utcnow() output_file = os.path.join(output_dir, '{}.fasta'.format(obj_name)) with open(output_file, 'w') as fasta_file: ga.get_assembly().get_fasta().to_file(fasta_file) fasta_file.close() fasta_end = datetime.datetime.utcnow() logger.info("Generating FASTA for {} took {}".format( obj_name, fasta_end - fasta_start)) return output_file
def run(ws_url='https://ci.kbase.us/services/ws/'): #an example KBase reference genome genomeref = "ReferenceGenomeAnnotations/kb|g.166819" #instantiate a new genome annotation API genome_annotation = GenomeAnnotationAPI( services={"workspace_service_url": ws_url}, token=os.environ.get('KB_AUTH_TOKEN'), ref=genomeref) genes = [ 'kb|g.166819.locus.1', 'kb|g.166819.locus.2', 'kb|g.166819.locus.3' ] gffdata = "" for s in genes: gffdata += get_gff(s, genome_annotation) outfile = 'g.166819.locus.1_2_3.gff' print outfile with open(outfile, 'w') as f: f.write(gffdata)
def generate_fasta(logger,internal_services,token,ref,output_dir,obj_name): try: ga = GenomeAnnotationAPI(internal_services, token=token, ref= ref) except Exception as e: raise Exception("Unable to Call GenomeAnnotationAPI : {0}".format("".join(traceback.format_exc()))) logger.info("Generating FASTA file from Assembly for {}".format(obj_name)) fasta_start = datetime.datetime.utcnow() output_file = os.path.join(output_dir,'{}.fasta'.format(obj_name)) fasta_file= io.open(output_file, 'wb') try: ga.get_assembly().get_fasta().to_file(fasta_file) except Exception as e: #raise Exception("Unable to Create FASTA file from Genome Annotation : {0}".format(obj_name)) raise Exception("Unable to Create FASTA file from Genome Annotation : {0}".format("".join(traceback.format_exc()))) finally: fasta_file.close() fasta_end = datetime.datetime.utcnow() logger.info("Generating FASTA for {} took {}".format(obj_name, fasta_end - fasta_start)) return output_file