def test_is_poplar_gene_valid(self): # Valid gene result = BARUtils.is_poplar_gene_valid('Potri.019G123900.1') self.assertTrue(result) # Invalid gene result = BARUtils.is_poplar_gene_valid('abc') self.assertFalse(result)
def get(self, fixed_pdb='', moving_pdb=''): """This end point returns the superimposition of the moving PDB onto moving PDB in PDB format""" fixed_pdb = escape(fixed_pdb) moving_pdb = escape(moving_pdb) arabidopsis_pdb_path = '/var/www/html/eplant_legacy/java/Phyre2-Models/Phyre2_' poplar_pdb_path = '/var/www/html/eplant_poplar/pdb/' phenix_pdb_link = 'https://bar.utoronto.ca/phenix-pdbs/' phenix_pdb_path = '/var/www/html/phenix-pdbs/' # Check if genes ids are valid if BARUtils.is_arabidopsis_gene_valid(fixed_pdb): fixed_pdb_path = arabidopsis_pdb_path + fixed_pdb.upper() + '.pdb' elif BARUtils.is_poplar_gene_valid(fixed_pdb): fixed_pdb_path = poplar_pdb_path + BARUtils.format_poplar( fixed_pdb) + '.pdb' else: return BARUtils.error_exit('Invalid fixed pdb gene id'), 400 if BARUtils.is_arabidopsis_gene_valid(moving_pdb): moving_pdb_path = arabidopsis_pdb_path + moving_pdb.upper( ) + '.pdb' elif BARUtils.is_poplar_gene_valid(moving_pdb): moving_pdb_path = poplar_pdb_path + BARUtils.format_poplar( moving_pdb) + '.pdb' else: return BARUtils.error_exit('Invalid moving pdb gene id'), 400 # Check if model already exists phenix_file_name = fixed_pdb.upper() + "-" + moving_pdb.upper( ) + "-phenix.pdb" response = requests.get(phenix_pdb_link + phenix_file_name) # If not, generate the model if response.status_code != 200: subprocess.run([ 'phenix.superpose_pdbs', 'file_name=' + phenix_pdb_path + phenix_file_name, fixed_pdb_path, moving_pdb_path ]) return redirect(phenix_pdb_link + phenix_file_name)
def get(self, gene_id=''): """ Endpoint returns annotated SNP poplar data in order of (to match A th API format): AA pos (zero-indexed), sample id, 'missense_variant','MODERATE', 'MISSENSE', codon/DNA base change, AA change (DH), pro length, gene ID, 'protein_coding', 'CODING', transcript id, biotype values with single quotes are fixed """ results_json = [] # Escape input gene_id = escape(gene_id) if BARUtils.is_poplar_gene_valid(gene_id) is False: return BARUtils.error_exit('Invalid gene id'), 400 try: rows = db.session.query(ProteinReference, SnpsToProtein, SnpsReference). \ select_from(ProteinReference). \ join(SnpsToProtein). \ join(SnpsReference). \ filter(ProteinReference.gene_identifier == gene_id).all() # BAR A Th API format is chr, AA pos (zero-indexed), sample id, 'missense_variant', # 'MODERATE', 'MISSENSE', codon/DNA base change, AA change (DH), # pro length, gene ID, 'protein_coding', 'CODING', transcript id, biotype for protein, snpsjoin, snpstbl in rows: itm_lst = [ snpstbl.chromosome, # snpstbl.chromosomal_loci, snpsjoin.aa_pos - 1, # zero index-ed snpstbl.sample_id, 'missense_variant', 'MODERATE', 'MISSENSE', str(snpsjoin.transcript_pos) + snpsjoin.ref_DNA + '>' + snpsjoin.alt_DNA, snpsjoin.ref_aa + snpsjoin.alt_aa, None, re.sub(r".\d$", '', protein.gene_identifier), 'protein_coding', 'CODING', protein.gene_identifier, None, ] results_json.append(itm_lst) except OperationalError: return BARUtils.error_exit('An internal error has occurred'), 500 # Return results if there are data if len(results_json) > 0: return BARUtils.success_exit(results_json) else: return BARUtils.error_exit( 'There are no data found for the given gene')
def get(self, species="", gene_id=""): """This end point provides gene isoforms given a gene ID. Only genes/isoforms with pdb structures are returned""" gene_isoforms = [] # Escape input species = escape(species) gene_id = escape(gene_id) # Set the database and check if genes are valid if species == "arabidopsis": database = eplant2_isoforms() if not BARUtils.is_arabidopsis_gene_valid(gene_id): return BARUtils.error_exit("Invalid gene id"), 400 elif species == "poplar": database = eplant_poplar_isoforms if not BARUtils.is_poplar_gene_valid(gene_id): return BARUtils.error_exit("Invalid gene id"), 400 # Format the gene first gene_id = BARUtils.format_poplar(gene_id) elif species == "tomato": database = eplant_tomato_isoforms if not BARUtils.is_tomato_gene_valid(gene_id, False): return BARUtils.error_exit("Invalid gene id"), 400 else: return BARUtils.error_exit("No data for the given species") # Now get the data try: rows = database.query.filter_by(gene=gene_id).all() except OperationalError: return BARUtils.error_exit("An internal error has occurred"), 500 [gene_isoforms.append(row.isoform) for row in rows] # Found isoforms if len(gene_isoforms) > 0: return BARUtils.success_exit(gene_isoforms) else: return BARUtils.error_exit( "There are no data found for the given gene")
def get(self, species='', gene_id=''): """This end point provides gene isoforms given a gene ID. Only genes/isoforms with pdb structures are returned""" gene_isoforms = [] # Escape input species = escape(species) gene_id = escape(gene_id) if species == 'arabidopsis': if BARUtils.is_arabidopsis_gene_valid(gene_id): try: rows = isoforms.query.filter_by(gene=gene_id).all() except OperationalError: return BARUtils.error_exit( 'An internal error has occurred'), 500 [gene_isoforms.append(row.isoform) for row in rows] # Found isoforms if len(gene_isoforms) > 0: return BARUtils.success_exit(gene_isoforms) else: return BARUtils.error_exit('Invalid gene id'), 400 elif species == 'poplar': if BARUtils.is_poplar_gene_valid(gene_id): # Path is the location of poplar pdb file if os.environ.get('BAR'): path = '/DATA/ePlants_Data/eplant_poplar/protein_structures/' else: path = os.getcwd( ) + '/data/gene_information/gene_isoforms/' path += gene_id + '.pdb' if os.path.exists(path) and os.path.isfile(path): return BARUtils.success_exit(gene_id) else: return BARUtils.error_exit('Invalid gene id'), 400 else: return BARUtils.error_exit('No data for the given species') return BARUtils.error_exit( 'There are no data found for the given gene')
def post(self): """This end point returns gene isoforms data for a multiple genes for a species. Only genes/isoforms with pdb structures are returned""" json_data = request.get_json() data = {} # Validate json try: json_data = GeneIsoformsSchema().load(json_data) except ValidationError as err: return BARUtils.error_exit(err.messages), 400 genes = json_data['genes'] species = json_data['species'] # Set species and check gene ID format if species == 'arabidopsis': # Check if gene is valid for gene in genes: if not BARUtils.is_arabidopsis_gene_valid(gene): return BARUtils.error_exit('Invalid gene id'), 400 # Query the database database = isoforms() try: rows = database.query.filter(isoforms.gene.in_(genes)).all() except OperationalError: return BARUtils.error_exit( 'An internal error has occurred.'), 500 if len(rows) > 0: for row in rows: if row.gene in data: data[row.gene].append(row.isoform) else: data[row.gene] = [] data[row.gene].append(row.isoform) return BARUtils.success_exit(data) else: return BARUtils.error_exit( 'No data for the given species/genes'), 400 elif species == 'poplar': for gene in genes: # Check if gene is valid if not BARUtils.is_poplar_gene_valid(gene): return BARUtils.error_exit('Invalid gene id'), 400 # Path is the location of poplar pdb file if os.environ.get('BAR'): path = '/DATA/ePlants_Data/eplant_poplar/protein_structures/' else: path = os.getcwd() + '/data/gene_information/gene_isoforms/' # Check if the genes exist. for gene in genes: gene_path = path + gene + '.pdb' if os.path.exists(gene_path) and os.path.isfile(gene_path): data[gene] = [] data[gene].append(gene) # Return data if gene is found if len(data) > 0: return BARUtils.success_exit(data) else: return BARUtils.error_exit( 'No data for the given species/genes'), 400 else: return BARUtils.error_exit('Invalid species'), 400
def post(self): """This end point returns gene isoforms data for a multiple genes for a species. Only genes/isoforms with pdb structures are returned""" json_data = request.get_json() data = {} # Validate json try: json_data = GeneIsoformsSchema().load(json_data) except ValidationError as err: return BARUtils.error_exit(err.messages), 400 genes = json_data["genes"] species = json_data["species"] # Set species and check gene ID format if species == "arabidopsis": database = eplant2_isoforms() # Check if gene is valid for gene in genes: if not BARUtils.is_arabidopsis_gene_valid(gene): return BARUtils.error_exit("Invalid gene id"), 400 # Query must be run individually for each species try: rows = database.query.filter( eplant2_isoforms.gene.in_(genes)).all() except OperationalError: return BARUtils.error_exit( "An internal error has occurred."), 500 elif species == "poplar": database = eplant_poplar_isoforms() for gene in genes: # Check if gene is valid if not BARUtils.is_poplar_gene_valid(gene): return BARUtils.error_exit("Invalid gene id"), 400 try: rows = database.query.filter( eplant_poplar_isoforms.gene.in_(genes)).all() except OperationalError: return BARUtils.error_exit( "An internal error has occurred."), 500 elif species == "tomato": database = eplant_tomato_isoforms() for gene in genes: # Check if gene is valid if not BARUtils.is_tomato_gene_valid(gene, False): return BARUtils.error_exit("Invalid gene id"), 400 try: rows = database.query.filter( eplant_tomato_isoforms.gene.in_(genes)).all() except OperationalError: return BARUtils.error_exit( "An internal error has occurred."), 500 else: return BARUtils.error_exit("Invalid species"), 400 # If there any isoforms found, return data if len(rows) > 0: for row in rows: if row.gene in data: data[row.gene].append(row.isoform) else: data[row.gene] = [] data[row.gene].append(row.isoform) return BARUtils.success_exit(data) else: return BARUtils.error_exit( "No data for the given species/genes"), 400