def get_gene_experiments(self, gene): """ Given a gene_symbol it returns the list of ISH experiments for this gene :param gene_symbol: str """ url = self.gene_experiments_url.replace("-GENE_SYMBOL-", gene) data = request(url).json()["msg"] if not len(data): print(f"No experiment found for gene {gene}") return None else: return [d["id"] for d in data]
def download_and_cache(url, cachedir): """ Given a url to download a gene's ISH experiment data, this function download and unzips the data :param url: str, utl to download data :param cachedir: str, path to folder where data will be downloaded """ # Get data req = request(url) # Create cache dir if not os.path.isdir(cachedir): os.mkdir(cachedir) # Unzip to cache dir z = zipfile.ZipFile(io.BytesIO(req.content)) z.extractall(cachedir)
def get_streamlines_data(eids, force_download=False): """ Given a list of expeirmental IDs, it downloads the streamline data from the https://neuroinformatics.nl cache and saves them as json files. :param eids: list of integers with experiments IDs """ data = [] for eid in track(eids, total=len(eids), description="downloading"): url = "https://neuroinformatics.nl/HBP/allen-connectivity-viewer/json/streamlines_{}.json.gz".format( eid ) jsonpath = streamlines_folder / f"{eid}.json" if not jsonpath.exists() or force_download: response = request(url) # Write the response content as a temporary compressed file temp_path = streamlines_folder / "temp.gz" with open(str(temp_path), "wb") as temp: temp.write(response.content) # Open in pandas and delete temp url_data = pd.read_json( str(temp_path), lines=True, compression="gzip" ) temp_path.unlink() # save json url_data.to_json(str(jsonpath)) # append to lists and return data.append(url_data) else: data.append(pd.read_json(str(jsonpath))) return data
def get_all_genes(self): """ Download metadata about all the genes available in the Allen gene expression dataset """ res = request(self.all_genes_url) return pd.DataFrame(res.json()["msg"])