def __init__(self, version: str): _DL_PAGE_URL = "http://202.120.12.135/TADB2/download.html" self.describer = ReferenceDescriber() self.describer.VERSION = version self.describer.update_alias() self.reference_dir = os.path.join("/data/reference", self.describer.NAME, self.describer.ALIAS) links = [i for i in Utilities.scrap_links_from_web_page(_DL_PAGE_URL) if i.endswith(".fas")] self._fasta_types = ["nucleotide", "protein"] self._links_dict = {k: [i for i in links if i.split("/")[-2] == k] for k in self._fasta_types} assert len(self._links_dict["nucleotide"]) + len(self._links_dict["protein"]) == len(links) self.nfasta = os.path.join(self.reference_dir, "{}.fasta".format(self.describer.ALIAS)) self.pfasta = os.path.join(self.reference_dir, "{}_protein.fasta".format(self.describer.ALIAS)) self.index_dir = ""
def __init__(self): self.describer = ReferenceDescriber() self.describer.VERSION = self._get_last_friday() self.reference_dir = os.path.join("/data/reference", self.describer.NAME, self.describer.ALIAS) links = [ i for i in Utilities.scrap_links_from_web_page(self._DL_PAGE_URL) if i.endswith(".gz") ] self._dl_queue = [] for dl_link in links: dl_dir = self.reference_dir if "_pro" in dl_link: dl_dir = os.path.join(dl_dir, "protein") elif "_nt" in dl_link: dl_dir = os.path.join(dl_dir, "nucleotide") self._dl_queue.append((dl_link, dl_dir)) self.nfasta = os.path.join(self.reference_dir, "{}.fasta".format(self.describer.ALIAS)) self.pfasta = os.path.join( self.reference_dir, "{}_protein.fasta".format(self.describer.ALIAS))