def human_genome(): import gzip base = "http://hgdownload.cse.ucsc.edu/goldenpath/hg19/chromosomes/chr{}.fa.gz" links = {} for i in range(1, 23): links[f"chr{i}.dna"] = base.format(i) links["chrX.dna"] = base.format("X") links["chrY.dna"] = base.format("Y") if not os.path.isdir("human"): os.mkdir("human") for fn, link in links.items(): print(f"Downloading {fn}...") data = fetch_bin(link) print(f"Decompressing {fn}...") data = gzip.decompress(data) print(f"Stripping {fn}...") data = utils.strip_fasta(data.decode()) with open(os.path.join("human", fn), "w") as file: file.write(data)
def strip_fasta(): infn, outfn = require(2, "<input> <output>") with open(infn, "r") as file: content = file.read() content = utils.strip_fasta(content) with open(outfn, "w") as file: file.write(content)
def cat_genome(): links = {"a1": "CM001378.3", "a2": "CM001379.3", "a3": "CM001380.3", "b1": "CM001381.3", "b2": "CM001382.3", "b3": "CM001383.3", "b4": "CM001384.3", "c1": "CM001385.3", "c2": "CM001386.3", "d1": "CM001387.3", "d2": "CM001388.3", "d3": "CM001389.3", "d4": "CM001390.3", "e1": "CM001391.3", "e2": "CM001392.3", "e3": "CM001393.3", "f1": "CM001394.3", "f2": "CM001395.3", "x": "CM001396.3"} if not os.path.isdir("cat"): os.mkdir("cat") for fn, link in links.items(): print(f"Downloading {fn}...") data = fetch_nuccore(link) print(f"Stripping {fn}...") data = utils.strip_fasta(data) with open(f"cat/chr{fn.upper()}.dna", "w") as file: file.write(data)
def corona(): links = {"sars_cov2": "NC_045512"} if not os.path.isdir("corona"): os.mkdir("corona") for fn, link in links.items(): print(f"Downloading {fn}...") data = fetch_nuccore(link) print(f"Stripping {fn}...") data = utils.strip_fasta(data) with open(f"corona/{fn}.dna", "w") as file: file.write(data)
def dog_genome(): links = {} ptr = 25100 format = "CM{}.1" for i in (list(range(1, 39)) + ["X", "Y", "MT"]): links[str(i)] = format.format(str(ptr).zfill(6)) ptr += 1 if not os.path.isdir("dog"): os.mkdir("dog") for fn, link in links.items(): print(f"Downloading {fn}...") data = fetch_nuccore(link) print(f"Stripping {fn}...") data = utils.strip_fasta(data) with open(f"dog/chr{fn.upper()}.dna", "w") as file: file.write(data)
def mouse_genome(): links = {} ptr = 994 format = "CM{}.3" for i in (list(range(1, 20)) + ["X", "Y"]): links[str(i)] = format.format(str(ptr).zfill(6)) ptr += 1 links["MT"] = "AY172335.1" if not os.path.isdir("mouse"): os.mkdir("mouse") for fn, link in links.items(): print(f"Downloading {fn}...") data = fetch_nuccore(link) print(f"Stripping {fn}...") data = utils.strip_fasta(data) with open(f"mouse/chr{fn.upper()}.dna", "w") as file: file.write(data)