def planttfdb(MSUID): # Find the file url = 'http://planttfdb.cbi.pku.edu.cn/download.php' html_page = helper.connectionError(url) soup = BeautifulSoup(html_page.content, "lxml") # Find headers for search in soup.findAll('table', {"id": "oid_tfid"}): for linkfound in search.findAll('a'): if (linkfound.contents[0] == "Oryza sativa subsp. japonica"): link = 'http://planttfdb.cbi.pku.edu.cn/' + linkfound.get( 'href') break # Give the entire name of the file with the extension .gz filename = link.split("/")[-1] # Give the name of the file without .gz uncompressName = filename[:-3] + ".txt" pathToFile = helper.formatPathToFile(uncompressName) # Test existant file if (not helper.existFile(pathToFile)): # Fetch the file by the url and decompress it r = helper.connectionError(link) decompressedFile = gzip.decompress(r.content) # Create the file .txt with open(pathToFile, "wb") as f: f.write(decompressedFile) f.close() # Use the previous created file (.txt) with open(pathToFile, "r+b") as file: # Import file tab-delimited try: array = pd.read_csv(file, sep="\t", header=None) except pd.io.common.EmptyError: array = pd.DataFrame() # Named columns array.columns = ["TF_ID", "Gene_ID", "Family"] data = array.loc[array['TF_ID'] == MSUID] if (not data.empty): return data else: data = array.loc[array['Gene_ID'] == MSUID] if (data.empty): return False else: hashmap = {"Family": data["Family"].values[0]} return hashmap
def oryzabaseRapId(RAPID): pathToFile = helper.formatPathToFile("OryzabaseGeneListEn.txt") if (helper.existFile(pathToFile) == False): loadFileURL( pathToFile, "https://shigen.nig.ac.jp/rice/oryzabase/gene/download?classtag=GENE_EN_LIST" ) else: print("File already exist") print("Find file OK") # Import file tab-delimited try: array = pd.read_csv(pathToFile, sep="\t", encoding='utf-8') except NameError: array = pd.DataFrame() #array.columns = ['Trait Id', 'CGSNL Gene Symbol', 'Gene symbol synonym(s)', ' CGSNL Gene Name', 'Gene name synonym(s)', 'Protein Name', 'Allele', 'Chromosome No.', 'Explanation', 'Trait Class', 'RAP ID', 'GrameneId', 'Arm', 'Locate(cM)', 'Gene Ontology', 'Trait Ontology', 'Plant Ontology'] data = array.loc[array['RAP ID'] == RAPID] return data
def oryzabaseCGSNL(CGSNL): pathToFile = helper.formatPathToFile("OryzabaseGeneListEn.txt") if (helper.existFile(pathToFile) == False): loadFileURL( pathToFile, "https://shigen.nig.ac.jp/rice/oryzabase/gene/download?classtag=GENE_EN_LIST" ) else: print("File already exist") print("Find file OK") # Import file tab- try: array = pd.read_csv(pathToFile, sep="\t", encoding='utf-8') except NameError: array = pd.DataFrame() print(array) print("Find by CGSNL Gene Name") data = array.loc[array['CGSNL Gene Name'] == CGSNL] return data
def main(): pathScript = sys.argv[0] contig = sys.argv[1] if len(contig) < 2: contig = 'chr0' + contig # test if for 10 - 11 - 12 else: contig = 'chr' + contig start = sys.argv[2] end = sys.argv[3] db = sys.argv[4] dataSnp = snpSeek.snpSeek(contig, start, end) id = sys.argv[5] if (db == "1"): dataRapdb = rapdb.rapdb(id) print(dataRapdb) elif (db == "call_snpSeek"): for i in range(0, len(dataSnp)): print(dataSnp[i]) elif (db == "2"): dataGramene = gramene.gramene(id) print(dataGramene) elif (db == "3"): dataOryzabase = oryzabase.oryzabase(id) print(dataOryzabase) elif (db == "4"): ic4r.ic4r(id) elif (db == "5"): dataPlanttfdb = planttfdb.planttfdb(id) print(dataPlanttfdb) # LOC_xxxxxxxx elif (db == "6"): dataPlntfdb = plntfdb.plntfdb(id) print(dataPlntfdb) elif (db == "7"): dataFunricegenes = funricegenes.funricegenes(id) print(dataFunricegenes) elif (db == "8"): dataFunricegenes2 = funricegenes.funricegenes2(id) print(dataFunricegenes2) elif (db == "9"): dataFunricegenes3 = funricegenes.funricegenes3(id) print(dataFunricegenes3) elif (db == "10"): dataMsu = msu.msu(id) print(dataMsu) elif (db == "10.1"): dataMsu = msu.msu_orf(id) print(dataMsu) # Ecriture fichier a revoir !!!!!!!!! pour les id et hashmap[iricname] et hashmpap [raprepname] elif (db == "13"): url = "http://rapdb.dna.affrc.go.jp/download/archive/RAP-MSU_2017-04-14.txt.gz" filename = url.split("/")[-1] # Give the name of the file without .gz pathToFile = helper.formatPathToFile(filename[:-3]) if (not os.path.isfile(pathToFile)): # Fetch the file by the url and decompress it r = requests.get(url) decompressedFile = gzip.decompress(r.content) # Create the file .txt with open(pathToFile, "w") as f: f.write(decompressedFile) f.close() newFile = helper.formatPathToFile("geneID.txt") with open(newFile, "a") as f: # Import file tab-delimited try: array = pd.read_csv(pathToFile, sep="\t", header=None) except: array = pd.DataFrame() # Named columns array.columns = ["RAP", "LOC"] # Find the line corresponding to the entered RAP ID (Select LOC FROM LOC where RAP = RapID) data = array.loc[array['RAP'] == id] #data.loc[:, 'iricname'] = hashmap['iricname'] # Store the corresponding LOC ID and split the string print(data['iricname']) data.to_csv(f, sep='\t') f.close() # Plage chromosome # Cree le fichier fileID.txt elif (db == "11"): snpSeekAll.snpSeekAll("Os12:1..27,531,856") snpSeekAll.snpSeekAll("Os02:1..35,937,250") snpSeekAll.snpSeekAll("Os03:1..36,413,819") snpSeekAll.snpSeekAll("Os04:1..35,502,694") snpSeekAll.snpSeekAll("Os05:1..29,958,434") snpSeekAll.snpSeekAll("Os06:1..31,248,787") snpSeekAll.snpSeekAll("Os07:1..29,697,621") snpSeekAll.snpSeekAll("Os08:1..28,443,022") snpSeekAll.snpSeekAll("Os09:1..23,012,720") snpSeekAll.snpSeekAll("Os10:1..23,207,287") snpSeekAll.snpSeekAll("Os11:1..29,021,106") snpSeekAll.snpSeekAll("Os12:1..27,531,856") # Return the SnpSeek Call elif (db == "12"): print(dataSnp) """