示例#1
0
def planttfdb(MSUID):

    # Find the file
    url = 'http://planttfdb.cbi.pku.edu.cn/download.php'
    html_page = helper.connectionError(url)
    soup = BeautifulSoup(html_page.content, "lxml")
    # Find headers
    for search in soup.findAll('table', {"id": "oid_tfid"}):
        for linkfound in search.findAll('a'):
            if (linkfound.contents[0] == "Oryza sativa subsp. japonica"):
                link = 'http://planttfdb.cbi.pku.edu.cn/' + linkfound.get(
                    'href')
                break

    # Give the entire name of the file with the extension .gz
    filename = link.split("/")[-1]

    # Give the name of the file without .gz
    uncompressName = filename[:-3] + ".txt"
    pathToFile = helper.formatPathToFile(uncompressName)

    # Test existant file
    if (not helper.existFile(pathToFile)):
        # Fetch the file by the url and decompress it
        r = helper.connectionError(link)
        decompressedFile = gzip.decompress(r.content)

        # Create the file .txt
        with open(pathToFile, "wb") as f:
            f.write(decompressedFile)
            f.close()

    # Use the previous created file (.txt)
    with open(pathToFile, "r+b") as file:

        # Import file tab-delimited

        try:
            array = pd.read_csv(file, sep="\t", header=None)
        except pd.io.common.EmptyError:
            array = pd.DataFrame()
        # Named columns
        array.columns = ["TF_ID", "Gene_ID", "Family"]

        data = array.loc[array['TF_ID'] == MSUID]

    if (not data.empty):

        return data
    else:
        data = array.loc[array['Gene_ID'] == MSUID]

    if (data.empty):

        return False
    else:

        hashmap = {"Family": data["Family"].values[0]}
        return hashmap
示例#2
0
def oryzabaseRapId(RAPID):
    pathToFile = helper.formatPathToFile("OryzabaseGeneListEn.txt")
    if (helper.existFile(pathToFile) == False):
        loadFileURL(
            pathToFile,
            "https://shigen.nig.ac.jp/rice/oryzabase/gene/download?classtag=GENE_EN_LIST"
        )
    else:
        print("File already exist")
    print("Find file OK")

    # Import file tab-delimited
    try:
        array = pd.read_csv(pathToFile, sep="\t", encoding='utf-8')

    except NameError:
        array = pd.DataFrame()

    #array.columns = ['Trait Id', 'CGSNL Gene Symbol', 'Gene symbol synonym(s)', ' CGSNL Gene Name', 'Gene name synonym(s)', 'Protein Name', 'Allele', 'Chromosome No.', 'Explanation', 'Trait Class', 'RAP ID', 'GrameneId', 'Arm', 'Locate(cM)', 'Gene Ontology', 'Trait Ontology', 'Plant Ontology']

    data = array.loc[array['RAP ID'] == RAPID]

    return data
示例#3
0
def oryzabaseCGSNL(CGSNL):
    pathToFile = helper.formatPathToFile("OryzabaseGeneListEn.txt")
    if (helper.existFile(pathToFile) == False):
        loadFileURL(
            pathToFile,
            "https://shigen.nig.ac.jp/rice/oryzabase/gene/download?classtag=GENE_EN_LIST"
        )
    else:
        print("File already exist")
    print("Find file OK")

    # Import file tab-
    try:
        array = pd.read_csv(pathToFile, sep="\t", encoding='utf-8')

    except NameError:
        array = pd.DataFrame()

    print(array)

    print("Find by CGSNL Gene Name")
    data = array.loc[array['CGSNL Gene Name'] == CGSNL]
    return data
示例#4
0
def main():
    pathScript = sys.argv[0]
    contig = sys.argv[1]
    if len(contig) < 2:
        contig = 'chr0' + contig  # test if for 10 - 11 - 12
    else:
        contig = 'chr' + contig
    start = sys.argv[2]
    end = sys.argv[3]
    db = sys.argv[4]

    dataSnp = snpSeek.snpSeek(contig, start, end)

    id = sys.argv[5]

    if (db == "1"):
        dataRapdb = rapdb.rapdb(id)
        print(dataRapdb)

    elif (db == "call_snpSeek"):
        for i in range(0, len(dataSnp)):
            print(dataSnp[i])

    elif (db == "2"):
        dataGramene = gramene.gramene(id)
        print(dataGramene)

    elif (db == "3"):
        dataOryzabase = oryzabase.oryzabase(id)
        print(dataOryzabase)

    elif (db == "4"):
        ic4r.ic4r(id)

    elif (db == "5"):
        dataPlanttfdb = planttfdb.planttfdb(id)
        print(dataPlanttfdb)

    # LOC_xxxxxxxx
    elif (db == "6"):
        dataPlntfdb = plntfdb.plntfdb(id)
        print(dataPlntfdb)

    elif (db == "7"):
        dataFunricegenes = funricegenes.funricegenes(id)
        print(dataFunricegenes)

    elif (db == "8"):
        dataFunricegenes2 = funricegenes.funricegenes2(id)
        print(dataFunricegenes2)

    elif (db == "9"):
        dataFunricegenes3 = funricegenes.funricegenes3(id)
        print(dataFunricegenes3)

    elif (db == "10"):
        dataMsu = msu.msu(id)
        print(dataMsu)

    elif (db == "10.1"):
        dataMsu = msu.msu_orf(id)
        print(dataMsu)

        # Ecriture fichier a revoir !!!!!!!!! pour les id et hashmap[iricname] et hashmpap [raprepname]
    elif (db == "13"):
        url = "http://rapdb.dna.affrc.go.jp/download/archive/RAP-MSU_2017-04-14.txt.gz"
        filename = url.split("/")[-1]

        # Give the name of the file without .gz
        pathToFile = helper.formatPathToFile(filename[:-3])

        if (not os.path.isfile(pathToFile)):
            # Fetch the file by the url and decompress it
            r = requests.get(url)
            decompressedFile = gzip.decompress(r.content)
            # Create the file .txt
            with open(pathToFile, "w") as f:
                f.write(decompressedFile)
                f.close()
        newFile = helper.formatPathToFile("geneID.txt")
        with open(newFile, "a") as f:
            # Import file tab-delimited
            try:
                array = pd.read_csv(pathToFile, sep="\t", header=None)
            except:
                array = pd.DataFrame()
            # Named columns
            array.columns = ["RAP", "LOC"]

            # Find the line corresponding to the entered RAP ID (Select LOC FROM LOC where RAP = RapID)
            data = array.loc[array['RAP'] == id]
            #data.loc[:, 'iricname'] = hashmap['iricname']

            # Store the corresponding LOC ID and split the string
            print(data['iricname'])
            data.to_csv(f, sep='\t')

            f.close()

    # Plage chromosome
    # Cree le fichier fileID.txt
    elif (db == "11"):
        snpSeekAll.snpSeekAll("Os12:1..27,531,856")
        snpSeekAll.snpSeekAll("Os02:1..35,937,250")
        snpSeekAll.snpSeekAll("Os03:1..36,413,819")
        snpSeekAll.snpSeekAll("Os04:1..35,502,694")
        snpSeekAll.snpSeekAll("Os05:1..29,958,434")
        snpSeekAll.snpSeekAll("Os06:1..31,248,787")
        snpSeekAll.snpSeekAll("Os07:1..29,697,621")
        snpSeekAll.snpSeekAll("Os08:1..28,443,022")
        snpSeekAll.snpSeekAll("Os09:1..23,012,720")
        snpSeekAll.snpSeekAll("Os10:1..23,207,287")
        snpSeekAll.snpSeekAll("Os11:1..29,021,106")
        snpSeekAll.snpSeekAll("Os12:1..27,531,856")

    # Return the SnpSeek Call
    elif (db == "12"):
        print(dataSnp)
    """