Exemplo n.º 1
0
def execute_query(db, qfields=[], verbose=False):
    # Get query qfields list
    fields = db[0].findAll("field")
    # Prepare URL
    link = db[0].findAll("link")[0]["stern"]
    # Compile URL
    if link[:4]=='http':
        if db[0]["method"] == "POST":
            i = 0
            for field in fields:
                data = {field.text: qfields[i]}
                i += 1
            return helper.connectionError(link, data)
        elif db[0]["method"] == "GET":
            query_string = ""
            if db[0]["type"] != "text/csv":
                i = 0
                for field in fields:
                    # Detect controller field (always first field)
                    if "lowercase" in field:
                        print(qfields[i].lower())
                    if field.text == "":
                        query_string += qfields[i] + "?"
                    # All other fields are query fields
                    else:
                        query_string += field.text + field["op"] + qfields[i] + "&"
                    i += 1
                query_string = query_string[:-1]
                link += query_string + \
                    db[0].findAll("link")[0]["aft"]
                if verbose: print(link)
            return helper.connectionError(link)
    else:
        return open(link)
Exemplo n.º 2
0
def planttfdb(MSUID):

    # Find the file
    url = 'http://planttfdb.cbi.pku.edu.cn/download.php'
    html_page = helper.connectionError(url)
    soup = BeautifulSoup(html_page.content, "lxml")
    # Find headers
    for search in soup.findAll('table', {"id": "oid_tfid"}):
        for linkfound in search.findAll('a'):
            if (linkfound.contents[0] == "Oryza sativa subsp. japonica"):
                link = 'http://planttfdb.cbi.pku.edu.cn/' + linkfound.get(
                    'href')
                break

    # Give the entire name of the file with the extension .gz
    filename = link.split("/")[-1]

    # Give the name of the file without .gz
    uncompressName = filename[:-3] + ".txt"
    pathToFile = helper.formatPathToFile(uncompressName)

    # Test existant file
    if (not helper.existFile(pathToFile)):
        # Fetch the file by the url and decompress it
        r = helper.connectionError(link)
        decompressedFile = gzip.decompress(r.content)

        # Create the file .txt
        with open(pathToFile, "wb") as f:
            f.write(decompressedFile)
            f.close()

    # Use the previous created file (.txt)
    with open(pathToFile, "r+b") as file:

        # Import file tab-delimited

        try:
            array = pd.read_csv(file, sep="\t", header=None)
        except pd.io.common.EmptyError:
            array = pd.DataFrame()
        # Named columns
        array.columns = ["TF_ID", "Gene_ID", "Family"]

        data = array.loc[array['TF_ID'] == MSUID]

    if (not data.empty):

        return data
    else:
        data = array.loc[array['Gene_ID'] == MSUID]

    if (data.empty):

        return False
    else:

        hashmap = {"Family": data["Family"].values[0]}
        return hashmap
Exemplo n.º 3
0
def ic4r(RAPID):

    link = 'http://expression.ic4r.org/expression-api?term=' + RAPID + '#showtable'
    html_page = helper.connectionError(link)
    soup = BeautifulSoup(html_page.content, "lxml")
    # Find headers
    headers = []
    for head in soup.findAll('thead'):
        for link in head.findAll('tr'):
            for linkhead in link.findAll('th'):
                headers.append(linkhead.contents)
    content = []
    for body in soup.findAll('tbody'):
        for link in body.findAll('tr'):
            dict = {}
            i = 0
            for linkbody in link.findAll('td'):
                try:
                    dict[str(headers[i][0])] = linkbody.contents[0]
                except:
                    dict[str(headers[i][0])] = 'None'
                i = i + 1

            #content.append(dict)
            print(dict)
Exemplo n.º 4
0
def get_header(db, test_id):

    dict = {}
    #database descriptor querry
    database_descriptor = BeautifulSoup(open("database-description.xml").read(), "xml").findAll("database",{"type":"text/html","dbname":db})
    if not database_descriptor:
        raise ValueError('Database Not Found')
    for database in database_descriptor:
        if database["method"]=="POST":
            link = database.findAll("link")[0]["stern"]
            data = {'rapId': test_id}
            res = helper.connectionErrorPost(link, data)
        elif database["method"]=="GET":
            link = database.findAll("link")[0]["stern"] + test_id + database.findAll("link")[0]["aft"]
            res = helper.connectionError(link)
        
        # Headers declaration
        headers = []
        for header in database.findAll("header"):
            headers.append(header.text)

        # Connection handling
        ret = BeautifulSoup(res.content, "lxml")
        data = ret.findAll(database.findAll("data_struct")[0]["indicator"], {database.findAll("data_struct")[0]["identifier"] : database.findAll("data_struct")[0]["identification_string"]})[0]
        # Header detection
        for header in data.findAll('th'):
            header = header.text.replace('\r', '')
            header = header.replace('\n', '')
            header = header.replace('\t', '')
            headers.append(header)
        dict[database["dbname"]]=headers

    return dict
Exemplo n.º 5
0
def gramene(RAPID):
    """
        Download the file located in the URL

        :param url: The url for accessing the file
        :type url: String
        """

    # Fetch the file by the url and decompress it
    link = 'http://data.gramene.org/v53/genes?q=' + RAPID + '&bedFeature=gene&bedCombiner=canonical'
    html_page = helper.connectionError(link)
    return html_page.content.decode('UTF-8')
Exemplo n.º 6
0
def loadFileURL(nameFile, url):
    """
    Download the file located in the rapdb download page

    """

    # Fetch the file by the url and decompress it
    html_page = helper.connectionError(url)

    # Create the file .txt
    with open(nameFile, "wb") as f:
        f.write(html_page.content)
        print("File created")
        f.close()
Exemplo n.º 7
0
Arquivo: msu.py Projeto: meta00/rRice
def msu(id):

    link = "http://rice.plantbiology.msu.edu/cgi-bin/sequence_display.cgi?orf=" + id
    html_page = helper.connectionError(link)
    soup = BeautifulSoup(html_page.content, "lxml")

    headers = ["Genomic Sequence", "CDS", "Protein"]
    dict = {}
    i = 0
    for search in soup.findAll('pre'):
        dataFormat = search.text.replace('>' + id, '')
        dataFormat = dataFormat.replace('\n', '')
        dict[headers[i]] = dataFormat
        i = i + 1

    return dict
Exemplo n.º 8
0
def snpSeek(contig, start, end):

    Log = open('log.txt', 'w')
    url = 'http://snp-seek.irri.org/ws/genomics/gene/osnippo/'
    u = ''
    model = '&model=rap\n'  #'&model=msu7\n'
    data = []

    Log.write(url + contig + '?' + 'start=' + start + '&end=' + end +
              '&model=msu7\n')
    try:
        #u = urllib.urlopen(url + contig + '?' + 'start='+ start + '&end='+ end+'&model=msu7\n')
        urlFind = url + contig + '?' + 'start=' + start + '&end=' + end + '&model=msu7'
        r = helper.connectionError(urlFind)
        # encodage en bytes et pas en string  d'ou le decode
        data = json.loads(r.content.decode('UTF-8'))
    except:
        Log.write(url + contig + '?' + 'start=' + start + '&end=' + end +
                  '&model=msu7\n')
        pass
    locus = contig + ':' + start + '-' + end

    #retourne un tableau
    return data
Exemplo n.º 9
0
def rapdb(RAPID):
    #Parameters
    # RAPID_valide = "Os06g0654600"
    #End parameters
    link = "http://rapdb.dna.affrc.go.jp/tools/search/run?keyword=" + RAPID + "&submit=Search&id=on&size=10"

    html_page = helper.connectionError(link)

    soup = BeautifulSoup(html_page.content, "lxml")
    result = soup.find('tr', attrs={"class": "result"})
    hashmap = {}
    try:
        rapid = result.find('td', attrs={"class": "c01"}).a.contents[0]
    except:
        print("Error : empty ID")
        rapid = RAPID
    try:
        description = result.find('td', attrs={"class": "c02"}).contents[0]
    except:
        print("Error : empty description")
        description = ""
    try:
        position = result.find('td', attrs={"class": "c03"}).a.contents[0]
    except:
        print("Error : empty position")
        position = ""
    try:
        RAP_symbol = result.find('td', attrs={"class": "c04"}).contents[0]
    except:
        print("Error : empty RAP symbol")
        RAP_symbol = ""
    try:
        RAP_name = result.find('td', attrs={"class": "c05"}).contents[0]
    except:
        print("Error : empty RAP_name")
        RAP_name = ""
    try:
        CGSNL_symbol = result.find('td', attrs={"class": "c06"}).contents[0]
    except:
        print("Error : empty CGSNL_symbol")
        CGSNL_symbol = ""
    try:
        CGSNL_name = result.find('td', attrs={"class": "c07"}).contents[0]
    except:
        print("Error : empty CGSNL_name")
        CGSNL_name = ""
    try:
        Oryzabase_symbol = result.find('td', attrs={
            "class": "c08"
        }).contents[0]
    except:
        print("Error : empty Oryzabase_symbol")
        Oryzabase_symbol = ""
    try:
        Oryzabase_name = result.find('td', attrs={"class": "c09"}).contents[0]
    except:
        print("Error : empty Oryzabase_name")
        Oryzabase_name = ""

    hashmap = {
        "ID": rapid,
        "Description": description,
        "Position": position,
        "RAP-DB Gene Symbol Synonym(s)": RAP_symbol,
        "RAP-DB Gene Name Synonym(s)": RAP_name,
        "CGSNL Gene Symbol": CGSNL_symbol,
        "CGSNL Gene Name": CGSNL_name,
        "Oryzabase Gene Symbol Synonym(s)": Oryzabase_symbol,
        "Oryzabase Gene Name Synonym(s)": Oryzabase_name
    }

    return json.dumps(hashmap)
Exemplo n.º 10
0
def query(db, qfields=[]):

    # database descriptor querry
    database_descriptor = BeautifulSoup(
        open("database-description.xml").read(),
        "xml").findAll("database", dbname=db.lower())
    if not database_descriptor:
        raise ValueError('Database Not Found')

    # Prepare URL
    link = database_descriptor[0].findAll("link")[0]["stern"]
    # Get Headers list
    headers = []
    for header in database_descriptor[0].findAll("header"):
        headers.append(header.text)
    # Get query qfields list
    fields = []
    for field in database_descriptor[0].findAll("field"):
        fields.append(field.text)

    if database_descriptor[0]["method"] == "POST":
        i = 0
        for field in fields:
            data = {field: qfields[i]}
            i += 1
        res = helper.connectionError(link, data)
    elif database_descriptor[0]["method"] == "GET":
        query_string = ""
        if database_descriptor[0]["type"] != "text/csv":
            i = 0
            for field in fields:
                # Detect controller field (always first field)
                if field == "":
                    query_string += qfields[i] + "?"
                # All other fields are query fields
                else:
                    query_string += field + "=" + qfields[i] + "&"
                i += 1
            query_string = query_string[:-1]
            link += query_string + \
                database_descriptor[0].findAll("link")[0]["aft"]
        res = helper.connectionError(link)

    # Handle HTML based query
    if (database_descriptor[0]["type"] == "text/html"):
        # Handling Connection
        ret = BeautifulSoup(res.content, "lxml")

        data = ret.findAll(
            database_descriptor[0].findAll("data_struct")[0]["indicator"], {
                database_descriptor[0].findAll("data_struct")[0]["identifier"]:
                database_descriptor[0].findAll("data_struct")[0]
                ["identification_string"]
            })
        result = []
        count = 0
        if data != []:
            regex = re.compile(
                database_descriptor[0].findAll("prettify")[0].text,
                re.IGNORECASE)
            replaceBy = database_descriptor[0].findAll(
                "prettify")[0]['replaceBy']
            for dataLine in data[0].findAll(database_descriptor[0].findAll(
                    "data_struct")[0]["line_separator"]):
                dict = {}
                i = 0
                for dataCell in dataLine.findAll(
                        database_descriptor[0].findAll(
                            "data_struct")[0]["cell_separator"]):
                    #dataNearly = re.sub(r'\xa0',' ', dataCell.text)
                    #dataFormat = regex.sub("", dataNearly)
                    dataFormat = regex.sub(replaceBy, dataCell.text)
                    dict[headers[i]] = dataFormat
                    i += 1
                if dict == {}:
                    continue
                result.append(dict)
        return result
    # Handle JSON based query
    elif (database_descriptor[0]["type"] == "text/JSON"):
        # Return as a List of Dictionary
        return json.loads(res.content.decode("UTF-8"))
    # Handle csv based DB
    if (database_descriptor[0]["type"] == "text/csv"):
        ret = csv.reader(res.content.decode(
            database_descriptor[0]["encoding"]).splitlines(),
                         delimiter=list(database_descriptor[0]["deli"])[0],
                         quoting=csv.QUOTE_NONE)
        data = []
        for row in ret:
            i = 0
            dict = {}
            for header in headers:
                dict[header] = row[i]
                i += 1
            f = 0
            for field in fields:
                if (dict[field] == qfields[f]) & (qfields[f] != ""):
                    data.append(dict)
                f += 1
        return data