Exemplo n.º 1
0
def _bing_api_ranking(url, keyword, server_info):
    url_regex = re.compile("^" + url)
    rank_infos = {}
    counter = 0

    base_api_url = 'https://api.datamarket.azure.com/Data.ashx/Bing/SearchWeb/v1/Web?'

    # Set format
    base_api_url += "$format=json&"

    # Set Market (language-region)
    base_api_url += "Market=%27" + server_info['market'] + "%27&"

    # Set keyword
    base_api_url += "Query=%27" + urllib.quote_plus(
        keyword.encode("UTF-8")) + "%27&"

    # Setup basic authentication
    headers = {
        "Authorization":
        "Basic %s" % base64.b64encode(":" + config.azure_account_id)
    }
    logging.info(headers)

    # Fetch 2x50 results and try to match url
    num = 50
    for i in range(0, 2):
        start = i * num
        api_url = base_api_url + "$skip=" + unicode(
            start) + "&$top=" + unicode(num)
        result = urlfetch_with_cache(api_url, 3600, 600, headers)
        if result['status_code'] != 200:
            raise Exception("Invalid response (url = <" + api_url +
                            ">, status code = " +
                            unicode(result['status_code']) +
                            " and content = <" + unicode(result['content']) +
                            ">")
        response = json.loads(result['content'])
        logging.info(response)
        if response.get('d'):
            results = response['d'].get('results')
            if results:
                # Total number of results is not available anymore via the API
                rank_infos['total'] = None
                for r in results:
                    logging.info(r)
                    counter += 1
                    if (url_regex.match(r['Url'])):
                        rank_infos['rank'] = counter
                        rank_infos['url'] = r['Url']
                        return rank_infos
    return rank_infos
Exemplo n.º 2
0
def _bing_api_ranking(url, keyword, server_info):
    url_regex = re.compile("^" + url)
    rank_infos = {}
    counter = 0

    base_api_url = "https://api.datamarket.azure.com/Data.ashx/Bing/SearchWeb/v1/Web?"

    # Set format
    base_api_url += "$format=json&"

    # Set Market (language-region)
    base_api_url += "Market=%27" + server_info["market"] + "%27&"

    # Set keyword
    base_api_url += "Query=%27" + urllib.quote_plus(keyword.encode("UTF-8")) + "%27&"

    # Setup basic authentication
    headers = {"Authorization": "Basic %s" % base64.b64encode(":" + config.azure_account_id)}
    logging.info(headers)

    # Fetch 2x50 results and try to match url
    num = 50
    for i in range(0, 2):
        start = i * num
        api_url = base_api_url + "$skip=" + unicode(start) + "&$top=" + unicode(num)
        result = urlfetch_with_cache(api_url, 3600, 600, headers)
        if result["status_code"] != 200:
            raise Exception(
                "Invalid response (url = <"
                + api_url
                + ">, status code = "
                + unicode(result["status_code"])
                + " and content = <"
                + unicode(result["content"])
                + ">"
            )
        response = json.loads(result["content"])
        logging.info(response)
        if response.get("d"):
            results = response["d"].get("results")
            if results:
                # Total number of results is not available anymore via the API
                rank_infos["total"] = None
                for r in results:
                    logging.info(r)
                    counter += 1
                    if url_regex.match(r["Url"]):
                        rank_infos["rank"] = counter
                        rank_infos["url"] = r["Url"]
                        return rank_infos
    return rank_infos
Exemplo n.º 3
0
def _google_ajax_api_ranking(url, keyword, server_info):
    url_regex = re.compile("^" + url)
    rank_infos = {}
    counter = 0

    base_api_url = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&'

    # Set API Key if defined
    try:
        if config.google_ajax_api_key:
            base_api_url += "key=" + urllib.quote_plus(
                config.google_ajax_api_key) + "&"
    except:
        pass

    # Set country param
    base_api_url += "gl=" + server_info['gl'] + "&"

    # Each call to google AJAX Search API returns 4 results.
    # We are allowed to fetch 64 results (16 pages)
    num = 4
    for i in range(0, 16):
        start = i * num
        api_url = base_api_url + "start=" + unicode(
            start) + "&q=" + urllib.quote_plus(keyword.encode("UTF-8"))
        result = urlfetch_with_cache(api_url, 3600, 600)
        if result['status_code'] != 200:
            raise Exception("Invalid response (url = <" + google_url +
                            ">, status code = " +
                            unicode(result['status_code']) +
                            " and content = <" + unicode(result['content']) +
                            ">")
        response = json.loads(result['content'])
        if response.get('responseData'):
            if i == 0:
                try:
                    rank_infos['total'] = long(
                        response['responseData']['cursor'].get(
                            'estimatedResultCount'))
                except:
                    rank_infos['total'] = None
            results = response['responseData'].get('results')
            if results:
                for r in results:
                    counter += 1
                    if (url_regex.match(r['unescapedUrl'])):
                        rank_infos['rank'] = counter
                        rank_infos['url'] = r['unescapedUrl']
                        return rank_infos
    return rank_infos
Exemplo n.º 4
0
def _google_ajax_api_ranking(url, keyword, server_info):
    url_regex = re.compile("^" + url)
    rank_infos = {}
    counter = 0

    base_api_url = "http://ajax.googleapis.com/ajax/services/search/web?v=1.0&"

    # Set API Key if defined
    try:
        if config.google_ajax_api_key:
            base_api_url += "key=" + urllib.quote_plus(config.google_ajax_api_key) + "&"
    except:
        pass

    # Set country param
    base_api_url += "gl=" + server_info["gl"] + "&"

    # Each call to google AJAX Search API returns 4 results.
    # We are allowed to fetch 64 results (16 pages)
    num = 4
    for i in range(0, 16):
        start = i * num
        api_url = base_api_url + "start=" + unicode(start) + "&q=" + urllib.quote_plus(keyword.encode("UTF-8"))
        result = urlfetch_with_cache(api_url, 3600, 600)
        if result["status_code"] != 200:
            raise Exception(
                "Invalid response (url = <"
                + google_url
                + ">, status code = "
                + unicode(result["status_code"])
                + " and content = <"
                + unicode(result["content"])
                + ">"
            )
        response = json.loads(result["content"])
        if response.get("responseData"):
            if i == 0:
                try:
                    rank_infos["total"] = long(response["responseData"]["cursor"].get("estimatedResultCount"))
                except:
                    rank_infos["total"] = None
            results = response["responseData"].get("results")
            if results:
                for r in results:
                    counter += 1
                    if url_regex.match(r["unescapedUrl"]):
                        rank_infos["rank"] = counter
                        rank_infos["url"] = r["unescapedUrl"]
                        return rank_infos
    return rank_infos