def build_search_urls_test():
    """Build some classical urls from basic_search_units."""
    print "Testing external_search_engines build_search_url functions."
    tests = [ [['+', 'ellis', 'author', 'w'], ['+', 'unification', 'title', 'w'],
            ['-', 'Ross', 'author', 'w'], ['+', 'large', '', 'w'], ['-', 'helloworld', '', 'w']],
        [['+', 'ellis', 'author', 'w'], ['+', 'unification', 'title', 'w']],
        [['+', 'ellis', 'author', 'w']],
        [['-', 'Ross', 'author', 'w']] ]
    for engine in external_collections_dictionary.values():
        print engine.name
        for test in tests:
            url = engine.build_search_url(test)
            print "    Url: " + str(url)
示例#2
0
def build_search_urls_test():
    """Build some classical urls from basic_search_units."""
    print "Testing external_search_engines build_search_url functions."
    tests = [[['+', 'ellis', 'author', 'w'],
              ['+', 'unification', 'title', 'w'], ['-', 'Ross', 'author', 'w'],
              ['+', 'large', '', 'w'], ['-', 'helloworld', '', 'w']],
             [['+', 'ellis', 'author', 'w'],
              ['+', 'unification', 'title', 'w']],
             [['+', 'ellis', 'author', 'w']], [['-', 'Ross', 'author', 'w']]]
    for engine in external_collections_dictionary.values():
        print engine.name
        for test in tests:
            url = engine.build_search_url(test)
            print "    Url: " + str(url)
示例#3
0
def download_and_parse():
    """Try to make a query that always return results on all search engines.
    Check that a page is well returned and that the result can be parsed.

    This test is not included in the general test suite.

    This test give false positive if any of the external server is non working or too slow.
    """
    test = [['+', 'ieee', '', 'w']]
    errors = []

    external_collections = external_collections_dictionary.values()
    urls = [engine.build_search_url(test) for engine in external_collections]
    pagegetters = [HTTPAsyncPageGetter(url) for url in urls]
    dummy = async_download(pagegetters, None, None, 30)

    for (page, engine, url) in zip(pagegetters, external_collections, urls):
        if not url:
            errors.append("Unable to build url for : " + engine.name)
            continue
        if len(page.data) == 0:
            errors.append("Zero sized page with : " + engine.name)
            continue
        if engine.parser:
            results = engine.parser.parse_and_get_results(page.data)
            num_results = engine.parser.parse_num_results()
            if len(results) == 0:
                errors.append("Unable to parse results for : " + engine.name)
                continue
            if not num_results:
                errors.append(
                    "Unable to parse (None returned) number of results for : "
                    + engine.name)
            try:
                num_results = int(num_results)
            except:
                errors.append(
                    "Unable to parse (not a number) number of results for : " +
                    engine.name)

    return errors
def download_and_parse():
    """Try to make a query that always return results on all search engines.
    Check that a page is well returned and that the result can be parsed.

    This test is not included in the general test suite.

    This test give false positive if any of the external server is non working or too slow.
    """
    test = [['+', 'ieee', '', 'w']]
    errors = []

    external_collections = external_collections_dictionary.values()
    urls = [engine.build_search_url(test) for engine in external_collections]
    pagegetters = [HTTPAsyncPageGetter(url) for url in urls]
    dummy = async_download(pagegetters, None, None, 30)

    for (page, engine, url) in zip(pagegetters, external_collections, urls):
        if not url:
            errors.append("Unable to build url for : " + engine.name)
            continue
        if len(page.data) == 0:
            errors.append("Zero sized page with : " + engine.name)
            continue
        if engine.parser:
            results = engine.parser.parse_and_get_results(page.data)
            num_results = engine.parser.parse_num_results()
            if len(results) == 0:
                errors.append("Unable to parse results for : " + engine.name)
                continue
            if not num_results:
                errors.append("Unable to parse (None returned) number of results for : " + engine.name)
            try:
                num_results = int(num_results)
            except:
                errors.append("Unable to parse (not a number) number of results for : " + engine.name)

    return errors