Пример #1
0
def search(query_string=None, cmd=None, search_engine='google',
           max_download=5):
    engine = get_engine(search_engine)
    search_req = engine.get_search_request(query_string)
    search_result = get(search_req)
    if isinstance(search_result, DownloadError):
        raise SearchError('Failed search on {} ({})'.format(
            search_engine, search_result.status_code))
    urls = engine.get_hits(search_result)
    docs = iter_get([Request(u.url) for u in urls[:max_download]])

    return extract_commands(docs, base_commands=cmd)
    def test_extract_commands(self):
        cmds = extract_commands(iter_html_docs(TEST_DATA_DIR))
        self.assertEqual(set(cmds.commands.keys()), MERGED_COMMANDS)

        cmds = extract_commands(iter_html_docs(TEST_DATA_DIR), 'xargs')
        self.assertEqual(set(cmds.commands.keys()), set([
            'find /tmp -name "*.tmp" | xargs rm',
            u'find ./music -name "*.mp3" -print0 | xargs -0 ls',
            'find . -name "*.sh" | xargs grep "ksh"',
            'find /tmp -name "*.tmp" -print0 | xargs -0 rm',
            'find . -name "*.sh" -print0 | xargs -0 -I {} mv {} ~/back.scripts',
            u'find ./work -print | xargs grep "profit"']))

        cmds = extract_commands(
            get_html_doc(TEST_DATA_DIR, 'stackoverflow.com'), 'xargs')
        self.assertEqual(cmds.commands, {})

        doc = HtmlDocument('http://stackoverflow.com', b'')
        doc.body = None
        cmds = extract_commands(doc)
        self.assertEqual(cmds.nr_docs, 0)
def search(query_string=None,
           cmd=None,
           search_engine='google',
           max_download=5):
    engine = get_engine(search_engine)
    search_req = engine.get_search_request(query_string)
    search_result = get(search_req)
    if isinstance(search_result, DownloadError):
        raise SearchError('Failed search on {} ({})'.format(
            search_engine, search_result.status_code))
    urls = engine.get_hits(search_result)
    docs = iter_get([Request(u.url) for u in urls[:max_download]])

    return extract_commands(docs, base_commands=cmd)