Exemplo n.º 1
0
def extract_commands(html_docs, base_commands=None):
    """Extract all commands in the html documents.

    Args:
       html_docs (HtmlDocument or iterable of docs): The html documents.
       base_commands (str or iterable of str): If provided, limit the results
         to these commands.
    Returns:
       commands (Commands): Collection of found command examples.
    """
    if isinstance(html_docs, HtmlDocument):
        html_docs = [html_docs]
    extractor = CommandExtractor(base_commands)
    commands = Commands()
    for doc in html_docs:
        seen = set()
        try:
            nr_cmds = 0
            for line_nr, cmd in extractor.iter_commands(doc):
                if cmd in seen:
                    continue
                seen.add(cmd)
                commands.add_command(Command(cmd, line_nr, nr_cmds, doc))
                nr_cmds += 1
            commands.nr_docs += 1
        except:
            continue

    if base_commands:
        return commands

    # Only keep command names with more than one occurence
    commands_by_name = defaultdict(list)
    for command in commands:
        commands_by_name[command.name].append(command)
    keep = {}
    for coms in commands_by_name.values():
        if len(coms) > 1 or len(coms[0].lines) > 1:
            for com in coms:
                keep[com.cmd] = com
    return Commands(keep, commands.nr_docs)
def extract_commands(html_docs, base_commands=None):
    """Extract all commands in the html documents.

    Args:
       html_docs (HtmlDocument or iterable of docs): The html documents.
       base_commands (str or iterable of str): If provided, limit the results
         to these commands.
    Returns:
       commands (Commands): Collection of found command examples.
    """
    if isinstance(html_docs, HtmlDocument):
        html_docs = [html_docs]
    extractor = CommandExtractor(base_commands)
    commands = Commands()
    for doc in html_docs:
        seen = set()
        try:
            nr_cmds = 0
            for line_nr, cmd in extractor.iter_commands(doc):
                if cmd in seen:
                    continue
                seen.add(cmd)
                commands.add_command(Command(cmd, line_nr, nr_cmds, doc))
                nr_cmds += 1
            commands.nr_docs += 1
        except:
            continue

    if base_commands:
        return commands

    # Only keep command names with more than one occurence
    commands_by_name = defaultdict(list)
    for command in commands:
        commands_by_name[command.name].append(command)
    keep = {}
    for coms in commands_by_name.values():
        if len(coms) > 1 or len(coms[0].lines) > 1:
            for com in coms:
                keep[com.cmd] = com
    return Commands(keep, commands.nr_docs)
Exemplo n.º 3
0
    def test_commands(self):

        cmds = Commands()
        doc = HtmlDocument('http://example.com', b'', 1)
        cmd = Command('ls', 1, 1, doc)
        cmds.add_command(cmd)
        cmd = Command(u'grep \u201ctest\u2033', 5, 2, doc)
        cmds.add_command(cmd)
        cmd = Command('ls', 22, 3, doc)
        cmds.add_command(cmd)

        cmds.nr_docs = 1

        ranked = cmds.rank_commands()

        self.assertEqual(len(ranked), 2)
        self.assertEqual(ranked[0].cmd, 'ls')

        for cmd in ranked:
            cmd.echo()
            cmd.echo(verbose=True)
            print(repr(cmd))

        cmds = Commands.from_dict(cmds.to_dict())
        for cmd in ranked:
            cmd_copy = Command.from_dict(cmd.to_dict())
            self.assertEqual(cmd_copy, cmd)
Exemplo n.º 4
0
 def mock_search(query_string=None, cmd=None, **kwargs):
     coms = Commands()
     cmd = u'git commit \u2013amend -m \u2018new message\u2019'
     doc = HtmlDocument(u'http://example.com', b'', 1)
     coms.add_command(Command(cmd, 1, 1, doc))
     return coms
 def mock_search(query_string=None, cmd=None, **kwargs):
     coms = Commands()
     cmd = u'git commit \u2013amend -m \u2018new message\u2019'
     doc = HtmlDocument(u'http://example.com', b'', 1)
     coms.add_command(Command(cmd, 1, 1, doc))
     return coms