Пример #1
0
def collect_arguments():
    parser = ArgumentParser()
    parser.prog = "pycollect"
    parser.description = 'collector: collects and sends stats to carbon'

    parser.add_argument('-m',
                        '--metric',
                        required=True,
                        help='metric path where to store data')
    parser.add_argument('-V',
                        '--verbose',
                        action='store_true',
                        help='print data to stdout before sending to server')
    parser.add_argument('-s',
                        '--server',
                        required=True,
                        help='carbon server address')
    parser.add_argument('-p',
                        '--port',
                        default=2003,
                        help='carbon server port, default 2003')
    parser.add_argument('-D',
                        '--daemon',
                        action='store_true',
                        help='run as daemon, sends data at regular intervals')
    parser.add_argument('-i',
                        '--interval',
                        type=float,
                        default=5.0,
                        help='interval to send data in daemon mode, \
                        defaults 5s')
    parser.add_argument('--version',
                        action='version',
                        version='%(prog)s ' + VERSION)

    group = parser.add_mutually_exclusive_group(required=True)
    group.add_argument('-c',
                       '--value',
                       default=False,
                       type=int_or_float,
                       help='metric value to send, must be int or \
                       float ')
    group.add_argument('-P',
                       '--plugin',
                       default=False,
                       help='call plugin to collect metric data')
    group.add_argument('-S',
                       '--script',
                       default=False,
                       help='get value from script outpout')
    parser.epilog = 'metric must be in standard collectd format e.g.  hostname.stats.command.[time|data]'
    args = parser.parse_args()
    if args.value is not False and args.daemon is True:
        parser.error(
            'Can not run in Daemon mode with fixed value, use Script or Plugin mode'
        )
    return args
Пример #2
0
 def run(self):
     if 'module' in self.options and 'func' in self.options:
         module_name = self.options['module']
         attr_name = self.options['func']
     elif 'ref' in self.options:
         _parts = self.options['ref'].split('.')
         module_name = '.'.join(_parts[0:-1])
         attr_name = _parts[-1]
     else:
         raise self.error(
             ':module: and :func: should be specified, or :ref:')
     mod = __import__(module_name, globals(), locals(), [attr_name])
     if not hasattr(mod, attr_name):
         raise self.error(
             ('Module "%s" has no attribute "%s"\n'
              'Incorrect argparse :module: or :func: values?') %
             (module_name, attr_name))
     func = getattr(mod, attr_name)
     if isinstance(func, ArgumentParser):
         parser = func
     elif 'passparser' in self.options:
         parser = ArgumentParser()
         func(parser)
     else:
         parser = func()
     if 'path' not in self.options:
         self.options['path'] = ''
     path = str(self.options['path'])
     if 'prog' in self.options:
         parser.prog = self.options['prog']
     result = parse_parser(parser,
                           skip_default_values='nodefault' in self.options)
     result = parser_navigate(result, path)
     if 'manpage' in self.options:
         return self._construct_manpage_specific_structure(result)
     nested_content = nodes.paragraph()
     self.state.nested_parse(self.content, self.content_offset,
                             nested_content)
     nested_content = nested_content.children
     items = []
     # add common content between
     for item in nested_content:
         if not isinstance(item, nodes.definition_list):
             items.append(item)
     if 'description' in result:
         items.append(self._nested_parse_paragraph(result['description']))
     items.append(nodes.literal_block(text=result['usage']))
     items.append(
         print_command_args_and_opts(
             print_arg_list(result, nested_content),
             print_opt_list(result, nested_content),
             print_subcommand_list(result, nested_content)))
     if 'epilog' in result:
         items.append(self._nested_parse_paragraph(result['epilog']))
     return items
Пример #3
0
 def run(self):
     if 'module' in self.options and 'func' in self.options:
         module_name = self.options['module']
         attr_name = self.options['func']
     elif 'ref' in self.options:
         _parts = self.options['ref'].split('.')
         module_name = '.'.join(_parts[0:-1])
         attr_name = _parts[-1]
     else:
         raise self.error(
             ':module: and :func: should be specified, or :ref:')
     mod = __import__(module_name, globals(), locals(), [attr_name])
     if not hasattr(mod, attr_name):
         raise self.error((
             'Module "%s" has no attribute "%s"\n'
             'Incorrect argparse :module: or :func: values?'
         ) % (module_name, attr_name))
     func = getattr(mod, attr_name)
     if isinstance(func, ArgumentParser):
         parser = func
     elif 'passparser' in self.options:
         parser = ArgumentParser()
         func(parser)
     else:
         parser = func()
     if 'path' not in self.options:
         self.options['path'] = ''
     path = str(self.options['path'])
     if 'prog' in self.options:
         parser.prog = self.options['prog']
     result = parse_parser(
         parser, skip_default_values='nodefault' in self.options)
     result = parser_navigate(result, path)
     if 'manpage' in self.options:
         return self._construct_manpage_specific_structure(result)
     nested_content = nodes.paragraph()
     self.state.nested_parse(
         self.content, self.content_offset, nested_content)
     nested_content = nested_content.children
     items = []
     # add common content between
     for item in nested_content:
         if not isinstance(item, nodes.definition_list):
             items.append(item)
     if 'description' in result:
         items.append(self._nested_parse_paragraph(result['description']))
     items.append(nodes.literal_block(text=result['usage']))
     items.append(print_command_args_and_opts(
         print_arg_list(result, nested_content),
         print_opt_list(result, nested_content),
         print_subcommand_list(result, nested_content)
     ))
     if 'epilog' in result:
         items.append(self._nested_parse_paragraph(result['epilog']))
     return items
Пример #4
0
def main():
    """Parse terminal arguments."""
    parser = ArgumentParser()

    parser.description = 'CLI-helper for VK group administation.'
    parser.prog = 'python3 app.py'

    parser.add_argument(
        '--id',
        dest='vk_id',
        type=int,
        help='VK group or public id.',
        default=MY_PUBLIC_ID,
    )
    parser.add_argument(
        '--token',
        dest='vk_token',
        type=str,
        help='VK access token.',
        default=None,
    )

    subparser = parser.add_subparsers(
        help='Command',
        dest='command',
    )

    subparser.add_parser(
        'watch',
        help='Get new subscribers (without saving).',
    )
    subparser.add_parser(
        'save',
        help='Save subscribers state.',
    )

    args = parser.parse_args()

    if not args.command:
        parser.print_help()
        raise SystemExit()

    if args.command == 'watch':
        commands.watch(
            args.vk_id,
            args.vk_token,
        )

    if args.command == 'save':
        commands.save(
            args.vk_id,
            args.vk_token,
        )
Пример #5
0
    def configure(self, parser: ArgumentParser):
        super().configure(parser)
        parser.add_argument('-i', '--interactive', action='store_true', help='Pass STDIN to the container')
        parser.add_argument('-t', '--tty', action='store_true', help='Allocate a pseudo-TTY')
        parser.add_argument('-u', '--user', type=str, default=None,
                            help='Container username or UID (format: <name|uid>[:<group|gid>])')
        parser.add_argument('--no-sync', action='store_true', help='Don\'t sync the project before running the script')

        # add the "double-dash" argument to the usage message
        parser.prog = 'spotty exec'
        parser.usage = parser.format_usage()[7:-1] + ' -- COMMAND [args...]\n'
        parser.epilog = 'The double dash (--) separates the command that you want to execute inside the container ' \
                        'from the Spotty arguments.'
Пример #6
0
def addargs(parser: ArgumentParser) -> None:
    parser.prog = os.path.basename(__file__).split(".")[0]
    add_argument(parser, "-v", "--version", help="Current version number", action="store_true")
    add_argument(parser, "-u", "--uribase", help="Base URI for RDF identifiers", default=DEFAULT_FHIR_URI)
    add_argument(parser, "-mv", "--metadatavoc", help="FHIR metadata vocabulary", default=DEFAULT_FHIR_MV)
    add_argument(parser, "-no", "--noontology", help="Omit owl ontology header", action="store_true")
    add_argument(parser, "-nn", "--nonarrative", help="Omit narrative text on output", action="store_true")
    add_argument(parser, "-nc", "--nocontinuation", help="Don't follow URL continuations", action="store_true")
    add_argument(parser, "--nocache", help="Do not use FMV Cache", action="store_true")
    add_argument(parser, "--fmvcache", help="Metadata vocabluary cache directory",
                 default=DEFAULT_FMV_CACHE_DIR)
    add_argument(parser, "--maxsize", help="Maximum sensible file size in KB.  0 means no size check",
                 type=int, default=800)
    add_argument(parser, "-sd", "--skipdirs", help="Skip directories", nargs='*')
    add_argument(parser, "-sf", "--skipfns", help="Skip file names containing text", nargs='*')
    add_argument(parser, "--format", help="Output format", choices=output_formats, default="turtle")
    parser.fromfile_prefix_chars = "@"
Пример #7
0
def setup(parser: ArgumentParser) -> ArgumentParser:
    parser.prog = "steinloss"
    parser.description = \
        "A tool for measuring a package loss, between two endpoints. The way it works, is by spinning up a server " \
        "endpoint, that waits for a incoming connection. When the server-side gets pinged by a probe, it will start " \
        "sending packages to it, and the probe will respond on each packages." \
        "The package loss is calculated by keeping track of the id of the packet." \
        "Data is shown on the a website on port 8080, on the server side"
    parser.add_argument('-v',
                        '--version',
                        action='version',
                        version='%(prog)s ' + __version__)

    group = parser.add_mutually_exclusive_group(required=True)
    group.add_argument("-s",
                       "--server",
                       action="store_true",
                       help="Determines if you're on the server side")
    group.add_argument(
        "-p",
        "--probe",
        action="store_true",
        help="Determines if you're on the probe side. "
        "You have to specify a ip address for the probe to target")
    parser.add_argument(
        "-i",
        "--ip-address",
        action="store",
        required='-p' in sys.argv or '--probe' in sys.argv,
        help=
        "REQUIRED for probe: the ip the probe pings. Server what it listens on",
        metavar='')  # Removes caps var name.

    parser.add_argument(
        "-P",
        "--port",
        type=int,
        default=9090,
        help=
        "Which port to use. Have to be the same, as the servers port. Default is 9090",
        metavar='')  # Removes caps var name.
    parser.add_argument("--speed", action=SpeedConverter,
                        default=4194304)  # 4mb

    return parser
Пример #8
0
    def configure(self, parser: ArgumentParser):
        super().configure(parser)
        parser.add_argument('script_name',
                            metavar='SCRIPT_NAME',
                            type=str,
                            help='Script name')
        parser.add_argument(
            '-u',
            '--user',
            type=str,
            default=None,
            help='Container username or UID (format: <name|uid>[:<group|gid>])'
        )
        parser.add_argument('-s',
                            '--session-name',
                            type=str,
                            default=None,
                            help='tmux session name')
        parser.add_argument('-l',
                            '--logging',
                            action='store_true',
                            help='Log the script outputs to a file')
        parser.add_argument(
            '-p',
            '--parameter',
            metavar='PARAMETER=VALUE',
            action='append',
            type=str,
            default=[],
            help=
            'Set a value for the script parameter (format: PARAMETER=VALUE). This '
            'argument can be used multiple times to set several parameters. Parameters can be '
            'used in the script as Mustache variables (for example: {{PARAMETER}}).'
        )
        parser.add_argument(
            '--no-sync',
            action='store_true',
            help='Don\'t sync the project before running the script')

        # add the "double-dash" argument to the usage message
        parser.prog = 'spotty run'
        parser.usage = parser.format_usage()[7:-1] + ' [-- args...]\n'
        parser.epilog = 'The double dash (--) separates custom arguments that you can pass to the script ' \
                        'from the Spotty arguments.'
Пример #9
0
def _register_generate_config_options(parser: argparse.ArgumentParser) -> None:
    """Registers the necessary arguments on the parser."""
    parser.prog = "pylint-config"
    # Overwrite the help command
    parser.add_argument(
        "-h",
        "--help",
        action=_HelpAction,
        default=argparse.SUPPRESS,
        help="show this help message and exit",
        parser=parser,
    )

    # We use subparsers to create various subcommands under 'pylint-config'
    subparsers = parser.add_subparsers(dest="config_subcommand",
                                       title="Subcommands")

    # Add the generate command
    generate_parser = subparsers.add_parser(
        "generate", help="Generate a pylint configuration")
    generate_parser.add_argument("--interactive", action="store_true")
Пример #10
0
def _set_parser_prog(parser: argparse.ArgumentParser, prog: str):
    """
    Recursively set prog attribute of a parser and all of its subparsers so that the root command
    is a command name and not sys.argv[0].
    :param parser: the parser being edited
    :param prog: value for the current parsers prog attribute
    """
    # Set the prog value for this parser
    parser.prog = prog

    # Set the prog value for the parser's subcommands
    for action in parser._actions:
        if isinstance(action, argparse._SubParsersAction):

            # Set the prog value for each subcommand
            for sub_cmd, sub_cmd_parser in action.choices.items():
                sub_cmd_prog = parser.prog + ' ' + sub_cmd
                _set_parser_prog(sub_cmd_parser, sub_cmd_prog)

            # We can break since argparse only allows 1 group of subcommands per level
            break
Пример #11
0
def _set_parser_prog(parser: argparse.ArgumentParser, prog: str) -> None:
    """
    Recursively set prog attribute of a parser and all of its subparsers so that the root command
    is a command name and not sys.argv[0].

    :param parser: the parser being edited
    :param prog: new value for the parser's prog attribute
    """
    # Set the prog value for this parser
    parser.prog = prog

    # Set the prog value for the parser's subcommands
    for action in parser._actions:
        if isinstance(action, argparse._SubParsersAction):
            # Set the _SubParsersAction's _prog_prefix value. That way if its add_parser() method is called later,
            # the correct prog value will be set on the parser being added.
            action._prog_prefix = parser.prog

            # The keys of action.choices are subcommand names as well as subcommand aliases. The aliases point to the
            # same parser as the actual subcommand. We want to avoid placing an alias into a parser's prog value.
            # Unfortunately there is nothing about an action.choices entry which tells us it's an alias. In most cases
            # we can filter out the aliases by checking the contents of action._choices_actions. This list only contains
            # help information and names for the subcommands and not aliases. However, subcommands without help text
            # won't show up in that list. Since dictionaries are ordered in Python 3.6 and above and argparse inserts the
            # subcommand name into choices dictionary before aliases, we should be OK assuming the first time we see a
            # parser, the dictionary key is a subcommand and not alias.
            processed_parsers = []

            # Set the prog value for each subcommand's parser
            for subcmd_name, subcmd_parser in action.choices.items():
                # Check if we've already edited this parser
                if subcmd_parser in processed_parsers:
                    continue

                subcmd_prog = parser.prog + ' ' + subcmd_name
                _set_parser_prog(subcmd_parser, subcmd_prog)
                processed_parsers.append(subcmd_parser)

            # We can break since argparse only allows 1 group of subcommands per level
            break
Пример #12
0
    return output.strip() + "\n"


if __name__ == "__main__":

    if sys.version_info[0] == 2 and sys.version_info[1] < 7:
        sys.stderr.write("Python versions below 2.7 are not supported.\n")
        sys.stderr.write("Your Python version:\n")
        sys.stderr.write(".".join([str(v)
                                   for v in sys.version_info[:3]]) + "\n")
        sys.exit(0)

    from argparse import ArgumentParser, RawDescriptionHelpFormatter

    parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter)
    parser.prog = "Coptic NLP Pipeline"
    parser.usage = "python coptic_nlp.py [OPTIONS] files"
    parser.epilog = """Example usage:
--------------
Add norm, lemma, parse, tag, unary tags, find multiword expressions and do language recognition:
> python coptic_nlp.py -penmult infile.txt        

Just tokenize a file using pipes and dashes:
> python coptic_nlp.py -o pipes infile.txt       

Tokenize with pipes and mark up line breaks, conservatively detokenize bound groups, assume seg boundary at merge site:
> python coptic_nlp.py -b -d 1 --segment_merged -o pipes infile.txt

Normalize, tag, lemmatize, find multiword expressions and parse, splitting sentences by <verse> tags:
> python coptic_nlp.py -pnltm -s verse infile.txt       
Пример #13
0
    def run(self):
        if 'module' in self.options and 'func' in self.options:
            module_name = self.options['module']
            attr_name = self.options['func']
        elif 'ref' in self.options:
            _parts = self.options['ref'].split('.')
            module_name = '.'.join(_parts[0:-1])
            attr_name = _parts[-1]
        elif 'filename' in self.options and 'func' in self.options:
            mod = {}
            try:
                f = open(self.options['filename'])
            except IOError:
                # try open with abspath
                f = open(os.path.abspath(self.options['filename']))
            code = compile(f.read(), self.options['filename'], 'exec')
            exec(code, mod)
            attr_name = self.options['func']
            func = mod[attr_name]
        else:
            raise self.error(
                ':module: and :func: should be specified, or :ref:, or :filename: and :func:'
            )

        # Skip this if we're dealing with a local file, since it obviously can't be imported
        if 'filename' not in self.options:
            try:
                mod = __import__(module_name, globals(), locals(), [attr_name])
            except:
                raise self.error('Failed to import "%s" from "%s".\n%s' %
                                 (attr_name, module_name, sys.exc_info()[1]))

            if not hasattr(mod, attr_name):
                raise self.error(
                    ('Module "%s" has no attribute "%s"\n'
                     'Incorrect argparse :module: or :func: values?') %
                    (module_name, attr_name))
            func = getattr(mod, attr_name)

        if isinstance(func, ArgumentParser):
            parser = func
        elif 'passparser' in self.options:
            parser = ArgumentParser()
            func(parser)
        else:
            parser = func()
        if 'path' not in self.options:
            self.options['path'] = ''
        path = str(self.options['path'])
        if 'prog' in self.options:
            parser.prog = self.options['prog']
        result = parse_parser(parser,
                              skip_default_values='nodefault' in self.options,
                              skip_default_const_values='nodefaultconst'
                              in self.options)
        result = parser_navigate(result, path)
        if 'manpage' in self.options:
            return self._construct_manpage_specific_structure(result)

        # Handle nested content, where markdown needs to be preprocessed
        items = []
        nested_content = nodes.paragraph()
        if 'markdown' in self.options:
            from sphinxarg.markdown import parseMarkDownBlock
            items.extend(parseMarkDownBlock('\n'.join(self.content) + '\n'))
        else:
            self.state.nested_parse(self.content, self.content_offset,
                                    nested_content)
            nested_content = nested_content.children
        # add common content between
        for item in nested_content:
            if not isinstance(item, nodes.definition_list):
                items.append(item)

        markDownHelp = False
        if 'markdownhelp' in self.options:
            markDownHelp = True
        if 'description' in result and 'nodescription' not in self.options:
            if markDownHelp:
                items.extend(renderList([result['description']], True))
            else:
                items.append(
                    self._nested_parse_paragraph(result['description']))
        items.append(nodes.literal_block(text=result['usage']))
        items.extend(
            print_action_groups(result,
                                nested_content,
                                markDownHelp,
                                settings=self.state.document.settings))
        if 'nosubcommands' not in self.options:
            items.extend(
                print_subcommands(result,
                                  nested_content,
                                  markDownHelp,
                                  settings=self.state.document.settings))
        if 'epilog' in result and 'noepilog' not in self.options:
            items.append(self._nested_parse_paragraph(result['epilog']))

        # Traverse the returned nodes, modifying the title IDs as necessary to avoid repeats
        ensureUniqueIDs(items)

        return items
Пример #14
0
def run_hebpipe():

    if sys.version_info[0] == 2 and sys.version_info[1] < 7:
        sys.stderr.write("Python versions below 2.7 are not supported.\n")
        sys.stderr.write("Your Python version:\n")
        sys.stderr.write(".".join([str(v) for v in sys.version_info[:3]]) + "\n")
        sys.exit(0)

    from argparse import ArgumentParser, RawDescriptionHelpFormatter

    parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter)
    parser.prog = "HebPipe - NLP Pipeline for Hebrew"
    parser.usage = "python heb_pipe.py [OPTIONS] files"
    parser.epilog = """Example usage:
--------------
Whitespace tokenize, tokenize morphemes, add pos, lemma, morph, dep parse with automatic sentence splitting, 
entity recognition and coref for one text file, output in default conllu format:
> python heb_pipe.py -wtplmdec example_in.txt        

OR specify no processing options (automatically assumes you want all steps)
> python heb_pipe.py example_in.txt        

Just tokenize a file using pipes:
> python heb_pipe.py -wt -o pipes example_in.txt     

Pos tag, lemmatize, add morphology and parse a pre-tokenized file, splitting sentences by existing <sent> tags:
> python heb_pipe.py -plmd -s sent example_in.txt  

Add full analyses to a whole directory of *.txt files, output to a specified directory:    
> python heb_pipe.py -wtplmdec --dirout /home/heb/out/ *.txt

Parse a tagged TT SGML file into CoNLL tabular format for treebanking, use existing tag <sent> to recognize sentence borders:
> python heb_pipe.py -d -s sent example_in.tt
"""
    parser.add_argument("files", help="File name or pattern of files to process (e.g. *.txt)")

    g1 = parser.add_argument_group("standard module options")
    g1.add_argument("-w","--whitespace", action="store_true", help='Perform white-space based tokenization of large word forms')
    g1.add_argument("-t","--tokenize", action="store_true", help='Tokenize large word forms into smaller morphological segments')
    g1.add_argument("-p","--pos", action="store_true", help='Do POS tagging')
    g1.add_argument("-l","--lemma", action="store_true", help='Do lemmatization')
    g1.add_argument("-m","--morph", action="store_true", help='Do morphological tagging')
    g1.add_argument("-d","--dependencies", action="store_true", help='Parse with dependency parser')
    g1.add_argument("-e","--entities", action="store_true", help='Add entity spans and types')
    g1.add_argument("-c","--coref", action="store_true", help='Add coreference annotations')
    g1.add_argument("-s","--sent", action="store", default="auto", help='XML tag to split sentences, e.g. sent for <sent ..> or none for no splitting (otherwise automatic sentence splitting)')
    g1.add_argument("-o","--out", action="store", choices=["pipes","conllu","sgml"], default="conllu", help='Output CoNLL format, SGML or just tokenize with pipes')

    g2 = parser.add_argument_group("less common options")
    g2.add_argument("-q","--quiet", action="store_true", help='Suppress verbose messages')
    g2.add_argument("-x","--extension", action="store", default='conllu', help='Extension for output files (default: .conllu)')
    g2.add_argument("--cpu", action="store_true", help='Use CPU instead of GPU (slower)')
    g2.add_argument("--disable_lex", action="store_true", help='Do not use lexicon during lemmatization')
    g2.add_argument("--dirout", action="store", default=".", help='Optional output directory (default: this dir)')
    g2.add_argument("--punct_sentencer", action="store_true", help='Only use punctuation (.?!) to split sentences (deprecated)')
    g2.add_argument("--from_pipes", action="store_true", help='Input contains subtoken segmentation with the pipe character (no automatic tokenization is performed)')
    g2.add_argument("--version", action="store_true", help='Print version number and quit')

    if "--version" in sys.argv:
        sys.stdout.write("HebPipe V" + __version__)
        sys.exit(1)

    opts = parser.parse_args()
    opts = diagnose_opts(opts)

    if opts.cpu:
        import flair
        flair.device = torch.device('cpu')
        torch.cuda.is_available = lambda: False

    dotok = opts.tokenize

    if not opts.quiet:
        try:
            from .lib import timing
        except ImportError:  # direct script usage
            from lib import timing

    files = glob(opts.files)

    if not opts.quiet:
        log_tasks(opts)

    # Check if models, Marmot and Malt Parser are available
    if opts.pos or opts.lemma or opts.morph or opts.dependencies or opts.tokenize or opts.entities:
        models_OK = check_requirements()
        if not models_OK:
            sys.stderr.write("! You are missing required software:\n")
            if (opts.pos or opts.lemma or opts.morph):
                sys.stderr.write(" - Tagging, lemmatization and morphological analysis require models\n")
            if not models_OK:
                sys.stderr.write(" - Model files in models/ are missing\n")
            response = inp("Attempt to download missing files? [Y/N]\n")
            if response.upper().strip() == "Y":
                download_requirements(models_OK)
            else:
                sys.stderr.write("Aborting\n")
                sys.exit(0)
        tagger = FlairTagger()
        morpher = FlairTagger(morph=True)
        lemmatizer = init_lemmatizer(cpu=opts.cpu, no_post_process=opts.disable_lex)
    else:
        tagger = None
        morpher = None
        lemmatizer = None

    if dotok:  # Pre-load stacked tokenizer for entire batch
        rf_tok = RFTokenizer(model=model_dir + "heb.sm" + str(sys.version_info[0]))
    else:
        rf_tok = None
    if opts.entities:  # Pre-load stacked tokenizer for entire batch
        xrenner = Xrenner(model=model_dir + "heb.xrm")
    else:
        xrenner = None
    flair_sent_splitter = FlairSentSplitter() if opts.sent == "auto" and not opts.punct_sentencer else None
    dep_parser = Parser.load(model_dir+"heb.diaparser") if opts.dependencies else None

    for infile in files:
        base = os.path.basename(infile)
        if infile.endswith("." + opts.extension):
            outfile = base.replace("." + opts.extension,".out." + opts.extension)
        elif len(infile) > 4 and infile[-4] == ".":
            outfile = base[:-4] + "." + opts.extension
        else:
            outfile = base + "." + opts.extension

        if not opts.quiet:
            sys.stderr.write("Processing " + base + "\n")

        try:
            input_text = io.open(infile,encoding="utf8").read().replace("\r","")
        except UnicodeDecodeError:  # Fallback to support Windows Hebrew encoding
            input_text = io.open(infile,encoding="cp1255").read().replace("\r","")

        processed = nlp(input_text, do_whitespace=opts.whitespace, do_tok=dotok, do_tag=opts.pos, do_lemma=opts.lemma,
                               do_parse=opts.dependencies, do_entity=opts.entities, out_mode=opts.out,
                               sent_tag=opts.sent, preloaded=(rf_tok,xrenner,flair_sent_splitter,dep_parser, tagger, morpher, lemmatizer),
                                punct_sentencer=opts.punct_sentencer,from_pipes=opts.from_pipes, filecount=len(files))

        if len(files) > 1:
            with io.open(opts.dirout + os.sep + outfile, 'w', encoding="utf8", newline="\n") as f:
                if not PY3:
                    processed = unicode(processed)
                f.write((processed.strip() + "\n"))
        else:  # Single file, print to stdout
            if PY3:
                sys.stdout.buffer.write(processed.encode("utf8"))
            else:
                print(processed.encode("utf8"))

    fileword = " files\n\n" if len(files) > 1 else " file\n\n"
    sys.stderr.write("\nFinished processing " + str(len(files)) + fileword)
Пример #15
0
	return output.strip() + "\n"


if __name__ == "__main__":

	if sys.version_info[0] == 2 and sys.version_info[1] < 7:
		sys.stderr.write("Python versions below 2.7 are not supported.\n")
		sys.stderr.write("Your Python version:\n")
		sys.stderr.write(".".join([str(v) for v in sys.version_info[:3]]) + "\n")
		sys.exit(0)

	from argparse import ArgumentParser, RawDescriptionHelpFormatter

	parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter)
	parser.prog = "Coptic NLP Pipeline"
	parser.usage = "python coptic_nlp.py [OPTIONS] files"
	parser.epilog = """Example usage:
--------------
Add norm, lemma, parse, tag, unary tags, find multiword expressions and do language recognition:
> python coptic_nlp.py -penmult infile.txt        

Just tokenize a file using pipes and dashes:
> python coptic_nlp.py -o pipes infile.txt       

Tokenize with pipes and mark up line breaks, conservatively detokenize bound groups, assume seg boundary at merge site:
> python coptic_nlp.py -b -d 1 --segment_merged -o pipes infile.txt

Normalize, tag, lemmatize, find multiword expressions and parse, splitting sentences by <verse> tags:
> python coptic_nlp.py -pnltm -s verse infile.txt       
Пример #16
0
            await cursor.execute(INITIAL_MIGRATION)
            print('ok')

        print('Migrating...')

        for name in get_migrations_list():
            await migrate(conn, name)

        await conn.close()


if __name__ == "__main__":
    PARSER = ArgumentParser()

    PARSER.description = 'CLI for applying migration'
    PARSER.prog = 'python3 cli.py'

    PARSER.add_argument('--postgres-pass',
                        dest='postgres_pass',
                        type=str,
                        help='PostgreSQL user password',
                        default=None)
    PARSER.add_argument('--postgres-user',
                        dest='postgres_user',
                        type=str,
                        help='PostgreSQL user name',
                        default='postgres')
    PARSER.add_argument('--postgres-host',
                        dest='postgres_host',
                        type=str,
                        help='PostgreSQL database host',
Пример #17
0
def main(argv=None):  # IGNORE:C0111
    if isinstance(argv, list):
        sys.argv.extend(argv)
    elif argv is not None:
        sys.argv.append(argv)
            
    program_name = "sphinxhp-data-extractor"  # IGNORE:W0612 @UnusedVariable
    program_version = "v%s" % __versionstr__
    program_build_date = str(__updated__)
    program_version_message = '%%(prog)s %s (%s)' % (program_version, program_build_date)
    program_shortdesc = '''%s -- extract Sphinx markup data from the sphinx homepage ''' % program_name
    program_license = u'''%s
    
  Created by André Berg on %s.
  Copyright %s Berg Media. All rights reserved.
  
  Licensed under the Apache License 2.0
  http://www.apache.org/licenses/LICENSE-2.0
  
  Distributed on an "AS IS" basis without warranties
  or conditions of any kind, either express or implied.

USAGE
''' % (program_shortdesc, str(__date__), time.strftime('%Y'))

    valid_formats = SphinxDatabase.VALID_FORMATS
    
    try:
        # Setup argument parser
        parser = ArgumentParser(description=program_license, formatter_class=RawDescriptionHelpFormatter)
        parser.add_argument("-v", "--verbose", dest="verbose", action="count", help="set verbosity level [default: %(default)s]")
        parser.add_argument("-l", "--list-epaths", dest="listepaths", action="store_true", help="list element paths available for querying the database and exit")
        parser.add_argument("-o", "--outdir", dest="outdir", help="default output directory. [default: %(default)s]", metavar="path" )
        parser.add_argument("-f", "--force", dest="force", action="store_true", help="force creation of outdir if it doesn't exist. [default: %(default)s]")
        parser.add_argument("-s", "--siteurl", dest="siteurl", help="default url of the Sphinx homepage. can be a local file url [default: %(default)s]", metavar="url" )
        parser.add_argument("-F", "--format", dest="format", help=("output format. One of %r or 'all'. "  % (valid_formats)) + "You can specify multiple formats by separating with a colon, e.g. 'format1:format2' [default: %(default)s]")
        parser.add_argument('-V', '--version', action='version', version=program_version_message)
        parser.add_argument(dest="epaths", help="element paths of the data units to fetch. if None all that is considered 'data' will be emitted by the Database. [default: %(default)s]", metavar="epath", nargs='*')
        
        parser.set_defaults(siteurl=constants.DEFAULT_REMOTE_SITE_URL, outdir=os.curdir, epaths=None, force=False)
        
        parser.prog = program_name

        # Process arguments
        args = parser.parse_args()
        
        listepaths = args.listepaths
        epaths = args.epaths
        verbose = args.verbose
        formatstr = args.format
        siteurl = args.siteurl
        outdir = os.path.realpath(args.outdir)
        force = args.force
        
        db = None
        
        if listepaths:
            print_epaths(siteurl)
            return 0
        
        if formatstr is None:
            formats = ['stdout']
        else:
            if 'all' in formatstr:
                formats = valid_formats
            else:
                formats = formatstr.split(":")
                for format in formats:  # IGNORE:W0622 @ReservedAssignment
                    if format not in valid_formats:
                        raise CLIError("format '%s' not recognized" % format)
        
        if verbose > 0:
            print("Verbose mode on")
            print("format(s): %s" % ', '.join(formats))
            print("url: %s" % siteurl)
            print("outdir: %s" % outdir)
            print("force: %s" % force)
            print("epaths: %s" % epaths)

        try:
            urlcomps = urlsplit(siteurl)
            siteurl_base = urlcomps.netloc
            site_path = urlcomps.path
            if not is_local_url(siteurl):
                response = urlrequest(siteurl_base, site_path)
                if response.status != 200:
                    raise ValueError("E: siteurl may be malformed.")
        except Exception as e:
            raise(e)
        
        if 'stdout' not in formats:
            if not os.path.exists(outdir):
                if force:
                    try:
                        if verbose > 0:
                            print("Creating path to outdir...")
                        os.makedirs(outdir, 0o755)
                    except Exception as e: # IGNORE:W0703
                        CLIError("outdir %r doesn't exist and couldn't be created. An exception occurred: %s" % (outdir, e))
                else:
                    raise CLIError("outdir %r doesn't exist.\nPass -f/--force if you want to have it created anway." % outdir)

 
        if not db:
            db = SphinxDatabase(siteurl)
            if verbose > 0:
                print("Initializing SphinxDatabase %d..." % id(db))
            db.initialize()
        
        for format in formats:  # @ReservedAssignment
            _outdir = os.path.join(outdir, format)
            if format == "html":
                if verbose > 0:
                    print("Writing HTML data to '%s'" % _outdir)
                writer = HTMLWriter(db, _outdir)
                writer.write()
            elif format == 'csv':
                if verbose > 0:
                    print("Writing CSV data to '%s'" % _outdir)
                writer = CSVWriter(db, _outdir)
                # could just specify semicolon as colsep to get CSV seen 
                # valid in German Excel, but we need to convert float values 
                # from 0.n to 0,n as well so we use the callback function
                writer.value_callback = to_german_csv
                writer.write()
            elif format == 'tmprefs':
                if verbose > 0:
                    print("Writing TMPrefs data to '%s'" % _outdir)
                writer = TextMateWriter(db, _outdir)
                writer.write()
            elif format == 'list':
                if verbose > 0:
                    print("Writing List data to '%s'" % outdir)
                writer = ListWriter(db, _outdir)
                writer.write(include_comments=True)
            elif format == 'listplain':
                if verbose > 0:
                    print("Writing List (plain) data to '%s'" % outdir)
                writer = ListWriter(db, _outdir)
                writer.write()
            else: # mode == 'stdout'
                if len(epaths) == 0:
                    db.print_data(func=pprint)
                else:
                    db.print_data(epaths=epaths, func=pprint)
        return 0
    except KeyboardInterrupt:
        if verbose > 0:
            print("Aborted")
        return 0
    except CLIError as e:
        print(e)
        return 1
    except Exception as e:
        if DEBUG or TESTRUN:
            raise(e)
        print(sys.argv[0].split("/")[-1] + ": " + str(e), file=sys.stderr)
        print("\t for help use --help", file=sys.stderr)
        return 2
Пример #18
0
def main(argv=None):  # IGNORE:C0111
    if isinstance(argv, list):
        sys.argv.extend(argv)
    elif argv is not None:
        sys.argv.append(argv)

    program_name = "sphinxhp-data-extractor"  # IGNORE:W0612 @UnusedVariable
    program_version = "v%s" % __versionstr__
    program_build_date = str(__updated__)
    program_version_message = '%%(prog)s %s (%s)' % (program_version,
                                                     program_build_date)
    program_shortdesc = '''%s -- extract Sphinx markup data from the sphinx homepage ''' % program_name
    program_license = u'''%s
    
  Created by André Berg on %s.
  Copyright %s Berg Media. All rights reserved.
  
  Licensed under the Apache License 2.0
  http://www.apache.org/licenses/LICENSE-2.0
  
  Distributed on an "AS IS" basis without warranties
  or conditions of any kind, either express or implied.

USAGE
''' % (program_shortdesc, str(__date__), time.strftime('%Y'))

    valid_formats = SphinxDatabase.VALID_FORMATS

    try:
        # Setup argument parser
        parser = ArgumentParser(description=program_license,
                                formatter_class=RawDescriptionHelpFormatter)
        parser.add_argument("-v",
                            "--verbose",
                            dest="verbose",
                            action="count",
                            help="set verbosity level [default: %(default)s]")
        parser.add_argument(
            "-l",
            "--list-epaths",
            dest="listepaths",
            action="store_true",
            help=
            "list element paths available for querying the database and exit")
        parser.add_argument(
            "-o",
            "--outdir",
            dest="outdir",
            help="default output directory. [default: %(default)s]",
            metavar="path")
        parser.add_argument(
            "-f",
            "--force",
            dest="force",
            action="store_true",
            help=
            "force creation of outdir if it doesn't exist. [default: %(default)s]"
        )
        parser.add_argument(
            "-s",
            "--siteurl",
            dest="siteurl",
            help=
            "default url of the Sphinx homepage. can be a local file url [default: %(default)s]",
            metavar="url")
        parser.add_argument(
            "-F",
            "--format",
            dest="format",
            help=("output format. One of %r or 'all'. " % (valid_formats)) +
            "You can specify multiple formats by separating with a colon, e.g. 'format1:format2' [default: %(default)s]"
        )
        parser.add_argument('-V',
                            '--version',
                            action='version',
                            version=program_version_message)
        parser.add_argument(
            dest="epaths",
            help=
            "element paths of the data units to fetch. if None all that is considered 'data' will be emitted by the Database. [default: %(default)s]",
            metavar="epath",
            nargs='*')

        parser.set_defaults(siteurl=constants.DEFAULT_REMOTE_SITE_URL,
                            outdir=os.curdir,
                            epaths=None,
                            force=False)

        parser.prog = program_name

        # Process arguments
        args = parser.parse_args()

        listepaths = args.listepaths
        epaths = args.epaths
        verbose = args.verbose
        formatstr = args.format
        siteurl = args.siteurl
        outdir = os.path.realpath(args.outdir)
        force = args.force

        db = None

        if listepaths:
            print_epaths(siteurl)
            return 0

        if formatstr is None:
            formats = ['stdout']
        else:
            if 'all' in formatstr:
                formats = valid_formats
            else:
                formats = formatstr.split(":")
                for format in formats:  # IGNORE:W0622 @ReservedAssignment
                    if format not in valid_formats:
                        raise CLIError("format '%s' not recognized" % format)

        if verbose > 0:
            print("Verbose mode on")
            print("format(s): %s" % ', '.join(formats))
            print("url: %s" % siteurl)
            print("outdir: %s" % outdir)
            print("force: %s" % force)
            print("epaths: %s" % epaths)

        try:
            urlcomps = urlsplit(siteurl)
            siteurl_base = urlcomps.netloc
            site_path = urlcomps.path
            if not is_local_url(siteurl):
                response = urlrequest(siteurl_base, site_path)
                if response.status != 200:
                    raise ValueError("E: siteurl may be malformed.")
        except Exception as e:
            raise (e)

        if 'stdout' not in formats:
            if not os.path.exists(outdir):
                if force:
                    try:
                        if verbose > 0:
                            print("Creating path to outdir...")
                        os.makedirs(outdir, 0o755)
                    except Exception as e:  # IGNORE:W0703
                        CLIError(
                            "outdir %r doesn't exist and couldn't be created. An exception occurred: %s"
                            % (outdir, e))
                else:
                    raise CLIError(
                        "outdir %r doesn't exist.\nPass -f/--force if you want to have it created anway."
                        % outdir)

        if not db:
            db = SphinxDatabase(siteurl)
            if verbose > 0:
                print("Initializing SphinxDatabase %d..." % id(db))
            db.initialize()

        for format in formats:  # @ReservedAssignment
            _outdir = os.path.join(outdir, format)
            if format == "html":
                if verbose > 0:
                    print("Writing HTML data to '%s'" % _outdir)
                writer = HTMLWriter(db, _outdir)
                writer.write()
            elif format == 'csv':
                if verbose > 0:
                    print("Writing CSV data to '%s'" % _outdir)
                writer = CSVWriter(db, _outdir)
                # could just specify semicolon as colsep to get CSV seen
                # valid in German Excel, but we need to convert float values
                # from 0.n to 0,n as well so we use the callback function
                writer.value_callback = to_german_csv
                writer.write()
            elif format == 'tmprefs':
                if verbose > 0:
                    print("Writing TMPrefs data to '%s'" % _outdir)
                writer = TextMateWriter(db, _outdir)
                writer.write()
            elif format == 'list':
                if verbose > 0:
                    print("Writing List data to '%s'" % outdir)
                writer = ListWriter(db, _outdir)
                writer.write(include_comments=True)
            elif format == 'listplain':
                if verbose > 0:
                    print("Writing List (plain) data to '%s'" % outdir)
                writer = ListWriter(db, _outdir)
                writer.write()
            else:  # mode == 'stdout'
                if len(epaths) == 0:
                    db.print_data(func=pprint)
                else:
                    db.print_data(epaths=epaths, func=pprint)
        return 0
    except KeyboardInterrupt:
        if verbose > 0:
            print("Aborted")
        return 0
    except CLIError as e:
        print(e)
        return 1
    except Exception as e:
        if DEBUG or TESTRUN:
            raise (e)
        print(sys.argv[0].split("/")[-1] + ": " + str(e), file=sys.stderr)
        print("\t for help use --help", file=sys.stderr)
        return 2
Пример #19
0
    def run(self):
        if 'module' in self.options and 'func' in self.options:
            module_name = self.options['module']
            attr_name = self.options['func']
        elif 'ref' in self.options:
            _parts = self.options['ref'].split('.')
            module_name = '.'.join(_parts[0:-1])
            attr_name = _parts[-1]
        elif 'filename' in self.options and 'func' in self.options:
            mod = {}
            try:
                f = open(self.options['filename'])
            except IOError:
                # try open with abspath
                f = open(os.path.abspath(self.options['filename']))
            code = compile(f.read(), self.options['filename'], 'exec')
            exec(code, mod)
            attr_name = self.options['func']
            func = mod[attr_name]
        else:
            raise self.error(
                ':module: and :func: should be specified, or :ref:, or :filename: and :func:')

        # Skip this if we're dealing with a local file, since it obviously can't be imported
        if 'filename' not in self.options:
            try:
                mod = __import__(module_name, globals(), locals(), [attr_name])
            except:
                raise self.error('Failed to import "%s" from "%s".\n%s' % (attr_name, module_name, sys.exc_info()[1]))

            if not hasattr(mod, attr_name):
                raise self.error((
                    'Module "%s" has no attribute "%s"\n'
                    'Incorrect argparse :module: or :func: values?'
                ) % (module_name, attr_name))
            func = getattr(mod, attr_name)

        if isinstance(func, ArgumentParser):
            parser = func
        elif 'passparser' in self.options:
            parser = ArgumentParser()
            func(parser)
        else:
            parser = func()
        if 'path' not in self.options:
            self.options['path'] = ''
        path = str(self.options['path'])
        if 'prog' in self.options:
            parser.prog = self.options['prog']
        result = parse_parser(
            parser, skip_default_values='nodefault' in self.options, skip_default_const_values='nodefaultconst' in self.options)
        result = parser_navigate(result, path)
        if 'manpage' in self.options:
            return self._construct_manpage_specific_structure(result)

        # Handle nested content, where markdown needs to be preprocessed
        items = []
        nested_content = nodes.paragraph()
        if 'markdown' in self.options:
            from sphinxarg.markdown import parseMarkDownBlock
            items.extend(parseMarkDownBlock('\n'.join(self.content) + '\n'))
        else:
            self.state.nested_parse(
                self.content, self.content_offset, nested_content)
            nested_content = nested_content.children
        # add common content between
        for item in nested_content:
            if not isinstance(item, nodes.definition_list):
                items.append(item)

        markDownHelp = False
        if 'markdownhelp' in self.options:
            markDownHelp = True
        if 'description' in result and 'nodescription' not in self.options:
            if markDownHelp:
                items.extend(renderList([result['description']], True))
            else:
                items.append(self._nested_parse_paragraph(result['description']))
        items.append(nodes.literal_block(text=result['usage']))
        items.extend(print_action_groups(result, nested_content, markDownHelp,
                                         settings=self.state.document.settings))
        if 'nosubcommands' not in self.options:
            items.extend(print_subcommands(result, nested_content, markDownHelp,
                                           settings=self.state.document.settings))
        if 'epilog' in result and 'noepilog' not in self.options:
            items.append(self._nested_parse_paragraph(result['epilog']))

        # Traverse the returned nodes, modifying the title IDs as necessary to avoid repeats
        ensureUniqueIDs(items)

        return items
Пример #20
0
from orangecloud_client.commands.shell.parser import parse_line, InvalidSynthax


class StorePositional(Action):
    ORDER_ARGS_ATTRIBUTE_NAME = 'ordered_args'

    def __call__(self, _, namespace, values, option_string=None):
        if ('%s' % StorePositional.ORDER_ARGS_ATTRIBUTE_NAME) not in namespace:
            setattr(namespace, StorePositional.ORDER_ARGS_ATTRIBUTE_NAME, [])
        previous = namespace.ordered_args
        previous.append((self.dest, values))
        setattr(namespace, StorePositional.ORDER_ARGS_ATTRIBUTE_NAME, previous)


parser = ArgumentParser(add_help=True)
parser.prog = ''
subparsers = parser.add_subparsers(help='commands', dest='action')

sub_parser = subparsers.add_parser('cd', help='Change directory')
sub_parser.add_argument('path', action=StorePositional, type=str, help='The path composed of names and ..')

sub_parser = subparsers.add_parser('mkdir', help='Create a directory')
sub_parser.add_argument('name', action=StorePositional, type=str, help='The folder name')

sub_parser = subparsers.add_parser('ls', help='List file or directory')
sub_parser.add_argument('name', action=StorePositional, metavar='name', nargs='?', type=str,
                        help='The sub-folder/file name')

sub_parser = subparsers.add_parser('rm', help='Remove a file/folder')
sub_parser.add_argument('name', action=StorePositional, help='The sub-folder/file name')
Пример #21
0
from orangecloud_client.commands.shell.parser import parse_line, InvalidSynthax


class StorePositional(Action):
    ORDER_ARGS_ATTRIBUTE_NAME = 'ordered_args'

    def __call__(self, _, namespace, values, option_string=None):
        if ('%s' % StorePositional.ORDER_ARGS_ATTRIBUTE_NAME) not in namespace:
            setattr(namespace, StorePositional.ORDER_ARGS_ATTRIBUTE_NAME, [])
        previous = namespace.ordered_args
        previous.append((self.dest, values))
        setattr(namespace, StorePositional.ORDER_ARGS_ATTRIBUTE_NAME, previous)


parser = ArgumentParser(add_help=True)
parser.prog = ''
subparsers = parser.add_subparsers(help='commands', dest='action')

sub_parser = subparsers.add_parser('cd', help='Change directory')
sub_parser.add_argument('path',
                        action=StorePositional,
                        type=str,
                        help='The path composed of names and ..')

sub_parser = subparsers.add_parser('mkdir', help='Create a directory')
sub_parser.add_argument('name',
                        action=StorePositional,
                        type=str,
                        help='The folder name')

sub_parser = subparsers.add_parser('ls', help='List file or directory')