Пример #1
0
import mwcli

router = mwcli.Router(
    "mwtext",
    "This script provides access to a set of utilities for text processing", {
        'preprocess_text':
        "Converts an XML dump to preprocessed plaintext. " +
        "One line per chunk.",
        'learn_vectors':
        "Learn a set of word vectors from preprocessed " + "plaintext",
        'word2vec2gensim':
        "Converts word2vec format to gensim KeyedVector " + "binaries"
    })

main = router.main
Пример #2
0
from __future__ import absolute_import
import mwcli

router = mwcli.Router(
    u"mwxml",
    u"This script provides access to a set of utilities for extracting " +
    u"content from MediaWiki XML dumps.", {
        u'dump2revdocs':
        u"Converts XML dumps to revision documents (XML --> JSON)",
        u'validate':
        u"Compares a stream of revision documents against a schema",
        u'normalize':
        u"Converts a stream of old revision documents to documents " +
        u"that validate against the current schema",
        u'inflate':
        u"Converts a stream of flat revision documents to standard " +
        u"revision documents"
    })

main = router.main
Пример #3
0
import mwcli

router = mwcli.Router(
    "mwrefs",
    "A set of utilities for extracting and processing <ref>s in " +
    " MediaWiki projects.",
    {'diffs': "Extracts changes to <ref>s from XML dumps",
     'extract': "Extracts all <ref>s from XML dumps",
     'fetch_references': "Gets the reference documents for a revision from " +
                         "the a MediaWiki API"}
)

main = router.main
Пример #4
0
import mwcli

router = mwcli.Router(
    "mwviews",
    "This script provides access to a set of utilities for processing view counts.",
    {
        'aggregate':
        "Aggregate view counts from hourly view files",
        'fetch_global_namespaces':
        "Fetches a dataset of namespace names for all wikis"
    })

main = router.main
Пример #5
0
import mwcli

router = mwcli.Router(
    "mwtext",
    "This script provides access to a set of utilities for text processing", {
        'transform_content':
        "Transforms an XML dump using a transformer",
        'words2plaintext':
        "Converts a 'words' type transformation into " +
        "plaintext -- optionally with labels",
        'learn_vectors':
        "Learn a set of word vectors from preprocessed " + "plaintext",
        'word2vec2gensim':
        "Converts word2vec format to gensim KeyedVector " + "binaries"
    })

main = router.main