import mwcli router = mwcli.Router( "mwtext", "This script provides access to a set of utilities for text processing", { 'preprocess_text': "Converts an XML dump to preprocessed plaintext. " + "One line per chunk.", 'learn_vectors': "Learn a set of word vectors from preprocessed " + "plaintext", 'word2vec2gensim': "Converts word2vec format to gensim KeyedVector " + "binaries" }) main = router.main
from __future__ import absolute_import import mwcli router = mwcli.Router( u"mwxml", u"This script provides access to a set of utilities for extracting " + u"content from MediaWiki XML dumps.", { u'dump2revdocs': u"Converts XML dumps to revision documents (XML --> JSON)", u'validate': u"Compares a stream of revision documents against a schema", u'normalize': u"Converts a stream of old revision documents to documents " + u"that validate against the current schema", u'inflate': u"Converts a stream of flat revision documents to standard " + u"revision documents" }) main = router.main
import mwcli router = mwcli.Router( "mwrefs", "A set of utilities for extracting and processing <ref>s in " + " MediaWiki projects.", {'diffs': "Extracts changes to <ref>s from XML dumps", 'extract': "Extracts all <ref>s from XML dumps", 'fetch_references': "Gets the reference documents for a revision from " + "the a MediaWiki API"} ) main = router.main
import mwcli router = mwcli.Router( "mwviews", "This script provides access to a set of utilities for processing view counts.", { 'aggregate': "Aggregate view counts from hourly view files", 'fetch_global_namespaces': "Fetches a dataset of namespace names for all wikis" }) main = router.main
import mwcli router = mwcli.Router( "mwtext", "This script provides access to a set of utilities for text processing", { 'transform_content': "Transforms an XML dump using a transformer", 'words2plaintext': "Converts a 'words' type transformation into " + "plaintext -- optionally with labels", 'learn_vectors': "Learn a set of word vectors from preprocessed " + "plaintext", 'word2vec2gensim': "Converts word2vec format to gensim KeyedVector " + "binaries" }) main = router.main