Пример #1
0
def configure_command_line_arguments():
    # Initialize the commandline argument parser.
    parser = argparse.ArgumentParser(description='Naive Bayes Classifier')

    # Configure the log level parser.  Verbose shows some logs, veryVerbose
    # shows more
    logging_group = parser.add_mutually_exclusive_group(required=False)
    logging_group.add_argument("-v",
                               "--verbose",
                               help="Set the log level verbose.",
                               action='store_true',
                               required=False)

    logging_group.add_argument("-vv",
                               "--veryVerbose",
                               help="Set the log level verbose.",
                               action='store_true',
                               required=False)

    # NLTK supports six built in plaintext corpora.  This allows the user
    # to choose between those six corpora or a seventh option - the
    # corpus the user provided.
    # The first is a corpus taken from ABC news.
    parser.add_argument('-abc',
                       '--abc',
                       help="ABC news corpus",
                       required=False,
                       action='store_true')

    # The second corpus is the book of Genesis
    parser.add_argument('-gen',
                       '--genesis', help="The book of Genesis from the Bible.",
                       required=False,
                       action='store_true')

    # Third is a collection of text from project Gutenberg
    parser.add_argument('-gut',
                       '--gutenberg', help="Text from Project Gutenberg.",
                       required=False,
                       action='store_true')

    # Fourth is text from presidential inaugural addresses
    parser.add_argument('-in',
                       '--inaugural', help="Text from inaugural addresses.",
                       required=False,
                       action='store_true')

    # Fifth is text from the State of the Union
    parser.add_argument('-su',
                       '--stateUnion', help="Text from State of the Union Addresses.",
                       required=False,
                       action='store_true')

    # The final NLTK provided corpus is text from the web
    parser.add_argument('-web',
                       '--webtext', help="Text taken from the web.",
                       required=False,
                       action='store_true')

    # Tell the parser that there is an optional corpus that can be pulled in.
    # The directory can contain multiple files and directories (if the user
    # also passes --recursive)
    fs.add_filesystem_path_args(parser,
                                '-c',
                                '--custom',
                                help='Directory of files to include in a custom corpus.',
                                required=False)

    parser.add_argument('-t',
                        '--train',
                        help="Train the classifier using the NLTK tokens",
                        required=False,
                        action='store_true')

    parser.add_argument('-cl',
                        '--classify',
                        help="Classify the contents of classify.txt",
                        required=False)

    # Third is a collection of text from project Gutenberg
    parser.add_argument('-s',
                       '--stemming', help="Stem in the classifier or trainer.",
                       required=False,
                       action='store_true')

    # Third is a collection of text from project Gutenberg
    parser.add_argument('-lp',
                       '--printProbabilities', help="Print each word probability.",
                       required=False,
                       action='store_true')


    # Parse the passed commandline args and turn them into a dictionary.
    args = vars(parser.parse_args())

    # Configure the log level based on passed in args to be one of DEBUG, INFO, WARN, ERROR, CRITICAL
    log.set_log_level_from_args(args)

    return args
Пример #2
0
def configure_command_line_arguments():
    # Initialize the commandline argument parser.
    parser = argparse.ArgumentParser(description='Play with words using NLTK.')

    # Configure the log level parser.  Verbose shows some logs, veryVerbose
    # shows more
    logging_group = parser.add_mutually_exclusive_group(required=False)
    logging_group.add_argument("-v",
                               "--verbose",
                               help="Set the log level verbose.",
                               action='store_true',
                               required=False)
    logging_group.add_argument("-vv",
                               "--veryVerbose",
                               help="Set the log level verbose.",
                               action='store_true',
                               required=False)

    # In this app we allow the user to choose from a handful of built-in
    # corpora and a user provided one
    corpora_group = parser.add_mutually_exclusive_group(required=True)

    # NLTK supports six built in plaintext corpora.  This allows the user
    # to choose between those six corpora or a seventh option - the
    # corpus the user provided.
    # The first option is a corpus taken from ABC news.
    corpora_group.add_argument('-abc',
                               '--abc',
                               help="ABC news corpus",
                               required=False,
                               action='store_true')

    # The second option is the book of Genesis
    corpora_group.add_argument('-gen',
                               '--genesis',
                               help="The book of Genesis from the Bible.",
                               required=False,
                               action='store_true')

    # Third option is a collection of text from project Gutenberg
    corpora_group.add_argument('-gut',
                               '--gutenberg',
                               help="Text from Project Gutenberg.",
                               required=False,
                               action='store_true')

    # Fourth is text from presidential inaugural addresses
    corpora_group.add_argument('-in',
                               '--inaugural',
                               help="Text from inaugural addresses.",
                               required=False,
                               action='store_true')

    # Fifth is text from the State of the Union
    corpora_group.add_argument('-su',
                               '--stateUnion',
                               help="Text from State of the Union Addresses.",
                               required=False,
                               action='store_true')

    # The final NLTK provided corpus is text from the web
    corpora_group.add_argument('-web',
                               '--webtext',
                               help="Text taken from the web.",
                               required=False,
                               action='store_true')

    corpora_group.add_argument(
        '-svl',
        '--stemVsLemma',
        help=
        "Generate chart of corpus length of original, stemmed and lemmatized word",
        required=False,
        action='store_true')

    # Tell the parser that there is an optional corpus that can be pulled in.
    # The directory can contain multiple files and directories (if the user
    # also passes --recursive)
    fs.add_filesystem_path_args(
        parser,
        '-c',
        '--custom',
        help='Directory of files to include in a custom corpus.',
        required=False,
        group=corpora_group)

    # Optionally, the user is able to stem or lemmatize the input.
    preprocessing_group = parser.add_mutually_exclusive_group(required=False)

    # Select stemming
    preprocessing_group.add_argument('-s',
                                     '--stem',
                                     help="Stem the input.",
                                     required=False,
                                     action='store_true')

    # Select lemmatization
    preprocessing_group.add_argument('-l',
                                     '--lemma',
                                     help="Lemmatize the input.",
                                     required=False,
                                     action='store_true')

    # What do you want to know?  These params allow one or more calculations to be run on
    # the input data.  In addition, you can ask the app to stem the data before running any
    # of these calculations

    # Calculate the vocabulary size of the selected corpus
    parser.add_argument('-vs',
                        '--vocabularySize',
                        help="Calculate the vocabulary size.",
                        required=False,
                        action='store_true')

    # List all terms found in the corpus
    parser.add_argument('-tp',
                        '--termPresence',
                        help="List all words that are present.",
                        required=False,
                        action='store_true')

    # List the frequency of terms in the corpus
    parser.add_argument('-tf',
                        '--termFrequency',
                        help="Calculate the frequency of each word.",
                        required=False,
                        action='store_true')

    # Log normalize the term frequencies
    parser.add_argument('-ln',
                        '--logNormalize',
                        help="Calculate the log of the frequency.",
                        required=False,
                        action='store_true')

    # Determine the frequency of each frequency of terms
    parser.add_argument(
        '-ff',
        '--frequencyFrequency',
        help=
        "Calculate the frequency of each frequency.  For example, 7 words appear once, 5 appear twice, etc.",
        required=False,
        action='store_true')

    # Parse the passed commandline args and turn them into a dictionary.
    args = vars(parser.parse_args())

    # Configure the log level based on passed in args to be one of DEBUG, INFO, WARN, ERROR, CRITICAL
    log.set_log_level_from_args(args)

    return args
Пример #3
0
def configure_command_line_arguments():
    # Initialize the commandline argument parser.
    parser = argparse.ArgumentParser(description='Play with probabilities')

    # Configure the log level parser.  Verbose shows some logs, veryVerbose
    # shows more
    logging_group = parser.add_mutually_exclusive_group(required=False)
    logging_group.add_argument("-v",
                               "--verbose",
                               help="Set the log level verbose.",
                               action='store_true',
                               required=False)

    logging_group.add_argument("-vv",
                               "--veryVerbose",
                               help="Set the log level verbose.",
                               action='store_true',
                               required=False)

    # Run the coin flip simulation and plot the output
    parser.add_argument('-cf',
                        '--coinFlip',
                        help="Generate the coin flip offset chart",
                        required=False,
                        action='store_true')

    # configure how big of a multiplier to use when stepping the coin flipper
    parser.add_argument('-cfm',
                        '--coinFlipMultiplier',
                        help="Each iteration of the coin flip test runs more times than the previous, where current = cfm*previous.",
                        required=False,
                        default=1.2)

    # Run the dice roll simulation and plot the output
    parser.add_argument('-d',
                        '--diceRoll',
                        help="Generate the dice roll distribution",
                        required=False,
                        action='store_true')

    # configure the number of dice to use in the dice roller
    parser.add_argument('-nd',
                        '--numDice',
                        help="How many dice to use in the dice roll distribution",
                        required=False,
                        default=1)

    # configure the number of trials to use for any of the simulations
    parser.add_argument('-nt',
                        '--numTrials',
                        help="How many trials to use for generating the distribution",
                        required=False,
                        default=1000000)

    # generate a uniform distribution
    parser.add_argument('-ud',
                        '--uniformDistribution',
                        help="Generate a Uniform distribution",
                        required=False,
                        action='store_true')

    # generate a gaussian distribution
    parser.add_argument('-gd',
                        '--gaussianDistribution',
                        help="Generate a Gaussian distribution",
                        required=False,
                        action='store_true')

    # set the mean for a gaussian distribution
    parser.add_argument('-m',
                        '--mean',
                        help="Set the mean for the Gaussian distribution",
                        required=False,
                        default=0)

    # set the standard deviation for the gaussian distribution
    parser.add_argument('-sd',
                        '--standardDeviation',
                        help="Set the standard deviation for the Gaussian distribution",
                        required=False,
                        default=1)

    # generate a poisson distribution
    parser.add_argument('-pd',
                        '--poissonDistribution',
                        help="Generate a Poisson distribution",
                        required=False,
                        action='store_true')

    # set lambda for the poisson distribution
    parser.add_argument('-l',
                        '--lambda',
                        help="Set lambda (the expected arrival rate) for the Poisson distribution",
                        required=False,
                        default=3)

    # run marble/jar simulation
    parser.add_argument('-j',
                        '--jars',
                        help="Calculate probability distribution of marble choices.",
                        required=False,
                        action='store_true')

    # Parse the passed commandline args and turn them into a dictionary.
    args = vars(parser.parse_args())

    # Configure the log level based on passed in args to be one of DEBUG, INFO, WARN, ERROR, CRITICAL
    log.set_log_level_from_args(args)

    return args
Пример #4
0
def configure_command_line_arguments():
    # Initialize the commandline argument parser.
    parser = argparse.ArgumentParser(description="Play with words using NLTK.")

    # Configure the log level parser.  Verbose shows some logs, veryVerbose
    # shows more
    logging_group = parser.add_mutually_exclusive_group(required=False)
    logging_group.add_argument(
        "-v", "--verbose", help="Set the log level verbose.", action="store_true", required=False
    )
    logging_group.add_argument(
        "-vv", "--veryVerbose", help="Set the log level verbose.", action="store_true", required=False
    )

    # In this app we allow the user to choose from a handful of built-in
    # corpora and a user provided one
    corpora_group = parser.add_mutually_exclusive_group(required=True)

    # NLTK supports six built in plaintext corpora.  This allows the user
    # to choose between those six corpora or a seventh option - the
    # corpus the user provided.
    # The first option is a corpus taken from ABC news.
    corpora_group.add_argument("-abc", "--abc", help="ABC news corpus", required=False, action="store_true")

    # The second option is the book of Genesis
    corpora_group.add_argument(
        "-gen", "--genesis", help="The book of Genesis from the Bible.", required=False, action="store_true"
    )

    # Third option is a collection of text from project Gutenberg
    corpora_group.add_argument(
        "-gut", "--gutenberg", help="Text from Project Gutenberg.", required=False, action="store_true"
    )

    # Fourth is text from presidential inaugural addresses
    corpora_group.add_argument(
        "-in", "--inaugural", help="Text from inaugural addresses.", required=False, action="store_true"
    )

    # Fifth is text from the State of the Union
    corpora_group.add_argument(
        "-su", "--stateUnion", help="Text from State of the Union Addresses.", required=False, action="store_true"
    )

    # The final NLTK provided corpus is text from the web
    corpora_group.add_argument(
        "-web", "--webtext", help="Text taken from the web.", required=False, action="store_true"
    )

    corpora_group.add_argument(
        "-svl",
        "--stemVsLemma",
        help="Generate chart of corpus length of original, stemmed and lemmatized word",
        required=False,
        action="store_true",
    )

    # Tell the parser that there is an optional corpus that can be pulled in.
    # The directory can contain multiple files and directories (if the user
    # also passes --recursive)
    fs.add_filesystem_path_args(
        parser,
        "-c",
        "--custom",
        help="Directory of files to include in a custom corpus.",
        required=False,
        group=corpora_group,
    )

    # Optionally, the user is able to stem or lemmatize the input.
    preprocessing_group = parser.add_mutually_exclusive_group(required=False)

    # Select stemming
    preprocessing_group.add_argument("-s", "--stem", help="Stem the input.", required=False, action="store_true")

    # Select lemmatization
    preprocessing_group.add_argument("-l", "--lemma", help="Lemmatize the input.", required=False, action="store_true")

    # What do you want to know?  These params allow one or more calculations to be run on
    # the input data.  In addition, you can ask the app to stem the data before running any
    # of these calculations

    # Calculate the vocabulary size of the selected corpus
    parser.add_argument(
        "-vs", "--vocabularySize", help="Calculate the vocabulary size.", required=False, action="store_true"
    )

    # List all terms found in the corpus
    parser.add_argument(
        "-tp", "--termPresence", help="List all words that are present.", required=False, action="store_true"
    )

    # List the frequency of terms in the corpus
    parser.add_argument(
        "-tf", "--termFrequency", help="Calculate the frequency of each word.", required=False, action="store_true"
    )

    # Log normalize the term frequencies
    parser.add_argument(
        "-ln", "--logNormalize", help="Calculate the log of the frequency.", required=False, action="store_true"
    )

    # Determine the frequency of each frequency of terms
    parser.add_argument(
        "-ff",
        "--frequencyFrequency",
        help="Calculate the frequency of each frequency.  For example, 7 words appear once, 5 appear twice, etc.",
        required=False,
        action="store_true",
    )

    # Parse the passed commandline args and turn them into a dictionary.
    args = vars(parser.parse_args())

    # Configure the log level based on passed in args to be one of DEBUG, INFO, WARN, ERROR, CRITICAL
    log.set_log_level_from_args(args)

    return args
Пример #5
0
def configure_command_line_arguments():
    # Initialize the commandline argument parser.
    parser = argparse.ArgumentParser(description='Naive Bayes Classifier')

    # Configure the log level parser.  Verbose shows some logs, veryVerbose
    # shows more
    logging_group = parser.add_mutually_exclusive_group(required=False)
    logging_group.add_argument("-v",
                               "--verbose",
                               help="Set the log level verbose.",
                               action='store_true',
                               required=False)

    logging_group.add_argument("-vv",
                               "--veryVerbose",
                               help="Set the log level verbose.",
                               action='store_true',
                               required=False)

    # NLTK supports six built in plaintext corpora.  This allows the user
    # to choose between those six corpora or a seventh option - the
    # corpus the user provided.
    # The first is a corpus taken from ABC news.
    parser.add_argument('-abc',
                        '--abc',
                        help="ABC news corpus",
                        required=False,
                        action='store_true')

    # The second corpus is the book of Genesis
    parser.add_argument('-gen',
                        '--genesis',
                        help="The book of Genesis from the Bible.",
                        required=False,
                        action='store_true')

    # Third is a collection of text from project Gutenberg
    parser.add_argument('-gut',
                        '--gutenberg',
                        help="Text from Project Gutenberg.",
                        required=False,
                        action='store_true')

    # Fourth is text from presidential inaugural addresses
    parser.add_argument('-in',
                        '--inaugural',
                        help="Text from inaugural addresses.",
                        required=False,
                        action='store_true')

    # Fifth is text from the State of the Union
    parser.add_argument('-su',
                        '--stateUnion',
                        help="Text from State of the Union Addresses.",
                        required=False,
                        action='store_true')

    # The final NLTK provided corpus is text from the web
    parser.add_argument('-web',
                        '--webtext',
                        help="Text taken from the web.",
                        required=False,
                        action='store_true')

    # Tell the parser that there is an optional corpus that can be pulled in.
    # The directory can contain multiple files and directories (if the user
    # also passes --recursive)
    fs.add_filesystem_path_args(
        parser,
        '-c',
        '--custom',
        help='Directory of files to include in a custom corpus.',
        required=False)

    parser.add_argument('-t',
                        '--train',
                        help="Train the classifier using the NLTK tokens",
                        required=False,
                        action='store_true')

    parser.add_argument('-cl',
                        '--classify',
                        help="Classify the contents of classify.txt",
                        required=False)

    # Third is a collection of text from project Gutenberg
    parser.add_argument('-s',
                        '--stemming',
                        help="Stem in the classifier or trainer.",
                        required=False,
                        action='store_true')

    # Third is a collection of text from project Gutenberg
    parser.add_argument('-lp',
                        '--printProbabilities',
                        help="Print each word probability.",
                        required=False,
                        action='store_true')

    # Parse the passed commandline args and turn them into a dictionary.
    args = vars(parser.parse_args())

    # Configure the log level based on passed in args to be one of DEBUG, INFO, WARN, ERROR, CRITICAL
    log.set_log_level_from_args(args)

    return args
Пример #6
0
def configure_command_line_arguments():
    # Initialize the commandline argument parser.
    parser = argparse.ArgumentParser(description='Play with probabilities')

    # Configure the log level parser.  Verbose shows some logs, veryVerbose
    # shows more
    logging_group = parser.add_mutually_exclusive_group(required=False)
    logging_group.add_argument("-v",
                               "--verbose",
                               help="Set the log level verbose.",
                               action='store_true',
                               required=False)

    logging_group.add_argument("-vv",
                               "--veryVerbose",
                               help="Set the log level verbose.",
                               action='store_true',
                               required=False)

    # Run the coin flip simulation and plot the output
    parser.add_argument('-cf',
                        '--coinFlip',
                        help="Generate the coin flip offset chart",
                        required=False,
                        action='store_true')

    # configure how big of a multiplier to use when stepping the coin flipper
    parser.add_argument(
        '-cfm',
        '--coinFlipMultiplier',
        help=
        "Each iteration of the coin flip test runs more times than the previous, where current = cfm*previous.",
        required=False,
        default=1.2)

    # Run the dice roll simulation and plot the output
    parser.add_argument('-d',
                        '--diceRoll',
                        help="Generate the dice roll distribution",
                        required=False,
                        action='store_true')

    # configure the number of dice to use in the dice roller
    parser.add_argument(
        '-nd',
        '--numDice',
        help="How many dice to use in the dice roll distribution",
        required=False,
        default=1)

    # configure the number of trials to use for any of the simulations
    parser.add_argument(
        '-nt',
        '--numTrials',
        help="How many trials to use for generating the distribution",
        required=False,
        default=1000000)

    # generate a uniform distribution
    parser.add_argument('-ud',
                        '--uniformDistribution',
                        help="Generate a Uniform distribution",
                        required=False,
                        action='store_true')

    # generate a gaussian distribution
    parser.add_argument('-gd',
                        '--gaussianDistribution',
                        help="Generate a Gaussian distribution",
                        required=False,
                        action='store_true')

    # set the mean for a gaussian distribution
    parser.add_argument('-m',
                        '--mean',
                        help="Set the mean for the Gaussian distribution",
                        required=False,
                        default=0)

    # set the standard deviation for the gaussian distribution
    parser.add_argument(
        '-sd',
        '--standardDeviation',
        help="Set the standard deviation for the Gaussian distribution",
        required=False,
        default=1)

    # generate a poisson distribution
    parser.add_argument('-pd',
                        '--poissonDistribution',
                        help="Generate a Poisson distribution",
                        required=False,
                        action='store_true')

    # set lambda for the poisson distribution
    parser.add_argument(
        '-l',
        '--lambda',
        help=
        "Set lambda (the expected arrival rate) for the Poisson distribution",
        required=False,
        default=3)

    # run marble/jar simulation
    parser.add_argument(
        '-j',
        '--jars',
        help="Calculate probability distribution of marble choices.",
        required=False,
        action='store_true')

    # Parse the passed commandline args and turn them into a dictionary.
    args = vars(parser.parse_args())

    # Configure the log level based on passed in args to be one of DEBUG, INFO, WARN, ERROR, CRITICAL
    log.set_log_level_from_args(args)

    return args