Exemplo n.º 1
0
def load_training_data():
    training_data = {}
    class_names = {}

    # Read in the CSV file generated during training - comprised of the corpus (class in this example) name, the
    # term (stemmed or not depending on user input) and the probability of the term occurring within the given class.
    rows = fs.read_csv("bayes_training.csv")

    # Iterate through each of the rows in the CSV file
    for index, [category_name, term, probability] in enumerate(rows):

        # Skip the header
        if index > 0:

            # Store the probabilities for each term
            if category_name in training_data:
                training_data[category_name][term] = probability;
            else:
                training_data[category_name] = {}

            class_names[category_name] = category_name

    return training_data, [name for name in class_names.keys()]
Exemplo n.º 2
0
def load_training_data():
    training_data = {}
    class_names = {}

    # Read in the CSV file generated during training - comprised of the corpus (class in this example) name, the
    # term (stemmed or not depending on user input) and the probability of the term occurring within the given class.
    rows = fs.read_csv("bayes_training.csv")

    # Iterate through each of the rows in the CSV file
    for index, [category_name, term, probability] in enumerate(rows):

        # Skip the header
        if index > 0:

            # Store the probabilities for each term
            if category_name in training_data:
                training_data[category_name][term] = probability
            else:
                training_data[category_name] = {}

            class_names[category_name] = category_name

    return training_data, [name for name in class_names.keys()]
Exemplo n.º 3
0
def marbles_and_jars(num_trials):

    # read in the csv file of jars
    rows = fs.read_csv("marbles.csv")
    logging.debug("Read rows: " + str(rows))

    jars = {}
    headers = []
    marble_picks = {}

    # go through the rows and build a dictionary of jar_name => array of marble colors
    for index, row in enumerate(rows):
        # first row is just header data
        if index == 0:
            headers = row
        else:
            # go through each of the headers (these are columns)
            for column_index, header in enumerate(headers):
                # if the first column than it's the name of the jar - initialize the array to empty (no marbles)
                if column_index == 0:
                    jars[row[0]] = []
                else:
                    # each other column represents a number of marbles, the name of the marble is in the header
                    marble_color = header

                    # initialize the counters for picking marbles for the given color
                    marble_picks[marble_color] = 0

                    # set blank cells to 0, otherwise add the value in the cell
                    if len(row[column_index]) == 0:
                        num_marbles = 0
                    else:
                        num_marbles = int(row[column_index])

                    # expand an array of colors, 1 element for each num_marbles
                    jars[row[0]] += [marble_color] * num_marbles

    logging.info("Jars: " + str(jars))

    for i in range(0, num_trials):
        # pick a random jar from all of the jars w/out taking the marbles into consideration
        jar_names = jars.keys()
        jar_name = jar_names[random.randint(0, len(jar_names) - 1)]

        # now draw a single marble from all the marbles given that we selected a jar
        marbles = jars[jar_name];
        marble = marbles[random.randint(0, len(marbles) - 1)]
        marble_picks[marble] += 1

    logging.info("Marble picks : " + str(marble_picks))

    # prepare the data for plotting
    keys = []
    data = []
    for key, value in marble_picks.iteritems():
        column_name = key + " (" + str(value) + ")"
        keys.extend([column_name])
        data.extend([value/float(num_trials)])

    description_list = []
    for jar_name, jar_marbles in jars.iteritems():
        description_list.append(jar_name + "(" + str(len(jar_marbles)) + ")")
    description = ", ".join(description_list)

    # plot the data
    charting.bar_chart("marbles.png",
                       [data],
                       "Marbles in Jars (" + str(num_trials) + ") - " + description,
                       keys,
                       "Probabilities",
                       None,
                       ['#59799e'])
Exemplo n.º 4
0
def marbles_and_jars(num_trials):

    # read in the csv file of jars
    rows = fs.read_csv("marbles.csv")
    logging.debug("Read rows: " + str(rows))

    jars = {}
    headers = []
    marble_picks = {}

    # go through the rows and build a dictionary of jar_name => array of marble colors
    for index, row in enumerate(rows):
        # first row is just header data
        if index == 0:
            headers = row
        else:
            # go through each of the headers (these are columns)
            for column_index, header in enumerate(headers):
                # if the first column than it's the name of the jar - initialize the array to empty (no marbles)
                if column_index == 0:
                    jars[row[0]] = []
                else:
                    # each other column represents a number of marbles, the name of the marble is in the header
                    marble_color = header

                    # initialize the counters for picking marbles for the given color
                    marble_picks[marble_color] = 0

                    # set blank cells to 0, otherwise add the value in the cell
                    if len(row[column_index]) == 0:
                        num_marbles = 0
                    else:
                        num_marbles = int(row[column_index])

                    # expand an array of colors, 1 element for each num_marbles
                    jars[row[0]] += [marble_color] * num_marbles

    logging.info("Jars: " + str(jars))

    for i in range(0, num_trials):
        # pick a random jar from all of the jars w/out taking the marbles into consideration
        jar_names = jars.keys()
        jar_name = jar_names[random.randint(0, len(jar_names) - 1)]

        # now draw a single marble from all the marbles given that we selected a jar
        marbles = jars[jar_name]
        marble = marbles[random.randint(0, len(marbles) - 1)]
        marble_picks[marble] += 1

    logging.info("Marble picks : " + str(marble_picks))

    # prepare the data for plotting
    keys = []
    data = []
    for key, value in marble_picks.iteritems():
        column_name = key + " (" + str(value) + ")"
        keys.extend([column_name])
        data.extend([value / float(num_trials)])

    description_list = []
    for jar_name, jar_marbles in jars.iteritems():
        description_list.append(jar_name + "(" + str(len(jar_marbles)) + ")")
    description = ", ".join(description_list)

    # plot the data
    charting.bar_chart(
        "marbles.png", [data],
        "Marbles in Jars (" + str(num_trials) + ") - " + description, keys,
        "Probabilities", None, ['#59799e'])