Exemplo n.º 1
0
def write_results(log, results, iterations, pr_number=None, use_details=False):
    log("## All results ##\n")
    if use_details:
        log("<details>\n")
        log("<summary>%i %s ran</summary>\n\n" % (len(results),
                                                  "tests" if len(results) > 1
                                                  else "test"))

    for test_name, test in results.iteritems():
        baseurl = "http://w3c-test.org/submissions"
        if "https" in os.path.splitext(test_name)[0].split(".")[1:]:
            baseurl = "https://w3c-test.org/submissions"
        title = test_name
        if use_details:
            log("<details>\n")
            if pr_number:
                title = "<a href=\"%s/%s%s\">%s</a>" % (baseurl, pr_number, test_name, title)
            log('<summary>%s</summary>\n\n' % title)
        else:
            log("### %s ###" % title)
        strings = [("", err_string(test["status"], iterations), "")]

        strings.extend(((
            ("`%s`" % markdown_adjust(subtest_name)) if subtest else "",
            err_string(subtest["status"], iterations),
            ("`%s`" % markdown_adjust(';'.join(subtest["messages"]))) if len(subtest["messages"]) else "")
            for subtest_name, subtest in test["subtests"].items()))
        table(["Subtest", "Results", "Messages"], strings, log)
        if use_details:
            log("</details>\n")

    if use_details:
        log("</details>\n")
Exemplo n.º 2
0
def write_results(log, results, iterations, pr_number=None, use_details=False):
    log("## All results ##\n")
    if use_details:
        log("<details>\n")
        log("<summary>%i %s ran</summary>\n\n" %
            (len(results), "tests" if len(results) > 1 else "test"))

    for test_name, test in results.iteritems():
        baseurl = "http://w3c-test.org/submissions"
        if "https" in os.path.splitext(test_name)[0].split(".")[1:]:
            baseurl = "https://w3c-test.org/submissions"
        title = test_name
        if use_details:
            log("<details>\n")
            if pr_number:
                title = "<a href=\"%s/%s%s\">%s</a>" % (baseurl, pr_number,
                                                        test_name, title)
            log('<summary>%s</summary>\n\n' % title)
        else:
            log("### %s ###" % title)
        strings = [("", err_string(test["status"], iterations), "")]

        strings.extend(
            ((("`%s`" % markdown_adjust(subtest_name)) if subtest else "",
              err_string(subtest["status"], iterations),
              ("`%s`" % markdown_adjust(';'.join(subtest["messages"])))
              if len(subtest["messages"]) else "")
             for subtest_name, subtest in test["subtests"].items()))
        table(["Subtest", "Results", "Messages"], strings, log)
        if use_details:
            log("</details>\n")

    if use_details:
        log("</details>\n")
Exemplo n.º 3
0
def write_inconsistent(log, inconsistent, iterations):
    """Output inconsistent tests to logger.error."""
    log("## Unstable results ##\n")
    strings = [(
        "`%s`" % markdown_adjust(test),
        ("`%s`" % markdown_adjust(subtest)) if subtest else "",
        err_string(results, iterations),
        ("`%s`" % markdown_adjust(";".join(messages))) if len(messages) else "")
        for test, subtest, results, messages in inconsistent]
    table(["Test", "Subtest", "Results", "Messages"], strings, log)
Exemplo n.º 4
0
def write_inconsistent(log, inconsistent, iterations):
    """Output inconsistent tests to logger.error."""
    log("## Unstable results ##\n")
    strings = [("`%s`" % markdown_adjust(test),
                ("`%s`" % markdown_adjust(subtest)) if subtest else "",
                err_string(results, iterations),
                ("`%s`" %
                 markdown_adjust(";".join(messages))) if len(messages) else "")
               for test, subtest, results, messages in inconsistent]
    table(["Test", "Subtest", "Results", "Messages"], strings, log)
Exemplo n.º 5
0
def main():
    display_plot_of_temperatures()
    plt.savefig("images/testing_training_graph")
    plt.gcf().clear()

    learn(number_of_iterations=100)

    display_plot_of_temperatures()
    mySepLinePlot = build_sep_line_plot()
    mySepLinePlot.title(
        "Plots of temperatures and learning unit activation function")
    plt.savefig("images/activation_line")
    plt.gcf().clear()

    plt.plot(testing_errors)
    plt.ylabel("Euclidean Distance")
    plt.xlabel("Iteration #")
    plt.title("Testing error over iterations")
    plt.savefig("images/testing_error")
    plt.gcf().clear()

    file = open(reportFileName, "w")

    save_markdown_report(file, [
        md.h1("Project 3 Report"),
        md.h2("CMSC 409 - Artificial Intelligence"),
        md.h2("Steven Hernandez"),
        md.p("""1. There would be two input and one output for our unit.
Inputs would be the hour and a bias input while output would be the estimated
temperature at that hour of the day.
In fact, because we have weights for x (hour of the day) and a bias,
we can create the formula net = ax+b which means our unit can simply return net * 1
or the identity."""),
        md.p("""2. The activation function would be some linear function.
Or unit would not have a threshold however.
Whatever the outcome from the linear activation function is
would be the exact result from the learning unit.
If we look at the graph of temperatures for our training
(and testing) data, we can see that the values are basically
just a linear function."""),
        md.image("./images/testing_training_graph.png",
                 "Testing training graph"),
        md.p("3. Outcome of training with days 1-3:"),
        md.p("Euclidean distance comes down from %f to %f" %
             (testing_errors[0], testing_errors[len(testing_errors) - 1])),
        md.image("./images/testing_error.png", "Testing Error"),
        md.p("resulting in an activation as so:"),
        md.image("./images/activation_line.png", "Testing Error"),
        md.p("4."),
        md.table([
            [
                "input", "expected output", "actual output",
                "Euclidean distance"
            ],
            [5, 59.5, output(5), -59.5 + output(5)],
            [6, 64, output(6), -64 + output(6)],
            [7, 68.7, output(7), -68.7 + output(7)],
            [8, 73.65, output(8), -73.65 + output(8)],
            [9, 78.43, output(9), -78.43 + output(9)],
            [10, 82, output(10), -82 + output(10)],
            [11, 85.2, output(11), -85.2 + output(11)],
            [12, 87, output(12), -87 + output(12)],
            [13, 90.67, output(13), -90.67 + output(13)],
        ]),
        md.
        p("5. Learning rate was 0.0005 to keep the learning from going to quickly,"
          "while we went through 100 iterations."),
        md.
        p("Notice from the graph above on Euclidean distances, we reach our peak around the 20th iteration mark"
          ),
        md.
        p("6. As such, after the 20th iteration, we reach a plateau of improvement with our current system."
          ),
        md.
        p("7. Using a more complex network with greater than one unit would allow for more complex output"
          "which would ultimately help us with this problem."),
        md.
        p("Currently, we are stuck with a linear output because the single unit can only learn as such."
          ),
    ])

    file.close()

    print("Markdown Report generated in ./report.md")
    print("Converting Markdown file to PDF with ")
    print(
        "`pandoc --latex-engine=xelatex -V geometry=margin=1in -s -o FINAL_REPORT.pdf "
        + reportFileName + "`")

    os.system(
        "pandoc --latex-engine=xelatex -V geometry=margin=1in -s -o FINAL_REPORT.pdf "
        + reportFileName)
    print("Report created")
Exemplo n.º 6
0
def main():
    minimum_occurrences = 2
    encountered_words = get_encountered_words(
        minimum_occurrences=minimum_occurrences)
    feature_vector = create_feature_vector(
        minimum_occurrences=minimum_occurrences)

    table = [get_encountered_words(minimum_occurrences=minimum_occurrences)
             ] + feature_vector

    normalized_feature_vector = normalize_feature_vector(feature_vector)

    result = learn_wta(normalized_feature_vector, cluster_count=20)

    clustered_sentences = split_sentences_into_clusters(
        result, normalized_feature_vector)

    clustered_sentences = list(filter(lambda x: x, clustered_sentences))

    def sentence_tuple_to_str(tuple):
        return str(tuple[0]) + ") " + tuple[1]

    clustered_sentence_strings = list(
        map(lambda cluster: list(map(sentence_tuple_to_str, cluster)),
            clustered_sentences))

    file = open(reportFileName, "w")
    md.save_markdown_report(file, [
        md.meta_data("Project 4 Report - CMSC 409 - Artificial Intelligence",
                     "Steven Hernandez"),
        md.p("In total, there are " + str(len(get_encountered_words())) +
             " unique root words found. "),
        md.p(
            str(len(get_encountered_words(minimum_occurrences=2))) +
            " words that are encountered at least 2 times. "),
        md.p("And then only " +
             str(len(get_encountered_words(minimum_occurrences=3))) +
             " words that are encountered at least 3 times. "),
        md.
        p("These statistics are calculated based on processing the documents in the following ways:"
          ),
        md.ol([
            "Tokenizing the sentences, which splits each sentence on the spaces to only produce a list of word/numeric "
            "tokens. This allows us to begin processing each word individually without requiring the context of the "
            "entire sentence. ",
            "Removing punctuation is required because in general, punctuation does not provide us textual context. "
            "Again, we are only looking at the similarity of sentence based on the number of occurrences of common "
            "words between the sentences. We are not trying to decifer the intent or the sentiment behind the "
            "sentence, so we do not require punctuation or even placement of words within the sentence. Just that the "
            "word exists "
            "within the sentence. ",
            "Removing numbers because numbers do not provide context about what the sentence is talking about. "
            "A book might cost $20 as would a basic microcontroller like an Arduino, but they are not related. "
            "Additional since, we removed punctuation in the previous step, we wouldn't be able to differentiate "
            "$20 from 20 miles or 20 participants, etc. ",
            "Converting upper to lower case prevents words appearing at the beginning of a sentence (with a required "
            "capital letter) from being considered a different word if it also appears in the middle of a sentence "
            "(which would be written in all lower case) ",
            "Removing stop words shrinks the total number of words that we find. More importantly though, it removes "
            "overly common words that do not provide us useful insights into the similarity of sentences. The word "
            "'the' is very likely to appear in most sentences, thus is not a useful indicator. ",
            "Stemming takes a word in past tense/future tense or plural/singular and takes the 'stem' or 'root' word. "
            "This further shrinks the overall number of words or dimensions that we must analyze. An example: run and "
            "running have the same root word, thus are very similar. ",
            "Combining stemmed words takes these common stemmed root words and combines them so that we can get a "
            "total count of the occurances of the word throughout all sentence documents."
        ],
              alpha=True),
        md.
        p("On the following page is a table listing all of these root words along with the number of occurrences of "
          "the word through the documents (the feature vector)"),
        md.page_break(),
        md.table(split_table_rows(["Root Word", "\# of instances"],
                                  list(count_encountered_words().items()), 49),
                 width=50),
        md.page_break(),
        md.p("The following lists the root words with greater than " +
             str(minimum_occurrences) + "occurrences:"),
        md.table(split_table_rows(
            ["Root Word", "\# of instances"],
            list(({
                k: v
                for k, v in count_encountered_words().items()
                if v > minimum_occurrences
            }).items()), 49),
                 width=50),
        md.page_break(),
        md.
        p("The following 2 tables show the distribution of root words which appear at least "
          + str(minimum_occurrences) + " times across each "
          "document (with each row indicating one sentence) (This is the Term Document Matrix **TDM**)"
          ),
        md.table(split_table(table, 0, math.floor(len(table[0]) / 2)),
                 width=20),
        md.page_break(),
        md.table(split_table(table,
                             math.floor(len(table[0]) / 2) + 1, len(table[0])),
                 width=20),
        md.page_break(),
        md.h2("Learning"),
        md.
        p("We begin learning by using the 'Winner Takes All' (WTA) method which means that we begin with `n` "
          "clusters, then iterating for each document, we find the closest cluster using euclidean "
          "distance. Depending on which cluster's center (based on weight) is closest to the new document, "
          "the cluster's center's weight is changed by a value to better match the resulting pattern. Code below: "
          ),
        md.code(function=learn_wta),
        md.code(function=get_closest_cluster),
        md.code(function=calculate_change_in_weight),
        md.page_break(),
        md.h3("Learned clusters:"),
    ])

    # Show resulting clusters
    for i in range(len(clustered_sentence_strings)):
        md.save_markdown_report(file, [
            md.p("Cluster " + str(i + 1) + ":"),
            md.li(clustered_sentence_strings[i]),
        ])

    # Show bit representation of sentence vectors
    md.save_markdown_report(file, [
        md.
        p("If we look at the feature vectors as a bit map showing whether a sentence has or does not have "
          "a specific word, we can begin to see the pattern of the clustering method."
          ),
    ])

    def sentence_tuple_to_bit_string(tuple):
        return str(tuple[0]) + ") " + feature_vector_to_bit_string(
            feature_vector[tuple[0]])

    def feature_vector_to_bit_string(vector):
        return ''.join(map(str, vector))

    for i in range(len(clustered_sentences)):
        md.save_markdown_report(file, [
            md.p("Cluster " + str(i + 1) + ":"),
            md.li(
                list(map(sentence_tuple_to_bit_string,
                         clustered_sentences[i]))),
        ])

    md.save_markdown_report(file, [
        md.
        p("From these bit maps, we can see that each cluster has relatively distinct columns which match"
          "across the documents of the cluster."),
        md.
        p("Of course, this clustering does split some groups of documents into more clusters than expected. "
          "Some clusters seem as if they could be combined to the human views. Having additional sample documents "
          "would very likely help with this issue. With these few number of documents, for example, sentence 12 "
          "'Three parking spaces in back, pets are possible with approval from the owner.' does not mention "
          "being about a 'home' or many other words which are used in other documents that truly identify it"
          "as being about a home. With more documents, we would begin to have more overlap, which could "
          "aid in finding which words provide us the most importance. Sentence 10 as well does not share enough"
          "words to be able to identify it with the provided documents."),
        md.p("Below, we can see which words these sentences share in common."),
    ])

    def sentence_tuple_to_formatted_sentence(tuple):
        formatted_sentence = []

        sentence_vector = feature_vector[tuple[0]]

        for i, v in enumerate(sentence_vector):
            if v:
                formatted_sentence.append(encountered_words[i])
        return str(tuple[0]) + ") " + ", ".join(formatted_sentence)

    for i in range(len(clustered_sentence_strings)):
        md.save_markdown_report(file, [
            md.p("Cluster " + str(i + 1) + ":"),
            md.li(
                list(
                    map(sentence_tuple_to_formatted_sentence,
                        clustered_sentences[i]))),
        ])

    md.save_markdown_report(file, [
        md.
        p("One problem of this method compared to a method where clusters a created as needed, was that if the "
          "random initialization of weights for the cluster were randomly generated in a bad spot, it is likely "
          "the cluster would never contain any sentences because (as the name implies) the Winner Takes All method"
          "would often find one cluster taking over most of the documents, while other clusters remained empty."
          ),
        md.
        p("The solution taken here for this problem was to learn on many randomly placed clusters. Learning "
          "began with 20 clusters. From these 20 clusters however, we only end up with "
          + str(len(clustered_sentences)) +
          " clusters. Additionally, (during testing) it would some times "
          "result in clusters with only a single result, when the result would have worked better "
          "in some other already defined cluster."),
        md.
        p("With fewer clusters (for example 4), we occasionally ended up with good results, but often would end up"
          "with most documents stuck in one single cluster"),
        md.
        p("In addition to having more documents to sample, having clusters only as needed would likely improve this "
          "situation. With clusters-as-needed, clusters would only be able to contain documents within some radius "
          "of the cluster's center. If a document is found outside of this radius, then a new cluster would be "
          "formed in this place.")
    ])

    file.close()

    print("Markdown Report generated in ./report4.md")
    print("Converting Markdown file to PDF with ")
    print(
        "`pandoc --latex-engine=xelatex -V geometry=margin=1in -s -o FINAL_REPORT.pdf "
        + reportFileName + "`")

    os.system(
        "pandoc --latex-engine=xelatex -V geometry=margin=1in -s -o FINAL_REPORT.pdf "
        + reportFileName)
    print("Report created")
Exemplo n.º 7
0
def build_report():
    file = open(reportFileName, "w")
    project1.save_markdown_report(file, [
        md.meta_data("Project 2 Report - CMSC 409 - Artificial Intelligence",
                     "Steven Hernandez"),
        md.
        p("You will notice for each scenario (on the following pages), there are 4 graphs. These graphs are described in the table below."
          ),
        md.table([
            [
                "Final sep_line after learning",
                "Graph of all sep_lines during learning"
            ],
            [
                "Graph of errors (blue: training set error, gray: testing set error)",
                "Change of weights over time. (red: x_weight, green: y_weight, blue: bias)"
            ]
        ],
                 width=40),
    ])

    final_training_errors = {
        "hard": [],
        "soft": [],
    }

    final_testing_errors = {
        "hard": [],
        "soft": [],
    }

    for activation_type in ("hard", "soft"):
        for sample_size in ("25", "50", "75"):
            # Calculate errors
            train_error_df = pd.read_csv("./data/project2/" + activation_type +
                                         "/" + sample_size + "_errors.txt",
                                         header=None)
            test_error_df = pd.read_csv("./data/project2/" + activation_type +
                                        "/" + sample_size +
                                        "_total_errors.txt",
                                        header=None)

            final_training_errors[activation_type].append(
                str(train_error_df[0].iloc[-1]))
            final_testing_errors[activation_type].append(
                str(test_error_df[0].iloc[-1]))

    project1.save_markdown_report(file, [
        md.h4("Error for training set across each different scenario."),
        md.table([
            ["", "25%", "50%", "75%"],
            [
                "Hard", final_training_errors["hard"][0],
                final_training_errors["hard"][1],
                final_training_errors["hard"][2]
            ],
            [
                "Soft", final_training_errors["soft"][0],
                final_training_errors["soft"][1],
                final_training_errors["soft"][2]
            ],
        ],
                 width=15),
        md.h4("Error for testing set across each different scenario."),
        md.table([
            ["", "25%", "50%", "75%"],
            [
                "Hard", final_testing_errors["hard"][0],
                final_testing_errors["hard"][1],
                final_testing_errors["hard"][2]
            ],
            [
                "Soft", final_testing_errors["soft"][0],
                final_testing_errors["soft"][1],
                final_testing_errors["soft"][2]
            ],
        ],
                 width=15),
        md.
        p("""As we can see, **soft** activation results in the lowest error compared to **hard** activation.
             We can see that while soft activation with 75% training data results in the lowest error for the training set,
             soft activation with 50% training data actually does better for the testing set."""
          ),
        md.p("""Based on the graphs for `% error over iterations`,
             we can see that error jumps around quite extremely for **hard** activation. 
             As a result, it seems we do not actually end up with the best error. 
             For example, you will see in the *Errors* table for Hard activation with sample size 75%, 
             the final error was 0.152% while the best error had actually been 0.073% 
             (which happened to have happened quite early on iteration 39). 
             It might be the case that we need to lower alpha for these graphs. 
             **Soft** activation on the other hand smoothly moves towards it's best value"""
             ),
        md.
        p("""On that point, it seems to be the case that **soft** activation reaches just about its best accuracy 
             after the first iteration (after going through each item in the training set once)."""
          ),
        md.
        p("""Surprisingly, Hard activation with 75% training results in the best overall error of 0.073%.
             Unfortunately, this error was not the final output from training and as such was lost. 
             """),
        md.page_break(),
    ])

    for activation_type in ("hard", "soft"):
        for sample_size in ("25", "50", "75"):
            # Calculate errors
            train_error_df = pd.read_csv("./data/project2/" + activation_type +
                                         "/" + sample_size + "_errors.txt",
                                         header=None)
            test_error_df = pd.read_csv("./data/project2/" + activation_type +
                                        "/" + sample_size +
                                        "_total_errors.txt",
                                        header=None)
            weights_df = pd.read_csv("./data/project2/" + activation_type +
                                     "/" + sample_size + "_weights.txt",
                                     header=None)

            project1.save_markdown_report(file, [
                md.h3(
                    str.title(activation_type +
                              " activation with a sample size of " +
                              sample_size + "%")),
                md.images([
                    [
                        "./images/project2/" + activation_type + "/" +
                        sample_size + "_start_end_lines.png", ""
                    ],
                    [
                        "./images/project2/" + activation_type + "/" +
                        sample_size + "_all_sep_lines.png", ""
                    ],
                ]),
                md.images([
                    [
                        "./images/project2/" + activation_type + "/" +
                        sample_size + "_error.png", "errors"
                    ],
                    [
                        "./images/project2/" + activation_type + "/" +
                        sample_size + "_weights.png", "weights"
                    ],
                ]),
                md.h4("Errors"),
                md.table([
                    ["", "Training Set Error", "Test Set Error"],
                    [
                        "Start",
                        str(train_error_df[0].iloc[0]),
                        str(test_error_df[0].iloc[0])
                    ],
                    [
                        "End",
                        str(train_error_df[0].iloc[-1]),
                        str(test_error_df[0].iloc[-1])
                    ],
                    [
                        "Best",
                        str(train_error_df[0].min()),
                        str(test_error_df[0].min())
                    ],
                ],
                         width=15),
                md.h4("Weights"),
                md.table([
                    ["", "x_weight", "y_weight", "bias"],
                    [
                        "Random initial",
                        str(weights_df[0].iloc[0]),
                        str(weights_df[1].iloc[0]),
                        str(weights_df[2].iloc[0])
                    ],
                    [
                        "Final",
                        str(weights_df[0].iloc[-1]),
                        str(weights_df[1].iloc[-1]),
                        str(weights_df[2].iloc[-1])
                    ],
                ],
                         width=10),
                md.page_break(),
            ])

    project1.save_markdown_report(file, [
        md.
        h3("This project uses code from `project1.py` from last time as well as new code from `project2.py`"
           ),
        md.code(file="project2.py"),
        md.code(file="project1.py"),
    ])

    file.close()

    os.system(
        "pandoc --latex-engine=xelatex -V geometry=margin=1in -s -o FINAL_REPORT_2.pdf report2.md"
    )
    print("Report created")
Exemplo n.º 8
0
def main():
    # Data has been generated, so we don't want to regenerate the data.
    # generate_random_data()

    df = pd.read_csv(dataFileName, header=None)
    sepLineA = pd.read_csv(sepLineAFileName, header=None)
    sepLineB = pd.read_csv(sepLineBFileName, header=None)
    #
    errorMatrix1 = get_confusion_matrix(df, sepLineA)
    errorMatrix2 = get_confusion_matrix(df, sepLineB)

    myPlt = build_height_plot(df, sepLineA)
    myPlt.savefig("images/1d")
    myPlt.gcf().clear()

    myPlt = build_height_weight_plot(df, sepLineB)
    myPlt.savefig("images/2d")
    myPlt.gcf().clear()

    file = open(reportFileName, "w")

    save_markdown_report(file, [
        md.h1("Project 1 Report"),
        md.h2("CMSC 409 - Artificial Intelligence"),
        md.h2("Steven Hernandez"),
        md.p("Fully generated data can be found in `./Project1_data/data.txt"),
        md.h3("*Scenerio 1:* using only height."),
        md.table([["", "Weights"], ["x", sepLineA[0][0]],
                  ["bias", sepLineA[0][1]]]),
        md.p("Assuming the following"),
        md.image("./images/net.png"),
        md.p("Or in this situation: "),
        md.p("1 if 0 <= -a(Height) + bias, otherwise 0"),
        md.p("where *a* is some weight and *1* is male and *0* is female."),
        md.p("In this situation a=" + str(sepLineA[0][0]) + " and bias=" +
             str(sepLineA[0][1])),
        md.image("./images/1d.png"),
        md.table([["", "Predicted Male", "Predicted Female"],
                  ["Actual Male", errorMatrix1[1], errorMatrix1[2]],
                  ["Actual Female", errorMatrix1[3], errorMatrix1[0]]]),
        md.p("**Confusion Matrix**"),
        md.table([
            ["", ""],
            ["Error", 1 - ((errorMatrix1[1] + errorMatrix1[0]) / 4000)],
            ["Accuracy", (errorMatrix1[1] + errorMatrix1[0]) / 4000],
            ["True Positive Rate", errorMatrix1[1] / 2000],
            ["True Negative Rate", errorMatrix1[0] / 2000],
            ["False Positive Rate", errorMatrix1[3] / 2000],
            ["False Negative Rate", errorMatrix1[2] / 2000],
        ]),
        md.h3("*Scenerio 2:* heights and weights."),
        md.table([["", "Weights"], ["x", sepLineB[0][0]],
                  ["y", sepLineB[0][1]], ["bias", sepLineB[0][2]]]),
        md.p("Assuming the following"),
        md.image("./images/net.png"),
        md.p("Or in this situation:"),
        md.p("1 if 0 <= a(Height) - b(Weight) + bias, otherwise 0"),
        md.
        p("where *a* and *b* are some weights and *1* is male and *0* is female."
          ),
        md.p("In this situation a=" + str(sepLineB[0][0]) + " and b=" +
             str(sepLineB[0][1]) + " and bias=" + str(sepLineB[0][2])),
        md.image("./images/2d.png"),
        md.
        p("Notice, Male and Female are on slightly different levels in this graph"
          "so that one does not completely cover up the other."),
        md.p("**Confusion Matrix**"),
        md.table([["", "Predicted Male", "Predicted Female"],
                  ["Actual Male", errorMatrix2[1], errorMatrix2[2]],
                  ["Actual Female", errorMatrix2[3], errorMatrix2[0]]]),
        md.table([
            ["", ""],
            ["Error", 1 - ((errorMatrix2[1] + errorMatrix2[0]) / 4000)],
            ["Accuracy", (errorMatrix2[1] + errorMatrix2[0]) / 4000],
            ["True Positive Rate", errorMatrix2[1] / 2000],
            ["True Negative Rate", errorMatrix2[0] / 2000],
            ["False Positive Rate", errorMatrix2[3] / 2000],
            ["False Negative Rate", errorMatrix2[2] / 2000],
        ]),
        md.h3("Libraries Used"),
        md.p("matplotlib, numpy, pandas, pandoc"),
        md.h3("Selected Code Functions"),
        md.p("Functions used to generate this data and calculations."),
        md.p("The full code can be found in `./project1.py`"),
        md.code(function=generate_random_data),
        md.code(function=plot_male_and_females),
        md.code(function=plot_male_and_females),
        md.code(function=get_confusion_matrix),
    ])

    file.close()

    print("Markdown Report generated in ./report.md")
    print("Convert Markdown file to PDF with ")
    print(
        "`pandoc --latex-engine=xelatex -V geometry=margin=1in -s -o FINAL_REPORT.pdf report.md`"
    )