Exemplo n.º 1
0
def test_amazon_pulling():
    df = pulling_logic.pulling_amazon(
        resource_filename("vadertester", "json/Amazon_githubdata.json.gz"))
    errors = []
    github_line_amount = 37
    # First checks if the data is being put into a form properly
    if not len(df.columns) == 3:
        errors.append("incorrect column amount")

    # This error needs to be asserted because the key is used later on in the test
    assert df.columns[0] == "ID", "ID incorrectly labeled"

    if not df.columns[1] == "ReviewText":
        errors.append("ReviewText Column incorrectly labeled")

    # This error needs to be asserted because the key is used later on in the test
    assert df.columns[
        2] == "ReviewScore", "ReviewScore Column incorrectly labeled"

    # Checks if the IDs are Unique (test may take a long time on large data)
    if not df['ID'].is_unique:
        errors.append("ID column values are not unique")

    # Check if ReviewScore is all numeric
    if not df['ReviewScore'].dtype == "float64":
        errors.append("Rating column not numeric")

    # Checks if all lines from github data imported (github data contains 37 lines)
    if not len(df.index) == github_line_amount:
        errors.append("Not All data is imported correctly")

    assert not errors, "errors occurred:\n{}".format("\n".join(errors))
Exemplo n.º 2
0
def main():
    saved = sys.stdout
    with open('output.txt', 'w') as fout:
        sys.stdout = writer(sys.stdout, fout)
        print("Vader tester started")
        parser = create_parser()
        args = parser.parse_args()
        print("Loading test data from '" + args.file_input + "'")
        all_df = pulling_amazon(args.file_input)
        print("Getting " + str(args.amount) + " random rows from data")
        if len(all_df.index) < args.amount:
            raise Exception(
                "Sample cannot be bigger than dataset amount which is " +
                str(len(all_df.index)) + " rows")
        random_sample_df = random_sample(all_df, args.amount)
        print("Finished loading test data")
        print("Running tests")
        run_tests(random_sample_df)
        print("Finished running tests")
        print("Displaying output")
        report_generation(random_sample_df)
        print("Vader tester finished")
    sys.stdout = saved
Exemplo n.º 3
0
def tested_data_frame():
    df = pulling_amazon(resource_filename("vadertester", "json/Amazon_githubdata.json.gz"))
    testing_logic.run_tests(df)
    return df
Exemplo n.º 4
0
def amazon_data_frame_tested():
    df = pulling_amazon(
        resource_filename("vadertester", "json/Amazon_githubdata.json.gz"))
    df = random_sample(df, 30)
    run_tests(df)
    return df
Exemplo n.º 5
0
def amazon_data_frame():
    return pulling_amazon(
        resource_filename("vadertester", "json/Amazon_githubdata.json.gz"))