예제 #1
0
def run_scraper(debug=False):
    date = util.get_current_date()

    # create the csv file
    csv_name = '{}_data.csv'.format(date)
    log_file = '{}_log.txt'.format(date)
    util.create_csv(csv_name)

    for date_pair in util.get_date_pairs():
        worker = scraper.Scraper(date_pair[0], date_pair[1], debug)
        data = worker.crawl(log_file)

        if not data:
            data_row = [date_pair[0], date_pair[1], None, None, None, None]
            util.append_row(csv_name, data_row)
        else:
            for room_type in sorted(data):
                room_data = data[room_type]

                # Merge the accomodation data

                for accom_type in sorted(room_data['price_bar']):
                    m_price = merge(room_data, accom_type)
                    data_row = [date_pair[0], date_pair[1],
                                room_type,
                                accom_type,
                                m_price['p_bar'],
                                m_price['p_sum'],
                                m_price['p_adv']
                                ]
                    util.append_row(csv_name, data_row)

                for accom_type in sorted(room_data['price_summer']):
                    m_price = merge(room_data, accom_type)
                    data_row = [date_pair[0], date_pair[1],
                                room_type,
                                accom_type,
                                m_price['p_bar'],
                                m_price['p_sum'],
                                m_price['p_adv']
                                ]
                    util.append_row(csv_name, data_row)
                for accom_type in sorted(room_data['price_adv']):
                    m_price = merge(room_data, accom_type)
                    data_row = [date_pair[0], date_pair[1],
                                room_type,
                                accom_type,
                                m_price['p_bar'],
                                m_price['p_sum'],
                                m_price['p_adv']
                                ]
                    util.append_row(csv_name, data_row)
        worker.clean()

#util.generate_random_data('data.csv', 80000)
base_path, filename = os.path.split(__file__)
if base_path == '':
    base_path = '.'
trainning_data = util.get_data_from_file(base_path +
                                         '/../data_base/trainning_data.csv')
print("Generating association rules...")
nar = NetworkAssociationRules(trainning_data)
association_rules = nar.calculate_association_rules()

# saving association_rules
print("Saving association rules into file...")
util.create_csv(base_path + "/../results/association_rules.csv",
                association_rules,
                ["Semester", "Hour", "level", "Conf", "Sup"],
                parse_association_rule)

#util.generate_random_data('test_data.csv', 20000)
test_data = util.get_data_from_file(base_path +
                                    '/../data_base/testing_data.csv')
print("Testing....")
accuracy = nar.validate_association_rules(test_data)

# saving accuracy
print("Saving test results...")
util.create_csv(
    base_path + "/../results/accuracy.csv", accuracy,
    ["Semester", "Hour", "level", "Conf[rule]", "Conf[test]", "Result"],
    parse_accuracy)
예제 #3
0
]

negative_instances_list_pt = [
    non_contradiction_instance_1_pt, non_contradiction_instance_2_pt
]

if __name__ == '__main__':
    # call this script in the main folder, i.e., type
    # python clcd/text_generation/counting.py

    # english
    create_csv(
        out_path="text_gen_output/counting_train.csv",  # noqa
        size=10000,
        person_list=male_names,
        place_list=countries,
        min_n=1,
        n=30,
        positive_instances_list=positive_instances_list_en,
        negative_instances_list=negative_instances_list_en)  # noqa

    create_csv(
        out_path="text_gen_output/counting_test.csv",  # noqa
        size=1000,
        person_list=female_names,
        place_list=cities_and_states,
        min_n=1,
        n=30,
        positive_instances_list=positive_instances_list_en,
        negative_instances_list=negative_instances_list_en)  # noqa
예제 #4
0
DATA_SPLIT = 'rpartition'
RATIOS = [0.5, 0.3, 0.2]

start_time = time.time()

drivers = [
    webdriver.Chrome(ChromeDriverManager().install()) for i in range(NUM_PEERS)
]

data = read_csv(CSV_FILE_PATH)
header = data[0]

if DATA_SPLIT == 'partition':
    res = partition(data, NUM_PEERS)
    for index, r in enumerate(res):
        create_csv(header, r, f"{index}_partition.csv")
elif DATA_SPLIT == 'rpartition':
    res = r_partition(data, NUM_PEERS)
    for index, r in enumerate(res):
        create_csv(header, r, f"{index}_partition.csv")
elif DATA_SPLIT == 'spartition':
    res = s_partition(data, RATIOS)
    for index, r in enumerate(res):
        create_csv(header, r, f"{index}_partition.csv")

for index, driver in enumerate(drivers):
    # Click 'Start Building' on home page
    find_task_page(driver, PLATFORM, TASK_NAME, TRAINING_MODE)

    # Upload files on Task Training
    time.sleep(6)