def run_scraper(debug=False): date = util.get_current_date() # create the csv file csv_name = '{}_data.csv'.format(date) log_file = '{}_log.txt'.format(date) util.create_csv(csv_name) for date_pair in util.get_date_pairs(): worker = scraper.Scraper(date_pair[0], date_pair[1], debug) data = worker.crawl(log_file) if not data: data_row = [date_pair[0], date_pair[1], None, None, None, None] util.append_row(csv_name, data_row) else: for room_type in sorted(data): room_data = data[room_type] # Merge the accomodation data for accom_type in sorted(room_data['price_bar']): m_price = merge(room_data, accom_type) data_row = [date_pair[0], date_pair[1], room_type, accom_type, m_price['p_bar'], m_price['p_sum'], m_price['p_adv'] ] util.append_row(csv_name, data_row) for accom_type in sorted(room_data['price_summer']): m_price = merge(room_data, accom_type) data_row = [date_pair[0], date_pair[1], room_type, accom_type, m_price['p_bar'], m_price['p_sum'], m_price['p_adv'] ] util.append_row(csv_name, data_row) for accom_type in sorted(room_data['price_adv']): m_price = merge(room_data, accom_type) data_row = [date_pair[0], date_pair[1], room_type, accom_type, m_price['p_bar'], m_price['p_sum'], m_price['p_adv'] ] util.append_row(csv_name, data_row) worker.clean()
#util.generate_random_data('data.csv', 80000) base_path, filename = os.path.split(__file__) if base_path == '': base_path = '.' trainning_data = util.get_data_from_file(base_path + '/../data_base/trainning_data.csv') print("Generating association rules...") nar = NetworkAssociationRules(trainning_data) association_rules = nar.calculate_association_rules() # saving association_rules print("Saving association rules into file...") util.create_csv(base_path + "/../results/association_rules.csv", association_rules, ["Semester", "Hour", "level", "Conf", "Sup"], parse_association_rule) #util.generate_random_data('test_data.csv', 20000) test_data = util.get_data_from_file(base_path + '/../data_base/testing_data.csv') print("Testing....") accuracy = nar.validate_association_rules(test_data) # saving accuracy print("Saving test results...") util.create_csv( base_path + "/../results/accuracy.csv", accuracy, ["Semester", "Hour", "level", "Conf[rule]", "Conf[test]", "Result"], parse_accuracy)
] negative_instances_list_pt = [ non_contradiction_instance_1_pt, non_contradiction_instance_2_pt ] if __name__ == '__main__': # call this script in the main folder, i.e., type # python clcd/text_generation/counting.py # english create_csv( out_path="text_gen_output/counting_train.csv", # noqa size=10000, person_list=male_names, place_list=countries, min_n=1, n=30, positive_instances_list=positive_instances_list_en, negative_instances_list=negative_instances_list_en) # noqa create_csv( out_path="text_gen_output/counting_test.csv", # noqa size=1000, person_list=female_names, place_list=cities_and_states, min_n=1, n=30, positive_instances_list=positive_instances_list_en, negative_instances_list=negative_instances_list_en) # noqa
DATA_SPLIT = 'rpartition' RATIOS = [0.5, 0.3, 0.2] start_time = time.time() drivers = [ webdriver.Chrome(ChromeDriverManager().install()) for i in range(NUM_PEERS) ] data = read_csv(CSV_FILE_PATH) header = data[0] if DATA_SPLIT == 'partition': res = partition(data, NUM_PEERS) for index, r in enumerate(res): create_csv(header, r, f"{index}_partition.csv") elif DATA_SPLIT == 'rpartition': res = r_partition(data, NUM_PEERS) for index, r in enumerate(res): create_csv(header, r, f"{index}_partition.csv") elif DATA_SPLIT == 'spartition': res = s_partition(data, RATIOS) for index, r in enumerate(res): create_csv(header, r, f"{index}_partition.csv") for index, driver in enumerate(drivers): # Click 'Start Building' on home page find_task_page(driver, PLATFORM, TASK_NAME, TRAINING_MODE) # Upload files on Task Training time.sleep(6)