def import_perpetual_access_files(): results = [] my_files = glob.glob("/Users/hpiwowar/Downloads/wvu_perpetual_access.csv") my_files.reverse() for my_file in my_files: print my_file if False: xlsx_file = open(my_file, "rb") workbook = openpyxl.load_workbook(xlsx_file, read_only=True) sheetnames = list(workbook.sheetnames) for sheetname in sheetnames: sheet = workbook[sheetname] column_names = {} for i, column in enumerate( list(sheet.iter_rows(min_row=1, max_row=1))[0]): column_names[column.value] = i for row_cells in sheet.iter_rows(min_row=1): username = row_cells[column_names["Account Name"]].value issn = row_cells[column_names["ISSN (FS split)"]].value start_date = row_cells[ column_names["Content Start Date"]].value end_date = row_cells[ column_names["Content End Date"]].value if is_issn(issn): new_dict = { "username": username, "issn": issn, "start_date": start_date, "end_date": end_date } results.append(new_dict) # print new_dict print ".", else: rows = read_csv_file(my_file) for row in rows: print row new_dict = { "username": "******", "issn": row["issn"], "start_date": row["start_date"], "end_date": row["end_date"] } results.append(new_dict) # print new_dict print ".", with open("/Users/hpiwowar/Downloads/perpetual_access_cleaned.csv", "w") as csv_file: csv_writer = csv.writer(csv_file, encoding="utf-8") header = ["username", "issn", "start_date", "end_date"] csv_writer.writerow(header) for my_dict in results: csv_writer.writerow([my_dict[k] for k in header]) print "/Users/hpiwowar/Downloads/perpetual_access_cleaned.csv"
def main_csv_reader(args): path_to_coffee = args.path_to_coffee path_to_matched = args.matched_json all_people_list = flat_list(list(read_csv_file(path_to_coffee))) matched_in_this_session = [] error = False if path_to_matched: try: matched_people_json = read_json_file(path_to_matched) tuple_list = create_tuple_list(all_people_list, matched_people_json) sorted_people_list = sort_tuple_list(tuple_list) except: raise ('Only use the program generated matched_people.json file') else: write_json_file() matched_people_json = read_json_file('matched_people.json') sorted_people_list = all_people_list unmatched_people = [] for person in sorted_people_list: if person not in matched_in_this_session: individual_match_list = invidual_preproc(person, all_people_list, matched_people_json, matched_in_this_session) if individual_match_list: matched_pair = coffee_roulette(person, individual_match_list) if matched_pair is not None: for person in matched_pair: matched_in_this_session.append(person) else: error = True break else: unmatched_people.append(person) else: pass if error is False: create_today_matched(matched_in_this_session) if unmatched_people: create_today_unmatched(unmatched_people) updated_json = update_current_json(matched_people_json, matched_in_this_session) summary = "\n{} Matches".format(date.today()) summary = create_matched_people_string(matched_in_this_session, summary) summary_messsage, alone = make_summary(matched_in_this_session, unmatched_people, summary, "") summary += alone write_json_file(updated_json) write_txt_file(summary) print(summary_messsage)
def log_file(): analysis = request.args.get('analysis') start_date = request.args.get('start-date') end_date = request.args.get('end-date') # Parameter try: start_date = parse_date(start_date) except: return ("ERROR: Start date can't be parsed by YYYY-MM-DD format.", 400) try: end_date = parse_date(end_date) except: return ("ERROR: End date can't be parsed by YYYY-MM-DD format.", 400) # Validate if start_date > end_date: return ("ERROR: Start date can't be ahead of the end date.", 400) # Logic log_file = cli.generate_log_file(start_date, end_date) if analysis is None or analysis == 'summary': return jsonify(read_csv_file(cli.generate_summary_file(log_file))) elif analysis == 'revision': return jsonify(read_csv_file(cli.generate_revision_file(log_file))) elif analysis == 'coupling': return jsonify(read_csv_file(cli.generate_coupling_file(log_file))) elif analysis == 'age': return jsonify(read_csv_file(cli.generate_age_file(log_file))) elif analysis == 'abs-churn': return jsonify( read_csv_file(cli.generate_absolute_churn_file(log_file))) elif analysis == 'author-churn': return jsonify(read_csv_file(cli.generate_author_churn_file(log_file))) elif analysis == 'entity-churn': return jsonify(read_csv_file(cli.generate_entity_churn_file(log_file))) elif analysis == 'entity-ownership': return jsonify( read_csv_file(cli.generate_entity_ownership_file(log_file))) elif analysis == 'entity-effort': return jsonify(read_csv_file( cli.generate_entity_effort_file(log_file))) else: return ("ERROR: Analysis type not in selection.", 400)
def main(): if not util.is_there_csvfile(): print("A CSV of Alexa list does not exist!") if not util.fetch_alexa_list(): print("It could not fetch the file!") exit(1) website_dict = util.read_csv_file(10) print(website_dict['de']) util.write_dict_to_json(website_dict) util.dict_to_bookmark(website_dict)
#preprocess #train dfTrain['clickTime_day_gap'] = dfTrain['clickTime'].apply(util.get_train_time_day) dfTrain['clickTime_hour'] = dfTrain['clickTime'].apply(util.get_time_hour) #test dfTest['clickTime_day_gap'] = dfTest['clickTime'].apply(util.get_test_time_day) dfTest['clickTime_hour'] = dfTest['clickTime'].apply(util.get_time_hour) #ad ad = util.read_csv_file(data_root+'/ad.csv',logging=True) #app app_categories = util.read_csv_file(data_root+'/app_categories.csv',logging=True) app_categories["app_categories_first_class"] = app_categories['appCategory'].apply(util.categories_process_first_class) app_categories["app_categories_second_class"] = app_categories['appCategory'].apply(util.categories_process_second_class) #user user = util.read_csv_file(data_root+'/user.csv',logging=True) user['age_process'] = user['age'].apply(util.age_process) user["hometown_province"] = user['hometown'].apply(util.hometown_process_province) user["hometown_city"] = user['hometown'].apply(util.hometown_process_city)
p_feature_map = get_feature_map(p_features) q_feature_map = get_feature_map(q_features) for feature_p, feature_q in zip(p_features, q_features): p_x = float(p_feature_map[tuple(feature_p)]) / max_sample q_x = float(q_feature_map[tuple(feature_q)]) / max_sample s += (log(float(p_x / q_x))) s = s / max_sample return s if __name__ == "__main__": # Extract all the features from the csv file total_feat_printing_1st_to_2nd_1st, total_feat_printing_1st_to_2nd_2nd, total_feat_printing_2nd_to_3rd_2nd, total_feat_cursive_2nd_to_3rd_3rd, total_feat_printing_2nd_to_3rd_3rd, total_feat_cursive_3rd_to_4th_3rd, total_feat_printing_3rd_to_4th_3rd, total_feat_cursive_3rd_to_4th_4th, total_feat_printing_3rd_to_4th_4th, total_feat_cursive_4th_to_5th_4th, total_feat_printing_4th_to_5th_4th, total_feat_cursive_4th_to_5th_5th, total_feat_printing_4th_to_5th_5th = read_csv_file( SETTINGS.file_path_temporal_data) #Grade 1st to 2nd total_feat_printing_1st_to_2nd_1st = get_parsed_data( total_feat_printing_1st_to_2nd_1st) total_feat_printing_1st_to_2nd_2nd = get_parsed_data( total_feat_printing_1st_to_2nd_2nd) #Grade 2nd to 3rd total_feat_printing_2nd_to_3rd_2nd = get_parsed_data( total_feat_printing_2nd_to_3rd_2nd) total_feat_cursive_2nd_to_3rd_3rd = get_parsed_data( total_feat_cursive_2nd_to_3rd_3rd) total_feat_printing_2nd_to_3rd_3rd = get_parsed_data( total_feat_printing_2nd_to_3rd_3rd)
#coding=utf-8 import pandas as pd import sys from util import read_csv_file,get_time_day,get_time_hour,categories_process_first_class,categories_process_second_class,\ age_process,hometown_process_city,hometown_process_province ad = read_csv_file("../data/ad.csv") app_categories = read_csv_file("../data/app_categories.csv").head(100) position = read_csv_file("../data/position.csv").head(100) test = read_csv_file("../data/test.csv").head(100) train = read_csv_file("../data/train.csv") # test.to_csv("result.csv") # sys.exit() user = read_csv_file("../data/user.csv").head(100) user_app_actions = read_csv_file("../data/user_app_actions.csv").head(100) user_installedapps = read_csv_file("../data/user_installedapps.csv").head(100) ''' ad.csv preprocess ['creativeID' 'adID' 'camgaignID' 'advertiserID' 'appID' 'appPlatform'] ['creativeID' 'adID' 'camgaignID' 'advertiserID' 'appID' 'appPlatform_1' 'appPlatform_2'] ''' ad_columns = ad.columns.values print ad_columns
type=str, default=None, help="input file to parse") parser.add_argument("--username", type=str, default=None, help="username to input") parsed_args = parser.parse_args() parsed_vars = vars(parsed_args) # create_accounts(parsed_vars["filename"]) # build_counter_import_file(filename=parsed_vars["filename"], username=parsed_vars["username"]) crkn_ids = read_csv_file( u"/Users/hpiwowar/Documents/Projects/tiv2/jump-api/data/crkn_lookup.csv" ) institution_for_all_these_packages = "institution-fVnPvXK9iBYA" # report_name = "trj2" # all_in_one_data_rows = read_csv_file(u"/Users/hpiwowar/Documents/Projects/tiv2/jump-api/data/counter5_crkn/TR_J2 SUSHI Harvester CRKN_Wiley-2019_incl-non-participants.csv") # report_name = "trj3" # all_in_one_data_rows = read_csv_file(u"/Users/hpiwowar/Documents/Projects/tiv2/jump-api/data/counter5_crkn/TR_J3 SUSHI Harvester CRKN_Wiley-2019.csv") # report_name = "trj4" # all_in_one_data_rows = read_csv_file(u"/Users/hpiwowar/Documents/Projects/tiv2/jump-api/data/counter5_crkn/1 TR_J4 SUSHI Harvester CRKN_Wiley-2019.csv") # all_in_one_data_rows = read_csv_file(u"/Users/hpiwowar/Documents/Projects/tiv2/jump-api/data/counter5_crkn/2 TR_J4 SUSHI Harvester CRKN_Wiley-2019.csv") report_name = "trj4" all_in_one_data_rows = read_csv_file(
import simplejson as json import re from util import read_csv_file error_rows = read_csv_file( "/Users/hpiwowar/Downloads/jump_file_import_error_rows.csv") rows = [d for d in error_rows if d["file"] == "price"] each_error = [] i = 0 for row in rows: hits = re.findall('{"ri(.*?)wrong_publisher(.*?)}', row["errors"]) if hits: print hits[0] print len(hits) print i i = i + 1 # unknown_issn # wrong_publisher
def __init__(self): self.results = [] rows, first_header = read_csv_file( 'data/LeaderBoardData/TADPOLE_Submission_Leaderboard_TeamName.csv') self.results.append(first_header)