예제 #1
0
def import_perpetual_access_files():
    results = []
    my_files = glob.glob("/Users/hpiwowar/Downloads/wvu_perpetual_access.csv")
    my_files.reverse()
    for my_file in my_files:
        print my_file
        if False:
            xlsx_file = open(my_file, "rb")
            workbook = openpyxl.load_workbook(xlsx_file, read_only=True)
            sheetnames = list(workbook.sheetnames)

            for sheetname in sheetnames:
                sheet = workbook[sheetname]

                column_names = {}
                for i, column in enumerate(
                        list(sheet.iter_rows(min_row=1, max_row=1))[0]):
                    column_names[column.value] = i

                for row_cells in sheet.iter_rows(min_row=1):
                    username = row_cells[column_names["Account Name"]].value
                    issn = row_cells[column_names["ISSN (FS split)"]].value
                    start_date = row_cells[
                        column_names["Content Start Date"]].value
                    end_date = row_cells[
                        column_names["Content End Date"]].value
                    if is_issn(issn):
                        new_dict = {
                            "username": username,
                            "issn": issn,
                            "start_date": start_date,
                            "end_date": end_date
                        }
                        results.append(new_dict)
                        # print new_dict
                        print ".",
        else:
            rows = read_csv_file(my_file)
            for row in rows:
                print row
                new_dict = {
                    "username": "******",
                    "issn": row["issn"],
                    "start_date": row["start_date"],
                    "end_date": row["end_date"]
                }
                results.append(new_dict)
                # print new_dict
                print ".",

    with open("/Users/hpiwowar/Downloads/perpetual_access_cleaned.csv",
              "w") as csv_file:
        csv_writer = csv.writer(csv_file, encoding="utf-8")
        header = ["username", "issn", "start_date", "end_date"]
        csv_writer.writerow(header)
        for my_dict in results:
            csv_writer.writerow([my_dict[k] for k in header])
    print "/Users/hpiwowar/Downloads/perpetual_access_cleaned.csv"
예제 #2
0
def main_csv_reader(args):
    path_to_coffee = args.path_to_coffee
    path_to_matched = args.matched_json
    all_people_list = flat_list(list(read_csv_file(path_to_coffee)))
    matched_in_this_session = []
    error = False

    if path_to_matched:
        try:
            matched_people_json = read_json_file(path_to_matched)
            tuple_list = create_tuple_list(all_people_list,
                                           matched_people_json)
            sorted_people_list = sort_tuple_list(tuple_list)
        except:
            raise ('Only use the program generated matched_people.json file')
    else:
        write_json_file()
        matched_people_json = read_json_file('matched_people.json')
        sorted_people_list = all_people_list

    unmatched_people = []

    for person in sorted_people_list:
        if person not in matched_in_this_session:
            individual_match_list = invidual_preproc(person, all_people_list,
                                                     matched_people_json,
                                                     matched_in_this_session)
            if individual_match_list:
                matched_pair = coffee_roulette(person, individual_match_list)
                if matched_pair is not None:
                    for person in matched_pair:
                        matched_in_this_session.append(person)
                else:
                    error = True
                    break
            else:
                unmatched_people.append(person)
        else:
            pass

    if error is False:
        create_today_matched(matched_in_this_session)
        if unmatched_people:
            create_today_unmatched(unmatched_people)

        updated_json = update_current_json(matched_people_json,
                                           matched_in_this_session)
        summary = "\n{} Matches".format(date.today())
        summary = create_matched_people_string(matched_in_this_session,
                                               summary)
        summary_messsage, alone = make_summary(matched_in_this_session,
                                               unmatched_people, summary, "")
        summary += alone
        write_json_file(updated_json)
        write_txt_file(summary)
        print(summary_messsage)
예제 #3
0
def log_file():
    analysis = request.args.get('analysis')
    start_date = request.args.get('start-date')
    end_date = request.args.get('end-date')

    # Parameter
    try:
        start_date = parse_date(start_date)
    except:
        return ("ERROR: Start date can't be parsed by YYYY-MM-DD format.", 400)

    try:
        end_date = parse_date(end_date)
    except:
        return ("ERROR: End date can't be parsed by YYYY-MM-DD format.", 400)

    # Validate
    if start_date > end_date:
        return ("ERROR: Start date can't be ahead of the end date.", 400)

    # Logic
    log_file = cli.generate_log_file(start_date, end_date)

    if analysis is None or analysis == 'summary':
        return jsonify(read_csv_file(cli.generate_summary_file(log_file)))
    elif analysis == 'revision':
        return jsonify(read_csv_file(cli.generate_revision_file(log_file)))
    elif analysis == 'coupling':
        return jsonify(read_csv_file(cli.generate_coupling_file(log_file)))
    elif analysis == 'age':
        return jsonify(read_csv_file(cli.generate_age_file(log_file)))
    elif analysis == 'abs-churn':
        return jsonify(
            read_csv_file(cli.generate_absolute_churn_file(log_file)))
    elif analysis == 'author-churn':
        return jsonify(read_csv_file(cli.generate_author_churn_file(log_file)))
    elif analysis == 'entity-churn':
        return jsonify(read_csv_file(cli.generate_entity_churn_file(log_file)))
    elif analysis == 'entity-ownership':
        return jsonify(
            read_csv_file(cli.generate_entity_ownership_file(log_file)))
    elif analysis == 'entity-effort':
        return jsonify(read_csv_file(
            cli.generate_entity_effort_file(log_file)))
    else:
        return ("ERROR: Analysis type not in selection.", 400)
예제 #4
0
def main():

    if not util.is_there_csvfile():
        print("A CSV of Alexa list does not exist!")
        if not util.fetch_alexa_list():
            print("It could not fetch the file!")
            exit(1)

    website_dict = util.read_csv_file(10)
    print(website_dict['de'])
    util.write_dict_to_json(website_dict)
    util.dict_to_bookmark(website_dict)
예제 #5
0
파일: lr_0.1065.py 프로젝트: zhangscth/CTR

#preprocess

#train
dfTrain['clickTime_day_gap'] = dfTrain['clickTime'].apply(util.get_train_time_day)
dfTrain['clickTime_hour'] = dfTrain['clickTime'].apply(util.get_time_hour)

#test

dfTest['clickTime_day_gap'] = dfTest['clickTime'].apply(util.get_test_time_day)
dfTest['clickTime_hour'] = dfTest['clickTime'].apply(util.get_time_hour)


#ad
ad = util.read_csv_file(data_root+'/ad.csv',logging=True)

#app
app_categories = util.read_csv_file(data_root+'/app_categories.csv',logging=True)
app_categories["app_categories_first_class"] = app_categories['appCategory'].apply(util.categories_process_first_class)
app_categories["app_categories_second_class"] = app_categories['appCategory'].apply(util.categories_process_second_class)



#user

user = util.read_csv_file(data_root+'/user.csv',logging=True)
user['age_process'] = user['age'].apply(util.age_process)

user["hometown_province"] = user['hometown'].apply(util.hometown_process_province)
user["hometown_city"] = user['hometown'].apply(util.hometown_process_city)
예제 #6
0
    p_feature_map = get_feature_map(p_features)
    q_feature_map = get_feature_map(q_features)

    for feature_p, feature_q in zip(p_features, q_features):
        p_x = float(p_feature_map[tuple(feature_p)]) / max_sample
        q_x = float(q_feature_map[tuple(feature_q)]) / max_sample

        s += (log(float(p_x / q_x)))

    s = s / max_sample
    return s


if __name__ == "__main__":
    # Extract all the features from the csv file
    total_feat_printing_1st_to_2nd_1st, total_feat_printing_1st_to_2nd_2nd, total_feat_printing_2nd_to_3rd_2nd, total_feat_cursive_2nd_to_3rd_3rd, total_feat_printing_2nd_to_3rd_3rd, total_feat_cursive_3rd_to_4th_3rd, total_feat_printing_3rd_to_4th_3rd, total_feat_cursive_3rd_to_4th_4th, total_feat_printing_3rd_to_4th_4th, total_feat_cursive_4th_to_5th_4th, total_feat_printing_4th_to_5th_4th, total_feat_cursive_4th_to_5th_5th, total_feat_printing_4th_to_5th_5th = read_csv_file(
        SETTINGS.file_path_temporal_data)

    #Grade 1st to 2nd
    total_feat_printing_1st_to_2nd_1st = get_parsed_data(
        total_feat_printing_1st_to_2nd_1st)
    total_feat_printing_1st_to_2nd_2nd = get_parsed_data(
        total_feat_printing_1st_to_2nd_2nd)

    #Grade 2nd to 3rd
    total_feat_printing_2nd_to_3rd_2nd = get_parsed_data(
        total_feat_printing_2nd_to_3rd_2nd)

    total_feat_cursive_2nd_to_3rd_3rd = get_parsed_data(
        total_feat_cursive_2nd_to_3rd_3rd)
    total_feat_printing_2nd_to_3rd_3rd = get_parsed_data(
        total_feat_printing_2nd_to_3rd_3rd)
예제 #7
0
#coding=utf-8
import pandas as pd
import sys


from util import read_csv_file,get_time_day,get_time_hour,categories_process_first_class,categories_process_second_class,\
    age_process,hometown_process_city,hometown_process_province

ad = read_csv_file("../data/ad.csv")
app_categories = read_csv_file("../data/app_categories.csv").head(100)
position = read_csv_file("../data/position.csv").head(100)
test = read_csv_file("../data/test.csv").head(100)
train = read_csv_file("../data/train.csv")

# test.to_csv("result.csv")

# sys.exit()

user = read_csv_file("../data/user.csv").head(100)
user_app_actions = read_csv_file("../data/user_app_actions.csv").head(100)
user_installedapps = read_csv_file("../data/user_installedapps.csv").head(100)
'''
 ad.csv preprocess
 ['creativeID' 'adID' 'camgaignID' 'advertiserID' 'appID' 'appPlatform']

 ['creativeID' 'adID' 'camgaignID' 'advertiserID' 'appID' 'appPlatform_1'
 'appPlatform_2']
'''
ad_columns = ad.columns.values
print ad_columns
예제 #8
0
                        type=str,
                        default=None,
                        help="input file to parse")
    parser.add_argument("--username",
                        type=str,
                        default=None,
                        help="username to input")

    parsed_args = parser.parse_args()
    parsed_vars = vars(parsed_args)

    # create_accounts(parsed_vars["filename"])
    # build_counter_import_file(filename=parsed_vars["filename"], username=parsed_vars["username"])

    crkn_ids = read_csv_file(
        u"/Users/hpiwowar/Documents/Projects/tiv2/jump-api/data/crkn_lookup.csv"
    )
    institution_for_all_these_packages = "institution-fVnPvXK9iBYA"

    # report_name = "trj2"
    # all_in_one_data_rows = read_csv_file(u"/Users/hpiwowar/Documents/Projects/tiv2/jump-api/data/counter5_crkn/TR_J2 SUSHI Harvester CRKN_Wiley-2019_incl-non-participants.csv")

    # report_name = "trj3"
    # all_in_one_data_rows = read_csv_file(u"/Users/hpiwowar/Documents/Projects/tiv2/jump-api/data/counter5_crkn/TR_J3 SUSHI Harvester CRKN_Wiley-2019.csv")

    # report_name = "trj4"
    # all_in_one_data_rows = read_csv_file(u"/Users/hpiwowar/Documents/Projects/tiv2/jump-api/data/counter5_crkn/1 TR_J4 SUSHI Harvester CRKN_Wiley-2019.csv")
    # all_in_one_data_rows = read_csv_file(u"/Users/hpiwowar/Documents/Projects/tiv2/jump-api/data/counter5_crkn/2 TR_J4 SUSHI Harvester CRKN_Wiley-2019.csv")

    report_name = "trj4"
    all_in_one_data_rows = read_csv_file(
예제 #9
0
import simplejson as json
import re

from util import read_csv_file

error_rows = read_csv_file(
    "/Users/hpiwowar/Downloads/jump_file_import_error_rows.csv")

rows = [d for d in error_rows if d["file"] == "price"]
each_error = []
i = 0
for row in rows:
    hits = re.findall('{"ri(.*?)wrong_publisher(.*?)}', row["errors"])
    if hits:
        print hits[0]
    print len(hits)
    print i
    i = i + 1

# unknown_issn
# wrong_publisher
 def __init__(self):
     self.results = []
     rows, first_header = read_csv_file(
         'data/LeaderBoardData/TADPOLE_Submission_Leaderboard_TeamName.csv')
     self.results.append(first_header)