예제 #1
0
    def run(enrich_data_params, update_dataset_params):

        enrich_out_dir = enrich_data_params["enrich_out_dir"]
        enrich_out_archive_dir = enrich_data_params["enrich_out_archive_dir"]
        update_in_dir = update_dataset_params["update_in_dir"]
        update_in_archive_dir = update_dataset_params["update_in_archive_dir"]
        dataset_file = update_dataset_params["dataset_file"]

        # copy correlate data out to enrich in
        # move correlate out to correlate out archive
        FileUtil.copy_and_move_files(enrich_out_dir, update_in_dir,
                                     enrich_out_archive_dir, "*.csv")

        print("\nLoading Data to be added to Dataset from Filesystem...")
        df_update = FileUtil.get_df_from_csv_dir(update_in_dir, "*.csv")
        print("Complete. Count: " + str(df_update.shape[0]))

        # get header
        header = ','.join(df_update.columns) + '\n'
        # print(header)

        # add header if file does not exist
        if not FileUtil.file_exists(dataset_file):
            FileUtil.write_to_file(dataset_file, header)

        # append to file
        FileUtil.add_df_to_csv_file(df_update, dataset_file)

        # move enrich in to enrich in archive
        FileUtil.move_files(update_in_dir, update_in_archive_dir, "*.csv")
예제 #2
0
    def run(debug, env, username, password, correlate_data_params,
            enrich_data_params):

        correlate_out_dir = correlate_data_params["correlate_out_dir"]
        correlate_out_archive_dir = correlate_data_params[
            "correlate_out_archive_dir"]
        enrich_in_dir = enrich_data_params["enrich_in_dir"]
        enrich_in_archive_dir = enrich_data_params["enrich_in_archive_dir"]
        enrich_out_dir = enrich_data_params["enrich_out_dir"]

        # copy correlate data out to enrich in
        # move correlate out to correlate out archive
        FileUtil.copy_and_move_files(correlate_out_dir, enrich_in_dir,
                                     correlate_out_archive_dir, "*.csv")

        wj_api = WorkjamAPI(debug, env, username, password)

        # now in milliseconds
        now_timestamp = TimeUtil.get_current_milli_time()
        enrich_filename = 'Enrich_' + str(now_timestamp) + ".csv"

        print("\nLoading Data to be enriched from Filesystem...")
        df_enrich = FileUtil.get_df_from_csv_dir(enrich_in_dir, "*.csv")
        print("Complete. Count: " + str(df_enrich.shape[0]))

        # write header to the file

        response_user_header = wj_api.get_user_details(True, '', '')
        response_event_header = wj_api.get_event_details(True, '', '', '')

        FileUtil.write_to_file(
            enrich_out_dir + enrich_filename,
            'loggedin_user,company_id,query_datetime,apply_datetime,number_of_open_shifts,location_id,event_id,'
            + response_user_header + ',' + response_event_header +
            ',applied\n')

        print("\nEnriching User and Event info...")

        num_records_written_to_file = 0

        for index, row in df_enrich.iterrows():
            loggedinuser = row['loggedinuser']
            companyid = row['companyid']
            query_datetime = row['query_datetime']
            apply_datetime = row['apply_datetime']
            numberofopenshifts = row['numberofopenshifts']
            locationid = row['locationid']
            eventid = row['eventid']
            applied = row['applied']

            try:
                # Get Info for the Event in context
                response_event_csv = wj_api.get_event_details(
                    False, companyid, locationid, eventid)

                # Get Info for the User in context
                response_user_csv = wj_api.get_user_details(
                    False, companyid, loggedinuser)

                # # write enriche data to out dir with timestamp
                FileUtil.append_to_file(
                    enrich_out_dir + enrich_filename,
                    str(loggedinuser) + ',' + str(companyid) + ',' +
                    str(query_datetime) + ',' + str(apply_datetime) + ',' +
                    str(numberofopenshifts) + ',' + str(locationid) + ',' +
                    str(eventid) + ',' + response_user_csv + ',' +
                    response_event_csv + ',' + str(applied) + '\n')

                num_records_written_to_file += 1

            except Exception as e:
                print(e)

        print("Complete. Found: {} Written: {}\n".format(
            str(df_enrich.shape[0]), num_records_written_to_file))

        # move enrich in to enrich in archive
        FileUtil.move_files(enrich_in_dir, enrich_in_archive_dir, "*.csv")
예제 #3
0
    def run(debug, get_data_params, correlate_data_params):

        sumologic_out_dir = get_data_params["sumologic_out_dir"]
        sumologic_out_archive_dir = get_data_params["sumologic_out_archive_dir"]
        correlate_in_current_cycle_dir = correlate_data_params["correlate_in_current_cycle_dir"]
        correlate_in_previous_cycle_dir = correlate_data_params["correlate_in_previous_cycle_dir"]
        correlate_in_archive_dir = correlate_data_params["correlate_in_archive_dir"]
        correlate_out_dir = correlate_data_params["correlate_out_dir"]

        # copy sumologic out to correlate in current cycle
        # move sumologic out to sumologic out archive
        FileUtil.copy_and_move_files(sumologic_out_dir,
                                     correlate_in_current_cycle_dir,
                                     sumologic_out_archive_dir, "*.csv")

        # now in milliseconds
        now_timestamp = TimeUtil.get_current_milli_time()
        correlate_filename = 'Correlate_'+str(now_timestamp)+".csv"

        print("\nLoading Open Shift Requests from Filesystem...")
        # correlate apply with requests in current and previoud cycle
        df_requests = FileUtil.get_df_from_csv_dirs(correlate_in_current_cycle_dir,
                                                    correlate_in_previous_cycle_dir,
                                                    "Requests*")
        print("Complete. Count: " + str(df_requests.shape[0]))
        if(debug):
            for index, row in df_requests.iterrows():
                print(row)

        print("\nLoading Apply to Open Shifts from Filesystem...")
        df_apply = FileUtil.get_df_from_csv_dir(correlate_in_current_cycle_dir,
                                                    "Apply*")
        print("Complete. Count: " + str(df_apply.shape[0]))

        print("\nCorrelating Apply Open Shifts with Open Shifts Requests... ")

        fields = ['loggedinuser', 'companyid',
                  'query_datetime', 'apply_datetime', 'numberofopenshifts',
                  'locationid', 'eventid', 'applied']

        CorrelateData.add_header(correlate_out_dir+correlate_filename, fields)

        for index, row in df_apply.iterrows():

            apply_datetime = row['datetime']
            loggedinuser = row['loggedinuser']
            companyid = row['companyid']
            locationid = row['locationid']
            eventid = row['eventid']

            df_filtered = df_requests.loc[
                    (df_requests['loggedinuser'] == loggedinuser) &
                    (df_requests['companyid'] == companyid) &
                    (df_requests['datetime'] < apply_datetime) &
                    (df_requests['eventandlocationids'].str.contains(str(eventid)+","+str(locationid)))
                    ].drop_duplicates().sort_values(by=['datetime'], ascending=False).head(1)

            if df_filtered.shape[0] > 0:

                # lets first get rid of ', ' and replace it with '|' and then split
                # Example text: (3714cb1e-4839-4d8c-818e-9d01c655cd86,328038), (d87a2bb7-05e0-465e-8b6c-aa18d89a9c9f,328038), (e7bee5c5-8f4e-457f-95e7-b1ec82e8ab21,328038), (f04d14c1-68c3-4dda-8698-3d95eb3a4b9d,328038)
                events_and_locations = df_filtered.iloc[0]['eventandlocationids'].replace(', ','|').split('|')

                for event_location in events_and_locations:

                    # lets get rid of paranthesis and split text by ','
                    # Example text: (3714cb1e-4839-4d8c-818e-9d01c655cd86,328038)
                    eventid_in_request, locationid_in_request = event_location.replace('(','').replace(')','').split(',')

                    applied = False
                    if str(eventid) == str(eventid_in_request) and str(locationid) == str(locationid_in_request):
                        applied = True

                    row = {'loggedinuser': loggedinuser,
                           'companyid': companyid,
                           'query_datetime': df_filtered.iloc[0]['datetime'],
                           'apply_datetime': apply_datetime,
                           'numberofopenshifts': df_filtered.iloc[0]['numberofopenshifts'],
                           'locationid': locationid_in_request,
                           'eventid': eventid_in_request,
                           'applied': applied}

                    CorrelateData.add_row(correlate_out_dir+correlate_filename, fields, row)

        print("Complete. Results written to: {} \n".format(correlate_out_dir+correlate_filename))

        # move correlate in previous cycle to correlate in archive
        FileUtil.move_files(correlate_in_previous_cycle_dir,
                            correlate_in_archive_dir, "*.csv")

        # move correlate in current cycle (Apply) to
        # correlate in archive cycle
        FileUtil.move_files(correlate_in_current_cycle_dir,
                            correlate_in_archive_dir, "Apply*")

        # move correlate in current cycle (Requests) to
        # correlate in previous cycle
        FileUtil.move_files(correlate_in_current_cycle_dir,
                            correlate_in_previous_cycle_dir, "Requests*")