示例#1
0
def get_non_eval_clip_data_d():
    non_eval_clips_row_dl = logger.readCSV(NON_EVAL_CLIPS_DATA_CSV_PATH)

    non_eval_clip_data_d = {}
    for row_d in non_eval_clips_row_dl:
        non_eval_clip_data_d[row_d['postId']] = row_d['clip_path']
    return non_eval_clip_data_d
示例#2
0
def correct_txt_file(input_txt_file_path, output_txt_file_path,
                     corrected_chars_csv_path, font_path):
    input_lines_t = tools.read_text_file(input_txt_file_path)
    corrected_lines_l = []
    font = TTFont(font_path)

    corrected_chars_dl = logger.readCSV(corrected_chars_csv_path)
    unknown_char_equiv_d = build_unknown_char_equiv_d(corrected_chars_dl)

    for input_line in input_lines_t:
        new_corrected_line = ''
        for char in input_line:
            if tools.char_in_font(char, font) == False:
                try:
                    unkown_char_unicode = tools.char_2_unicode(char)
                    corrected_char = unknown_char_equiv_d[unkown_char_unicode]
                    new_corrected_line += corrected_char
                except Exception as e:
                    raise TypeError(
                        '''ERROR:  The input text file contains a char that is not recognized by char_in_font(), most likely this is because
                                               you have not loaded the correct and/or completed unkown_chars.csv, the unkown char unicode is:  '''
                        + str(e))
            else:
                new_corrected_line += char
        corrected_lines_l.append(new_corrected_line)

    tools.write_text_file(output_txt_file_path, corrected_lines_l)
示例#3
0
    def _prune_non_eval_clips():
        #         print(NON_EVAL_CLIPS_DIR_PATH)#````````````````````````````````````````````````````````````````````````````````````````````

        # remove clips until under max dir size
        age_sorted_non_eval_clip_path_l = file_system_utils.get_file_paths_in_dir_by_age(
            NON_EVAL_CLIPS_DIR_PATH)
        #         print(age_sorted_non_eval_clip_path_l)#``````````````````````````````````````````````````````````````````````````````````
        deleted_clip_path_l = []
        while (file_system_utils.get_size(NON_EVAL_CLIPS_DIR_PATH) >
               MAX_NON_EVAL_CLIPS_DIR_SIZE):
            pos = len(deleted_clip_path_l)
            os.remove(age_sorted_non_eval_clip_path_l[pos])
            deleted_clip_path_l.append(
                os.path.abspath(age_sorted_non_eval_clip_path_l[pos]))

        # remove rows that go to the paths of the clips that were just deleted
        non_eval_clips_row_dl = logger.readCSV(NON_EVAL_CLIPS_DATA_CSV_PATH)

        del_row_d_l = []
        for row_d_num, row_d in enumerate(non_eval_clips_row_dl):
            if os.path.abspath(row_d['clip_path']) in deleted_clip_path_l:
                del_row_d_l.append(row_d)

        for row_d in del_row_d_l:
            non_eval_clips_row_dl.remove(row_d)

        # go back through and rename everything so that in the csv it shows up as non_eval_0, 1, 2,...
        # need this so you don't get stuff overwritten next time
        rename_clips_for_order(non_eval_clips_row_dl)
def write_to_current(header, value):
    row_dl = logger.readCSV(POOL_CLIPS_DATA_CSV_PATH)
    cur_row_num = utils.get_cur_row_num(row_dl)
    row_dl[cur_row_num][header] = value

    logger.logList(row_dl, POOL_CLIPS_DATA_CSV_PATH, False, HEADER_LIST,
                   'overwrite')
def build_log_event_l(input_csv_path):
    row_dl = logger.readCSV(input_csv_path)
    
    log_event_l = []
    for row_d in row_dl:
        log_event_l.append(Log_Event.Log_Event(row_d))
        
    return log_event_l
示例#6
0
def load_unknown_chars_csv(unknown_chars_csv_path, corrected_chars_csv_path):
    unknown_chars_dl = logger.readCSV(unknown_chars_csv_path)
    
    #make sure unknown_chars csv has had all correct_chars added
    if all_correct_chars_entered(unknown_chars_dl) == False:
        raise TypeError('ERROR:  You must enter all values for "correct_char" in unknown_chars')
    
    #read original corrected chars csv if it exists
    if os.path.isfile(corrected_chars_csv_path) == True:
        og_corrected_chars_dl = logger.readCSV(corrected_chars_csv_path)
    else:
        og_corrected_chars_dl = []
        
    new_corrected_chars_dl = find_new_corrected_chars(og_corrected_chars_dl, unknown_chars_dl)
    
    header_order_list = ['correct_char', 'unknown_char_unicode', 'example']
    
    if new_corrected_chars_dl != []:
        logger.logList(new_corrected_chars_dl, corrected_chars_csv_path, WANT_BACKUP, header_order_list, 'append')
示例#7
0
        def __make_og_non_eval_post_id_clip_path_dl():
            new_row_dl = []
            pool_row_dl = logger.readCSV(CURRENT_DATA_DIR_PATH +
                                         '/pool_clips_data.csv')

            for pool_row_d in pool_row_dl:
                if pool_row_d['status'] == '':
                    new_row_dl.append({
                        'postId': pool_row_d['postId'],
                        'clip_path': pool_row_d['clip_path']
                    })
            return new_row_dl
示例#8
0
def pull_clip(clip_path, dest_path):
    os.rename(clip_path, dest_path)

    non_eval_clips_row_dl = logger.readCSV(NON_EVAL_CLIPS_DATA_CSV_PATH)

    # find and remove row in csv for the clip that was just renamed
    for row_d in non_eval_clips_row_dl:
        if row_d['clip_path'] == clip_path:
            non_eval_clips_row_dl.remove(row_d)
            break

    rename_clips_for_order(non_eval_clips_row_dl)
def build_row_dict_list(export_filename):
    row_dl = []
    raw_input = logger.readCSV(export_filename)

    #get header list
    #    headers_str = raw_input[0].keys()[0]
    headers_str = list(raw_input[0].keys())[0]
    header_list = headers_str.split(export_delim)
    #print(header_list)

    for raw_row_dict in raw_input:
        row_dict = make_row_dict(raw_row_dict, header_list)
        row_dl.append(row_dict)

    return row_dl
def move_current(move_amount):
    row_dl = logger.readCSV(POOL_CLIPS_DATA_CSV_PATH)

    # get row num of original current clip and set current 'current' = ''
    og_current_row_num = utils.get_cur_row_num(row_dl)
    row_dl[og_current_row_num]['current'] = ''

    #print(og_current_row_num)
    new_cur_row_num = og_current_row_num + move_amount
    if new_cur_row_num not in range(len(row_dl)):
        if move_amount > 0:
            new_cur_row_num = 0
        else:
            new_cur_row_num = len(row_dl) - 1

    row_dl[new_cur_row_num]['current'] = '1'

    logger.logList(row_dl, POOL_CLIPS_DATA_CSV_PATH, False, HEADER_LIST,
                   'overwrite')
示例#11
0
    def _log_small_historical_data():
        file_system_utils.make_dir_if_not_exist(HISTORICAL_DATA_DIR_PATH)

        # make new log dir path
        now = datetime.datetime.now()
        date_time_str = now.strftime("%Y-%m-%d__%H_%M")
        new_log_dir_path = HISTORICAL_DATA_DIR_PATH + '/log__' + date_time_str

        # add new dir, delete old if exists
        file_system_utils.delete_if_exists(new_log_dir_path)
        os.mkdir(new_log_dir_path)

        # copy data from current_data to new dir in historical_data
        copy_path_l = [
            CURRENT_DATA_DIR_PATH + '/download_log.csv',
            CURRENT_DATA_DIR_PATH + '/pool_clips_data.csv',
            CURRENT_DATA_DIR_PATH + '/LOG_FILES'
        ]
        file_system_utils.copy_objects_to_dest(copy_path_l, new_log_dir_path)

        # get list of evaluated postIds
        pool_evaluated_post_id_l = []
        pool_clips_data_row_dl = logger.readCSV(CURRENT_DATA_DIR_PATH +
                                                '/pool_clips_data.csv')
        for row_d in pool_clips_data_row_dl:
            if row_d['status'] != '':
                pool_evaluated_post_id_l.append(row_d['postId'])


#         print(pool_evaluated_post_id_l)#``````````````````````````````````````````````````````````````````````````

# add pool_evaluated_post_id_l to existing list of evaluated post ids
        evaluated_post_id_l = get_evaluated_post_id_l()
        #         print(evaluated_post_id_l)#`````````````````````````````````````````````````````````````````````````
        json_logger.write(pool_evaluated_post_id_l + evaluated_post_id_l,
                          EVALUATED_POST_IDS_JSON_PATH)
示例#12
0
    def _log_non_eval_clips():
        def __make_og_non_eval_post_id_clip_path_dl():
            new_row_dl = []
            pool_row_dl = logger.readCSV(CURRENT_DATA_DIR_PATH +
                                         '/pool_clips_data.csv')

            for pool_row_d in pool_row_dl:
                if pool_row_d['status'] == '':
                    new_row_dl.append({
                        'postId': pool_row_d['postId'],
                        'clip_path': pool_row_d['clip_path']
                    })
            return new_row_dl

        def __get_post_id_l(non_eval_clips_row_dl):
            post_id_l = []
            for row_dl in non_eval_clips_row_dl:
                post_id_l.append(row_dl['postId'])
            return post_id_l

        file_system_utils.make_dir_if_not_exist(NON_EVAL_CLIPS_DIR_PATH)
        try:
            non_eval_clips_row_dl = logger.readCSV(
                NON_EVAL_CLIPS_DATA_CSV_PATH)
        except FileNotFoundError:
            non_eval_clips_row_dl = []

        # make row_dl of postIDs and original clip paths
        og_non_eval_post_id_clip_path_dl = __make_og_non_eval_post_id_clip_path_dl(
        )

        # build final_non_eval_post_id_clip_path_dl - contains postId and new clip path that clip is about to be saved to
        # also will not include any postIds that are already logged
        final_non_eval_post_id_clip_path_dl = []
        existing_post_id_l = __get_post_id_l(non_eval_clips_row_dl)

        clips_added = 0
        for d in og_non_eval_post_id_clip_path_dl:
            if d['postId'] not in existing_post_id_l:
                new_save_name = 'non_eval_' + str(
                    len(non_eval_clips_row_dl) + clips_added) + '.mp4'
                final_non_eval_post_id_clip_path_dl.append({
                    'postId':
                    d['postId'],
                    'clip_path':
                    NON_EVAL_CLIPS_DIR_PATH + '/' + new_save_name
                })
                clips_added += 1

        # copy all non-evaluated clips to thier new home in non_eval_clips
        # could just rename, but this is nicer for testing
        og_pos = 0
        for d in final_non_eval_post_id_clip_path_dl:
            while (d['postId'] !=
                   og_non_eval_post_id_clip_path_dl[og_pos]['postId']):
                og_pos += 1
            og_clip_path = og_non_eval_post_id_clip_path_dl[og_pos][
                'clip_path']
            file_system_utils.copy_files_to_dest([og_clip_path],
                                                 NON_EVAL_CLIPS_DIR_PATH)
            just_copied_clip_path = NON_EVAL_CLIPS_DIR_PATH + '/' + ntpath.basename(
                og_clip_path)
            os.rename(just_copied_clip_path, d['clip_path'])

        # add info from final_non_eval_post_id_clip_path_dl to non_eval_clips_row_dl
        for row_d in final_non_eval_post_id_clip_path_dl:
            non_eval_clips_row_dl.append(row_d)

        logger.logList(non_eval_clips_row_dl, NON_EVAL_CLIPS_DATA_CSV_PATH,
                       False, NON_EVAL_CLIPS_DATA_CSV_HEADER_LIST, 'overwrite')
示例#13
0
def print_dl_report():
    def _num_dl_success(row_dl):
        num_dl_success = 0
        for row_d in row_dl:
            if row_d['download_success'] == 'True':
                num_dl_success += 1
        return num_dl_success

    def _youtube_reddit_other_cnt(row_dl):
        def __youtube_reddit_or_other_url(url):
            if 'youtu' in url:
                return 'youtube'
            elif 'redd' in url:
                return 'reddit'
            else:
                return 'other'


#                 raise Exception('ERROR:  Unkown URL type: ', url)

        yt_cnt = 0
        r_cnt = 0
        other_cnt = 0
        for row_d in row_dl:
            if (__youtube_reddit_or_other_url(row_d['postURL']) == 'youtube'):
                yt_cnt += 1
            elif (__youtube_reddit_or_other_url(row_d['postURL']) == 'reddit'):
                r_cnt += 1
            else:
                other_cnt += 1
        return yt_cnt, r_cnt, other_cnt

    def _print_fail_reason_occ(row_dl):
        def __fail_reason_occ_d(row_dl):
            fail_reason_occ_d = {}
            for row_d in row_dl:
                if row_d['fail_reason'] != '':
                    if row_d['fail_reason'] in fail_reason_occ_d.keys():
                        fail_reason_occ_d[row_d['fail_reason']] += 1
                    else:
                        fail_reason_occ_d[row_d['fail_reason']] = 1
            return fail_reason_occ_d

        overall_fail_reason_occ_d = __fail_reason_occ_d(row_dl)

        num_attempts = len(row_dl)
        total_fails = sum(overall_fail_reason_occ_d.values())

        print('')
        print('Total Fails: ', total_fails, '  %',
              int((total_fails / num_attempts) * 100))
        print('')
        print('% Fails Of     % Fails Of     # Fails:     Fail Reason:')
        print('Attempts:      Fails:')
        for fail_reason, num_occ in overall_fail_reason_occ_d.items():
            percent_of_attempts = int((num_occ / num_attempts) * 100)
            percent_of_fails = int((num_occ / total_fails) * 100)
            print('%', percent_of_attempts, '           %', percent_of_fails,
                  '            ', num_occ, '            ', fail_reason)

    row_dl = logger.readCSV(CLIP_DOWNLOAD_LOG_CSV_PATH)

    num_attempts = len(row_dl)
    num_dl_success = _num_dl_success(row_dl)
    dl_success_ratio = num_dl_success / num_attempts

    yt_cnt, r_cnt, other_cnt = _youtube_reddit_other_cnt(row_dl)

    #     overall_fail_reason_occ_d = _fail_reason_occ_d(row_dl)
    #     print(overall_fail_reason_occ_d)

    print('num_attempts: ', num_attempts)
    print('num_dl_success: ', num_dl_success)
    print('dl_success_ratio: ', dl_success_ratio)
    print('num youtube videos: ', yt_cnt, '  %', (yt_cnt / num_attempts) * 100)
    print('num reddit videos:  ', r_cnt, '  %', (r_cnt / num_attempts) * 100)
    print('num other videos:   ', other_cnt, '  %',
          (other_cnt / num_attempts) * 100)

    _print_fail_reason_occ(row_dl)
示例#14
0
    def get_confirmed_code_dl__and_is_complete(store_name, value, quantity):
        def get_datetime_from_dt_csv_str(datetime_csv_str):
            ss = str_utils.multi_dim_split(['-', ' ', ':', "'"],
                                           datetime_csv_str)
            return datetime(int(ss[0]), int(ss[1]), int(ss[2]), int(ss[3]),
                            int(ss[4]), int(ss[5]))

        def add_to_code_d_if_exists_in_row_d(code_d, row_d, key_):
            if key_ in row_d.keys():
                code_d[key_] = row_d[key_]
            return code_d

        def build_code_d(row_d):
            code_d = {}

            header = 'main_code'
            if header in row_d.keys():
                code_d[header] = row_d[header][:-1]

            code_d = add_to_code_d_if_exists_in_row_d(code_d, row_d, 'pin')
            code_d = add_to_code_d_if_exists_in_row_d(
                code_d, row_d, 'biz_id'
            )  # eventually remove !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
            return code_d

        confirmed_code_dl = []

        unused_code_csv_path = get__store_unused_codes_csv_path(
            code_req_d['store_name'])

        # return empty if code csv does not exist
        if not fsu.is_file(unused_code_csv_path):
            return confirmed_code_dl

        row_dl = logger.readCSV(unused_code_csv_path)
        store = STORE_D[
            store_name]  # will eventually be replaced with Store(store_name) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
        header_l = store.csv_header_l  # will eventually get this from config !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

        row_num = 0
        while (len(confirmed_code_dl) < quantity and row_num < len(row_dl)):
            row_d = row_dl[row_num]
            if float(row_d['adv_value']) == float(value):

                code_d = build_code_d(row_d)

                last_confirm_datetime = get_datetime_from_dt_csv_str(
                    row_d['last_confirmed'])
                datetime_since_last_confirm = datetime.now(
                ) - last_confirm_datetime
                sec_since_last_confirm = datetime_since_last_confirm.total_seconds(
                )

                # if it has been too long since last check, re-check code
                if sec_since_last_confirm > MAX_CONFIRMED_CODE_AGE_DAYS * 3600:

                    #                     real_value = store.get_code_value(code_d) # put back !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
                    real_value = 50  # remove, just for testing !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
                    print('using ', real_value,
                          ' as test #, should check code for real, PUT BACK'
                          )  #`````````````````````````````````````````

                    # if after checking, the real value is less than the  value,
                    # remove the code from unused_codes and put it in failed_codes
                    if real_value < float(row_d['adv_value']):
                        logger.removeRowByHeaderVal('og_code_str',
                                                    row_d['og_code_str'],
                                                    unused_code_csv_path,
                                                    errorIfHeaderNotExist=True)

                        failed_codes_csv_path = get__store_failed_codes_csv_path(
                            store_name)
                        logger.logList(row_dl,
                                       failed_codes_csv_path,
                                       wantBackup=True,
                                       headerList=header_l,
                                       overwriteAction='append')
                        break

                # if code not old, or if you just checked and confirmed the code
                confirmed_code_dl.append(code_d)
            row_num += 1

        return confirmed_code_dl, len(confirmed_code_dl) == quantity
def get_csv_row_dl():
    return logger.readCSV(POOL_CLIPS_DATA_CSV_PATH)