Exemplos de get_lines em Python, exemplos de core.h_file_handling.get_lines em Python

Exemplo n.º 1

0

Exibir arquivo

def check_for_header(file, remove_if_present, interactive=True):
    lines = hfh.get_lines(file)
    has_header = False
    if len(lines) > 1:
        #   assumes that a header line has a different length than data
        #   assumes delimited by comma
        line0 = len(lines[0])
        line1 = len(lines[1])
        if not line0 == line1:
            if interactive:
                has_header = ui.get_yes_no_response(
                    "Remove Header?\n",
                    str("Is this a header?" + lines[0] + "?"))
            else:
                has_header = True
                print(
                    file, line0, '\n',
                    "header automatically removed. Confirm that it had a header"
                )
        #   todo: when this fails fix it
    if remove_if_present and has_header:
        lines = lines[1:]
        hfh.write_lines_to_text(lines, file)
    if has_header:
        return True
    return False

Exemplo n.º 2

0

Exibir arquivo

def convert_xCalibre_matrix_for_PCI(matrix_file,
                                    corresponding_control_file=False,
                                    id_length=8,
                                    include_id=False):
    #similar file in h_stats
    ret = []
    first_row = ""
    if corresponding_control_file:
        df = pd.read_csv(corresponding_control_file, header=None)
        item_ids = df.loc[:, 0]
        for id in item_ids:
            first_row += id + ","
        first_row = first_row[:-1]
        ret = [first_row]
    lines = hfh.get_lines(matrix_file)
    for line in lines:
        ret_line = ""
        if include_id:
            ret_line = line[:id_length]
        answer_string = line[id_length:]
        for c in answer_string:
            ret_line += c + ','
        ret_line = ret_line[:-3]
        ret_line += '\n'
        ret.append(ret_line)
    name = hfh.get_stem(matrix_file) + "__c.csv"
    hfh.write_lines_to_text(ret, name)
    translated_name = hfh.get_stem(matrix_file) + "_T_c.csv"
    df = pd.read_csv(name, header=None)
    df.to_csv("pickme.csv")
    df = df.T
    df.to_csv(translated_name, index=False)
    return df.T

Exemplo n.º 3

0

Exibir arquivo

Arquivo: h_raw_processor.py Projeto: robertcalvertphd/core_irt

def create_c_from_LXR_Test(file_path, destination_path=None):
    if destination_path is None:
        destination_path = hfh.get_parent_folder(file_path)
    lines = hfh.get_lines(file_path)
    ret = []
    counter = 0
    for line in lines:
        counter += 1
        if line[0].isnumeric():
            entry = line.split()
            test_name = hfh.get_stem(file_path)
            test_id = line[:line.index('.')]
            entry[1]
            bank_id = entry[1] + '_' + entry[2]

            if len(entry) == 4:
                subject = entry[1] + "_" + entry[2]
                bank_id = subject + entry[3]
            key_line = lines[counter]
            key_i = key_line.find('Key: ')
            if key_i > -1:
                key = key_line[key_i + len("Key: ")]
            else:
                print("hello")
            record = [bank_id, key, '4', '1', 'Y', 'M']
            ret.append(record)
    df = pd.DataFrame(ret)

    name = hfh.get_stem(file_path) + "_c.csv"
    # df.sort_values(df[1])
    df.to_csv(destination_path + "/" + name, index=False, header=False)
    return df

Exemplo n.º 4

0

Exibir arquivo

def convert_response_string_to_csv_and_get_df(file,
                                              id_length,
                                              number_of_spaces,
                                              create_csv=False):
    df_rows = []
    lines = hfh.get_lines(file)
    for line in lines:
        df_row = []
        beginning = id_length + number_of_spaces
        FR_space = line.rfind('F')
        if FR_space == -1:
            FR_space = line.rfind('R')
        if FR_space > -1:
            stripped_line = line[beginning:FR_space]
        else:
            stripped_line = line[beginning:]
        for c in stripped_line:
            df_row.append(c)
        df_rows.append(df_row)

    df = pd.DataFrame(df_rows)
    if create_csv:
        name = hfh.create_name(file, modificaiton="_d_")
        df.to_csv(name)
    return df

Exemplo n.º 5

0

Exibir arquivo

Arquivo: h_raw_processor.py Projeto: robertcalvertphd/core_irt

def get_f_df_repeat_status(f_path):
    # places that repeat information lives...
    #   end of string
    #   pearson file type thrid column
    #   ... other things I have not come across
    ids_with_repeat_status = []
    lines = hfh.get_lines(f_path)
    if is_type_K(lines):
        df = hfh.get_df(f_path, header=0)
        df = df.drop(0)
        df['Attempt'] = df['Attempt'].replace(['1'], 'F')
        df['Attempt'] = df['Attempt'].replace(['2'], 'R')
        ids_with_repeat_status = df['ClientID'] + '_' + df['Attempt']
    else:
        for line in lines:
            ending_character = line.strip()[-1]
            if ending_character in ['F', 'R']:
                repeat_status = ending_character
                line = line.strip()
                split_line = line.split()
                _id = None
                if len(split_line) > 1:
                    _id = split_line[0]
                else:
                    split_line = line.split(',')
                    if len(split_line) > 1:
                        _id = split_line[0]
                if _id is None:
                    assert False, "can not assign repeat status to file " + f_path
                ids_with_repeat_status.append(_id + '_' + repeat_status)

    ret = process_response_string_file(f_path, create_c=False)
    f_df = ret
    f_df = f_df.set_index(ids_with_repeat_status)
    return ids_with_repeat_status

Exemplo n.º 6

0

Exibir arquivo

def convert_delimited_to_iteman(file, destination_path, delimiter=','):
    #verify is CSV
    ret = []
    lines = hfh.get_lines(file)
    if len(lines) > 0:
        if len(lines[0].split(delimiter)) > 1:
            #is CSV
            for line in lines:
                new_line = ""
                line = line.split(delimiter)
                id_handled = False
                non_answer_characters = False
                for i in line:
                    if not id_handled:
                        i += '   '
                        id_handled = True
                    if not i == 'Y' and not i == 'M':
                        new_line += i
                ret.append(new_line)
        if not non_answer_characters:
            print(
                file,
                "non answer character in data response string. It was removed."
            )

        name = hfh.create_name(hfh.get_stem(file), destination_path, 'txt',
                               '_f')
        hfh.write_lines_to_text(ret, name)
        return True
    else:
        print(file, "is empty")
        return False

Exemplo n.º 7

0

Exibir arquivo

def convert_2016_format(file_name, destination_path="", pretest_cutoff=False):
    #   of form:
    #   PT1   PT116MAR   BB... correct
    #   answers
    #   todo: rename this

    lines = hfh.get_lines(file_name)
    start_of_answers = lines[0].rfind(' ')
    answers = lines[0][start_of_answers + 1:]

    ret = []

    for line in lines[2:]:
        # remove R or F at end
        last_entry = line[len(line) - 2]
        if last_entry == 'R' or last_entry == 'F':
            line = line[:-5] + '\n'
        ret.append(line)
    name = hfh.get_stem(file_name)
    new = hfh.get_stem(destination_path + "/" + name) + "_f.txt"
    hfh.write_lines_to_text(ret, new)

    if is_valid_data(new):
        convert_answers_to_default_control(name, answers, destination_path,
                                           pretest_cutoff)
        return True
    return False

Exemplo n.º 8

0

Exibir arquivo

Arquivo: h_Rasch_report_analysis.py Projeto: robertcalvertphd/Data_Manipulation

def get_first_line_of_stats(path_to_stats):
    i = 0
    lines = get_lines(path_to_stats)
    for line in lines:
        i += 1
        line_split = line.split(',')
        if len(line_split) > 0:
            if line_split[0] == "Sequence":
                return i

Exemplo n.º 9

0

Exibir arquivo

def process_stats_files(stats_files, reports_path, report_name):
    r_entries = []  # list of tuples constant, message
    header = False
    ret = []
    header_index = 0
    for file in stats_files:
        lines = hfh.get_lines(file)
        header_index = hfh.get_index_of_line_that_starts_with_word_based_on_delimiter(lines,'Sequence')
        blank_index = hfh.get_next_blank_line_after_index_from_lines(lines, header_index)
        if not header:
            ret_lines = lines[header_index:blank_index+header_index]
            header = True
        else:
            ret_lines = lines[header_index+1:blank_index+header_index]
        for line in ret_lines:
            ret.append(line)
        # use ret to make master df
    aggregate_report_path = reports_path+"/"+report_name+"_aggregate_.csv"
    hfh.write_lines_to_text(ret, aggregate_report_path)
    df = pd.read_csv(aggregate_report_path)
    df = df[['Item ID', 'S-Rpbis', 'T-Rpbis','P','a','b','Flags']]
    df['K'] = 0
    df['La'] = 0
    df['Lb'] = 0

    mask_K = df['Flags'].str.contains(r'K', na=False)
    mask_Lb = df['Flags'].str.contains(r'Lb', na=False)
    mask_La = df['Flags'].str.contains(r'La', na=False)

    df.loc[mask_K, 'K'] = 1
    df.loc[mask_La, 'La'] = 1
    df.loc[mask_Lb, 'Lb'] = 1

    df['count'] = df['Item ID'].map(df['Item ID'].value_counts())
    df = add_descriptives_to_df_by_group(df, 'Item ID', 'a')
    df = add_descriptives_to_df_by_group(df, 'Item ID', 'b')
    df = add_descriptives_to_df_by_group(df, 'Item ID', 'T-Rpbis')
    df = add_descriptives_to_df_by_group(df, 'Item ID', 'S-Rpbis')
    df = add_descriptives_to_df_by_group(df, 'Item ID', 'P')

    df = df.sort_values('Item ID')
    df.to_csv(aggregate_report_path)
    df_new = df['Item ID'].apply(get_id_from_item_id)
    df = pd.concat([df, df_new], axis=1)
    df = df.sort_values('Item ID').groupby(['Item ID']).first()

    df['Caution'] = 0
    df['Good'] = 0

    mask1 = (df['K'] == 2) | (df['Lb'] == 1)
    df.loc[mask1, 'Caution'] = 1

    mask2 = (df['Caution'] == 0) & (df['a_mean'] > 1) & (df['T-Rpbis_mean'] > .3)
    df.loc[mask2, 'Good'] = 1

    df = df.drop(['S-Rpbis','T-Rpbis','P','a','b','Flags'], axis = 1)
    df.to_csv(reports_path + "/" + report_name + "_complete_.csv")

Exemplo n.º 10

0

Exibir arquivo

def convert_control_to_include_pretest(control_file):
    lines = hfh.get_lines(control_file)
    ret = []
    for line in lines:
        split_line = line.split(',')
        ret_line = split_line[0] + "," + split_line[1] + "," + split_line[
            2] + "," + split_line[3] + ",Y," + split_line[5]
        ret.append(ret_line)
    dot = control_file.rfind(".")
    name = control_file[:dot] + "_cf.csv"
    hfh.write_lines_to_text(ret, name)

Exemplo n.º 11

0

Exibir arquivo

Arquivo: h_Rasch_report_analysis.py Projeto: robertcalvertphd/Data_Manipulation

def create_aggregate_report(path, destination_path, target_string, header_txt,
                            sort_by):
    files = get_all_file_names_in_folder(path, target_string=target_string)
    file_name = destination_path + "/_aggregate_" + target_string + ".csv"
    write_lines_to_text([header_txt + "\n"], file_name)
    for file in files:
        lines = get_lines(file)
        write_lines_to_text(lines[1:], file_name, 'a')
    df = pd.read_csv(file_name, header=0)
    df = df.sort_values(by=sort_by, ascending=False)
    df.to_csv(file_name, index=False)

Exemplo n.º 12

0

Exibir arquivo

def is_valid_data(file):
    #   id  responses
    lines = hfh.get_lines(file)
    if not lines:
        return 0
    length = len(lines[1])
    end_of_id = lines[1].find(" ")

    for line in lines[1:]:
        if not line.find(" ") == end_of_id:
            print(file + " is invalid because of spacing issues")
            return False
    return 1

Exemplo n.º 13

0

Exibir arquivo

def convert_csv_response_string_to_raw_with_bogus_header(file, id_spaces=3):
    lines = hfh.get_lines(file)
    ret = ["BOGUS \n\n"]
    for line in lines:
        id_loc = line.find(',')
        id = line[:id_loc]
        spaces = ""
        for i in range(id_spaces):
            spaces += ' '
        ret_line = id + spaces
        ret_line += line[id_loc:].replace(',', '')
        ret.append(ret_line)
    hfh.write_lines_to_text(ret, file + "_m")

Exemplo n.º 14

0

Exibir arquivo

def convert_first_line_answers_to_default_control_and_data(
        file_name, comma_delimited=False, id_length=8, id_spaces=3):
    #todo: handle cutoff for not included
    lines = hfh.get_lines(file_name)
    correct = lines[0]
    new = []
    counter = 0
    if comma_delimited:
        correct = correct.replace(',', '')
    for a in correct:
        counter += 1
        if not a == '\n':
            include = 'y'
            new.append(str(counter) + "," + a + ",4,1," + include + ",M\n")

    name = hfh.get_stem(file_name)
    hfh.write_lines_to_text(new, name + "_c.csv")
    #   contains a random F at the end will test to see if it matters
    formatted = []
    if comma_delimited:
        for line in lines[1:]:
            split_line = line.split(',')
            id = split_line[0]
            new_id = ""
            if len(id) < id_length:
                short = id_length - len(id)
                for i in range(short):
                    new_id += "_"
            new_id += id

            response_string = line[len(id):-2].replace(',', '') + '\n'
            ret_line = new_id + "   " + response_string
            formatted.append(ret_line)

    else:
        for line in lines[2:]:
            # todo: could be problematic
            id_end = line.find(',')
            id = line[:id_end]
            new_id = ""
            characters_short = id_length - len(id)
            for c in range(characters_short):
                new_id += "_"
            new_id += id
            for i in range(id_spaces):
                new_id += " "
            new_line = new_id + line
            formatted.append(
                new_line)  #no clue why : is here perhaps I will remove it.

    hfh.write_lines_to_text(formatted, name + "_f.txt")

Exemplo n.º 15

0

Exibir arquivo

def set_standard_id_length_in_data_files(path_to_files,
                                         target_length,
                                         spaces_between_id_and_data=3):
    #assumes data has spaces
    files = hfh.get_all_file_names_in_folder(path_to_files,
                                             target_string='_f.txt')
    for file in files:
        new_lines = []
        lines = hfh.get_lines(file)
        for line in lines:
            new_lines.append(
                set_standard_id_length_for_line(line, target_length))
            #name = hfh.create_name(file,hfh.get_parent_folder(file)+"/formatted_data")
        hfh.write_lines_to_text(new_lines, file)

Exemplo n.º 16

0

Exibir arquivo

def get_data_df(data_file, id_length=8, spaces=3):
    lines = hfh.get_lines(data_file)
    ret = []
    for line in lines:
        ret_line = []
        id = line[:8]
        ret_line.append(id)
        response_sting = line[id_length + spaces:-2]
        response_sting = response_sting.strip()
        for character in response_sting:
            ret_line.append(character)
        ret.append(ret_line)
    df = pd.DataFrame(ret)
    return df

Exemplo n.º 17

0

Exibir arquivo

def remove_header_and_blank_lines(control_path):
    lines = hfh.get_lines(control_path)
    ret = []
    for line in lines:
        remove = False
        if line == '\n':
            remove = True
        split_line = line.split(',')
        if len(split_line) > 2:
            a = split_line[2]
            if not split_line[2].isnumeric():
                remove = True
        if not remove:
            ret.append(line)
    hfh.write_lines_to_text(ret, control_path)

Exemplo n.º 18

0

Exibir arquivo

Arquivo: h_raw_processor.py Projeto: robertcalvertphd/core_irt

def validate_c_file_header(c_file, debug=True):
    #first line should be AccNum...
    lines = hfh.get_lines(c_file)
    if debug:
        print("validating: " + c_file)
    assert len(lines) > 0, "validate_c_file was fed an empty file"
    if lines[0] == E.C_HEADER_S + '\n':
        return True
    if lines[1] == E.C_HEADER_S + '\n':
        assert False, "validate_c_file detected 2 headers"
    #   header now present
    df = hfh.get_df(c_file)
    df.columns = E.C_HEADER_L
    df.to_csv(c_file, index=False)
    return True

Exemplo n.º 19

0

Exibir arquivo

def convert_default_data_to_iteman(file_name,
                                   processed_data_path,
                                   new_name=False):
    if processed_data_path[-1] == '/':
        processed_data_path = processed_data_path[:-1]
    lines = hfh.get_lines(file_name)
    ret = []
    for line in lines[2:]:
        line = set_standard_id_length_for_line(line, 8)
        ret.append(line)

    path = processed_data_path + "/" + hfh.get_stem(file_name) + "_f.txt"
    if new_name:
        path = processed_data_path + "/" + hfh.get_stem(new_name) + "_f.txt"

    hfh.write_lines_to_text(ret, path)

Exemplo n.º 20

0

Exibir arquivo

Arquivo: h_raw_processor.py Projeto: robertcalvertphd/core_irt

def process_LXR_key(key_file,
                    get_c_df_AS_0=False,
                    get_L_df_AS_1=False,
                    destination_path_c=None,
                    destination_path_L=None):
    c_df = None
    L_df = None
    name = hfh.get_stem(key_file)
    lines = hfh.get_lines(key_file)
    ids = []
    keys = []
    for line in lines:
        split = line.split('.')
        if len(split) > 1:
            id = split[1]
            id = id.strip()
            id = id.replace(' ', '_')
            ids.append(id)
        else:
            split = line.split(':')
            if len(split) > 1:
                key = split[1]
                key = key.strip()
                keys.append(key)
    df = hfh.pd.DataFrame([ids, keys]).T
    df[2] = 4
    df[3] = 1
    df[4] = 'Y'
    df[5] = 'M'
    df[6] = df.index.values + 1

    c_df = df[[0, 1, 2, 3, 4, 5]]
    L_df = df[[6, 0]]

    if get_c_df_AS_0 or get_L_df_AS_1:
        return c_df, L_df
    if destination_path_c:
        c_df.to_csv(destination_path_c + '/' + name + '_c.csv',
                    index=False,
                    header=False)
    if destination_path_L:
        L_df.to_csv(destination_path_L + '/' + name + '_L.csv',
                    index=False,
                    header=False)

Exemplo n.º 21

0

Exibir arquivo

def create_mapping_from_Karen_test_data(file_path,
                                        destination_path="",
                                        create_csv=False,
                                        add_underscore=False):
    lines = hfh.get_lines(file_path)
    #   assumes files are of format
    #   number. Name NN
    #   ....  where no lines will start with a number and then a dot other than my target lines
    #   number. Name NN
    #   name of file is stem(file_path)+_L.csv
    ret = []
    if lines:
        for line in lines:
            if line[0].isnumeric():
                entry = line.split()
                test_name = hfh.get_stem(file_path)
                test_id = line[:line.index('.')]
                subject = entry[1]
                bank_id = entry[2]
                underscore = ""
                if add_underscore:
                    underscore = "_"
                record = [
                    test_name, test_id, subject, bank_id,
                    subject + underscore + str(bank_id)
                ]
                ret.append(record)

        df = pd.DataFrame(ret)
        df.columns = [
            'form', 'test_id', 'subject', 'bank_id_number', 'bank_id'
        ]

    else:
        print(file_path + "does not contain lines.")
    if create_csv:
        name = hfh.get_stem(file_path)[:8]
        #df.sort_values(df[1])
        file_name = destination_path + "/" + name + "_L.csv"
        df.to_csv(file_name, index=False, header=0)

    return df

Exemplo n.º 22

0

Exibir arquivo

def clean_stats_csv(path, create_csv=True, get_df=False):
    lines = hfh.get_lines(path)
    #assumes report starts with Sequence
    i = -1
    cont = True
    beginning = -1
    ret_lines = []
    while cont:
        for line in lines:
            i += 1
            split_line = line.split(',')
            if split_line[0] == 'Sequence':
                cont = False
                beginning = i
            if beginning > -1:
                if line == '\n':
                    cont = False
                else:
                    ret_lines.append(line.split(','))
    df = pd.DataFrame(ret_lines[1:])
    df.columns = ret_lines[0][:-1]
    df = df.drop(columns='4 SD')
    if create_csv:
        new_path = hfh.get_parent_folder(path)
        name = new_path + "/" + hfh.get_stem(path)[:-6] + ".cleaned_stats"
        df.to_csv(name)
    if get_df:
        return df


#set_standard_id_length_in_data_files("PT_IRT/PT_processed_data", 8)
#convert_xCalibre_matrix_for_PCI("PT_data/score_matrices/PT1_18_m.txt")
#process_karen_data("LCLE_IRT","LCLE_IRT","LCLE_IRT/processed_data/")
#merge_control_and_bank_info(a,b)
#path_to_files = "data_files"
#convertOldFormatToNew("LCLE_IRT/LCLEApr2019FullAdmin.txt")
#processNewFileFormat("LCLE_IRT/lcea1_18c.csv","LCLE_IRT/lcea1_18.txt")
#convert_first_line_answers_to_default_control_and_data(path_to_files+"/pt1_16_n.txt")
#create_control_files(path_to_files)
#update_control_files_with_item_bank_key("data_files/item_map.csv", "data_files")
#convert_2016_format("data_files/pt3_16.txt")

Exemplo n.º 23

0

Exibir arquivo

def has_acceptable_correct_percentage(xCalibre_report_path,
                                      id_length=8,
                                      debug=True):
    files = hfh.get_all_files(xCalibre_report_path, "Matrix")
    for file in files:
        total = 0
        correct = 0
        lines = hfh.get_lines(file)
        for line in lines:
            scores = line[id_length:-1]
            for x in scores:
                total += 1
                try:
                    correct += int(x)
                except:
                    pass
        percent_correct = round(correct / total * 100, 4)
        if debug: print(file, percent_correct)
        if percent_correct < 50:
            print(file, "has low correct rate.")
            return False
    return True

Exemplo n.º 24

0

Exibir arquivo

Arquivo: h_raw_processor.py Projeto: robertcalvertphd/core_irt

def process_response_strings_for_IRT(path_to_raw_data,
                                     processed=None,
                                     bank=None,
                                     verbose=False,
                                     get_f_df=False):
    #todo edited while tired confirm it works later

    path = path_to_raw_data
    if path is not False:
        lines = hfh.get_lines(path)

        r = path.find('raw_data')
        #assumes that raw_data exists in IRT model
        name = path
        if r > -1:
            project_directory = path[:r]
            name = project_directory + "/processed_data/" + hfh.get_stem(path)
            processed = project_directory + '/processed_data/'
            bank = project_directory + '/bank_files/'
            valid = is_valid_name(path)
            while valid is False:
                print(
                    path +
                    " is a raw data name which does not conform to convention of CCCYYMON."
                )
                name = input("enter an appropriate name here")
                valid = is_valid_name(name)

        if lines is False:
            print(
                "Error in determine response string.\n Path request error in path "
                + path)
        else:
            process_response_string_file(path,
                                         bank,
                                         write_csv=True,
                                         destination_path=processed)

Exemplo n.º 25

0

Exibir arquivo

def is_valid_control(file):
    lines = hfh.get_lines(file)
    valid = True
    for line in lines:
        valid = False
        # check for header line and remove
        split_line = line.split(',')
        if not len(split_line) == 4:
            valid = False
        else:
            if len(split_line) > 2:
                if not split_line[2].isnumeric():
                    valid = False
            # check for empty line and remove
            if len(split_line) == 0:
                valid = False
            # check for bank id
            if len(split_line[0]) < 4:
                valid = True
            # check for entries in four points

    if not valid: print("invalid control file attempting repair", file)
    remove_header_and_blank_lines(file)
    return valid

Exemplo n.º 26

0

Exibir arquivo

def convert_delimited_control(control_file,
                              destination_path,
                              delimiter='\t',
                              remove_version=False,
                              remove_header=True):
    lines = hfh.get_lines(control_file)
    ret = []
    changed = False
    csv_detected = False
    if len(lines) > 1:
        if remove_header:
            check_for_header(control_file, True)
        for line in lines:
            # check for delimiter
            split_line = line.split(delimiter)
            if len(split_line) > 1:
                changed = True
                ret_line = split_line[0] + ","
                if remove_version:
                    version_location = ret_line.rfind('V')
                    if version_location > 0:
                        ret_line = ret_line[:version_location] + ','

                for i in split_line[1:]:
                    ret_line += i + ','
                ret_line = ret_line[:-1]
                ret.append(ret_line)
            else:
                # check to see if file is comma delimited
                split_line = line.split(',')
                # if comma delimited and removing version get bank id and remove version
                if len(split_line) > 1 and remove_version:
                    csv_detected = True
                    ret_line = split_line[0]
                    version_location = ret_line.rfind('V')
                    if version_location > 0:
                        ret_line = ret_line[:version_location] + ','
                        for i in split_line[1:]:
                            ret_line += i + ','
                        ret_line = ret_line[:-1]
                        ret.append(ret_line)

                else:
                    ret.append(line)

        if csv_detected:
            print(
                "Comma delimited identified. ID_BANK version identifier removed"
            )
        if not changed:
            print(
                control_file +
                " asked to be delimiter converted but did not contain target delimiter"
            )

        hfh.write_lines_to_text(
            ret, destination_path + '/' + hfh.get_stem(control_file))
        return True
    else:
        print(control_file, " is an empty file and asked to be converted")
        return False

Exemplo n.º 27

0

Exibir arquivo

Arquivo: h_raw_processor.py Projeto: robertcalvertphd/core_irt

def parse_LXR_control(file_path):
    lines = hfh.get_lines(file_path)
    ret = []
    if lines is not False:
        for line in lines:
            line = line.strip()
            AccNum = ""
            period_i = line.find('.')
            if period_i > -1:
                #   look for a period, if present is of form:
                #       5.	HERBAL              851

                s = line.split()
                content = s[1].strip()
                number = s[2].strip()
                zeroes_needed = 3 - len(number)
                number_string = ""
                for z in range(zeroes_needed):
                    number_string += '0'
                number_string += str(number)
                AccNum = content[:5] + number_string

            target_string = 'Key: '
            key_i = line.find(target_string)

            if key_i > -1:
                #   look for Key: , if present is of form:
                #       183 Form #: Feb 10     Key: A   ASSES367

                new_line = line[key_i + len(target_string) + 2:].strip()
                s = new_line.split()
                # should only have 2 options
                if len(s) > 2:
                    print("issue with parse_LXR_KEY")
                else:
                    if len(s) == 0:
                        print("problem")
                    if len(s) == 1:
                        AccNum = s[0].strip()
                    if len(s) == 2:
                        number = s[1].strip()
                        zeroes_needed = 3 - len(number)
                        number_string = ""
                        for z in range(zeroes_needed):
                            number_string += '0'
                        number_string += str(number)
                        AccNum = s[0].strip() + number_string
            s = line.split()
            if len(s) == 2:
                topic = s[0].strip()
                number = s[1].strip()
                AccNum = topic[:5] + number
            s = line.split(',')
            if len(s) == 2:
                topic = s[0].strip()
                number = s[1].strip()
                AccNum = topic[:5] + number
            s = line.split(',')
            if len(s) > 20:
                topic = s[0]
                number = s[1]
                result = topic + str(number)
                AccNum = result
            if len(line) > 1 and AccNum == "":
                AccNum = line
            if len(AccNum) > 0:
                ret.append(AccNum)

        ret = hfh.pd.Series(ret)

        return ret
    else:
        return False

Exemplo n.º 28

0

Exibir arquivo

Arquivo: h_raw_processor.py Projeto: robertcalvertphd/core_irt

def remove_header_from_files(files):
    for file in files:
        lines = hfh.get_lines(file)
        ret = lines[1:]
        ret = pd.DataFrame(ret)
        ret.to_csv(file)

Exemplo n.º 29

0

Exibir arquivo

Arquivo: h_raw_processor.py Projeto: robertcalvertphd/core_irt

def process_response_string_file(f_path,
                                 bank_path=None,
                                 destination_path=None,
                                 write_csv=False,
                                 get_df=True,
                                 create_c=True,
                                 paired_bank_xlsx=None):
    if create_c:
        assert destination_path is not None, "process response string needs to know where to put the processed data"
    name = hfh.get_stem(f_path)
    lines = hfh.get_lines(f_path)
    assert len(lines) > 0, "asked to process empty file:" + f_path

    c_df = None
    f_df = None

    if is_type_K(lines):
        processed_lines = processK(lines)
        f_df = processed_lines
    elif is_type_A(lines):
        processed_lines = processA(lines)
        c_df = processed_lines[0]
        f_df = processed_lines[1]
    elif is_type_B(lines):
        processed_lines = processB(lines)
        f_df = processed_lines
    elif is_type_C(lines):
        processed_lines = processC(lines)
        c_df = processed_lines[0]
        f_df = processed_lines[1]
    elif is_type_D(lines):
        processed_lines = processD(lines)
        f_df = processed_lines
    elif is_type_E(lines):
        processed_lines = processE(lines)
        c_df = processed_lines[0]
        f_df = processed_lines[1]
    elif is_type_F(lines):
        processed_lines = processF(lines)
        f_df = processed_lines
    elif is_type_G(lines):
        processed_lines = processG(lines)
        c_df = processed_lines[0]
        f_df = processed_lines[1]
    elif is_type_H(lines):
        processed_lines = processH(lines)
        f_df = processed_lines
    elif is_type_I(lines):
        processed_lines = processI(lines)
        f_df = processed_lines
    elif is_type_J(lines):
        processed_lines = processJ(lines)
        f_df = processed_lines

    else:
        print(f_path + " is already formatted")
        is_formatteed(lines)
        f_df = hfh.get_df(f_path)

    if c_df is not None and bank_path:
        # add AccNum instead of sequence
        b_df = create_c_df_from_bank(bank_path)
        b_df['Key'] = c_df['Key']
        c_df = b_df
    if c_df is None and bank_path is not None and create_c:
        #todo: consider respecting the correct answer at the time vs the bank or just destroy it
        bank_files = hfh.get_all_files(bank_path, extension='xlsx')
        pair = hfh.pair_files([f_path], bank_files)
        if len(pair) == 0:
            print(
                "could not find matching bank file and no default control information present."
            )
        if len(pair) == 1:
            # todo: may evaluate differences between bank and response string if desired
            c_df = create_c_df_from_bank(pair[0][1])
        if len(pair) > 1:
            print("more than one file matched for bank", f_path)

    #confirm_id_as_index
    if 0 in f_df.columns or '0' in f_df.columns:
        f_df = f_df.set_index(f_df[0], drop=True)
        f_df = f_df.drop(columns=0)
    if write_csv:
        #todo changed index... need to make sure all processed items spit out the same... in this case they are pre-strict.

        f_df.to_csv(destination_path + '/' + name + '_f.csv',
                    index=True,
                    header=False)
        if c_df is not None:
            c_df.to_csv(destination_path + '/' + name + '_c.csv',
                        index=None,
                        header=False)
    if get_df:
        return f_df

Exemplo n.º 30

0

Exibir arquivo

Arquivo: h_raw_processor.py Projeto: robertcalvertphd/core_irt

def get_header_argument(c_path):
    lines = hfh.get_lines(c_path)
    split = lines[0].split(',')
    if split[0] == 'AccNum': return 0
    return None