示例#1
0
def main():
    try:
        snac_query = QuerySnac()
        fileobject, csvoutfile = u.opencsvout('/Users/aliciadetelich/Dropbox/git/chit_archives_scripts/data/snac_uris_outfile.csv')
        csvoutfile.writerow(snac_query.header_row)
        snac_query.q_snac(csvoutfile)
    except Exception:
        print(traceback.format_exc())
    finally:
        fileobject.close()
        print('All Done!')
def write_output(data, enum_value, output_file):
    header_row = [
        'tc_uri', 'tc_barcode', 'tc_data', 'collection_id', 'collection_title',
        'collection_uri', 'location_data'
    ]
    fileobject, csvoutfile = u.opencsvout(output_file)
    #if there is more than one barcode to search, this line prevents the header row from being written each time.
    if enum_value == 0:
        csvoutfile.writerow(header_row)
    csvoutfile.writerows(data)
    fileobject.close()
示例#3
0
def main():
    #1: Get a list of distinct creators for each collection, write output to file
    #cfg_fp = input('Please enter path to config file: ')
    list_of_parent_ids = input('Please enter path to list of parent IDs: ')
    try:
        header_row, parent_id_list = u.opencsv(list_of_parent_ids)
        #need to do this to re-use the list
        parent_id_list = [row for row in parent_id_list]
        #set the configuration file here??
        dbconn = dbssh.DBConn()
        print('Running queries')
        creator_data = aspace_run.run_db_queries(dbconn, parent_id_list,
                                                 queries.get_distinct_creators)
        composition_data = aspace_run.run_db_queries(dbconn, parent_id_list,
                                                     queries.get_music_data)
        outfile_path = input('Please enter path to outfile: ')
        fileobject, csvoutfile = u.opencsvout(outfile_path)
        write_outfile(creator_data, csvoutfile)
        fileobject.close()
        #2: Review manually and remediate any issues with agent records or duplicate agents
        to_continue = input(
            'After reviewing file please enter CONTINUE to continue: ')
        if to_continue == 'CONTINUE':
            #3: Create subseries for each agent record, save new URI
            agent_data = u.opencsvdict(outfile_path)
            #do the config here - need to fix utilities again
            api_url, headers = u.login()
            print('Creating subseries')
            rows_w_uris = aspace_run.call_api(api_url,
                                              headers,
                                              agent_data,
                                              crud=c.create_data,
                                              json_data=jd.create_subseries)
            #but if I'm just going to put it in a list anyway?? I guess for other implementations it makes more sense?
            #Match new subseries URIs with all children
            combined_data = match_uris(composition_data, rows_w_uris)
            #5: Run data munging functions to get appropriate position
            enumerated_data = add_positions(combined_data)
            #NOW need to flatten this data as I did before...
            flattened_data = flatten_data(enumerated_data)
            #6: Use update ao position action to make change
            dirpath = u.setdirectory()
            aspace_run.call_api(api_url,
                                headers,
                                flattened_data,
                                dirpath=dirpath,
                                crud=c.update_parent)
    except Exception as exc:
        print('Error: ')
        print(traceback.format_exc())
    finally:
        dbconn.close_conn()
def parse_dates():
    try:
        #starttime = time.time()
        command = find_timetwister()
        #header_row, csvfile = utilities.opencsv()
        fileobject, csvoutfile = utilities.opencsvout(
            output_csv='parsed_date_expressions.csv')
        q_data = run_db_query()
        yes_to_continue = input(
            'Enter "Y" to split output into multiple spreadsheets by date type, or any key to continue: '
        )
        headers = [
            'date_id', 'uri', 'expression', 'original_string', 'date_start',
            'date_end'
        ]
        csvoutfile.writerow(headers)
        for row_number, row in enumerate(q_data, 1):
            try:
                counter_range = list(range(0, 5500000, 1000))
                if row_number in counter_range:
                    logging.debug('Row: ' + str(row_number))
                date_id = row[0]
                uri = row[1]
                date_expression = row[2]
                #runs timetwister against each date expression
                process = Popen([command, str(date_expression)],
                                stdout=PIPE,
                                encoding='utf-8')
                #first reads the output and then converts the list items into JSON
                result_list = json.loads(process.stdout.read())
                '''output stored in a list with one or more JSON items (timetwister can parse a 
                    single expression field into multiple dates, eachwith its own JSON bit); this 
                    comprehension loops through each JSON bit in the list (usually just the one), 
                    and then each kay/value in the JSON bit, and appends the original, begin, and end 
                    values to the row of input data'''
                parse_json_into_list = [
                    str(json_value) for json_bit in result_list
                    for json_key, json_value in json_bit.items() if json_key in
                    ['original_string', 'date_start', 'date_end']
                ]
                row.extend(parse_json_into_list)
                if yes_to_continue == 'Y':
                    proc = process_output(row, datadict)
                else:
                    continue
            except Exception as exc:
                print(traceback.format_exc())
                row.extend(['ERROR'])
                if date_id:
                    logging.debug(date_id + ' ' + uri)
                logging.exception('Error: ')
            finally:
                csvoutfile.writerow(row)
    finally:
        '''This is for creating multiple CSV files from the original file. I guess it also creates a single file??'''
        if 'proc' in vars():
            for key, value in datadict.items():
                fob, outfile = utilities.opencsvout(output_csv=key + '.csv')
                outfile.writerow(headers + ['date_type_id'])
                outfile.writerows(value)
                fob.close()
                logging.debug('Outfile closed: ' + key + '.csv')
        '''checks if these variables exist; if so does cleanup work no matter what else happens; if variables don't 
        exist there's something wrong with the input or output files'''
        if 'row_number' in vars():
            if date_id:
                logging.debug('Last row: ' + str(row_number) + ' ' +
                              str(date_id) + ' ' + str(uri))
        if 'fileobject' in vars():
            fileobject.close()
            logging.debug('Outfile closed')
示例#5
0
#/usr/bin/python3

from collections import Counter
from tqdm import tqdm
from utilities import utilities as u

header_row, csvfile = u.opencsv()
fileobject, csvoutfile = u.opencsvout()

headers = header_row + ['count']

csvoutfile.writerow(headers)

record_links = [row for row in csvfile]
agent_uris = [row[2] for row in record_links]
agent_uri_count = Counter(agent_uris)

output = [
    row + [agent_uri_count[row[2]]] for row in tqdm(record_links)
    if row[2] in agent_uri_count
]

csvoutfile.writerows(output)

fileobject.close()