def main(): sheet_id = "1GEeNpKBhfjOCJGx1zJfi6XgZ4OWhGhzWsOHRT9DkmpY" # list_sheet = dataSheet(sheet_id, 'Test!A:Z') # test list_sheet = dataSheet(sheet_id, "batch!A:Z") report_sheet = dataSheet(sheet_id, "output!A:Z") the_uris = list_sheet.getDataColumns()[0] output_data = [] for uri in the_uris: asid = uri.split("/")[3] x = fix_agent(asid, "families") pprint(x["display_name"]) res = asf.postAgent(asid, json.dumps(x), agent_type="families") print(res) row = [SERVER, uri, str(res)] output_data.append(row) print(output_data) report_sheet.appendData(output_data) quit()
def main(): my_name = __file__ # This makes sure the script can be run from any working directory and still find related files. my_path = os.path.dirname(__file__) sheet_id = '1tYOXSDFlkbX_revB_ULvhmCdvKkyzpipBTkYqYXcM38' bibids_sheet = dataSheet(sheet_id, 'MARC-exported!A:Z') holding_counts_sheet = dataSheet(sheet_id, 'holding_counts!A:Z') the_bibids = [r[0] for r in bibids_sheet.getData()] ### MARC ### # Read the MARC the_heads = ['bibid', 'holdings_count'] the_rows = [the_heads] for abib in the_bibids: print('Getting MARC for ' + str(abib)) marc_path = os.path.join(my_path, 'output/marc/' + str(abib) + '.marc') if os.path.exists(marc_path): try: with open(marc_path, 'rb') as fh: reader = MARCReader(fh) for record in reader: # Find out if there is more than one holding; if there is, we cannot use it to automatically match top containers by name and will skip. the_852s = record.get_fields('852') count_852s = len(the_852s) except Exception as error: count_852s = 'ERROR: ' + str(error) the_rows.append([abib, str(count_852s)]) else: print("Could not find " + marc_path + "... skipping...") # print(the_rows) holding_counts_sheet.clear() x = holding_counts_sheet.appendData(the_rows) print(x) quit() # Write results to google sheet marc_sheet.clear() x = marc_sheet.appendData(the_rows) print(x) quit()
def main(): # SERVER = "Test" # test SERVER = "Prod" asf.setServer(SERVER) LOOKUP = '/Users/dwh2128/Documents/git/dcps-utils/archivesspace/as_reports/id_lookup_prod.csv' sheet_id = '1Jbdhda0HbmHKJ7COOJ3CBzdMwpSeIbYHyXzr179ETpI' read_sheet = dataSheet(sheet_id, 'TEST!A:Z') # Test write_sheet = dataSheet(sheet_id, 'Output!A:Z') the_data = read_sheet.getData() the_data.pop(0) # print(the_refs) the_output = [] for r in the_data: bibid = r[0] repo = r[1] ref = r[2] extref_old = r[3] extref_new = r[5] the_res = json.loads(asf.getResourceByBibID(bibid, LOOKUP)) # pprint(the_res) asid = the_res['uri'].split('/')[4] print("repo: " + str(repo) + "; asid: " + str(asid)) the_notes = json.dumps(the_res['notes']) # print(the_notes) print(" ") the_new_notes = replace_notes( the_notes, [ # fix problem of leading space in href {'find': 'xlink:href=\\" http', 'replace': 'xlink:href=\\"http'}, # replace old url with new one {'find': extref_old, 'replace': extref_new}]) # print(the_new_notes) the_res['notes'] = json.loads(the_new_notes) x = asf.postResource(repo, asid, json.dumps(the_res)) out_row = [SERVER, repo, asid, ref, extref_old, extref_new, str(x)] print(out_row) the_output.append(out_row) # # write_sheet.clear() write_sheet.appendData(the_output) quit()
def main(): # Test sheet with sample data sheet_id = '19zHqOJt9XUGfrfzAXzOcr4uARgCGbyYiOtoaOCAMP7s' sheet_range = 'Sheet1!A:Z' # Data from sheetFeeder dataSheet print("1. dataSheet data array:") ds = dataSheet(sheet_id, sheet_range).getData() print(ds) print("") # ds to df example df = datasheet_to_dataframe(sheet_id, sheet_range) print("2. Converted to DataFrame:") print(df) print("") print("DataFrame shape:") print(df.shape) print("") print("Data types:") print(df.dtypes) print("") print("Column averages:") print(df.mean()) df['mean'] = df.mean(numeric_only=True, axis=1) print(df) df.assign(mean_a=df.a.mean(), mean_b=df.b.mean()) # ds = dataframe_to_datasheet(df) # # print(ds) # dataSheet(sheet_id, sheet_range).appendData(ds) quit() print("") # df back to ds ds = dataframe_to_datasheet(df) print("3. Converted back to dataSheet array:") print(ds) print("") # Get sheetFeeder data as series, and convert to Pandas df ds = dataSheet(sheet_id, sheet_range) ds_series = ds.getDataSeries() print("4. Data as series:") print(ds_series) print("") df = pd.DataFrame(ds_series) print("5. Series converted to dataframe:") print(df)
def main(): """Get IA collection specific to serial (Columbia Library Columns). """ sheet_id = '1yTDyd5GQFEsVBiKOnt5T1ejBdXhxhmXVUn6jQ-dg_5I' sheet_tab = 'ColumbiaColumns' the_in_sheet = dataSheet(sheet_id, sheet_tab + '!A:Z') the_out_sheet = dataSheet(sheet_id, 'extract-errors!A:Z') the_err_sheet = dataSheet(sheet_id, 'errors!A:Z') output_folder = 'output/ia/' feed_stem = 'ia_clc_feed' collection_title = 'Columbia Library Columns' abbr = 'clc' pickle_path = output_folder + feed_stem + '.pickle' the_input = the_in_sheet.getData() heads = the_input.pop(0) the_records = [{'bibid': r[0], 'id':r[2], 'label': r[3]} for r in the_input] feed_data = ia.extract_data(the_records, feed_stem, collection_title) feed_data_new = {'errors': feed_data['errors'], 'data': []} for e in feed_data['data']: new_entry = e des = new_entry['description'] des_new = [] for d in des: if '<a' not in d: des_new.append(d) new_entry['description'] = des_new feed_data_new['data'].append(new_entry) # pprint(feed_data_new) # Save to pickle. print('Saving ' + str(len(feed_data_new['data']) ) + ' records to ' + pickle_path) # util.pickle_it(feed_data_new['data'], pickle_path) util.pickle_it(feed_data_new, pickle_path) # Report any extraction errors the_out_sheet.appendData(feed_data['errors']) # Generate XML x = ia.build_feed(output_folder + feed_stem + '.pickle', abbr) # report any build errors/warnings the_err_sheet.appendData(x)
def get_collection(sheet_id, sheet_tab, feed_stem, collection_title, multipart=False): """Get Internet Archive collection and save to pickle. Args: sheet_id (str): Google sheet id sheet_tab (str): Google sheet tab name feed_stem (str): abbreviation to be used in file naming and feed identification collection_title (str): Title of collection (e.g., Medical Heritage Library) multipart (bool, optional): Incl/exclude multi-volume works. Defaults to False. """ the_in_sheet = dataSheet(sheet_id, sheet_tab + '!A:Z') the_out_sheet = dataSheet(sheet_id, 'extract-errors!A:Z') pickle_path = OUT_PATH + feed_stem + '.pickle' # get a list of bibids and ia ids to process the_inputs = the_in_sheet.getData() the_inputs.pop(0) # remove head row print(str(len(the_inputs)) + ' records in ' + collection_title + '...') the_records = [] for i in the_inputs: # the_920s = i[6:] # get arbitrary number of 920s for this row the_920s = i[4].split(';') # get arbitrary number of 920s for this row rl = [] for r in the_920s: if 'archive.org' in r: rp = ia.parse_920(r) # Only add if id != None. if bool(rp['id']): rl.append({ 'bibid': i[0], 'id': rp['id'], 'label': rp['label'] }) # If we are allowing multi-volume works, add all; # otherwise, only add to list if it is a monograph. if len(rl) == 1 or multipart is True: the_records += rl feed_data = ia.extract_data(the_records, feed_stem, collection_title) print('Saving ' + str(len(feed_data['data'])) + ' records to ' + pickle_path) util.pickle_it(feed_data, pickle_path) the_out_sheet.appendData(feed_data['errors'])
def main(): sheet_id = '1yTDyd5GQFEsVBiKOnt5T1ejBdXhxhmXVUn6jQ-dg_5I' # the_voyager_range = 'Durst!A:Z' the_voyager_range = 'MWM!A:Z' the_ingest_range = 'ingested!A:Z' the_output_range = 'test!A:Z' the_data = dataSheet(sheet_id, the_voyager_range).getData() the_ingested = [ x[0] for x in dataSheet(sheet_id, the_ingest_range).getData() ] # list of IA ids that are in the IA collection. the_output_sheet = dataSheet(sheet_id, the_output_range) the_heads = ['bibid', 'id', 'label', 'url', 'composed', 'in collection?'] the_new_data = [the_heads] for a_row in the_data: bibid = a_row[0] the_920 = a_row[4] if the_920: # print(parse_920(the_920)) parsed = parse_920(the_920) for d in parsed: if 'archive.org' in d['url']: # id = d['url'].split('/')[-1] id = (d['url'][:-1] if d['url'].endswith('/') else d['url']).split('/')[-1] match_flag = "Y" if id in the_ingested else "N" url = 'https://ebooksbeta.lyrasistechnology.org/columbia/book/URI%2Furn%3Ax-internet-archive%3Aebooks-app%3Aitem%3A' + id if 'label' in d: label = 'Read on mobile (' + d['label'] + ')' else: label = "Read on mobile" composed = '$3' + label + '$u' + url the_new_data.append( [bibid, id, label, url, composed, match_flag]) print(the_new_data) the_output_sheet.clear() post = the_output_sheet.appendData(the_new_data) print(post)
def get_collection(sheet_id, sheet_tab, feed_stem, collection_title, multipart=False): the_in_sheet = dataSheet(sheet_id, sheet_tab + '!A:Z') the_out_sheet = dataSheet(sheet_id, 'extract-errors!A:Z') pickle_path = output_dir + '/' + feed_stem + '.pickle' # get a list of bibids and ia ids to process the_inputs = the_in_sheet.getData() the_inputs.pop(0) # remove head row the_records = [] for i in the_inputs: the_920s = i[4].split(';') # get arbitrary number of 920s for this row rl = [] for r in the_920s: # if 'oapen.org/record' in r: if 'library.oapen.org/handle/20.500.12657/' in r: rp = parse_920(r) rl.append({ 'bibid': i[0], 'id': rp['id'], 'label': rp['label'] }) # If we are allowing multi-volume works, add all; # otherwise, only add to list if it is a monograph. if len(rl) == 1 or multipart is True: the_records += rl elif len(rl) > 1: print("WARNING: " + str(i[0]) + " has multiple volumes. Skipping!") else: print("WARNING: could not find OAPEN record in " + str(i[0]) + ". Skipping!") feed_data = extract_data(the_records, feed_stem, collection_title) print('Saving ' + str(len(feed_data['data'])) + ' records to ' + pickle_path) util.pickle_it(feed_data, pickle_path) # print(feed_data['data']) pprint(feed_data['errors']) the_out_sheet.appendData(feed_data['errors'])
def main(): # x = util.unpickle_it('output/oapen/oapen_clio.pickle') # from pprint import pprint # pprint(x[1]) report_metadata('1kLI8x1whzSNqeKL5xVysopgKYWE9-D9H_PHX2RkW4wQ', 'output!A:Z', 'output/oapen/oapen_clio.pickle') quit() out_sheet = dataSheet('1OG0UgqHCdAzx326JNy7akx9-MOwR9A_MSf-MEv9k3Ms', 'OAPEN!A:Z') the_pickles = ['output/oapen/oapen_clio.pickle'] item_url_base = 'https://ebooks.lyrasistechnology.org/190150/book/https%3A%2F%2Fcolumbia.lyrasistechnology.org%2F190150%2Fworks%2FURI%2Fhttp%3A%2F%2Flibrary.oapen.org%2Fhandle%2F20.500.12657%2F' the_output = [['COLL', 'ID', 'BIBID', 'HREF']] # Add rows for items within each collection for p in the_pickles: the_output += [[ r['collection'], r['id'], r['bibid'], item_url_base + r['id'] ] for r in get_bibs_and_ids(p)] out_sheet.clear() out_sheet.appendData(the_output) quit()
def main(): asf.setServer('Prod') now1 = datetime.now() start_time = str(now1) end_time = '' #set later # today_str = str(date.today().strftime("%Y%m%d")) yest_str = str((date.today() - timedelta(days=1)).strftime("%Y-%m-%d")) sheet_id = '198ON5qZ3MYBWPbSAopWkGE6hcUD8P-KMkWkq2qRooOY' data_data = [{ 'range': 'resource-changes!A:Z', 'filter': 'resources' }, { 'range': 'accession-changes!A:Z', 'filter': 'accessions' }] for d in data_data: print('processing ' + d['filter']) the_sheet = dataSheet(sheet_id, d['range']) the_date = yest_str # the_date = '2019-08-27' the_repos = [2, 3, 4, 5] the_fields = [ 'id', 'title', 'identifier', 'create_time', 'system_mtime', 'last_modified_by', 'publish' ] the_modifieds = [] for r in the_repos: print('searching repo ' + str(r)) x = asf.getByDate(r, the_date, date_type='mtime', comparator='equal', filter=d['filter'], fields=the_fields) for a in x: row = [a[v] for v in the_fields] print(row) the_modifieds.append(row) # print(list(a.values())) # the_modifieds.append(list(a.values())) print('Repo ' + str(r) + ': ' + str(len(x))) print('Total ' + d['filter'] + ': ' + str(len(the_modifieds))) # the_sheet.clear() # the_sheet.appendData([the_fields]) the_sheet.appendData(the_modifieds) quit()
def main(): """Script to compose OPDS v1.2 XML feeds using data saved from ia_get_collections.py. Modify list of collections as needed. """ the_out_sheet = dataSheet(SHEET_ID, 'errors!A:Z') the_collections = [ (OUTPUT_DIR + '/ia_avt_feed.pickle', 'avt'), (OUTPUT_DIR + '/ia_ccny_feed.pickle', 'ccny'), (OUTPUT_DIR + '/ia_durst_feed.pickle', 'durst'), (OUTPUT_DIR + '/ia_med_feed.pickle', 'med'), (OUTPUT_DIR + '/ia_mrp_feed.pickle', 'mrp'), (OUTPUT_DIR + '/ia_mwm_feed.pickle', 'mwm'), (OUTPUT_DIR + '/ia_wwi_feed.pickle', 'wwi'), (OUTPUT_DIR + '/ia_clc_feed.pickle', 'clc'), (OUTPUT_DIR + '/ia_hebrewmss_feed.pickle', 'hebrewmss'), # ('output/ia/ia_tibetan_feed.pickle', 'tibet'), ] for col in the_collections: x = ia.build_feed(col[0], col[1], output_dir=OUTPUT_DIR) the_out_sheet.appendData(x) build_linglong_feed(pickle_dir=OUTPUT_DIR, output_dir=OUTPUT_DIR) # validate the output x = opds_validate.validate_files(OUTPUT_DIR) print('\n'.join('***ERROR!*** File ' + r['file'] + ' has errors: ' + r['errors'] for r in x if r['errors'])) quit()
def main(): the_sheet = dataSheet('183S8_aMD6py8XvVzIAE4WnDyUeCuLYsyacyNImX1frM', 'Sheet1!A:Z') in_file = os.path.join( MY_PATH, "output_test/springer/springer_test_feed_datastore.json") with open(in_file, "rb") as f: json_data = json.load(f) the_data = [[ 'Title', 'DOI', 'ISBN', 'Print ISBN', 'E-ISBN', 'Pub Date', 'PDF', 'EPUB' ]] for b in json_data: title = b['publicationName'] doi = b['doi'] isbn = b['isbn'] print_isbn = b['printIsbn'] e_isbn = b['electronicIsbn'] pub_date = b['publicationDate'] link_info = get_type(b) row = [ title, doi, isbn, print_isbn, e_isbn, pub_date, link_info['has_pdf'], link_info['has_epub'] ] # print(get_type(b['links'])) the_data.append(row) the_sheet.clear() the_sheet.appendData(the_data) quit()
def main(): SERVER = "Prod" # test # SERVER = "Prod" asf.setServer(SERVER) sheet_id = '1Jbdhda0HbmHKJ7COOJ3CBzdMwpSeIbYHyXzr179ETpI' read_sheet = dataSheet(sheet_id, 'TEST!A:Z') # Test write_sheet = dataSheet(sheet_id, 'Output!A:Z') the_data = read_sheet.getData() the_data.pop(0) # print(the_refs) the_output = [] for r in the_data: repo = r[1] ref = r[2] extref_old = r[3] extref_new = r[5] the_ao = json.loads(asf.getArchivalObjectByRef(repo, ref)) asid = the_ao['uri'].split('/')[4] print("asid: " + str(asid)) the_notes = json.dumps(the_ao['notes']) # fix problem of leading space in href the_new_notes = the_notes.replace('xlink:href=\\" http', 'xlink:href=\\"http') # replace old url with new one the_new_notes = the_new_notes.replace(extref_old, extref_new) print(the_new_notes) the_ao['notes'] = json.loads(the_new_notes) pprint(the_ao) x = asf.postArchivalObject(repo, asid, json.dumps(the_ao)) out_row = [SERVER, repo, asid, ref, extref_old, extref_new, str(x)] print(out_row) the_output.append(out_row) # write_sheet.clear() write_sheet.appendData(the_output) quit()
def main(): the_sheet = dataSheet('1D2E5Sm3qZdU3MGXk7q2XxfBpQS1iqauQm19f_y9aTbM', 'Sheet1!A:Z') out_path = os.path.join(MY_PATH, 'output_test/springer/springer_subjects.pickle') subject_data = get_subjects(the_sheet) print(pickle_it(subject_data, out_path)) # pprint(subject_data) quit()
def build_linglong_feed(pickle_dir=OUTPUT_FOLDER, output_dir=OUTPUT_FOLDER): """Run after data has been extracted via get_linglong. Args: pickle_dir (str, optional): Path to folder containing pickles. Defaults to OUTPUT_FOLDER. output_dir (str, optional): Path to output folder. Defaults to OUTPUT_FOLDER. """ the_out_sheet = dataSheet(SHEET_ID, 'errors!A:Z') for y in range(1931, 1938): x = ia.build_feed(pickle_dir + '/ia_ll_' + str(y) + '.pickle', 'll', output_dir=output_dir) the_out_sheet.appendData(x)
def get_collection(sheet_id, sheet_tab, feed_stem, collection_title, multipart=False): the_in_sheet = dataSheet(sheet_id, sheet_tab + '!A:Z') the_out_sheet = dataSheet(sheet_id, 'extract-errors!A:Z') # get a list of bibids and ia ids to process the_inputs = the_in_sheet.getData() the_inputs.pop(0) # remove head row the_records = [] for i in the_inputs: the_920s = i[4].split(';') # get arbitrary number of 920s for this row rl = [] for r in the_920s: if 'www.gutenberg.org/ebooks/' in r: rp = parse_920(r) rl.append({ 'bibid': i[0], 'id': rp['id'], 'label': rp['label'] }) # If we are allowing multi-volume works, add all; # otherwise, only add to list if it is a monograph. if len(rl) == 1 or multipart is True: the_records += rl elif len(rl) > 1: print("WARNING: " + str(i[0]) + " has multiple volumes. Skipping!") else: print("WARNING: could not find OAPEN record in " + str(i[0]) + ". Skipping!") # feed_data = extract_data(the_records, feed_stem, collection_title) return the_records
def main(): # SERVER = "Test" # test SERVER = "Prod" asf.setServer(SERVER) sheet_id = '1OABHEJF1jqA1vlbW5yTENry5W7YqKlag5nJDJ9ouCzg' # read_sheet = dataSheet(sheet_id, 'Test!A:Z') # Test read_sheet = dataSheet(sheet_id, 'Prod!A:Z') # Test write_sheet = dataSheet(sheet_id, 'output!A:Z') the_refs = read_sheet.getDataColumns()[0] # print(the_refs) the_output = [] for r in the_refs: the_ao = json.loads(asf.getArchivalObjectByRef(2, r)) asid = the_ao['uri'].split('/')[4] old_date = str(the_ao['dates'][0]['begin']) new_ao = fix_begin_date(2, the_ao) new_date = str(new_ao['dates'][0]['begin']) print("asid: " + str(asid)) x = asf.postArchivalObject(2, asid, json.dumps(new_ao)) out_row = [SERVER, r, asid, old_date, new_date, str(x)] # print(out_row) the_output.append(out_row) write_sheet.clear() write_sheet.appendData(the_output) quit() x = fix_begin_date(2, 'b2ec9ce511e4212ebb145fb909ca85bd') print(x) pprint( json.loads( asf.getArchivalObjectByRef(2, 'b2ec9ce511e4212ebb145fb909ca85bd'))) quit()
def main(): sheet_id = '1_1d8aElm9yRG4Avy9j6WxTh2TjhMp8iqaeZkgUNdxeE' report_sheet = dataSheet(sheet_id, 'Test!A:Z') lookup_sheet = dataSheet(sheet_id, 'Lookup!A:Z') lookup_file = os.path.join(MY_PATH, "output_test/proquest/proquest_lookup.json") x = get_lookup(lookup_sheet, lookup_file) print(x) quit() in_file = os.path.join(MY_PATH, "output_test/proquest/ProQuest_BooksCatalog.json") with open(in_file, "rb") as f: json_data = json.load(f) the_books = json_data['opdsFeed']['groups'][0]['publications'] the_data = [['Title', 'EBC ID', 'URL', 'PDF', 'EPUB']] for b in the_books: id = b['metadata']['identifier'].split('/')[-1] url = "https://ebookcentral.proquest.com/lib/columbia/detail.action?docID=" + \ str(id) link_info = get_type(b['links']) row = [ b['metadata']['title'], id, url, link_info['has_pdf'], link_info['has_epub'] ] # print(get_type(b['links'])) the_data.append(row) report_sheet.clear() report_sheet.appendData(the_data) quit()
def main(): asf.setServer('Prod') # the_repos=[2,3,4,5] the_repos=[2] the_fields = ['id','title','identifier','create_time','system_mtime','last_modified_by','json'] the_sheet=dataSheet('198ON5qZ3MYBWPbSAopWkGE6hcUD8P-KMkWkq2qRooOY','unpublished!A:Z') the_unpublished = [] for r in the_repos: print('searching repo ' + str(r)) x = getUnpublished(r,filter='resources',fields=the_fields) # print(x) for a in x: row = [ a[v] for v in the_fields ] my_json = json.loads(row.pop(6)) try: call_no = my_json['user_defined']['string_1'] except: call_no = '' repo_id = int(str(row[0].split('/')[-3]).rstrip()) # get the repo from the uri string. asid = int(str(row[0].split('/')[-1]).rstrip()) # get the asid from the uri string. row.pop(0) row.insert(0,asid), row.insert(0,repo_id) if 'UA' in call_no: repo = 'nnc-ua' else: repo = get_repo(repo_id) row.insert(0,repo) the_unpublished.append(row) print(row) print('Repo ' + str(r) + ': ' + str(len(x))) print('Total unpublished: ' + str(len(the_unpublished))) # the_sheet.clear() # the_sheet.appendData([the_fields]) # the_sheet.appendData(the_unpublished) quit()
def main(): my_name = __file__ script_name = os.path.basename(my_name) # This makes sure the script can be run from any working directory and still find related files. my_path = os.path.dirname(__file__) the_output_sheet = dataSheet( '1sjpjLt_I54h9l-ABwueYdN6xVAm01S6rKZB3BgMQv3k', 'output!A:Z') # The table of xlinks with format: # bibid|container_id|href|title|text aCSV = os.path.join(my_path, 'output/output_all_f.txt') with open(aCSV) as the_csv: the_data = [row for row in csv.reader(the_csv, delimiter='|')] the_heads = the_data.pop(0) the_heads += ['STATUS', 'REDIRECT_LOCATION', 'REDIRECT_STATUS'] the_new_data = [the_heads] for a_row in the_data: print(a_row) if 'clio.columbia.edu' in a_row[2]: print('Skipping CLIO record ' + a_row[2]) else: response = get_response(a_row[2]) if response['status'] != 200: new_row = a_row while len(new_row) < 5: new_row.append("") new_row.append(response['status']) if 'location' in response: redirect_response = get_response(response['location']) new_row += [ response['location'], redirect_response['status'] ] print(new_row) the_new_data.append(new_row) # Write output to sheet. the_output_sheet.clear() the_output_sheet.appendData(the_new_data)
def main(): # TEST x = feed_parse( 'https://www.gutenberg.org/ebooks/search.opds/?sort_order=downloads') pprint(x) quit() ### the_sheet = dataSheet('1SyErJ6LqNUzEoJ5LP14L9Ofkn63CUaDor4H_8cRFGgo', 'test!A:Z') a_url = 'https://ebooks-test.library.columbia.edu/feeds/ia/culcarnegiecorp/' # a_url = 'https://ebooks-test.library.columbia.edu/feeds/ia/cullinglong/' the_dicts = feed_parse(a_url) the_heads = ['BIBID', 'ID', 'Title', 'URL', 'Label'] the_data = [[ x['id'], x['title'], "https://ebooks.lyrasistechnology.org/columbia/book/URI%2F" + urllib.parse.quote(x['id']) ] for x in the_dicts] for row in the_data: try: row.insert(0, re.search('_(.+?)_', row[0]).group(1)) except: # raise "Could not parse bibid" print("Could not parse bibid -- " + row[0]) row.insert(0, "") the_data.insert(0, the_heads) the_sheet.clear() x = the_sheet.appendData(the_data) print(x)
def get_linglong(): """Get the linglong data from IA and save in one pickle per year (vol). """ the_sheet = dataSheet(SHEET_ID, 'LingLong!A:Z') the_input = the_sheet.getData() heads = the_input.pop(0) the_data = [] for y in range(1931, 1938): the_data.append({ 'vol': y, 'items': [{ 'bibid': r[0], 'id': r[2], 'label': r[3] } for r in the_input if r[1] == str(y)] }) # pprint(the_data) for vol_data in the_data: print(' ') print(vol_data['vol']) feed_stem = 'ia_ll_' + str(vol_data['vol']) pickle_path = OUTPUT_FOLDER + '/' + feed_stem + '.pickle' # print(vol_data['items']) feed_data = ia.extract_data(vol_data['items'], feed_stem, 'Ling Long (' + str(vol_data['vol']) + ')') pprint(feed_data['errors']) print('Saving ' + str(len(feed_data['data'])) + ' records to ' + pickle_path) util.pickle_it(feed_data, pickle_path)
def main(): # Set path to Saxon processor saxon_path = 'saxon-9.8.0.12-he.jar' # Set path to XSLT the_xslt = 'ead_tbm_csv.xsl' the_infile = '/path/to/source/xmlfile_ead.xml' the_outpath = '/path/to/output_file.txt' # Set Google Sheet id and range the_sheet = dataSheet('LmguZqjAk23OPHeDmyy2wvZiXGaiLz7', 'Test!A:Z') # Parameters to pass to XSLT: params = { 'series_scope': 7, # series to process; 0 = all series 'subject': 'AUDIO RECORDINGS' # static label for content } # generate a parameter string from params param_str = '' for key, value in params.items(): value = str(value).replace(' ', '\ ') param_str += str(key) + '=' + str(value) + ' ' # Send to Saxon with parameters saxon_process(saxon_path, the_infile, the_xslt, the_outpath, theParams=param_str) # Send result csv to Google Sheet y = the_sheet.importCSV(the_outpath, delim='|') print(y)
def main(): sheet_id = '1X4e52glzKJjRpiOb7S6b4bOzyhTfjHCVlfjPosdABKM' the_info = [ { 'name': 'AveryTrade', 'file': 'output/ia/ia_avt_feed.pickle' }, { 'name': 'Carnegie', 'file': 'output/ia/ia_ccny_feed.pickle' }, { 'name': 'Durst', 'file': 'output/ia/ia_durst_feed.pickle' }, { 'name': 'MedicalHeritage', 'file': 'output/ia/ia_med_feed.pickle' }, { 'name': 'MissionaryResearch', 'file': 'output/ia/ia_mrp_feed.pickle' }, { 'name': 'MuslimWorld', 'file': 'output/ia/ia_mwm_feed.pickle' }, { 'name': 'WWI', 'file': 'output/ia/ia_wwi_feed.pickle' }, ] the_out_sheet = dataSheet(sheet_id, 'Combined!A:Z') the_out_sheet.clear() the_heads = ['collection', 'bibid', 'href', 'label'] the_data = [the_heads] for i in the_info: the_links = get_links(i['file']) the_data += [[i['name'], r[0], r[1], r[2]] for r in the_links] post = the_out_sheet.appendData(the_data) print(post) quit() # post = the_out_sheet.appendData(get_links('output/ia/ia_ll_1931.pickle')) # print(post) # post = the_out_sheet.appendData(get_links('output/ia/ia_ll_1932.pickle')) # print(post) # post = the_out_sheet.appendData(get_links('output/ia/ia_ll_1933.pickle')) # print(post) # post = the_out_sheet.appendData(get_links('output/ia/ia_ll_1934.pickle')) # print(post) # post = the_out_sheet.appendData(get_links('output/ia/ia_ll_1935.pickle')) # print(post) # post = the_out_sheet.appendData(get_links('output/ia/ia_ll_1936.pickle')) # print(post) # post = the_out_sheet.appendData(get_links('output/ia/ia_ll_1937.pickle')) # print(post) quit()
from sheetFeeder import dataSheet import pandas as pd import pandas_functions as pf sheet_id = '19zHqOJt9XUGfrfzAXzOcr4uARgCGbyYiOtoaOCAMP7s' sheet_range = 'Pandas!A:Z' the_sheet = dataSheet(sheet_id, sheet_range) # Put some sample data in the sheet data1 = [['Col A', 'Col B', 'Col C', 'Col D'], [1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [0.0, 11.5, -5.0, 9.25]] the_sheet.clear() the_sheet.appendData(data1) # 1. Get the data from the sheet (this is redundant, but just for demo purposes!) ds = the_sheet.getData() # 2. Convert to Pandas DataFrame df = pf.datasheet_to_dataframe(sheet_id, sheet_range) print(df) print("") # Add a column calculating the averages of each row df['Average'] = df.mean(numeric_only=True, axis=1) print(df) # Convert back into array data2 = pf.dataframe_to_datasheet(df)
def main(): # This makes sure the script can be run from any working directory and still find related files. my_path = os.path.dirname(__file__) if DEBUG is True: sheet_id = "18uvn9wIABHVIdjlSRNXqnHUKB2aTvZgKO62e-UFNuO8" # test else: sheet_id = "1dTeMAK_cGWAUvrqvAiY2hGy4gJewrmWjnuIZu8NhWwE" now1 = datetime.datetime.now() start_time = str(now1) end_time = "" # set later # First get the agent records from API (this can take a long time!) asf.setServer("Prod") # AS instance: Prod | Dev | Test if DEBUG is True: out_folder = "/cul/cul0/ldpd/archivesspace/test/agents" else: out_folder = "/cul/cul0/ldpd/archivesspace/agents" family_agents_file = os.path.join(out_folder, "agents_families.pickle") corp_agents_file = os.path.join(out_folder, "agents_corporate.pickle") persons_agents_file = os.path.join(out_folder, "agents_persons.pickle") the_info = [ { "name": "families", "endpoint": "/agents/families", "sheet": dataSheet(sheet_id, "families!A:Z"), "pickle": family_agents_file }, { "name": "corporate", "endpoint": "/agents/corporate_entities", "sheet": dataSheet(sheet_id, "corporate!A:Z"), "pickle": corp_agents_file }, { "name": "persons", "endpoint": "/agents/people", "sheet": dataSheet(sheet_id, "persons!A:Z"), "pickle": persons_agents_file }, ] # List of fields to extract, expressed as dpaths. the_fields = [ ["uri", "uri"], ["title", "title"], ["source", "names/0/source"], ["authority_id", "names/0/authority_id"], ["is_linked_to_published_record", "is_linked_to_published_record"], ["publish", "publish"], ["last_modified_by", "last_modified_by"], ["last_modified", "system_mtime"], ] the_record_cnts = {} if DEBUG is True: print("*** (DEBUG MODE) ***") for i in the_info: print("Getting agents: " + i["name"]) agent_data = get_agent_data(i["name"], i["endpoint"], i["pickle"]) print(" ") # Report the saved data to Google Sheet the_sheet = i["sheet"] the_heads = [x[0] for x in the_fields] the_output = [the_heads] the_record_cnts[i["name"]] = str(len(agent_data)) for agent in agent_data: the_row = [] # Use dpath to extract values from dict and compose into rows. for af in the_fields: try: d = str(dpath.util.get(agent, af[1])) except: d = "" the_row.append(d) # print(the_row) the_output.append(the_row) the_sheet.clear() save = the_sheet.appendData(the_output) print(save) # Generate log print(the_record_cnts) print(" ".join(the_record_cnts)) cnt_str = "".join(k + "=" + v + ". " for k, v in the_record_cnts.items()) # print(cnt_str) now2 = datetime.datetime.now() end_time = str(now2) my_duration = str(now2 - now1) the_log = ("Data imported by " + MY_NAME + ". " + cnt_str + " Start: " + start_time + ". Finished: " + end_time + " (duration: " + my_duration + ").") log_range = "log!A:A" log_sheet = dataSheet(sheet_id, log_range) log_sheet.appendData([[the_log]]) print(" ") print(the_log) log_it(SCRIPT_NAME, the_log) # digester.post_digest(SCRIPT_NAME, the_log) print(" ") exit_msg = "Script done. Updated data is available at " + \ "https://docs.google.com/spreadsheets/d/" + \ str(sheet_id) + "/edit?usp=sharing" print(exit_msg) log_it(SCRIPT_NAME, exit_msg) quit()
def main(): now1 = datetime.datetime.now() start_time = str(now1) end_time = '' # set later # day_offset = now1.weekday() + 1 # Calculate the Sunday of current week day_offset = 7 # use past seven days, regardless of current day print('Script ' + MY_NAME + ' begun at ' + start_time + '. ') if not DEBUG: the_sheet_id = '1JA5bRSnYV80sx4m5SOFQ6QJ4u21SXvQeNdNbuRVCdds' else: the_sheet_id = '1e_TAK8eUsaHltBu9J5bNO1twThqt7_nE5olmz2pdCUw' # test doc day_offset = 14 # use past 2 weeks for testing # Set date stamp of start of week (Sunday) to determine recently created accessions. begin_of_week = (now1 - datetime.timedelta(day_offset)).date() the_sheet_rbml = dataSheet(the_sheet_id, 'rbml!A:Z') the_sheet_avery = dataSheet(the_sheet_id, 'avery!A:Z') the_sheet_rbmlbooks = dataSheet(the_sheet_id, 'rbmlbooks!A:Z') # Location to save output if DEBUG is True: out_folder = "/cul/cul0/ldpd/archivesspace/test/accessions" else: out_folder = "/cul/cul0/ldpd/archivesspace/accessions" rbml_acc_file = os.path.join(out_folder, 'report_rbml_accessions.json') avery_acc_file = os.path.join(out_folder, 'report_avery_accessions.json') rbmlbooks_acc_file = os.path.join(out_folder, 'report_rbmlbooks_accessions.json') print(' ') print('Starting accession report in ' + 'https://docs.google.com/spreadsheets/d/' + str(the_sheet_id) + '/edit?usp=sharing') if not DEBUG: # Save the accessions as json files. In DEBUG mode, just use the files already saved. print('Saving Avery accession data to ' + avery_acc_file + '....') # Only fetch file if not in Debug mode with open(avery_acc_file, "w+") as f: try: x = asf.getAccessions(3) f.write(x) except: raise ValueError( "There was an error in getting Avery accession data!") y = json.loads(x) if 'error' in y[0]: print(y[0]['error']) print('Saving RBML accession data to ' + rbml_acc_file + '....') with open(rbml_acc_file, "w+") as f: try: x = asf.getAccessions(2) f.write(x) except: raise ValueError( "There was an error in getting RBML accession data!") y = json.loads(x) if 'error' in y[0]: print(y[0]['error']) print('Saving RBMLBOOKS accession data to ' + rbmlbooks_acc_file + '....') with open(rbmlbooks_acc_file, "w+") as f: try: x = asf.getAccessions(6) f.write(x) except: raise ValueError( "There was an error in getting RBMLBOOKS accession data!") y = json.loads(x) if 'error' in y[0]: print(y[0]['error']) print(' ') # the_files = [ # [avery_acc_file, the_sheet_avery], # [rbml_acc_file, the_sheet_rbml] # ] the_recents = {} the_info = [{ 'repo_name': 'Avery', 'repo_id': 3, 'acc_file': avery_acc_file, 'the_sheet': the_sheet_avery }, { 'repo_name': 'RBML', 'repo_id': 2, 'acc_file': rbml_acc_file, 'the_sheet': the_sheet_rbml }, { 'repo_name': 'RBMLBOOKS', 'repo_id': 6, 'acc_file': rbmlbooks_acc_file, 'the_sheet': the_sheet_rbmlbooks }] # The top-level elements to save from the JSON (each can be further processed below) the_keys = { "title": "title", "uri": "uri", "repository": "repository", "accession_date": "accession_date", "id_0": "id_0", "id_1": "id_1", "id_2": "id_2", "id_3": "id_3", "extents": "extents", "related_resources": "related_resources", "collection_management": "collection_management", "user_defined": "user_defined", "create_time": "create_time", "system_mtime": "system_mtime", "last_modified_by": "last_modified_by" } ext_dict = { "ext-number": "number", "ext-portion": "portion", "ext-type": "extent_type" } for f in the_info: the_file = f['acc_file'] the_target = f['the_sheet'] repo_name = f['repo_name'] with open(the_file) as f: the_data = json.load(f) all_rows = [] for an_accession in the_data: # acc_info : prelim dict for each accession. Do things to it. acc_info = {} for key, value in the_keys.items(): try: acc_info.update({key: an_accession[value]}) except (IndexError, KeyError): acc_info.update({key: ""}) # Refine elements by extracting subelements, etc. # Handle collection_management cm = acc_info["collection_management"] cm_dict = { "processing_priority": "processing_priority", "processing_status": "processing_status" } for key, value in cm_dict.items(): try: acc_info[key] = cm[value] except (IndexError, KeyError, TypeError): acc_info[key] = '' acc_info.pop("collection_management") # Parse resource id and get bibid res = acc_info["related_resources"] if len(res) > 0: res_url = res[0]["ref"] repo = res_url.split('/')[2] asid = res_url.split('/')[4] bibid = asf.lookupBibID(repo, asid, LOOKUP_CSV) else: bibid = '' asid = '' acc_info["resource_bibid"] = bibid acc_info["resource_asid"] = asid acc_info.pop("related_resources") # Parse BibID out of user_defined / integer_1 try: usdef = acc_info["user_defined"] acc_info['integer_1'] = usdef['integer_1'] except: acc_info['integer_1'] = '' acc_info.pop("user_defined") # Fix problem with leading "+" in id_3 (add apostrophe for display) acc_info["id_3"] = re.sub(r"^\+", "'+", acc_info["id_3"]) # Handle repository repository = acc_info["repository"] if len(repository) > 0: repo_url = repository["ref"] repo = repo_url.split('/')[2] else: repo = '' acc_info["repo"] = repo acc_info.pop("repository") # Handle date acc_date = acc_info["accession_date"] yyyy = int(acc_date.split('-')[0]) mm = int(acc_date.split('-')[1]) dd = int(acc_date.split('-')[2]) the_date = datetime.date(yyyy, mm, dd) # due to legacy import issue, some with unknown dates have malformed dates like 0002-01-23. Acknowledge their unknownness. if the_date.year < 1700: acc_info["accession_date"] = "0000-00-00" acc_info["year"] = "" else: acc_info["year"] = the_date.year # Fiscal year if the_date.year < 1700: acc_info["fiscal-year"] = "" else: if the_date.month > 6: acc_info["fiscal-year"] = the_date.year + 1 else: acc_info["fiscal-year"] = the_date.year # Handle extents ext = acc_info["extents"] for key, value in ext_dict.items(): try: acc_info[key] = ext[0][value] except (IndexError, KeyError): acc_info[key] = '' acc_info.pop("extents") # Clean up titles acc_info['title'] = str(acc_info['title']).strip() # Uncomment to list records in log. # print("processing: " + str(acc_info["uri"]).strip() + ' / ' + str(acc_info["title"]).strip() ) all_rows.append(acc_info) processed_msg = 'Processed ' + \ str(len(all_rows)) + ' records in ' + repo_name + '.' print(processed_msg) log_it(SCRIPT_NAME, processed_msg) # the_heads = list(all_rows[0].keys()) # explicitly order the columns, as dict order is unpredictable. the_heads = [ 'title', 'uri', 'accession_date', 'id_0', 'id_1', 'id_2', 'id_3', 'integer_1', 'resource_bibid', 'resource_asid', 'repo', 'year', 'fiscal-year', 'ext-number', 'ext-portion', 'ext-type', 'processing_priority', 'processing_status', 'create_time', 'system_mtime', 'last_modified_by' ] the_output = [] # Build row in order specified by the_heads for a_row in all_rows: # r = list(a_row.values()) r = [a_row[h] for h in the_heads] the_output.append(r) # print(a_row) # sort by accession_date (the 2nd item in inner lists) the_output = sorted(the_output, key=itemgetter(2), reverse=True) # Get list of recents the_recents[repo_name] = [] for i in the_output: # i[18] = the create date column i_date = dateutil.parser.isoparse(i[18]).date() if i_date > begin_of_week: the_recents[repo_name].append(i) # If there are recents, list them if the_recents[repo_name]: print(' ') recent_msg = str(len(the_recents[repo_name])) + \ ' accessions recently added in ' + repo_name + ': ' print(recent_msg) log_it(SCRIPT_NAME, recent_msg) print('-----------') for r in the_recents[repo_name]: print(r[0]) print(r[1]) print('Created ' + str(dateutil.parser.isoparse(r[18]).date())) print('Last edited by ' + r[20]) print('-----------') else: print(' ') recent_msg = 'No recently created accessions in ' + repo_name print(recent_msg) log_it(SCRIPT_NAME, recent_msg) # print(the_recents[repo_name]) the_output.insert(0, the_heads) print(' ') the_target.clear() print('Writing ' + repo_name + ' data to sheet ...') the_target.appendData(the_output) print(' ') # generate log and add to log tab, if exists. the_tabs = the_target.initTabs now2 = datetime.datetime.now() end_time = str(now2) my_duration = str(now2 - now1) if DEBUG is True: the_log = '[TEST] Data imported from ' + target_server + ' by ' + MY_NAME + '. Start: ' + \ start_time + '. Finished: ' + end_time + \ ' (duration: ' + my_duration + ').' else: the_log = 'Data imported from ' + target_server + ' by ' + MY_NAME + '. Start: ' + \ start_time + '. Finished: ' + end_time + \ ' (duration: ' + my_duration + ').' if 'log' in the_tabs: log_range = 'log!A:A' # today = datetime.datetime.today().strftime('%c') dataSheet(the_sheet_id, log_range).appendData([[the_log]]) else: print('*** Warning: There is no log tab in this sheet. ***') print(' ') print(the_log) log_it(SCRIPT_NAME, the_log) print(' ') exit_msg = 'Script done. Updated data is available at ' + \ 'https://docs.google.com/spreadsheets/d/' + \ str(the_sheet_id) + '/edit?usp=sharing' print(exit_msg) log_it(SCRIPT_NAME, exit_msg)
from sheetFeeder import dataSheet # test import datetime from itertools import groupby from pprint import pprint import os digest_sheet = '190p6gnhpakdYD72Eb1PLicdVlAtAxjQ7D_8oee7Tk1U' digest_range = 'Sheet1!A:Z' digest_sheet = dataSheet(digest_sheet, digest_range) def main(): icons = { "right-triangle": "\U000025B6", } my_name = __file__ script_name = os.path.basename(my_name) # This makes sure the script can be run from any working directory and still find related files. now = str(datetime.datetime.now().strftime('%m/%d/%Y %H:%M:%S')) print('This 24-hour digest composed at ' + now + ' by ' + script_name + '. Contact [email protected] with questions/problems.') print(' ') print(' ') # Format the digest content.
# Extract matches from a csv matching on checksums. import csv from sheetFeeder import dataSheet # the_sheet = dataSheet( # '1ogPrdAFe1tpoGPxMXXtdrQjaGe1g_XG0OMdSaaxNZs8', 'digital-matches!A:Z') sheet_id = '1ogPrdAFe1tpoGPxMXXtdrQjaGe1g_XG0OMdSaaxNZs8' # checksum_sheet = dataSheet(sheet_id, 'ebooks_2011') the_sheet = dataSheet(sheet_id, 'digital-matches2!A:Z') # the_csv = '/Users/dwh2128/Documents/Cleanup_Project/fstore-subfolders/911-audio-pres.csv' # the_checksum_list = '/Users/dwh2128/Documents/Cleanup_Project/fstore-subfolders/911-checksums.csv' the_checksum_list = '/Users/dwh2128/Documents/Cleanup_Project/fstore-subfolders/ebooks_2011_checksums.csv' the_csv = '/Users/dwh2128/Documents/Cleanup_Project/duplicates-non-ifp-filtered.csv' the_sheet.clear() with open(the_checksum_list) as f: the_checksums = [r[0] for r in csv.reader(f)] the_matches = [] with open(the_csv) as f: for r in csv.reader(f): if r[0] in the_checksums and '/digital/' in r[1]: the_matches.append([r[0], r[1] + r[2]]) # print(the_checksums)
def main(): my_name = __file__ # This makes sure the script can be run from any working directory and still find related files. my_path = os.path.dirname(__file__) now1 = datetime.datetime.now() start_time = str(now1) end_time = '' #set later print('Script ' + my_name + ' begun at ' + start_time + '. ') # The Google Sheet to send data to the_data_sheet = dataSheet('1tQY9kR5YOh1e7i4dVRsl_GMxpNnUgCkb5X8qJQBAsG0', 'validation!A:Z') # the_data_sheet = dataSheet('198ON5qZ3MYBWPbSAopWkGE6hcUD8P-KMkWkq2qRooOY','validation!A:Z') # Test # Set path to schema validator (Jing) jing_path = os.path.join(my_path, "../resources/jing-20091111/bin/jing.jar") schema_filename = 'cul_as_ead.rng' schematron_filename = 'cul_as_ead.sch' schema_path = os.path.join(my_path, schema_filename) schematron_path = os.path.join(my_path, schematron_filename) # Use in notification email to distinguish errors/warnings icons = { 'facesmiling': '\U0001F600', 'redx': '\U0000274C', # use for parse errors 'exclamation': '\U00002757', 'warning': '\U000026A0\U0000FE0F', # use for schema validation errors 'qmark': '\U00002753' } data_folder = '/cul/cul0/ldpd/archivesspace/ead_cache' # data_folder = '/opt/dcps/archivesspace/test/ead' # for testing # Load files from directory into a list the_file_paths = [] for root, dirs, files in os.walk(os.path.abspath(data_folder)): for file in files: the_file_paths.append(os.path.join(root, file)) # The column heads for the report spreadsheet the_heads = [ 'bibid', 'file', 'well-formed?', 'valid?', 'schema output', 'schematron output', 'warning type' ] the_results = [] the_results.append(the_heads) # counters parse_errors = 0 validation_errors = 0 sch_warnings = 0 for a_file in the_file_paths: the_file_data = [] file_name = a_file.split('/')[-1] bibid = file_name.split('_')[-1].split('.')[0] # print('Processing ' + file_name) validation_result = jing_process(jing_path, a_file, schema_path) if 'fatal:' in validation_result: print(icons['redx'] + ' FATAL ERROR: ' + file_name + ' could not be parsed!') wf_status = False validation_status = False parse_errors += 1 else: wf_status = True if 'error:' in validation_result: validation_status = False print(icons['warning'] + ' ERROR: ' + file_name + ' contains validation errors.') validation_errors += 1 else: validation_status = True if validation_result: validation_result_clean = clean_output(validation_result, incl_types=False)[0] else: validation_result_clean = validation_result if wf_status == False: schematron_result_clean = '-' warning_types = [] else: # print('Result from schematron: ') schematron_result = jing_process(jing_path, a_file, schematron_path) if 'error:' in schematron_result: print('WARNING: ' + file_name + ' has Schematron rule violations.') sch_warnings += 1 if schematron_result: x = clean_output(schematron_result, incl_types=True) schematron_result_clean = x[0] warning_types = x[1] else: schematron_result_clean = '' warning_types = '' the_file_data = [ bibid, file_name, wf_status, validation_status, validation_result_clean, schematron_result_clean, ', '.join(warning_types) ] the_results.append(the_file_data) # Write result data to spreadsheet the_data_sheet.clear() the_data_sheet.appendData(the_results) # generate log and add to log tab, if exists. the_tabs = the_data_sheet.initTabs now2 = datetime.datetime.now() end_time = str(now2) my_duration = str(now2 - now1) the_log = 'EADs from ' + data_folder + ' evaluated by ' + schema_filename + ' and ' + schematron_filename + '. Parse errors: ' + str( parse_errors ) + '. Schema errors: ' + str( validation_errors ) + '. Schematron warnings: ' + str( sch_warnings ) + '. Start: ' + start_time + '. Finished: ' + end_time + ' (duration: ' + my_duration + ').' if 'log' in the_tabs: log_range = 'log!A:A' # today = datetime.datetime.today().strftime('%c') dataSheet(the_data_sheet.id, log_range).appendData([[the_log]]) else: print('*** Warning: There is no log tab in this sheet. ***') print(' ') # print(the_log) print('Parse errors: ' + str(parse_errors)) print('Schema errors: ' + str(validation_errors)) print('Schematron warnings: ' + str(sch_warnings)) print(' ') print(' ') print('Script done. Check report sheet for more details: ' + the_data_sheet.url) quit()