def column(heading): import my_data, my_colorama try: col = my_data.Data.csv_headings.index(heading) except Exception as e: my_colorama.red("---- Exception in mods.column(): " + str(e)) my_colorama.yellow("------ heading: " + heading) my_data.Data.collection_log_file.write( " ---- Exception in mods.column(): " + str(e)) my_data.Data.collection_log_file.write(" ------ heading: " + heading) return col
def exception(e, tag): import json, my_colorama, my_data, constant # import traceback, logging # logging.error(traceback.format_exc()) msg = "Exception!!! " + str(e) if constant.DEBUG: my_colorama.red('---- ' + msg) my_data.Data.object_log_file.write(msg + '\n') my_data.Data.collection_log_file.write(' ' + msg + '\n') skip(tag)
def append(key, value): import my_data, my_colorama col = column(key) if type(value) is not str: value = value['#text'] nc = len(my_data.Data.csv_row[col]) if nc > 0: my_data.Data.csv_row[col] += ' ~ ' + value return constant.DONE + key else: if constant.DEBUG: my_colorama.red( "------ append() called but the target cell in column(%s) is empty!" % key) my_data.Data.collection_log_file.write( "------ append() called but the target cell in column(%s) is empty!" % key) return False
def single(key, value): import my_data, my_colorama, constant col = column(key) if type(value) is not str: value = value['#text'] nc = len(my_data.Data.csv_row[col]) if nc > 0: if constant.DEBUG: my_colorama.red( "------ single() called but the target cell in column(%s) is already filled!" % key) my_data.Data.collection_log_file.write( "------ single() called but the target cell in column(%s) is already filled!" % key) return False else: my_data.Data.csv_row[col] = value return constant.DONE + key
def physicalDescription_action(desc): import my_colorama try: if 'digitalOrigin' in desc: ok = single('Digital_Origin', desc['digitalOrigin']) if ok: desc['digitalOrigin'] = ok else: skip(desc['digitalOrigin']) if 'extent' in desc: ok = single('Extent', desc['extent']) if ok: desc['extent'] = ok else: skip(desc['extent']) if 'form' in desc: ok = single('Form~AuthorityURI', desc['form']) if ok: desc['form'] = ok else: skip(desc['form']) if 'internetMediaType' in desc: mime = getMIME(desc['internetMediaType']) if (desc['internetMediaType'] == 'text/plain'): mime = 'text/plain' if mime: ok = single('MIME_Type', mime) if ok: desc['internetMediaType'] = ok else: skip(desc['internetMediaType']) else: my_colorama.red("Could not guess MIME type from '%s'." % desc['internetMediaType']) skip(desc['internetMediaType']) return False except Exception as e: exception(e, desc)
def skip(tag): import json, my_colorama, my_data, constant # import traceback, logging # logging.error(traceback.format_exc()) msg = "Warning: Unexpected structure detected in the data. The element could not be processed." if constant.DEBUG: my_colorama.red('------ ' + msg) my_data.Data.object_log_file.write(msg + '\n') my_data.Data.collection_log_file.write(' ' + msg + '\n') msg = "Unexpected Element: " + json.dumps(tag) if constant.DEBUG: my_colorama.yellow('-------- ' + msg) my_data.Data.object_log_file.write(' ' + msg + '\n') my_data.Data.collection_log_file.write(' ' + msg + '\n') # col = column('WORKSPACE') # target = my_data.Data.csv_row[col] # my_data.Data.csv_row[col] += msg + ', ' return False # always returns False !
def cleanup(tmp): import constant, my_data, my_colorama, xmltodict, tempfile, json rem = my_data.Data.object_log_filename.replace('.log', '.remainder') try: tmp.seek(0) with tmp as input, tempfile.TemporaryFile('w+') as temp: temp.write('{\n') for line in input: if len(line.strip() ) == 0 or line == '{\n' or line == '}\n' or line == '}': continue keep = True for needle in constant.NEEDLES: if needle in line: keep = False break if keep: temp.write(line) temp.write('}\n') input.close() # rewind the temporary file and remove all empty keys temp.seek(0) try: dict_from_file = eval(temp.read()) empty_keys = [k for k, v in dict_from_file.items() if not v] for k in empty_keys: del dict_from_file[k] # write the data back into the directory as .remainder with open(rem, 'w+') as file: file.write(json.dumps(dict_from_file)) except Exception as e: my_colorama.red("-- Processing: %s" % my_data.Data.object_log_filename) my_colorama.red(" Exception: %s" % e) except Exception as e: my_colorama.red("Exception: %s" % e) raise