def validation(csv, cust): inspector = Inspector() inspector.__init__(row_limit=100000, error_limit=100000) # arbitrary row limit report = inspector.inspect(csv) email_data = [] pretty_str = '' if not report[ 'valid']: # an error report will only be sent if there are issues to be found for table in report['tables']: s = ast.literal_eval(table['datapackage']) filename = s['name'] + "_error_dump.txt" with open( filename, 'w', ) as fp: for error in table['errors']: row = error['row-number'] col = error['column-number'] err_str = error['message'] code = "" for err in cust: if col in err['columns'] and error[ 'code'] != 'required-constraint' and error[ 'code'] != 'type-or-format-error': err_str = err_str[:err_str.find( "\"", err_str.find("\"") + 1, ) + 1] value = err_str[err_str.find("\"") + 1:] value = value[:len(value) - 1] err_str = err_str + " in row " + str( row) + " and column " + str( col) + err['message'] code = err['name'] #print(code) break # multiple codes are possible, but the custom code should be given advantage non-constraints or type errors. elif error['code'] == 'required-constraint': value = '' code = error['code'] else: new_err_str = err_str[:err_str.find( "\"", err_str.find("\"") + 1, ) + 1] value = new_err_str[new_err_str.find("\"") + 1:] value = value[:len(value) - 1] code = error['code'] pretty_str = pretty_str + err_str + "\n" email_data.append({ 'code': code, 'row': row, 'col': col, 'value': value }) notification(owner, email_data, pretty_str, s['name'])
def validation(datapackage, cust, path_and_filename, path): """ datapackage: dataset uploaded in datapackage A datapackage is a json like data structure, that contains the original dataset, as well as the schema that was generated previously, e.g., { "name": filename, "title": filename, "resources": [ { "name": filename, "path": path_and_filename, "schema": schema } ] } cust: eventual housing for custom error messages, NYI path_and_filename: name of the file Uploaded path: directory of the file """ inspector = Inspector() inspector.__init__(row_limit=100000, error_limit=100000) # arbitrary row limit report = inspector.inspect(datapackage) print(json.dumps(report, indent=4)) pretty_str = '' if path != "": path = path + '/' if not report[ 'valid']: # an error report will only be sent if there are issues to be found for table in report['tables']: s = ast.literal_eval(table['datapackage']) filename = s['name'] + "_error_dump.txt" with open( path + filename, 'w', ) as fp: error_rows = [] for error in table['errors']: row = error['row-number'] error_rows.append(row) if 'col' in error.keys(): col = error['column-number'] else: col = "" err_str = error['message'] code = "" for err in cust: # This replaces certain error codes with better formatted, more human readable variants. if col in err['columns'] and error[ 'code'] != 'required-constraint' and error[ 'code'] != 'type-or-format-error': err_str = err_str[:err_str.find( "\"", err_str.find("\"") + 1, ) + 1] value = err_str[err_str.find("\"") + 1:] value = value[:len(value) - 1] newrow = row - 1 err_str = err_str + " in row " + str( newrow) + " and column " + str( col) + err['message'] code = err['name'] #print(code) break # multiple codes are possible, but the custom code should be given advantage non-constraints or type errors. elif error['code'] == 'required-constraint': value = '' code = error['code'] else: new_err_str = err_str[:err_str.find( "\"", err_str.find("\"") + 1, ) + 1] value = new_err_str[new_err_str.find("\"") + 1:] value = value[:len(value) - 1] code = error['code'] pretty_str = pretty_str + err_str + "\n" ''' with open(path + path_and_filename + '_errorlog.csv','w') as sp: with open(path + path_and_filename + '.csv','r') as rp: csv_r = csv.reader(rp) csv_w = csv.writer(sp) headers = csv_r.__next__() csv_w.writerow(headers) row_number = 2 for row in csv_r: if row_number in error_rows: csv_w.writerow(row) row_number = row_number + 1 ''' with open(path + path_and_filename + '_error_report.json', 'w') as fp: fp.write(json.dumps(table['errors'], indent=4)) return table['errors'] else: return "All clear"