def validate_sheet(engine, row, sheet_id, data_row_filter, stats_spending): spending_table = sl.get_table(engine, 'spending') data = list( sl.find(engine, spending_table, resource_id=row['resource_id'], sheet_id=sheet_id)) connection = engine.connect() trans = connection.begin() issue_noted_for_this_resource = False # record first failure only error_message = None try: records = 0 for row_ in data: if data_row_filter and data_row_filter != row_['row_id']: continue result = {'id': row_['id'], 'valid': True} result['signature'] = generate_signature(row_) if row_['DateFormatted'] is None: stats_spending['date'].add_spending('Date invalid', row_) result['valid'] = False if not issue_noted_for_this_resource: issue( engine, row['resource_id'], row['retrieve_hash'], STAGE, 'Date invalid (blank, inconsistent or unrecognised format)', { 'row_id': row_.get('row_id'), 'row_number': row_.get('row_number'), 'Date': row_.get('Date') }) error_message = 'Date invalid' issue_noted_for_this_resource = True else: stats_spending['date'].add_spending('Date ok', row_) if row_['AmountFormatted'] is None: stats_spending['amount'].add_spending('Amount invalid', row_) result['valid'] = False if not issue_noted_for_this_resource: issue( engine, row['resource_id'], row['retrieve_hash'], STAGE, 'Amount invalid', { 'row_id': row_.get('row_id'), 'row_number': row_.get('row_number'), 'Amount': row_.get('Amount') }) error_message = 'Amount invalid' issue_noted_for_this_resource = True else: stats_spending['amount'].add_spending('Amount ok', row_) if result['valid']: records += 1 sl.update(connection, spending_table, {'id': result['id']}, result) trans.commit() return records > 0, error_message finally: connection.close()
def update_network_entities(engine, file_name): log.info("Updating network entities reference sheet: %s", file_name) network_entities = set() table = sl.get_table(engine, 'network_entity') if os.path.exists(file_name): fh = open(file_name, 'rb') reader = csv.DictReader(fh) for d in reader: e = [(k, v.decode('utf-8')) for (k, v) in d.items()] e = dict(e) network_entities.add((e['representativeEtlId'], e['etlFingerPrint'])) sl.upsert(engine, table, e, ['representativeEtlId', 'etlFingerPrint']) fh.close() reps = set([ne[0] for ne in network_entities]) rep_table = sl.get_table(engine, 'representative') for rep in reps: sl.update(engine, rep_table, {'etlId': rep}, {'network_extracted': True}) for row in sl.all(engine, table): network_entities.add((row['representativeEtlId'], row['etlFingerPrint'])) fh = open(file_name, 'wb') writer = None table = sl.get_table(engine, 'network_entity') for ic, fp in network_entities: row = { 'representativeEtlId': ic, 'etlFingerPrint': fp } if writer is None: writer = csv.DictWriter(fh, row.keys()) writer.writerow(dict(zip(row.keys(), row.keys()))) r = [(k, unicode(v).encode('utf-8')) for k, v in row.items()] writer.writerow(dict(r)) fh.close()
def validate_sheet(engine, row, sheet_id, data_row_filter, stats_spending): spending_table = sl.get_table(engine, 'spending') data = list(sl.find(engine, spending_table, resource_id=row['resource_id'], sheet_id=sheet_id)) connection = engine.connect() trans = connection.begin() issue_noted_for_this_resource = False # record first failure only error_message = None try: records = 0 for row_ in data: if data_row_filter and data_row_filter != row_['row_id']: continue result = {'id': row_['id'], 'valid': True} result['signature'] = generate_signature(row_) if row_['DateFormatted'] is None: stats_spending['date'].add_spending('Date invalid', row_) result['valid'] = False if not issue_noted_for_this_resource: issue(engine, row['resource_id'], row['retrieve_hash'], STAGE, 'Date invalid (blank, inconsistent or unrecognised format)', {'row_id': row_.get('row_id'), 'row_number': row_.get('row_number'), 'Date': row_.get('Date')}) error_message = 'Date invalid' issue_noted_for_this_resource = True else: stats_spending['date'].add_spending('Date ok', row_) if row_['AmountFormatted'] is None: stats_spending['amount'].add_spending('Amount invalid', row_) result['valid'] = False if not issue_noted_for_this_resource: issue(engine, row['resource_id'], row['retrieve_hash'], STAGE, 'Amount invalid', {'row_id': row_.get('row_id'), 'row_number': row_.get('row_number'), 'Amount': row_.get('Amount')}) error_message = 'Amount invalid' issue_noted_for_this_resource = True else: stats_spending['amount'].add_spending('Amount ok', row_) if result['valid']: records += 1 sl.update(connection, spending_table, {'id': result['id']}, result) trans.commit() return records > 0, error_message finally: connection.close()
def validate_sheet(engine, row, sheet_id): spending_table = sl.get_table(engine, 'spending') data = list( sl.find(engine, spending_table, resource_id=row['resource_id'], sheet_id=sheet_id)) connection = engine.connect() trans = connection.begin() issue_noted_for_this_resource = False # record first failure only try: records = 0 for row_ in data: result = {'id': row_['id'], 'valid': True} result['signature'] = generate_signature(row_) if row_['DateFormatted'] is None: result['valid'] = False if not issue_noted_for_this_resource: issue( engine, row['resource_id'], row['retrieve_hash'], 'Date invalid (or possible the date format is inconsistent)', { 'row_id': row_.get('row_id'), 'Date': row_.get('Date') }) issue_noted_for_this_resource = True if row_['AmountFormatted'] is None: result['valid'] = False if not issue_noted_for_this_resource: issue(engine, row['resource_id'], row['retrieve_hash'], 'Amount invalid', { 'row_id': row_.get('row_id'), 'Amount': row_.get('Amount') }) issue_noted_for_this_resource = True if result['valid']: records += 1 sl.update(connection, spending_table, {'id': result['id']}, result) trans.commit() return records > 0 finally: connection.close()
def validate_sheet(engine, row, sheet_id): spending_table = sl.get_table(engine, 'spending') data = list(sl.find(engine, spending_table, resource_id=row['resource_id'], sheet_id=sheet_id)) connection = engine.connect() trans = connection.begin() issue_noted_for_this_resource = False # record first failure only try: records = 0 for row_ in data: result = {'id': row_['id'], 'valid': True} result['signature'] = generate_signature(row_) if row_['DateFormatted'] is None: result['valid'] = False if not issue_noted_for_this_resource: issue(engine, row['resource_id'], row['retrieve_hash'], 'Date invalid (or possible the date format is inconsistent)', {'row_id': row_.get('row_id'), 'Date': row_.get('Date')}) issue_noted_for_this_resource = True if row_['AmountFormatted'] is None: result['valid'] = False if not issue_noted_for_this_resource: issue(engine, row['resource_id'], row['retrieve_hash'], 'Amount invalid', {'row_id': row_.get('row_id'), 'Amount': row_.get('Amount')}) issue_noted_for_this_resource = True if result['valid']: records += 1 sl.update(connection, spending_table, {'id': result['id']}, result) trans.commit() return records > 0 finally: connection.close()
log = logging.getLogger('dump') def submit_all(): engine = db_connect() spending = sl.get_table(engine, 'spending') stringer = DataStringer(service='ukspending', event='transactions') log.info("Submitting frames to datawire...") failures = 0 for i, row in enumerate(generate_all()): action_at = row.get('DateFormatted') if action_at: action_at = datetime.datetime.strptime(action_at, '%Y-%m-%d') try: stringer.submit(row, action_at=action_at, source_url=row.get('SourceURL')) failures = 0 except Exception, e: failures += 1 if failures > 10: raise e time.sleep(5) data = {'datawire_submitted': True} sl.update(engine, spending, row) if __name__ == '__main__': submit_all()
def submit_all(): engine = db_connect() spending = sl.get_table(engine, 'spending') stringer = DataStringer(service='ukspending', event='transactions') log.info("Submitting frames to datawire...") failures = 0 for i, row in enumerate(generate_all()): action_at = row.get('DateFormatted') if action_at: action_at = datetime.datetime.strptime(action_at, '%Y-%m-%d') try: stringer.submit(row, action_at=action_at, source_url=row.get('SourceURL')) failures = 0 except Exception, e: failures += 1 if failures > 10: raise e time.sleep(5) data = {'datawire_submitted': True} sl.update(engine, spending, row) if __name__ == '__main__': submit_all()
def reset(): sl.update(engine, representative, {}, {'network_extracted': False}) return jsonify({'status': 'OK'})