class Schema(API): """Processor to add types to row. Parameters ---------- schema: str/dict Schema as in https://github.com/okfn/jsontableschema-py#model. If schema is None processor will cast values using type detection. """ # Public def __init__(self, schema=None): self.__schema = None if schema is not None: self.__schema = SchemaModel(schema) def process(self, iterator): if self.__schema is None: values = [] for value in iterator.values: value = helpers.parse_value(value) values.append(value) iterator.values = tuple(values) else: values = self.__schema.convert_row(*iterator.values) iterator.values = tuple(values) def handle(self, iterator): pass # pragma: no cover
def do_csvimport(args,client=None): if client is None: client = clientfromargs(args) logger=get_logger(args) logger.debug('selected schema %s' % (args.schema,)) if os.path.exists(args.schema): schemafile = args.schema else: schemafile = os.path.join(schemas_dir, args.schema + '.json') if not os.path.exists(schemafile): logger.error('This schema doesn''t exist in csv_schemas') exit(-1) try: schema = SchemaModel(schemafile, case_insensitive_headers=True) with open(schemafile,'r') as sf: schemacontent = json.load(sf) try: nheaders = schemacontent['nheaders'] except KeyError: nheaders = 1 except InvalidSchemaError: logger.error('Invalid CSV schema') raise logger.debug('schema headers %s' % schema.headers) if 'account' not in schema.headers and args.accountname is None: logger.error('This schema does not have an account column and no account name was provided') exit(-1) accounts = {x.account_name: x for x in client.budget.be_accounts} payees = {p.name: p for p in client.budget.be_payees} mastercategories_perid = {m.id: m for m in client.budget.be_master_categories} subcategories = {} for s in client.budget.be_subcategories: m=mastercategories_perid[s.entities_master_category_id] subcategories[m.name+':'+s.name]=s def getaccount(accountname): try: logger.debug('searching for account %s' % accountname) return accounts[accountname] except KeyError: logger.error('Couldn''t find this account: %s' % accountname) exit(-1) def getpayee(payeename): try: logger.debug('searching for payee %s' % payeename) return payees[payeename] except KeyError: logger.debug('Couldn''t find this payee: %s' % payeename) payee=Payee(name=payeename) client.budget.be_payees.append(payee) return payee def getsubcategory(categoryname): try: logger.debug('searching for subcategory %s' % categoryname) return subcategories[categoryname] except KeyError: get_logger(args).debug('Couldn''t find this category: %s' % categoryname) exit(-1) if 'account' not in schema.headers: entities_account_id = getaccount(args.accountname).id if 'inflow' in schema.headers and 'outflow' in schema.headers: pass elif 'amount' in schema.headers: pass else: logger.error('This schema doesn''t provide an amount column or (inflow,outflow) columns') exit(-1) csvrow = namedtuple('CSVrow', field_names=schema.headers) transactions = [] imported_date=datetime.now().date() get_logger(args).debug('OK starting the import from %s '%os.path.abspath(args.csvfile)) with open(args.csvfile, 'r') as inputfile: header = inputfile.readline() for row in csv.reader(inputfile): if sys.version[0] == '2': row = [cell.decode('utf-8') for cell in row] get_logger(args).debug('read line %s' % row) result = csvrow(*list(schema.convert_row(*row, fail_fast=True))) if 'account' in schema.headers: entities_account_id = getaccount(result.account).id if 'inflow' in schema.headers and 'outflow' in schema.headers: amount = result.inflow - result.outflow elif 'amount' in schema.headers: amount = result.amount else: get_logger(args).error('Couldn''t find this account: %s' % args.accountname) exit(-1) if 'category' in schema.headers and result.category: entities_subcategory_id = getsubcategory(result.category).id else: entities_subcategory_id = None if 'payee' in schema.headers: imported_payee=result.payee else: imported_payee='' entities_payee_id = getpayee(imported_payee).id if 'memo' in schema.headers: memo=result.memo else: memo='' transaction=Transaction( entities_account_id=entities_account_id, amount=amount, date=result.date, entities_payee_id=entities_payee_id, entities_subcategory_id=entities_subcategory_id, imported_date=imported_date, imported_payee=imported_payee, memo=memo, source="Imported" ) if args.import_duplicates or (not client.budget.be_transactions.containsduplicate(transaction)): get_logger(args).debug('Appending transaction %s '%transaction.getdict()) transactions.append(transaction) else: get_logger(args).debug('Duplicate transaction found %s '%transaction.getdict()) client.add_transactions(transactions)
def do_csvimport(args, client=None): if client is None: client = clientfromargs(args) logger = get_logger(args) logger.debug('selected schema %s' % (args.schema,)) if os.path.exists(args.schema): schemafile = args.schema else: schemafile = os.path.join(schemas_dir, args.schema + '.json') if not os.path.exists(schemafile): logger.error('This schema doesn''t exist in csv_schemas') exit(-1) try: schema = SchemaModel(schemafile, case_insensitive_headers=True) with open(schemafile, 'r') as sf: schemacontent = json.load(sf) try: nheaders = schemacontent['nheaders'] except KeyError: nheaders = 1 except InvalidSchemaError: logger.error('Invalid CSV schema') raise logger.debug('schema headers %s' % schema.headers) if 'account' not in schema.headers and args.accountname is None: logger.error('This schema does not have an account column and no account name was provided') exit(-1) accounts = {x.account_name: x for x in client.budget.be_accounts} payees = {p.name: p for p in client.budget.be_payees} mastercategories_perid = {m.id: m for m in client.budget.be_master_categories} subcategories = {} for s in client.budget.be_subcategories: m = mastercategories_perid[s.entities_master_category_id] subcategories[m.name + ':' + s.name] = s def getaccount(accountname): try: logger.debug('searching for account %s' % accountname) return accounts[accountname] except KeyError: logger.error('Couldn''t find this account: %s' % accountname) exit(-1) def getpayee(payeename): try: logger.debug('searching for payee %s' % payeename) return payees[payeename] except KeyError: logger.debug('Couldn''t find this payee: %s' % payeename) payee = Payee(name=payeename) client.budget.be_payees.append(payee) return payee def getsubcategory(categoryname): try: logger.debug('searching for subcategory %s' % categoryname) return subcategories[categoryname] except KeyError: logger.debug('Couldn''t find this category: %s' % categoryname) exit(-1) entities_account_id = None if 'account' not in schema.headers: entities_account_id = getaccount(args.accountname).id amount = None if 'inflow' in schema.headers and 'outflow' in schema.headers: pass elif 'amount' in schema.headers: pass else: logger.error('This schema doesn''t provide an amount column or (inflow,outflow) columns') exit(-1) csvrow = namedtuple('CSVrow', field_names=schema.headers) transactions = [] imported_date = datetime.now().date() logger.debug('OK starting the import from %s ' % os.path.abspath(args.csvfile)) with open(args.csvfile, 'r') as inputfile: header = [] for i in range(0, nheaders): header.append(inputfile.readline()) for row in csv.reader(inputfile): if sys.version[0] == '2': row = [cell.decode('utf-8') for cell in row] if all(map(lambda x: x.strip() == '', row)): continue logger.debug('read line %s' % row) result = csvrow(*list(schema.convert_row(*row, fail_fast=True))) if 'account' in schema.headers: entities_account_id = getaccount(result.account).id if entities_account_id is None: logger.error( 'No account id, the account %s in the an account column was not recognized' % result.account) exit(-1) if 'inflow' in schema.headers and 'outflow' in schema.headers: amount = result.inflow - result.outflow elif 'amount' in schema.headers: amount = result.amount if 'category' in schema.headers and result.category: entities_subcategory_id = getsubcategory(result.category).id else: entities_subcategory_id = None if 'payee' in schema.headers: imported_payee = result.payee else: imported_payee = '' entities_payee_id = getpayee(imported_payee).id if 'memo' in schema.headers: memo = result.memo else: memo = '' transaction = Transaction( entities_account_id=entities_account_id, amount=amount, date=result.date, entities_payee_id=entities_payee_id, entities_subcategory_id=entities_subcategory_id, imported_date=imported_date, imported_payee=imported_payee, memo=memo, source="Imported" ) if args.import_duplicates or (not transaction in client.budget.be_transactions): logger.debug('Appending transaction %s ' % transaction.get_dict()) transactions.append(transaction) else: logger.debug('Duplicate transaction found %s ' % transaction.get_dict()) client.add_transactions(transactions)
def convert_data(schema, data): result = [] model = SchemaModel(schema) for item in data: result.append(tuple(model.convert_row(*item))) return result