def extract_from_file(importer, filename, existing_entries): """Import entries from a document. Args: importer: The importer instance to handle the document. filename: Filesystem path to the document. existing_entries: Existing entries. Returns: The list of imported entries. """ entries = importer.extract(filename, existing_entries) if not entries: return [] # Deduplicate. entries = importer.deduplicate(entries, existing_entries) # Sort the newly imported entries. importer.sort(entries) # Ensure that the entries are typed correctly. for entry in entries: data.sanity_check_types(entry) return entries
def validate_data_types(entries, options_map): """Check that all the data types of the attributes of entries are as expected. Users are provided with a means to filter the list of entries. They're able to write code that manipulates those tuple objects without any type constraints. With discipline, this mostly works, but I know better: check, just to make sure. This routine checks all the data types and assumptions on entries. Args: entries: A list of directives. unused_options_map: An options map. Returns: A list of new errors, if any were found. """ errors = [] for entry in entries: try: data.sanity_check_types( entry, options_map["allow_deprecated_none_for_tags_and_links"]) except AssertionError as exc: errors.append( ValidationError(entry.meta, "Invalid data types: {}".format(exc), entry)) return errors
def extract_from_file(importer, filename, existing_entries): """Import entries from a document. Args: importer: The importer instance to handle the document. filename: Filesystem path to the document. existing_entries: Existing entries. Returns: The list of imported entries. """ file = cache.get_file(filename) # Support calling without the existing entries. kwargs = {} if 'existing_entries' in inspect.signature(importer.extract).parameters: kwargs['existing_entries'] = existing_entries entries = importer.extract(file, **kwargs) if entries is None: entries = [] # Make sure the newly imported entries are sorted; don't trust the importer. entries.sort(key=data.entry_sortkey) # Ensure that the entries are typed correctly. for entry in entries: data.sanity_check_types(entry) return entries
def extract_from_file(filename, importer, existing_entries=None, min_date=None, allow_none_for_tags_and_links=False): """Import entries from file 'filename' with the given matches, Also cross-check against a list of provided 'existing_entries' entries, de-duplicating and possibly auto-categorizing. Args: filename: The name of the file to import. importer: An importer object that matched the file. existing_entries: A list of existing entries parsed from a ledger, used to detect duplicates and automatically complete or categorize transactions. min_date: A date before which entries should be ignored. This is useful when an account has a valid check/assert; we could just ignore whatever comes before, if desired. allow_none_for_tags_and_links: A boolean, whether to allow plugins to generate Transaction objects with None as value for the 'tags' or 'links' attributes. Returns: A list of new imported entries. Raises: Exception: If there is an error in the importer's extract() method. """ # Extract the entries. file = cache.get_file(filename) # Note: Let the exception through on purpose. This makes developing # importers much easier by rendering the details of the exceptions. # # Note: For legacy support, support calling without the existing entries. kwargs = {} if 'existing_entries' in inspect.signature(importer.extract).parameters: kwargs['existing_entries'] = existing_entries new_entries = importer.extract(file, **kwargs) if not new_entries: return [] # Make sure the newly imported entries are sorted; don't trust the importer. new_entries.sort(key=data.entry_sortkey) # Ensure that the entries are typed correctly. for entry in new_entries: data.sanity_check_types(entry, allow_none_for_tags_and_links) # Filter out entries with dates before 'min_date'. if min_date: new_entries = list( itertools.dropwhile(lambda x: x.date < min_date, new_entries)) return new_entries
def validate_entry(entry): is_valid = True # Use builtin data type checks try: sanity_check_types(entry, allow_none_for_tags_and_links=True) except AssertionError as e: if str(e) == 'Missing filename in metadata': pass elif str(e) == 'Missing lineno in metadata': pass else: print(e) is_valid = False if isinstance(entry, Transaction): # Test account names for correctness for p in entry.postings: is_valid = is_valid and is_valid_account(p.account) return is_valid
def test_sanity_check_types(self): entry = self.create_empty_transaction() data.sanity_check_types(entry) with self.assertRaises(AssertionError): data.sanity_check_types('a string') with self.assertRaises(AssertionError): data.sanity_check_types(META) with self.assertRaises(AssertionError): data.sanity_check_types(datetime.date.today()) with self.assertRaises(AssertionError): data.sanity_check_types(entry._replace(flag=1)) with self.assertRaises(AssertionError): data.sanity_check_types(entry._replace(payee=1)) with self.assertRaises(AssertionError): data.sanity_check_types(entry._replace(narration=1)) with self.assertRaises(AssertionError): data.sanity_check_types(entry._replace(tags={})) with self.assertRaises(AssertionError): data.sanity_check_types(entry._replace(links={})) with self.assertRaises(AssertionError): data.sanity_check_types(entry._replace(postings=None))
def extract_from_file(filename, importer, existing_entries=None, min_date=None, allow_none_for_tags_and_links=False): """Import entries from file 'filename' with the given matches, Also cross-check against a list of provided 'existing_entries' entries, de-duplicating and possibly auto-categorizing. Args: filename: The name of the file to import. importer: An importer object that matched the file. existing_entries: A list of existing entries parsed from a ledger, used to detect duplicates and automatically complete or categorize transactions. min_date: A date before which entries should be ignored. This is useful when an account has a valid check/assert; we could just ignore whatever comes before, if desired. allow_none_for_tags_and_links: A boolean, whether to allow plugins to generate Transaction objects with None as value for the 'tags' or 'links' attributes. Returns: A list of new imported entries and a subset of these which have been identified as possible duplicates. Raises: Exception: If there is an error in the importer's extract() method. """ # Extract the entries. file = cache.get_file(filename) # Note: Let the exception through on purpose. This makes developing # importers much easier by rendering the details of the exceptions. new_entries = importer.extract(file) if not new_entries: return [], [] # Make sure the newly imported entries are sorted; don't trust the importer. new_entries.sort(key=data.entry_sortkey) # Ensure that the entries are typed correctly. for entry in new_entries: data.sanity_check_types(entry, allow_none_for_tags_and_links) # Filter out entries with dates before 'min_date'. if min_date: new_entries = list(itertools.dropwhile(lambda x: x.date < min_date, new_entries)) # Find potential matching entries. duplicate_entries = [] if existing_entries is not None: duplicate_pairs = similar.find_similar_entries(new_entries, existing_entries) duplicate_set = set(id(entry) for entry, _ in duplicate_pairs) # Add a metadata marker to the extracted entries for duplicates. mod_entries = [] for entry in new_entries: if id(entry) in duplicate_set: marked_meta = entry.meta.copy() marked_meta[DUPLICATE_META] = True entry = entry._replace(meta=marked_meta) duplicate_entries.append(entry) mod_entries.append(entry) new_entries = mod_entries return new_entries, duplicate_entries
def check_before_add(self, transac): try: data.sanity_check_types(transac) except AssertionError: self.logger.exception("Transaction %s not valid", transac)