Пример #1
0
def extract_from_file(importer, filename, existing_entries):
    """Import entries from a document.

    Args:
      importer: The importer instance to handle the document.
      filename: Filesystem path to the document.
      existing_entries: Existing entries.

    Returns:
      The list of imported entries.
    """
    entries = importer.extract(filename, existing_entries)
    if not entries:
        return []

    # Deduplicate.
    entries = importer.deduplicate(entries, existing_entries)

    # Sort the newly imported entries.
    importer.sort(entries)

    # Ensure that the entries are typed correctly.
    for entry in entries:
        data.sanity_check_types(entry)

    return entries
Пример #2
0
def validate_data_types(entries, options_map):
    """Check that all the data types of the attributes of entries are as expected.

    Users are provided with a means to filter the list of entries. They're able to
    write code that manipulates those tuple objects without any type constraints.
    With discipline, this mostly works, but I know better: check, just to make sure.
    This routine checks all the data types and assumptions on entries.

    Args:
      entries: A list of directives.
      unused_options_map: An options map.
    Returns:
      A list of new errors, if any were found.
    """
    errors = []
    for entry in entries:
        try:
            data.sanity_check_types(
                entry, options_map["allow_deprecated_none_for_tags_and_links"])
        except AssertionError as exc:
            errors.append(
                ValidationError(entry.meta,
                                "Invalid data types: {}".format(exc),
                                entry))
    return errors
Пример #3
0
def extract_from_file(importer, filename, existing_entries):
    """Import entries from a document.

    Args:
      importer: The importer instance to handle the document.
      filename: Filesystem path to the document.
      existing_entries: Existing entries.

    Returns:
      The list of imported entries.
    """
    file = cache.get_file(filename)

    # Support calling without the existing entries.
    kwargs = {}
    if 'existing_entries' in inspect.signature(importer.extract).parameters:
        kwargs['existing_entries'] = existing_entries
    entries = importer.extract(file, **kwargs)
    if entries is None:
        entries = []

    # Make sure the newly imported entries are sorted; don't trust the importer.
    entries.sort(key=data.entry_sortkey)

    # Ensure that the entries are typed correctly.
    for entry in entries:
        data.sanity_check_types(entry)

    return entries
Пример #4
0
def extract_from_file(filename,
                      importer,
                      existing_entries=None,
                      min_date=None,
                      allow_none_for_tags_and_links=False):
    """Import entries from file 'filename' with the given matches,

    Also cross-check against a list of provided 'existing_entries' entries,
    de-duplicating and possibly auto-categorizing.

    Args:
      filename: The name of the file to import.
      importer: An importer object that matched the file.
      existing_entries: A list of existing entries parsed from a ledger, used to
        detect duplicates and automatically complete or categorize transactions.
      min_date: A date before which entries should be ignored. This is useful
        when an account has a valid check/assert; we could just ignore whatever
        comes before, if desired.
      allow_none_for_tags_and_links: A boolean, whether to allow plugins to
        generate Transaction objects with None as value for the 'tags' or 'links'
        attributes.
    Returns:
      A list of new imported entries.
    Raises:
      Exception: If there is an error in the importer's extract() method.
    """
    # Extract the entries.
    file = cache.get_file(filename)

    # Note: Let the exception through on purpose. This makes developing
    # importers much easier by rendering the details of the exceptions.
    #
    # Note: For legacy support, support calling without the existing entries.
    kwargs = {}
    if 'existing_entries' in inspect.signature(importer.extract).parameters:
        kwargs['existing_entries'] = existing_entries
    new_entries = importer.extract(file, **kwargs)
    if not new_entries:
        return []

    # Make sure the newly imported entries are sorted; don't trust the importer.
    new_entries.sort(key=data.entry_sortkey)

    # Ensure that the entries are typed correctly.
    for entry in new_entries:
        data.sanity_check_types(entry, allow_none_for_tags_and_links)

    # Filter out entries with dates before 'min_date'.
    if min_date:
        new_entries = list(
            itertools.dropwhile(lambda x: x.date < min_date, new_entries))

    return new_entries
Пример #5
0
def validate_entry(entry):
    is_valid = True

    # Use builtin data type checks
    try:
        sanity_check_types(entry, allow_none_for_tags_and_links=True)
    except AssertionError as e:
        if str(e) == 'Missing filename in metadata':
            pass
        elif str(e) == 'Missing lineno in metadata':
            pass
        else:
            print(e)
            is_valid = False

    if isinstance(entry, Transaction):
        # Test account names for correctness
        for p in entry.postings:
            is_valid = is_valid and is_valid_account(p.account)

    return is_valid
Пример #6
0
 def test_sanity_check_types(self):
     entry = self.create_empty_transaction()
     data.sanity_check_types(entry)
     with self.assertRaises(AssertionError):
         data.sanity_check_types('a string')
     with self.assertRaises(AssertionError):
         data.sanity_check_types(META)
     with self.assertRaises(AssertionError):
         data.sanity_check_types(datetime.date.today())
     with self.assertRaises(AssertionError):
         data.sanity_check_types(entry._replace(flag=1))
     with self.assertRaises(AssertionError):
         data.sanity_check_types(entry._replace(payee=1))
     with self.assertRaises(AssertionError):
         data.sanity_check_types(entry._replace(narration=1))
     with self.assertRaises(AssertionError):
         data.sanity_check_types(entry._replace(tags={}))
     with self.assertRaises(AssertionError):
         data.sanity_check_types(entry._replace(links={}))
     with self.assertRaises(AssertionError):
         data.sanity_check_types(entry._replace(postings=None))
Пример #7
0
def extract_from_file(filename, importer,
                      existing_entries=None,
                      min_date=None,
                      allow_none_for_tags_and_links=False):
    """Import entries from file 'filename' with the given matches,

    Also cross-check against a list of provided 'existing_entries' entries,
    de-duplicating and possibly auto-categorizing.

    Args:
      filename: The name of the file to import.
      importer: An importer object that matched the file.
      existing_entries: A list of existing entries parsed from a ledger, used to
        detect duplicates and automatically complete or categorize transactions.
      min_date: A date before which entries should be ignored. This is useful
        when an account has a valid check/assert; we could just ignore whatever
        comes before, if desired.
      allow_none_for_tags_and_links: A boolean, whether to allow plugins to
        generate Transaction objects with None as value for the 'tags' or 'links'
        attributes.
    Returns:
      A list of new imported entries and a subset of these which have been
      identified as possible duplicates.
    Raises:
      Exception: If there is an error in the importer's extract() method.
    """
    # Extract the entries.
    file = cache.get_file(filename)

    # Note: Let the exception through on purpose. This makes developing
    # importers much easier by rendering the details of the exceptions.
    new_entries = importer.extract(file)
    if not new_entries:
        return [], []

    # Make sure the newly imported entries are sorted; don't trust the importer.
    new_entries.sort(key=data.entry_sortkey)

    # Ensure that the entries are typed correctly.
    for entry in new_entries:
        data.sanity_check_types(entry, allow_none_for_tags_and_links)

    # Filter out entries with dates before 'min_date'.
    if min_date:
        new_entries = list(itertools.dropwhile(lambda x: x.date < min_date,
                                               new_entries))

    # Find potential matching entries.
    duplicate_entries = []
    if existing_entries is not None:
        duplicate_pairs = similar.find_similar_entries(new_entries, existing_entries)
        duplicate_set = set(id(entry) for entry, _ in duplicate_pairs)

        # Add a metadata marker to the extracted entries for duplicates.
        mod_entries = []
        for entry in new_entries:
            if id(entry) in duplicate_set:
                marked_meta = entry.meta.copy()
                marked_meta[DUPLICATE_META] = True
                entry = entry._replace(meta=marked_meta)
                duplicate_entries.append(entry)
            mod_entries.append(entry)
        new_entries = mod_entries

    return new_entries, duplicate_entries
Пример #8
0
 def check_before_add(self, transac):
     try:
         data.sanity_check_types(transac)
     except AssertionError:
         self.logger.exception("Transaction %s not valid", transac)