def test_zero_balance_produces_assertion(self, filename): # pylint: disable=line-too-long """\ Details,Posting Date,"Description",Amount,Type,Balance,Check or Slip #, DEBIT,3/18/2016,"Payment to Chafe card ending in 1234 03/18",-2680.89,ACCT_XFER,0,, """ file = cache.get_file(filename) importer = csv.Importer( { Col.DATE: 'Posting Date', Col.NARRATION1: 'Description', Col.NARRATION2: 'Check or Slip #', Col.AMOUNT: 'Amount', Col.BALANCE: 'Balance', Col.DRCR: 'Details' }, 'Assets:Bank', 'USD', ('Details,Posting Date,"Description",Amount,' 'Type,Balance,Check or Slip #,'), institution='chafe') entries = importer.extract(file) self.assertEqualEntries( r""" 2016-03-18 * "Payment to Chafe card ending in 1234 03/18" Assets:Bank -2680.89 USD 2016-03-19 balance Assets:Bank 0 USD """, entries)
def test_mixin(self, entries, errors, _): """ 2014-01-01 open Assets:US:BofA:Checking USD 2014-05-19 * "Verizon Wireless" "" Assets:US:BofA:Checking -44.34 USD 2014-05-23 * "Wine-Tarner Cable" "" Assets:US:BofA:Checking -80.17 USD 2014-06-04 * "BANK FEES" "Monthly bank fee" Assets:US:BofA:Checking -4.00 USD 2014-06-04 * "RiverBank Properties" "Paying the rent" Assets:US:BofA:Checking -2400.00 USD 2014-06-08 * "EDISON POWER" "" Assets:US:BofA:Checking -65.00 USD """ def filter_last_two(entries): return entries[-2:] file = cache.get_file(path.join(tempfile.gettempdir(), 'test')) # Running with no filters should return the extracted entries unchanged importer = Importer(entries, 'Assets:US:BofA:Checking', filters=[]) extracted_entries = importer.extract(file) self.assertEqualEntries(extracted_entries, entries) # Run with a filter that should pass only the last two entries importer = Importer(entries, 'Assets:US:BofA:Checking', filters=[filter_last_two]) extracted_entries = importer.extract(file) self.assertEqualEntries(extracted_entries, entries[-2:])
def test_importer(self, entries, errors, _): """ 2014-01-01 open Assets:US:BofA:Checking USD 2014-05-19 * "Verizon Wireless" "" Assets:US:BofA:Checking -44.34 USD 2014-05-23 * "Wine-Tarner Cable" "" Assets:US:BofA:Checking -80.17 USD 2014-06-04 * "BANK FEES" "Monthly bank fee" Assets:US:BofA:Checking -4.00 USD 2014-06-04 * "RiverBank Properties" "Paying the rent" Assets:US:BofA:Checking -2400.00 USD 2014-06-08 * "EDISON POWER" "" Assets:US:BofA:Checking -65.00 USD """ account = 'Assets:US:BofA:Checking' file = cache.get_file(path.join(tempfile.gettempdir(), 'test')) importer = testing.ConstImporter(entries, account) assert importer.file_account(file) == account assert importer.file_name(file) == None assert importer.identify(file) assert importer.file_date(file) == datetime.date(2014, 6, 8) extracted_entries = importer.extract(file) self.assertEqualEntries(extracted_entries, entries)
def test_expect_file_date(self, filename, msg): """Compute the imported file date and compare to an expected output. If an expected file (as <filename>.file_date) is not present, we issue a warning. Missing expected files can be written out by removing them before running the tests. Args: filename: A string, the name of the file to import using self.importer. Raises: AssertionError: If the contents differ from the expected file. """ # Import the date. file = cache.get_file(filename) date = self.importer.file_date(file) if date is None: self.fail("No date produced from {}".format(file.name)) expect_filename = '{}.file_date'.format(file.name) if path.exists(expect_filename) and path.getsize(expect_filename) > 0: expect_date_str = open(expect_filename, encoding='utf-8').read().strip() expect_date = datetime.datetime.strptime(expect_date_str, '%Y-%m-%d').date() self.assertEqual(expect_date, date) else: # Write out the expected file for review. with open(expect_filename, 'w', encoding='utf-8') as outfile: print(date.strftime('%Y-%m-%d'), file=outfile) self.skipTest("Expected file not present; generating '{}'".format( expect_filename))
def test_expect_file_name(self, filename, msg): """Compute the imported file name and compare to an expected output. If an expected file (as <filename>.file_name) is not present, we issue a warning. Missing expected files can be written out by removing them before running the tests. Args: filename: A string, the name of the file to import using self.importer. Raises: AssertionError: If the contents differ from the expected file. """ # Import the date. file = cache.get_file(filename) generated_basename = self.importer.file_name(file) if generated_basename is None: self.fail("No filename produced from {}".format(filename)) # Check that we're getting a non-null relative simple filename. self.assertFalse(path.isabs(generated_basename), generated_basename) self.assertNotRegex(generated_basename, os.sep) expect_filename = '{}.file_name'.format(file.name) if path.exists(expect_filename) and path.getsize(expect_filename) > 0: expect_filename = open(expect_filename, encoding='utf-8').read().strip() self.assertEqual(expect_filename, generated_basename) else: # Write out the expected file for review. with open(expect_filename, 'w', encoding='utf-8') as file: print(generated_basename, file=file) self.skipTest("Expected file not present; generating '{}'".format( expect_filename))
def test_categorizer_two_arguments(self, filename): """\ Date,Amount,Payee,Description 6/2/2020,30.00,"Payee here","Description" 7/2/2020,-25.00,"Supermarket","Groceries" """ file = cache.get_file(filename) def categorizer(txn, row): txn = txn._replace(payee=row[2]) txn.meta['source'] = pformat(row) return txn importer = csv.Importer( { Col.DATE: 'Date', Col.NARRATION: 'Description', Col.AMOUNT: 'Amount' }, 'Assets:Bank', 'EUR', ('Date,Amount,Payee,Description'), categorizer=categorizer, institution='foobar') entries = importer.extract(file) self.assertEqualEntries( r""" 2020-06-02 * "Payee here" "Description" source: "['6/2/2020', '30.00', 'Supermarket', 'Groceries']" Assets:Bank 30.00 EUR 2020-07-02 * "Supermarket" "Groceries" source: "['7/2/2020', '-25.00', 'Supermarket', 'Groceries']" Assets:Bank -25.00 EUR """, entries)
def test_date_formats(self, filename): """\ Posting,Description,Amount 11/7/2016,A,2 12/7/2016,B,3 13/7/2016,C,4 """ file = cache.get_file(filename) importer = csv.Importer( { Col.DATE: 'Posting', Col.NARRATION: 'Description', Col.AMOUNT: 'Amount' }, 'Assets:Bank', 'EUR', [], dateutil_kwds={'dayfirst': True}) entries = importer.extract(file) self.assertEqualEntries( r""" 2016-07-11 * "A" Assets:Bank 2 EUR 2016-07-12 * "B" Assets:Bank 3 EUR 2016-07-13 * "C" Assets:Bank 4 EUR """, entries)
def test_identify(filename, expected): importer = filing.Importer('Assets:Testing', filename_regexp=r'test-\d{2}\.(pdf|csv)') file = cache.get_file(path.join(DATADIR, filename)) if expected: assert importer.identify(file) else: assert not importer.identify(file)
def get_info(raw_entry: Directive) -> dict: if raw_entry.meta["filename"].endswith(".beancount"): ftype = "text/plain" else: ftype = get_file(raw_entry.meta['filename']).mimetype() return dict( type=ftype, filename=raw_entry.meta['filename'], line=raw_entry.meta['lineno'], )
def test_basics(): account = 'Assets:Checking' importer = filing.Importer( account, basename=None, filename_regexp='test.pdf') file = cache.get_file(path.join(DATADIR, 'test.pdf')) assert importer.name() == 'beansoup.importers.filing.Importer: "{}"'.format(account) assert importer.file_account(file) == account assert importer.file_name(file) == None assert importer.extract(file) == [] account = 'Liabilities:Visa' importer = filing.Importer( account, basename='filed', filename_regexp='test.pdf') file = cache.get_file(path.join(DATADIR, 'test.pdf')) assert importer.name() == 'beansoup.importers.filing.Importer: "{}"'.format(account) assert importer.file_account(file) == account assert importer.file_name(file) == 'filed.pdf' assert importer.extract(file) == []
def test_expect_identify(self, filename, msg): """Attempt to identify a file and expect results to be true. Args: filename: A string, the name of the file to import using self.importer. Raises: AssertionError: If the contents differ from the expected file. """ file = cache.get_file(filename) matched = self.importer.identify(file) self.assertTrue(matched)
def extract_from_file(filename, importer, existing_entries=None, min_date=None, allow_none_for_tags_and_links=False): """Import entries from file 'filename' with the given matches, Also cross-check against a list of provided 'existing_entries' entries, de-duplicating and possibly auto-categorizing. Args: filename: The name of the file to import. importer: An importer object that matched the file. existing_entries: A list of existing entries parsed from a ledger, used to detect duplicates and automatically complete or categorize transactions. min_date: A date before which entries should be ignored. This is useful when an account has a valid check/assert; we could just ignore whatever comes before, if desired. allow_none_for_tags_and_links: A boolean, whether to allow plugins to generate Transaction objects with None as value for the 'tags' or 'links' attributes. Returns: A list of new imported entries. Raises: Exception: If there is an error in the importer's extract() method. """ # Extract the entries. file = cache.get_file(filename) # Note: Let the exception through on purpose. This makes developing # importers much easier by rendering the details of the exceptions. # # Note: For legacy support, support calling without the existing entries. kwargs = {} if 'existing_entries' in inspect.signature(importer.extract).parameters: kwargs['existing_entries'] = existing_entries new_entries = importer.extract(file, **kwargs) if not new_entries: return [] # Make sure the newly imported entries are sorted; don't trust the importer. new_entries.sort(key=data.entry_sortkey) # Ensure that the entries are typed correctly. for entry in new_entries: data.sanity_check_types(entry, allow_none_for_tags_and_links) # Filter out entries with dates before 'min_date'. if min_date: new_entries = list( itertools.dropwhile(lambda x: x.date < min_date, new_entries)) return new_entries
def test_column_types(self, filename): # pylint: disable=line-too-long """\ Details,Posting Date,"Description",Amount,Type,Balance,Check or Slip #, DEBIT,3/18/2016,"Payment to Chafe card ending in 1234 03/18",-2680.89,ACCT_XFER,3409.86,, CREDIT,3/15/2016,"EMPLOYER INC DIRECT DEP PPD ID: 1111111111",2590.73,ACH_CREDIT,6090.75,, DEBIT,3/14/2016,"INVESTMENT SEC TRANSFER A5144608 WEB ID: 1234456789",-150.00,ACH_DEBIT,3500.02,, DEBIT,3/6/2016,"ATM WITHDRAWAL 001234 03/8888 DELANC",-60.00,ATM,3650.02,, CREDIT,3/5/2016,"CA STATE NYSTTAXRFD PPD ID: 1111111111",110.00,ACH_CREDIT,3710.02,, DEBIT,3/4/2016,"BOOGLE WALLET US000NEI9T WEB ID: C234567890",-1300.00,ACH_DEBIT,3600.02,, """ file = cache.get_file(filename) importer = csv.Importer( { Col.DATE: 'Posting Date', Col.NARRATION1: 'Description', Col.NARRATION2: 'Check or Slip #', Col.AMOUNT: 'Amount', Col.BALANCE: 'Balance', Col.DRCR: 'Details' }, 'Assets:Bank', 'USD', ('Details,Posting Date,"Description",Amount,' 'Type,Balance,Check or Slip #,'), institution='chafe') entries = importer.extract(file) self.assertEqualEntries( r""" 2016-03-18 * "Payment to Chafe card ending in 1234 03/18" Assets:Bank -2680.89 USD 2016-03-15 * "EMPLOYER INC DIRECT DEP PPD ID: 1111111111" Assets:Bank 2590.73 USD 2016-03-14 * "INVESTMENT SEC TRANSFER A5144608 WEB ID: 1234456789" Assets:Bank -150.00 USD 2016-03-06 * "ATM WITHDRAWAL 001234 03/8888 DELANC" Assets:Bank -60.00 USD 2016-03-05 * "CA STATE NYSTTAXRFD PPD ID: 1111111111" Assets:Bank 110.00 USD 2016-03-04 * "BOOGLE WALLET US000NEI9T WEB ID: C234567890" Assets:Bank -1300.00 USD 2016-03-19 balance Assets:Bank 3409.86 USD """, entries)
def test_basics(): account = 'Assets:Checking' importer = filing.Importer(account, basename=None, filename_regexp='test.pdf') file = cache.get_file(path.join(DATADIR, 'test.pdf')) assert importer.name( ) == 'beansoup.importers.filing.Importer: "{}"'.format(account) assert importer.file_account(file) == account assert importer.file_name(file) == None assert importer.extract(file) == [] account = 'Liabilities:Visa' importer = filing.Importer(account, basename='filed', filename_regexp='test.pdf') file = cache.get_file(path.join(DATADIR, 'test.pdf')) assert importer.name( ) == 'beansoup.importers.filing.Importer: "{}"'.format(account) assert importer.file_account(file) == account assert importer.file_name(file) == 'filed.pdf' assert importer.extract(file) == []
def file_date(filename, importer): """Date for the given file. Args: filename: The full path to a file. importer: An importer that matched the file. Returns: The date or the exception message if one occurs. """ try: return importer.file_date(cache.get_file(filename)) except Exception as exception: # pylint: disable=broad-except return str(exception)
def __init__(self, directory: str, account: str, importer: ImporterProtocol, **kwargs) -> None: super().__init__(**kwargs) self.directory = os.path.expanduser(directory) self.importer = importer self.account = account # get _FileMemo object for each file files = [ get_file(os.path.abspath(f)) for f in filter( os.path.isfile, glob(os.path.join(directory, '**', '*'), recursive=True)) ] # filter the valid files for this importer self.files = [f for f in files if self.importer.identify(f)]
def find_imports(importer_config, files_or_directories, logfile=None): """Given an importer configuration, search for files that can be imported in the list of files or directories, run the signature checks on them and return a list of (filename, importers), where 'importers' is a list of importers that matched the file. Args: importer_config: a list of importer instances that define the config. files_or_directories: a list of files of directories to walk recursively and hunt for files to import. logfile: A file object to write log entries to, or None, in which case no log is written out. Yields: Triples of filename found, textified contents of the file, and list of importers matching this file. """ # Iterate over all files found; accumulate the entries by identification. for filename in file_utils.find_files(files_or_directories): if logfile is not None: logfile.write(SECTION.format(filename)) logfile.write('\n') # Skip files that are simply too large. size = path.getsize(filename) if size > FILE_TOO_LARGE_THRESHOLD: logging.warning( "File too large: '{}' ({} bytes); skipping.".format( filename, size)) continue # For each of the sources the user has declared, identify which # match the text. file = cache.get_file(filename) matching_importers = [] for importer in importer_config: try: matched = importer.identify(file) if matched: matching_importers.append(importer) except Exception as exc: logging.error( "Importer %s.identify() raised an unexpected error: %s", importer.name(), exc) yield (filename, matching_importers)
def file_import_info(filename: str, importer) -> FileImportInfo: """Generate info about a file with an importer.""" # pylint: disable=broad-except file = cache.get_file(filename) try: account = importer.file_account(file) except Exception: account = "" try: date = importer.file_date(file) except Exception: date = datetime.date.today() try: name = importer.file_name(file) except Exception: name = path.basename(filename) return FileImportInfo(importer.name(), account, date, name)
def identify(importers_list, files_or_directories): """Run the identification loop. Args: importers_list: A list of importer instances. files_or_directories: A list of strings, files or directories. """ logfile = sys.stdout for filename, importers in find_imports(importers_list, files_or_directories, logfile=logfile): file = cache.get_file(filename) for importer in importers: logfile.write('Importer: {}\n'.format( importer.name() if importer else '-')) logfile.write('Account: {}\n'.format( importer.file_account(file))) logfile.write('\n')
def test_pdf_filing_importer(first_day, filename, expected_date): account = 'Liabilities:Amex' importer = amex.PdfFilingImporter(account, basename='amex', first_day=first_day) file = cache.get_file(path.join(DATADIR, filename)) assert importer.name( ) == 'beansoup.importers.amex.PdfFilingImporter: "{}"'.format(account) assert importer.file_account(file) == account assert importer.file_name(file) == 'amex.pdf' assert importer.extract(file) == [] if expected_date: assert importer.identify(file) assert importer.file_date(file) == expected_date else: assert not importer.identify(file) assert not importer.file_date(file)
def test_importer_against_liability(self, filename): """ 12/06/2015,SKYPE 123456789,14.00,,97.62 12/07/2015,STM-LAURIER MONTREAL,22.50,,120.12 12/13/2015,PAYMENT - THANK YOU,,97.62,22.50 12/14/2015,RESTAURANT PHAYA THAI MONTREAL,40.00,,62.50 12/16/2015,STM-CHARLEVOIX MONTREAL,45.00,,107.50 """ file = cache.get_file(filename) account = 'Liabilities:TD:Visa' importer = td.Importer(account, 'CAD', 'td-visa', first_day=4, filename_regexp=path.basename(filename)) assert importer.file_account(file) == account assert importer.file_name(file) == 'td-visa.csv' assert importer.identify(file) assert importer.file_date(file) == datetime.date(2016, 1, 3) entries = importer.extract(file) self.assertEqualEntries( """ 2015-12-06 * "SKYPE 123456789" Liabilities:TD:Visa -14.00 CAD 2015-12-07 * "STM-LAURIER MONTREAL" Liabilities:TD:Visa -22.50 CAD 2015-12-13 * "PAYMENT - THANK YOU" Liabilities:TD:Visa 97.62 CAD 2015-12-14 * "RESTAURANT PHAYA THAI MONTREAL" Liabilities:TD:Visa -40.00 CAD 2015-12-16 * "STM-CHARLEVOIX MONTREAL" Liabilities:TD:Visa -45.00 CAD 2016-01-04 balance Liabilities:TD:Visa -107.50 CAD """, entries)
def test_importer_against_liability(self, filename): """ 12/06/2015,SKYPE 123456789,14.00,,97.62 12/07/2015,STM-LAURIER MONTREAL,22.50,,120.12 12/13/2015,PAYMENT - THANK YOU,,97.62,22.50 12/14/2015,RESTAURANT PHAYA THAI MONTREAL,40.00,,62.50 12/16/2015,STM-CHARLEVOIX MONTREAL,45.00,,107.50 """ file = cache.get_file(filename) account = 'Liabilities:TD:Visa' importer = td.Importer(account, 'CAD', 'td-visa', first_day=4, filename_regexp=path.basename(filename)) assert importer.file_account(file) == account assert importer.file_name(file) == 'td-visa.csv' assert importer.identify(file) assert importer.file_date(file) == datetime.date(2016, 1, 3) entries = importer.extract(file) self.assertEqualEntries(""" 2015-12-06 * "SKYPE 123456789" Liabilities:TD:Visa -14.00 CAD 2015-12-07 * "STM-LAURIER MONTREAL" Liabilities:TD:Visa -22.50 CAD 2015-12-13 * "PAYMENT - THANK YOU" Liabilities:TD:Visa 97.62 CAD 2015-12-14 * "RESTAURANT PHAYA THAI MONTREAL" Liabilities:TD:Visa -40.00 CAD 2015-12-16 * "STM-CHARLEVOIX MONTREAL" Liabilities:TD:Visa -45.00 CAD 2016-01-04 balance Liabilities:TD:Visa -107.50 CAD """, entries)
def test_explict_encoding_utf8(self, filename): """\ Posting,Description,Amount 2020/08/08,🍏,2 """ file = cache.get_file(filename) importer = csv.Importer( { Col.DATE: 'Posting', Col.NARRATION: 'Description', Col.AMOUNT: 'Amount' }, 'Assets:Bank', 'EUR', [], encoding='utf-8') entries = importer.extract(file) self.assertEqualEntries( r""" 2020-08-08 * "🍏" Assets:Bank 2 EUR """, entries)
def test_categorizer_one_argument(self, filename): """\ Date,Amount,Payee,Description 6/2/2020,30.00,"Payee here","Description" 7/2/2020,-25.00,"Supermarket","Groceries" """ file = cache.get_file(filename) def categorizer(txn): if txn.narration == "Groceries": txn.postings.append( data.Posting("Expenses:Groceries", -txn.postings[0].units, None, None, None, None)) return txn importer = csv.Importer( { Col.DATE: 'Date', Col.NARRATION: 'Description', Col.AMOUNT: 'Amount' }, 'Assets:Bank', 'EUR', ('Date,Amount,Payee,Description'), categorizer=categorizer, institution='foobar') entries = importer.extract(file) self.assertEqualEntries( r""" 2020-06-02 * "Description" Assets:Bank 30.00 EUR 2020-07-02 * "Groceries" Assets:Bank -25.00 EUR Expenses:Groceries 25.00 EUR """, entries)
def test_links(self, filename): """\ Date,Description,Amount,Link 2020-07-03,A,2, 2020-07-03,B,3,123 """ file = cache.get_file(filename) importer = csv.Importer( { Col.DATE: 'Date', Col.NARRATION: 'Description', Col.AMOUNT: 'Amount', Col.REFERENCE_ID: 'Link' }, 'Assets:Bank', 'EUR', []) entries = importer.extract(file) self.assertEqualEntries( r""" 2020-07-03 * "A" Assets:Bank 2 EUR 2020-07-03 * "B" ^123 Assets:Bank 3 EUR """, entries)
def test_tags(self, filename): """\ Date,Description,Amount,Tag 2020-07-03,A,2, 2020-07-03,B,3,foo """ file = cache.get_file(filename) importer = csv.Importer( { Col.DATE: 'Date', Col.NARRATION: 'Description', Col.AMOUNT: 'Amount', Col.TAG: 'Tag' }, 'Assets:Bank', 'EUR', []) entries = importer.extract(file) self.assertEqualEntries( r""" 2020-07-03 * "A" Assets:Bank 2 EUR 2020-07-03 * "B" #foo Assets:Bank 3 EUR """, entries)
def __init__(self, directory: str, account: str, importer: ImporterProtocol, **kwargs) -> None: super().__init__(**kwargs) self.directory = os.path.expanduser(directory) self.importer = importer self.account = account # get _FileMemo object for each file files = [ get_file(f) for f in filter( os.path.isfile, glob(os.path.join(directory, '**', '*'), recursive=True)) ] # filter the valid files for this importer # handle the fact that importer.identify could raise an exception # instead of returning False, but that ultimately means False for this purpose def try_identify(importer, file): try: return importer.identify(file) except: return False self.files = [f for f in files if try_identify(self.importer, f)]
def get_info(raw_entry: Directive) -> dict: return dict( type=get_file(raw_entry.meta['filename']).mimetype(), filename=raw_entry.meta['filename'], line=raw_entry.meta['lineno'], )
def test_file_date(first_day, filename_regexp, filename, expected): importer = filing.Importer('Assets:Testing', first_day=first_day, filename_regexp=filename_regexp) file = cache.get_file(path.join(DATADIR, filename)) assert importer.file_date(file) == expected
def test_importer_against_asset(self, filename): """ 04/01/2016,12-345 Smith RLS,404.38,,5194.21 04/05/2016,COSTCO #9876543,60.24,,5133.97 04/05/2016,METRO ETS 2020,34.90,,5099.07 04/05/2016,POISSONERIE DU,31.78,,5067.29 04/05/2016,LES DOUCEURS DU,12.39,,5054.90 04/05/2016,FROMAGERIE ATWA,42.17,,5012.73 04/07/2016,CHQ#00123-456789,16.00,,4996.73 04/12/2016,FROMAGERIE ATWA,39.46,,4957.27 04/12/2016,DAVID'S TEA,27.50,,4929.77 04/12/2016,PATISSERIE SAIN,32.00,,4897.77 04/12/2016,GAZ METRO BPY,247.26,,4650.51 04/14/2016,VIDEOTRON LTEE BPY,237.74,,4412.77 04/14/2016,TD VISA A1B2C3,74.37,,4338.40 04/16/2016,FRUITERIE ATWAT,24.65,,4313.75 04/16/2016,POISSONERIE NOU,64.79,,4248.96 04/16/2016,CHQ#00125-9876543,160.00,,4088.96 04/19/2016,CHQ#00124-9876543,900.00,,3188.96 04/22/2016,AMEX B2C3D4,734.59,,2454.37 04/23/2016,POISSONERIE DU,57.18,,2397.19 04/28/2016,BELL CANADA BPY,25.30,,2371.89 04/29/2016,CINEPLEX #9172,23.00,,2348.89 04/29/2016,CANADA RIT,,345.24,2694.13 04/29/2016,12345678900WIRE,,210.32,2904.45 04/30/2016,BARON SPORTS,21.28,,2883.17 """ file = cache.get_file(filename) account = 'Assets:TD:Checking' importer = td.Importer(account, 'CAD', 'td-checking', first_day=1, filename_regexp=path.basename(filename)) assert importer.file_account(file) == account assert importer.file_name(file) == 'td-checking.csv' assert importer.identify(file) assert importer.file_date(file) == datetime.date(2016, 4, 30) entries = importer.extract(file) self.assertEqualEntries(""" 2016-04-01 * "12-345 Smith RLS" Assets:TD:Checking -404.38 CAD 2016-04-05 * "COSTCO #9876543" Assets:TD:Checking -60.24 CAD 2016-04-05 * "METRO ETS 2020" Assets:TD:Checking -34.90 CAD 2016-04-05 * "POISSONERIE DU" Assets:TD:Checking -31.78 CAD 2016-04-05 * "LES DOUCEURS DU" Assets:TD:Checking -12.39 CAD 2016-04-05 * "FROMAGERIE ATWA" Assets:TD:Checking -42.17 CAD 2016-04-07 * "CHQ#00123-456789" Assets:TD:Checking -16.00 CAD 2016-04-12 * "FROMAGERIE ATWA" Assets:TD:Checking -39.46 CAD 2016-04-12 * "DAVID'S TEA" Assets:TD:Checking -27.50 CAD 2016-04-12 * "PATISSERIE SAIN" Assets:TD:Checking -32.00 CAD 2016-04-12 * "GAZ METRO BPY" Assets:TD:Checking -247.26 CAD 2016-04-14 * "VIDEOTRON LTEE BPY" Assets:TD:Checking -237.74 CAD 2016-04-14 * "TD VISA A1B2C3" Assets:TD:Checking -74.37 CAD 2016-04-16 * "FRUITERIE ATWAT" Assets:TD:Checking -24.65 CAD 2016-04-16 * "POISSONERIE NOU" Assets:TD:Checking -64.79 CAD 2016-04-16 * "CHQ#00125-9876543" Assets:TD:Checking -160.00 CAD 2016-04-19 * "CHQ#00124-9876543" Assets:TD:Checking -900.00 CAD 2016-04-22 * "AMEX B2C3D4" Assets:TD:Checking -734.59 CAD 2016-04-23 * "POISSONERIE DU" Assets:TD:Checking -57.18 CAD 2016-04-28 * "BELL CANADA BPY" Assets:TD:Checking -25.30 CAD 2016-04-29 * "CINEPLEX #9172" Assets:TD:Checking -23.00 CAD 2016-04-29 * "CANADA RIT" Assets:TD:Checking 345.24 CAD 2016-04-29 * "12345678900WIRE" Assets:TD:Checking 210.32 CAD 2016-04-30 * "BARON SPORTS" Assets:TD:Checking -21.28 CAD 2016-05-01 balance Assets:TD:Checking 2883.17 CAD """, entries)
def extract_from_file(filename, importer, existing_entries=None, min_date=None, allow_none_for_tags_and_links=False): """Import entries from file 'filename' with the given matches, Also cross-check against a list of provided 'existing_entries' entries, de-duplicating and possibly auto-categorizing. Args: filename: The name of the file to import. importer: An importer object that matched the file. existing_entries: A list of existing entries parsed from a ledger, used to detect duplicates and automatically complete or categorize transactions. min_date: A date before which entries should be ignored. This is useful when an account has a valid check/assert; we could just ignore whatever comes before, if desired. allow_none_for_tags_and_links: A boolean, whether to allow plugins to generate Transaction objects with None as value for the 'tags' or 'links' attributes. Returns: A list of new imported entries and a subset of these which have been identified as possible duplicates. Raises: Exception: If there is an error in the importer's extract() method. """ # Extract the entries. file = cache.get_file(filename) # Note: Let the exception through on purpose. This makes developing # importers much easier by rendering the details of the exceptions. new_entries = importer.extract(file) if not new_entries: return [], [] # Make sure the newly imported entries are sorted; don't trust the importer. new_entries.sort(key=data.entry_sortkey) # Ensure that the entries are typed correctly. for entry in new_entries: data.sanity_check_types(entry, allow_none_for_tags_and_links) # Filter out entries with dates before 'min_date'. if min_date: new_entries = list(itertools.dropwhile(lambda x: x.date < min_date, new_entries)) # Find potential matching entries. duplicate_entries = [] if existing_entries is not None: duplicate_pairs = similar.find_similar_entries(new_entries, existing_entries) duplicate_set = set(id(entry) for entry, _ in duplicate_pairs) # Add a metadata marker to the extracted entries for duplicates. mod_entries = [] for entry in new_entries: if id(entry) in duplicate_set: marked_meta = entry.meta.copy() marked_meta[DUPLICATE_META] = True entry = entry._replace(meta=marked_meta) duplicate_entries.append(entry) mod_entries.append(entry) new_entries = mod_entries return new_entries, duplicate_entries
def file_one_file(filename, importers, destination, idify=False, logfile=None): """Move a single filename using its matched importers. Args: filename: A string, the name of the downloaded file to be processed. importers: A list of importer instances that handle this file. destination: A string, the root destination directory where the files are to be filed. The files are organized there under a hierarchy mirroring that of the chart of accounts. idify: A flag, if true, remove whitespace and funky characters in the destination filename. logfile: A file object to write log entries to, or None, in which case no log is written out. Returns: The full new destination filename on success, and None if there was an error. """ # Create an object to cache all the conversions between the importers # and phases and what-not. file = cache.get_file(filename) # Get the account corresponding to the file. file_accounts = [] for index, importer in enumerate(importers): try: account_ = importer.file_account(file) except Exception as exc: account_ = None logging.exception("Importer %s.file_account() raised an unexpected error: %s", importer.name(), exc) if account_ is not None: file_accounts.append(account_) file_accounts_set = set(file_accounts) if not file_accounts_set: logging.error("No account provided by importers: {}".format( ", ".join(imp.name() for imp in importers))) return None if len(file_accounts_set) > 1: logging.warning("Ambiguous accounts from many importers: {}".format( ', '.join(file_accounts_set))) # Note: Don't exit; select the first matching importer's account. file_account = file_accounts.pop(0) # Given multiple importers, select the first one that was yielded to # obtain the date and process the filename. importer = importers[0] # Compute the date from the last modified time. mtime = path.getmtime(filename) mtime_date = datetime.datetime.fromtimestamp(mtime).date() # Try to get the file's date by calling a module support function. The # module may be able to extract the date from the filename, from the # contents of the file itself (e.g. scraping some text from the PDF # contents, or grabbing the last line of a CSV file). try: date = importer.file_date(file) except Exception as exc: logging.exception("Importer %s.file_date() raised an unexpected error: %s", importer.name(), exc) date = None if date is None: # Fallback on the last modified time of the file. date = mtime_date date_source = 'mtime' else: date_source = 'contents' # Apply filename renaming, if implemented. # Otherwise clean up the filename. try: clean_filename = importer.file_name(file) # Warn the importer implementor if a name is returned and it's an # absolute filename. if clean_filename and (path.isabs(clean_filename) or os.sep in clean_filename): logging.error(("The importer '%s' file_name() method should return a relative " "filename; the filename '%s' is absolute or contains path " "separators"), importer.name(), clean_filename) except Exception as exc: logging.exception("Importer %s.file_name() raised an unexpected error: %s", importer.name(), exc) clean_filename = None if clean_filename is None: # If no filename has been provided, use the basename. clean_filename = path.basename(file.name) elif re.match(r'\d\d\d\d-\d\d-\d\d', clean_filename): logging.error("The importer '%s' file_name() method should not date the " "returned filename. Implement file_date() instead.") # We need a simple filename; remove the directory part if there is one. clean_basename = path.basename(clean_filename) # Remove whitespace if requested. if idify: clean_basename = misc_utils.idify(clean_basename) # Prepend the date prefix. new_filename = '{0:%Y-%m-%d}.{1}'.format(date, clean_basename) # Prepend destination directory. new_fullname = path.normpath(path.join(destination, file_account.replace(account.sep, os.sep), new_filename)) # Print the filename and which modules matched. if logfile is not None: logfile.write('Importer: {}\n'.format(importer.name() if importer else '-')) logfile.write('Account: {}\n'.format(file_account)) logfile.write('Date: {} (from {})\n'.format(date, date_source)) logfile.write('Destination: {}\n'.format(new_fullname)) logfile.write('\n') return new_fullname
def with_testdir(directory): """Parametrizing fixture that provides files from a directory.""" return pytest.mark.parametrize( "file", [cache.get_file(fn) for fn in find_input_files(directory)])