def readcsv2dict_with_noheader(csvfile, dictkeys, header, dupkeyfail=False, noshowwarning=False, encoding='windows-1252', debug=False): if not dictkeys: logger.error('must pass in dictkeys') raise Exception('must pass in dictkeys') if not header: logger.error('must pass in header') raise Exception('must pass in header') if not isinstance(header, list): logger.error('header must be a list:%s', header) raise Exception('header must be a list:%s', header) results = {} dupkeys = [] dupcount = 0 with open(csvfile, mode='r', encoding=encoding) as csv_file: reader = csv.reader(csv_file) for row in reader: rowdict = dict(zip(header, row)) reckey = kvmatch.build_multifield_key(rowdict, dictkeys) # do we fail if we see the same key multiple times? if reckey in results: dupcount += 1 if dupkeyfail: # capture this key dupkeys.append(reckey) # create/update the dictionary results[reckey] = rowdict # fail if we found dupkeys if dupkeys: # log this issue logger.warning( 'readcsv2dict:v%s:file:%s:duplicate key failure:keys:%s', AppVersion, csvfile, ','.join(dupkeys)) # display message if the user wants this displayed if not noshowwarning: print('readcsv2dict:duplicate key failure:', ','.join(dupkeys)) raise ValueError('Duplicate key failure') # return the results return results, header, dupcount
def readcsv2dict_with_header(csvfile, dictkeys, dupkeyfail=False, noshowwarning=False, headerlc=False, encoding='windows-1252', debug=False): results = {} dupkeys = [] dupcount = 0 with open(csvfile, mode='r', encoding=encoding) as csv_file: reader = csv.reader(csv_file) header = reader.__next__() if debug: print('header-before:', header) logger.debug('header-before:%s', header) if headerlc: dictkeys = [x.lower() for x in dictkeys] header = [x.lower() for x in header] if debug: print('header-after:', header) logger.debug('header-after:%s', header) for row in reader: rowdict = dict(zip(header, row)) reckey = kvmatch.build_multifield_key(rowdict, dictkeys) # do we fail if we see the same key multiple times? if reckey in results: dupcount += 1 if dupkeyfail: # capture this key dupkeys.append(reckey) # create/update the dictionary results[reckey] = rowdict # fail if we found dupkeys if dupkeys: # log this issue logger.warning( 'readcsv2dict:v%s:file:%s:duplicate key failure:keys:%s', AppVersion, csvfile, ','.join(dupkeys)) # display message if the user wants this displayed if not noshowwarning: print('readcsv2dict:duplicate key failure:', ','.join(dupkeys)) raise ValueError('Duplicate key failure') # return the results return results, header, dupcount
def test_build_multifield_key_f03_empty_string_dictkeys(self): with self.assertRaises(Exception) as context: kvmatch.build_multifield_key(rowdict, '')
def test_build_multifield_key_f01_missing_key(self): with self.assertRaises(Exception) as context: kvmatch.build_multifield_key(rowdict, ['Company', 'Missing'])
def test_build_multifield_key_p02_multiplestrings_joinchar(self): self.assertEqual( kvmatch.build_multifield_key(rowdict, ['Company', 'Wine'], joinchar=':'), 'Test:Yummy')
def test_build_multifield_key_p05_single_string_string(self): self.assertEqual(kvmatch.build_multifield_key(rowdict, 'Company'), 'Test')
def test_build_multifield_key_p04_string_date(self): self.assertEqual( kvmatch.build_multifield_key(rowdict, ['Company', 'ProcessDate']), 'Test|2020-01-01 00:00:00')
def test_build_multifield_key_p03_string_number(self): self.assertEqual( kvmatch.build_multifield_key(rowdict, ['Company', 'Price']), 'Test|10.0')
def test_build_multifield_key_p02_multiplestrings(self): self.assertEqual( kvmatch.build_multifield_key(rowdict, ['Company', 'Wine']), 'Test|Yummy')
def readxls2dict_findheader(xlsfile, dictkeys, req_cols=[], xlatdict={}, optiondict={}, col_aref=None, debug=False, dupkeyfail=False): # validate we have proper input if not dictkeys: logger.error( 'kvxls:readxls2dict_findheader:dictkeys not populated - program error' ) print( 'kvxls:readxls2dict_findheader:dictkeys not populated - program error' ) raise # check for duplicate keys dupkeys = [] # results defined as a dicut results = {} # debugging logger.debug('dictkeys:%s', dictkeys) if debug: print('readxls2dict_findheader:dictkeys:', dictkeys) input('press enter') # test how dictkeys was passed in if isinstance(dictkeys, str): dictkeys = [dictkeys] if debug: print( 'readxls2dict_findheader:converted dictkeys from string to list' ) logger.debug('converted dictkeys from string to list') # debugging if debug: print('readxls2dict_findheader:reading in xls as a list first') logger.debug('reading in xls as a list first') # read in the data from the file resultslist = readxls2list_findheader(xlsfile, req_cols, xlatdict=xlatdict, optiondict=optiondict, col_aref=col_aref, debug=debug) # debugging if debug: print( 'readxls2dict_findheader:xls data is in an array - now convert to a dictionary' ) print('readxls2dict_findheader:dictkeys:', dictkeys) logger.debug('xls data is in an array - now convert to a dictionary') logger.debug('dictkeys:%s', dictkeys) # convert to a dictionary based on keys provided for rowdict in resultslist: #rowdict = dict(zip(header,row)) if debug: print('rowdict:', rowdict) print('dictkeys:', dictkeys) logger.debug('rowdict:%s', rowdict) logger.debug('dictkeys:%s', dictkeys) reckey = kvmatch.build_multifield_key(rowdict, dictkeys) # do we fail if we see the same key multiple times? if dupkeyfail: if reckey in results.keys(): # capture this key dupkeys.append(reckey) # create/update the dictionary results[reckey] = rowdict # fail if we found dupkeys if dupkeys: logger.error('duplicate key failure:%s', ','.join(dupkeys)) print('readxls2dict:duplicate key failure:', ','.join(dupkeys)) raise # return the results return results
def readcsv2dict_findheader(csvfile, req_cols, dictkeys, xlatdict={}, optiondict={}, col_aref=None, debug=False, dupkeyfail=False): # check inputs if not dictkeys: raise Exception('dictkeys must be populated') elif not isinstance(dictkeys, list): raise Exception('dictkeys must be a list:%s', dictkeys) # debugging if debug: print('dictkeys:', dictkeys) # local variables dupkeys = [] dictresults = {} # read in the data from the file results = readcsv2list_findheader(csvfile, req_cols, xlatdict=xlatdict, optiondict=optiondict, col_aref=col_aref, debug=debug) # check processing if 'no_header' in optiondict and optiondict['no_header'] and not col_aref: raise Exception( 'invalid setting optiondict[no_header] and no col_aref') if 'aref_result' in optiondict and optiondict['aref_result']: raise Exception('invalid setting optiondict[aref_result]') if debug: print('results:', results) # convert to a dictionary based on keys provided for rowdict in results: if debug: print('dictkeys:', dictkeys) print('rowdict:', rowdict) reckey = kvmatch.build_multifield_key(rowdict, dictkeys) # do we fail if we see the same key multiple times? if dupkeyfail: if reckey in dictresults: # capture this key dupkeys.append(reckey) # create/update the dictionary dictresults[reckey] = rowdict # fail if we found dupkeys if dupkeys: print('readcsv2dict:duplicate key failure:', ','.join(dupkeys)) raise Exception('duplicate key failure:%s', dupkeys) if debug: print('dictresults:', dictresults) # return the results return dictresults