def reach(self, db): """how is the vdc accessible""" #TODO: use index() method instead of dict dist_loc = column_index_from_string(etl.find_in_header(db, 'District')) - 1 vdc_loc = column_index_from_string( etl.find_in_header(db, 'VDC / Municipalities')) - 1 acc_look = column_index_from_string( etl.find_in_header(self.s_dict['acc'], 'DistrictVDC Concatenation')) - 1 acc_acc = column_index_from_string( etl.find_in_header(self.s_dict['acc'], 'NeKSAP ACCESS')) - 1 dist_vals = etl.get_values(db.columns[dist_loc][1:]) vdc_vals = etl.get_values(db.columns[vdc_loc][1:]) acc_look_vals = etl.get_values( self.s_dict['acc'].columns[acc_look][1:]) acc_acc_vals = etl.get_values(self.s_dict['acc'].columns[acc_acc][1:]) #make dict for access, concatenate dist and vdc acc_dict = dict(zip(acc_look_vals, acc_acc_vals)) d_v_conc = [dist_vals[i] + vdc_vals[i] for i in xrange(len(dist_vals))] ret = [] for v in d_v_conc: if v in acc_dict and v != '': ret.append(acc_dict[v]) else: ret.append('') return ret
def priority(self, db): """is it a priority district""" dloc = column_index_from_string(etl.find_in_header(db, 'District')) - 1 rloc = column_index_from_string( etl.find_in_header(self.s_dict['ref'], 'Priority Districts')) - 1 dvals = etl.get_values(db.columns[dloc][1:]) rvals = etl.get_values(self.s_dict['ref'].columns[rloc][1:]) ret = [] for v in dvals: if v in rvals and v != '': ret.append('TRUE') else: ret.append('FALSE') return ret
def update(self, db): "last updated" dloc = column_index_from_string(etl.find_in_header(db, 'Last Update')) - 1 dvals = etl.get_values(db.columns[dloc][1:]) to_ret = [] for v in dvals: to_ret.append(v) return to_ret
def dist_hlcit(self, db): "dist code" dloc = column_index_from_string(etl.find_in_header(db, 'District')) - 1 d_rloc = column_index_from_string( etl.find_in_header(self.s_dict['ref'], 'Admin1_District')) - 1 code_rloc = column_index_from_string( etl.find_in_header(self.s_dict['ref'], 'Admin1_P-Code')) - 1 dvals = etl.get_values(db.columns[dloc][1:]) d_rloc_vals = etl.get_values(self.s_dict['ref'].columns[d_rloc][1:]) code_rloc_vals = etl.get_values( self.s_dict['ref'].columns[code_rloc][1:]) ret = [] for v in dvals: if v in d_rloc_vals and v != '': ret.append(code_rloc_vals[d_rloc_vals.index(v)]) else: ret.append('') return ret
def act_cat(self, db): "UNOCHA cat" dloc = column_index_from_string( etl.find_in_header(db, 'Action description')) - 1 sc_rloc = column_index_from_string( etl.find_in_header(self.s_dict['ref'], 'SC Categories')) - 1 un_rloc = column_index_from_string( etl.find_in_header(self.s_dict['ref'], 'UN Categories')) - 1 dvals = etl.get_values(db.columns[dloc][1:]) sc_rloc_vals = etl.get_values(self.s_dict['ref'].columns[sc_rloc][1:]) un_rloc_vals = etl.get_values(self.s_dict['ref'].columns[un_rloc][1:]) ret = [] for v in dvals: if v in sc_rloc_vals and v != '': ret.append(un_rloc_vals[sc_rloc_vals.index(v)]) else: ret.append('') return ret
def hub(self, db): "what a hub a dist is in" dloc = column_index_from_string(etl.find_in_header(db, 'District')) - 1 pri_rloc = column_index_from_string( etl.find_in_header(self.s_dict['ref'], 'Priority Districts')) - 1 hub_rloc = column_index_from_string( etl.find_in_header(self.s_dict['ref'], 'Shelter Cluster Hubs')) - 1 dvals = etl.get_values(db.columns[dloc][1:]) pri_rloc_vals = etl.get_values( self.s_dict['ref'].columns[pri_rloc][1:]) hub_rloc_vals = etl.get_values( self.s_dict['ref'].columns[hub_rloc][1:]) ret = [] for v in dvals: if v in pri_rloc_vals and v != '': ret.append(hub_rloc_vals[pri_rloc_vals.index(v)]) else: ret.append('') return ret
def test_consolidate(self): #create historical db db = Workbook().active db.append(("Implementing agency", "Local partner agency" , "District", "VDC / Municipalities", "Municipal Ward", "Action type", "Action description", "# Items / # Man-hours / NPR", "Total Number Households")) db.append(('val', 'key', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'Last')) db.append(('dup1', 'dupkey1', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'Last')) db.append(('row2val', 'key1', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'Last')) db.append(('row3val', 'key2', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'Last')) db.append(('dup2', 'dupkey2', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'Last')) #create two sheets to add wb1 = Workbook() wb1.create_sheet(2, 'Distributions') ws1 = wb1.get_sheet_by_name('Distributions') ws1.append(("Implementing agency", "Local partner agency" , "District", "VDC / Municipalities", "Municipal Ward", "Action type", "Action description", "# Items / # Man-hours / NPR", "Total Number Households")) ws1.append(('val', 'key', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'Last')) ws1.append(('dup1', 'dupkey1', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'Last')) ws1.append(('notdupedws1', 'notdupedvalws1', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'Last')) wb2 = Workbook() wb2.create_sheet(1, 'Distributions') ws2 = wb2.get_sheet_by_name('Distributions') ws2.title = 'Distributions' ws2.append(("Implementing agency", "Local partner agency" , "District", "VDC / Municipalities", "Municipal Ward", "Action type", "Action description", "# Items / # Man-hours / NPR", "Total Number Households")) ws2.append(('val', 'key', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'Last')) ws2.append(('dup2', 'dupkey2', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'Last')) ws2.append(('notdupedws2', 'notdupedvalws2', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'Last')) #weird bug wb2.get_sheet_by_name('Distributions1').title = 'Distributions' print 'in test: ' + str(wb1.get_sheet_names()) print 'in test2: ' + str(wb2.get_sheet_names()) cons = etl.consolidate(db, (wb1, wb2), 'J') cons_sheet = cons.get_sheet_by_name('Consolidated') self.assertEqual(set(etl.get_values(cons_sheet.columns[9])), (set(['None', 'valkeyNoneNoneNoneNoneNoneNoneNone','row2valkey1NoneNoneNoneNoneNoneNoneNone', 'row3valkey2NoneNoneNoneNoneNoneNoneNone','dup1dupkey1NoneNoneNoneNoneNoneNoneNone', 'notdupedws1notdupedvalws1NoneNoneNoneNoneNoneNoneNone','dup2dupkey2NoneNoneNoneNoneNoneNoneNone', 'notdupedws2notdupedvalws2NoneNoneNoneNoneNoneNoneNone'])))
def uid(self, db): "UID" ok_key = False if db.title == 'Distributions': vals = [ "Implementing agency", "Local partner agency", "District", "VDC / Municipalities", "Municipal Ward", "Action type", "Action description", "# Items / # Man-hours / NPR", "Total Number Households" ] ok_key = True elif db.title == 'Trainings': vals = [ "Implementing agency", "Local partner agency", "District", "VDC / Municipalities", "Municipal Ward", "Training Subject", "Audience", "IEC Materials Distributed", "Males" ] ok_key = True to_ret = [] mal_count = 0 print 'Starting UID' if ok_key: #pull vals into array key_vals = [] max_len = 0 for v in vals: val_arr = etl.get_values(db.columns[ column_index_from_string(etl.find_in_header(db, v)) - 1]) key_vals.append(val_arr) if len(val_arr) > max_len: max_len = len(val_arr) for i in xrange(1, max_len): cur_key = '' for k in xrange(len(key_vals)): try: cur_key += key_vals[k][i] except: pass to_ret.append(cur_key) return to_ret else: print 'ERROR: no suitable WB found for UID' return to_ret
def match_vdcs(): #notes... #1651 VDCs without exact match w = etl.pull_wb("/Users/ewanog/Downloads/ward_level_data.xlsx", "local") #w = etl.pull_wb("/Users/ewanog/Downloads/test.xlsx", "local") #from cbs: col 1 (dist), col 2 (vdc), col 3 (ward) #from new: H (dist), N (VDC), O (ward) cbs = w.get_sheet_by_name("cbs") new = w.get_sheet_by_name("new") cbs_dist = etl.get_values(cbs.columns[0][1:]) cbs_vdc = etl.get_values(cbs.columns[1][1:]) cbs_ward = etl.get_values(cbs.columns[2][1:]) new_dist = etl.get_values(new.columns[7][1:]) new_vdc = etl.get_values(new.columns[13][1:]) new_ward = etl.get_values(new.columns[14][1:]) ns = set(new_vdc) #see vdc matches cb_zip = zip(cbs_dist, cbs_vdc) new_zip = zip(new_dist, new_vdc) print len(set(etl.colvals_notincol(cbs,'B',new,'N'))) print len(set(cbs_vdc)) for v in set(etl.colvals_notincol(cbs,'B',new,'N')): r = process.extract(v, [r for r in new_zip if r[1] == ]) print r[0][1] if __name__ == '__main__': read = False if read: for ws in w.worksheets: if ws['A44'].value: r = get_vals(ws) print r[-1][0] #pr(r) else: match_vdcs()
def test_get_values(self): db = Workbook().active db.append(('val', 'key')) self.assertEqual(etl.get_values(db.rows[0]),['val','key'])
try: v.value = etl.convert_date(v.value, "%d/%m/%Y", "%d/%m/%Y") except: try: v.value = etl.convert_date(v.value, "%d/%m/%y", "%d/%m/%Y") except: try: v.value = etl.convert_date(v.value, "%m/%d/%y", "%d/%m/%Y") except: try: v.value = etl.convert_date(v.value, "%m/%d/%Y", "%d/%m/%Y") except: "cant parse: " + v.value with open("/Users/ewanog/Documents/tmp/dates.csv", "w") as f: csv_out = csv.writer(f, delimiter=",") csv_out.writerow(etl.get_values(s.rows[0])) for r in s.rows[1:]: out = [] for v in r: try: if v.value is not None: out.append(v.value) except Exception, e: print e csv_out.writerow(out) print str(out) f.close()