def algo11(db, ref):
    #Column L: must be in Reference>Target
    db_col_loc = etl.find_in_header(db, 'Targeting')
    ref_col_loc = etl.find_in_header(ref, 'Target')
    missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc)

    return db, ref, 'Targets not in Referece\n' + ','.join(missing_names)
def algo6(db, ref):
    #Column G: must be in Reference>District
    db_col_loc = etl.find_in_header(db, 'District')
    ref_col_loc = etl.find_in_header(ref, 'Admin1_District')
    missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc)
    return db, ref, 'Districts not in the reference:\n' + ','.join(
def algo17(db,ref):
    #column R: must be in reference>Status
    db_col_loc = etl.find_in_header(db, 'Activity Status')
    ref_col_loc = etl.find_in_header(ref,'Status')
    missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc)

    return db, ref, return_message('Activity Status not in Reference' ,missing_names)
def algo9(db, ref):
    #column J: must be in reference>Type of Activity
    db_col_loc = etl.find_in_header(db, 'Action type')
    ref_col_loc = etl.find_in_header(ref, 'Action_Type')
    missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc)

    return db, ref, 'Incorrect Action types:\n' + ','.join(missing_names)
def algo11(db,ref):
    #Column L: must be in Reference>Target
    db_col_loc = etl.find_in_header(db, 'Targeting')
    ref_col_loc = etl.find_in_header(ref,'Target')
    missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc)

    return db, ref, return_message('Targets not in Referece' ,missing_names)
def algo1(db, ref):
    #***Column A must be in Reference>ImplementingAgency if not: make a note
    db_col_loc = etl.find_in_header(db, 'Implementing agency')
    ref_col_loc = etl.find_in_header(ref,'Implementing_Agency_Name')
    missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc)

    return db, ref, return_message('Agencies not in the reference:' ,missing_names)
def algo9(db,ref):
    #column J: must be in reference>Type of Activity
    db_col_loc = etl.find_in_header(db, 'Action type')
    ref_col_loc = etl.find_in_header(ref,'Action_Type')
    missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc)

    return db, ref, return_message('Incorrect Action types:' ,missing_names)
def algo3(db, ref):
    #* what return? change?
    #Column C: must be in Reference>LocalPartnerAgency
    #If not: Check if mispelling (like '%agencyname%)

    db_col_loc = etl.find_in_header(db, 'Local partner agency')
    ref_col_loc = etl.find_in_header(ref, 'Local_Partner_Agency')
    missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc)
def algo17(db, ref):
    #column R: must be in reference>Status
    db_col_loc = etl.find_in_header(db, 'Activity Status')
    ref_col_loc = etl.find_in_header(ref, 'Status')
    missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc)

    return db, ref, 'Activity Status not in Referece\n' + ','.join(
def algo1(db, ref):
    #***Column A must be in Reference>ImplementingAgency if not: make a note
    db_col_loc = etl.find_in_header(db, 'Implementing agency')
    ref_col_loc = etl.find_in_header(ref, 'Implementing_Agency_Name')
    missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc)

    return db, ref, 'Agencies not in the reference:\n' + ','.join(
def algo3(db,ref):
    #* what return? change?
    #Column C: must be in Reference>LocalPartnerAgency
    #If not: Check if mispelling (like '%agencyname%)

    db_col_loc = etl.find_in_header(db, 'Local partner agency')
    ref_col_loc = etl.find_in_header(ref,'Local_Partner_Agency')
    missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc)
def match_vdcs():
    #1651 VDCs without exact match

    w = etl.pull_wb("/Users/ewanog/Downloads/ward_level_data.xlsx", "local")
    #w = etl.pull_wb("/Users/ewanog/Downloads/test.xlsx", "local")
    #from cbs: col 1 (dist), col 2 (vdc), col 3 (ward)
    #from new: H (dist), N (VDC), O (ward)
    cbs = w.get_sheet_by_name("cbs")
    new = w.get_sheet_by_name("new")
    cbs_dist = etl.get_values(cbs.columns[0][1:])
    cbs_vdc = etl.get_values(cbs.columns[1][1:])
    cbs_ward = etl.get_values(cbs.columns[2][1:])

    new_dist = etl.get_values(new.columns[7][1:])
    new_vdc = etl.get_values(new.columns[13][1:])
    new_ward = etl.get_values(new.columns[14][1:])
    ns = set(new_vdc)
    #see vdc matches

    cb_zip = zip(cbs_dist, cbs_vdc)
    new_zip = zip(new_dist, new_vdc)

    print len(set(etl.colvals_notincol(cbs,'B',new,'N')))
    print len(set(cbs_vdc))
    for v in set(etl.colvals_notincol(cbs,'B',new,'N')):
        r = process.extract(v, [r for r in new_zip if r[1] == ])
        print r[0][1]

if __name__ == '__main__':
    read = False
    if read:
        for ws in w.worksheets:
            if ws['A44'].value:
                r = get_vals(ws)
                print r[-1][0]
def algo8(db, ref):
    #Column I: must be in reference>Ward or a number or numbers separated by commas

    #take all into missing names and trim from there
    db_col_loc = etl.find_in_header(db, 'Municipal Ward')
    ref_col_loc = etl.find_in_header(ref, 'Wards')
    missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc)

    #trim missing_names to see if they contain any forbidden chars
    invalid = []

    for v in missing_names:
        for letter in v:
            #if we've found an illegal letter
            if letter not in set(string.digits + ' ' + ','):
                invalid.append('(' + v + ')')

    return db, ref, 'Malformed wards: \n' + ','.join(invalid)
def algo8(db,ref):
    #Column I: must be in reference>Ward or a number or numbers separated by commas

    #take all into missing names and trim from there
    db_col_loc = etl.find_in_header(db, 'Municipal Ward')
    ref_col_loc = etl.find_in_header(ref,'Wards')
    missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc)

    #trim missing_names to see if they contain any forbidden chars
    invalid = []

    for v in missing_names:
        for letter in v:
            #if we've found an illegal letter
            if letter not in set(string.digits + ' ' + ','):
                invalid.append('(' + v + ')')

    return db, ref, return_message('Malformed wards: ' ,invalid)
 def test_colvals_notincol(self):
     self.assertEqual(tuple(etl.colvals_notincol(db, 'A', ref, 'A')), 
def algo6(db,ref):
    #Column G: must be in Reference>District
    db_col_loc = etl.find_in_header(db, 'District')
    ref_col_loc = etl.find_in_header(ref,'Admin1_District')
    missing_names = etl.colvals_notincol(db, db_col_loc, ref, ref_col_loc)
    return db, ref, return_message('Districts not in the reference:' ,missing_names)