def main(args): pkglist = args["<filename>"] if pkglist is None: # Figure out which package list matches the type of download we want to do (6m, 12m, or DFLN) pkgset = args["--pkgset"] pkglist = find_latest_file(pkgset) logging.info("Working with " + pkglist) # Create list of which packages we want results for packages_to_download = [line.strip().split("\t") for line in open(pkglist)] # Download the results for each site. # We could do the downloads simultaneously (threads), but that causes # Firefox troubles from multiple browsers using the same profile. # Download results to files raw_report_fns = [] for site in SITES: site_pkgs = [pkg for org, pkg in packages_to_download if org == site["org"]] filename = download_from_site(site, site_pkgs) if filename: raw_report_fns.append(filename) # Merge them into a Dataframe df = merge_raw_reports(raw_report_fns, args["--keepdl"]) logging.info("Saving raw results...") df.to_csv( os.path.splitext(pkglist)[0] + " RAW.csv", index=None, quoting=csv.QUOTE_NONNUMERIC, date_format="%Y-%m-%d" )
def join_cpps(df): ## Rename df = df.rename(columns={'CLNT': 'Client', 'Mail Code': 'Mailcode', 'Descript':'IDMI Descript', 'Package':'IDMI Package', 'Qty Mail': 'Qty Mailed'}) ## And add some columns: df['EffType'] = df['Mailcode'].apply(lambda s: EFFTYPES[s[0]]) df['Eff'] = df['Mailcode'].apply(lambda x: x[:4]) df["Lasersplit"] = [get_lasersplit(mc) for mc in df['Mailcode']] df["Listcode"] = [get_listcode(mc) for mc in df['Mailcode']] ## Drop columns! df = df[['Client','EffType','Eff','Listcode','Lasersplit','Mailcode', 'IDMI Package','IDMI Descript','Mail Date','FF Date', 'Qty Mailed','Total Donors','ND Count','Total Revenue']] ## Load List Costs LCPP_XLSX = find_latest_file('List CPPs', CPP_DIR) logging.info('List costs: ' + LCPP_XLSX) lcpps = pd.read_excel(CPP_DIR + LCPP_XLSX, "List & Media CPPs") lcpps = lcpps[['Mail Code','Media CPP_ ','Est?']] lcpps = lcpps.rename(columns={'Mail Code':'Mailcode', 'Media CPP_ ':'List CPP', 'Est?':'LCPP Est?'}) logging.info('Merging in list costs...') df = pd.merge(df, lcpps, 'left', on='Mailcode') PCPP_XLSX = find_latest_file('Prod CPPs', CPP_DIR) logging.info('Prod costs: ' + PCPP_XLSX) pcpps = pd.read_excel(CPP_DIR + PCPP_XLSX, "Prod CPPs") pcpps = pcpps[['Eff', '8&10', 'Total Prd CPP', 'Estimated?']] pcpps = pcpps.rename(columns={'8&10':'Lasersplit', 'Total Prd CPP':'Prod CPP', 'Estimated?':'PCPP Est?'}) ## Merge in the Production Costs logging.info('Merging in production costs...') df = pd.merge(df, pcpps, 'left', on=['Eff','Lasersplit']) ## Some nice calulated columns df['List Cost'] = df['Qty Mailed'] * df['List CPP'] df['Prod Cost'] = df['Qty Mailed'] * (df['Prod CPP']) ## Now we take a bunch of slightly different tabs from a single ## spreadsheet and merge them together (using 'EffType' to distinguish ## the originating tabs) LIST_XLSX = find_latest_file('USO Prospect Code Log', LIST_DIR) logging.info('List descs: ' + LIST_XLSX) lists_DM = pd.read_excel(LIST_DIR + LIST_XLSX, "Direct Mail Codes", parse_cols="A,B,D,E,F", dtypes={'Code' :'object', 'Category':'object', 'List' :'object', 'Segment' :'object' }) lists_DM = lists_DM.append(pd.read_excel(LIST_DIR + LIST_XLSX, "USO NY Direct Mail", parse_cols="A,B,D,E,F", dtypes={'Code' :'object'})) lists_DM.rename(columns={'Code' :'List Code', 'Category':'List Category', 'List' :'List Desc', 'Segment' :'List Segment/Vehicle' }, inplace=True) lists_DM['EffType'] = 'Prsp' lists_PI = pd.read_excel(LIST_DIR + LIST_XLSX, "Alternative Media", parse_cols="A:D", dtypes={'Code' :'object'}) lists_PI.rename(columns={'Code' :'List Code', 'Category':'List Category', 'Program' :'List Desc', 'Details' :'List Segment/Vehicle' }, inplace=True) lists_PI['EffType'] = 'PIns' lists_NI = pd.read_excel(LIST_DIR + LIST_XLSX, "Newspaper", parse_cols="A:C", dtypes={'Code' :'object'}) lists_NI.rename(columns={'Code' :'List Code', 'Category':'List Category', 'Program' :'List Desc' }, inplace=True) lists_NI['EffType'] = 'NIns' lists = pd.concat([lists_DM, lists_PI, lists_NI]) ## Some list codes are inevitably numbers instead of strings, so: lists['List Code'] = lists['List Code'].apply(str) lists = lists.rename(columns={'List Code': 'Listcode'}) logging.info('Merging in list descriptions...') df = pd.merge(df, lists, 'left', on=['EffType','Listcode']) return df