def run(schedule_name, jobs, do_fixes=False): heuristics = get_job_heuristics() if len(jobs) == 0: return print("^ {} running {} jobs".format(time.strftime("%m/%d/%Y %H:%M:%S"), len(jobs))) return_string = "" # def run_module_job(name, module): # start_time = time.time() # return_string = '' # return_string += "\n\n################\n" # return_string += name # if hasattr(module, 'problem_data'): # job = getattr(module, 'problem_data') # else: # job = getattr(module, 'main') # try: # list(sessions.values())[0].console.new_line('Running {}'.format(name)) # if job.__name__ == 'main': # result = str(job(sessions)) # elif job.__name__ == 'problem_data': # result = run_job(name, module.problem_data, module.fix, module.sessions, do_fix=False, download_result=False, sf_sessions=sessions, sql_sessions=None) # if len(result) > 0: # return_string += "\n" # return_string += result # else: # return_string = '' # except Exception as e: # return_string += "\n>>>>> Job failed: {}\n{}\n".format(name,str(e)) # duration = math.floor(time.time() - start_time) # return name, return_string, duration threads = [] # sorted_jobs = for name, module in jobs.items(): threads.append( threading.new(run_module_job, sessions, name, module, do_fix=do_fixes)) for thread in threads: job_name, result_string, duration = thread.result() return_string += '\n' + result_string heur = heuristics.get( job_name, { 'name': job_name, 'num_runs': 0, 'avg_duration': 0, 'last_error_message': None }) heur['avg_duration'] = (heur['num_runs'] * heur['avg_duration'] + duration) / (heur['num_runs'] + 1) heur['num_runs'] = heur['num_runs'] + 1 heur['last_error_message'] = result_string heuristics[job_name] = heur Cache.set('scheduled_jobs', heuristics) return print_report(schedule_name, return_string, len(jobs))
def main(): # test = sf.select('''SELECT Id, Event__r.Venue__c, Event__r.Venue__r.VenueName__c, # Event__r.Venue__r.BillingCity, Event__r.Venue__r.BillingCountry, Type__c,RateType__c, # AmountRate__c, BeforeAfter__c # FROM Deduction__c LIMIT 50000''', mode='bulk', return_type='dataframe') data = sf.fselect(""" --sql SELECT Event__c , Event__r.Venue__c AS VenueId , Event__r.Venue__r.VenueName__c AS VenueName , Event__r.Venue__r.BillingCity + ', ' + Event__r.Venue__r.BillingCountryCode AS City , Event__r.Venue__r.BillingCountry AS Country , Type__c , RateType__c , AmountRate__c , BeforeAfter__c FROM Deduction__c WHERE ApplyTicketScale__c NOT IN ('Selected Tickets','Other Amount') AND StageType__c = Event__r.StatusStage__c AND Type__c != 'FMF/Venue Charge' AND Event__r.Status__c IN ('Confirmed','Flash Started','Flash Complete') -- AND RateType__c != 'Flat' -- AND Event__r.Venue__r.VenueName__c = 'The Wiltern' -- LIMIT 50000 --endsql """, mode='bulk', content_type='JSON') Cache.set('deductionsanalysis', data) # data = Cache.get('deductionsanalysis') by_event_id = {} venue_ids = set() venue_fields = {} venue_counts = {} for i, item in enumerate(data): if isinstance(item.VenueId, SalesforceFormulaParser.NullValue): continue curr_list = by_event_id.get(item.Event__c, []) curr_list.append(Deduction(item.VenueId, item.VenueName or '', item.BeforeAfter__c or '', item.Type__c or '', item.RateType__c or '', item.AmountRate__c or 0)) by_event_id[item.Event__c] = curr_list venue_ids.add(item.VenueId) venue_fields[item.VenueId] = Venue(item.VenueName, item.City, item.Country) sets = [] sets_by_venue = {} for lst in by_event_id.values(): new_set = tuple(sorted(lst, key=lambda item: (item.VenueId, item.VenueName, item.BeforeAfter__c, item.Type__c, item.RateType__c, item.AmountRate__c))) venue_id = new_set[0].VenueId if venue_id not in sets_by_venue: sets_by_venue[venue_id] = set() venue_counts[venue_id] = 0 sets.append(new_set) sets_by_venue[venue_id].add(new_set) venue_counts[venue_id] += 1 counter = {} for tup in sets: val = counter.get(tup, 0) + 1 counter[tup] = val top_n = 3 top_n_sets_by_venue = { k: sorted([(counter[s], s) for s in v], key=lambda x: x[0], reverse=True)[:top_n] for k, v in sets_by_venue.items() } top_n_sets_by_venue_text = { k: [set_to_text(x[1], x[0]) for x in v] for k, v in top_n_sets_by_venue.items() } def get_n(venue_id, n): lst = top_n_sets_by_venue[venue_id] if len(lst) > n: return lst[n][0] return None def get_n_str(venue_id, n): lst = top_n_sets_by_venue_text[venue_id] if len(lst) > n: return lst[n] return '' def set_output(row): x = row.VenueId row['Total Events'] = venue_counts[x] percent = lambda x: None if not x else (x / row['Total Events'] * 100).round() row['Venue Name'] = venue_fields[x].Name row['City'] = venue_fields[x].City row['Country'] = venue_fields[x].Country row['1st'] = get_n(x, 0) row['1st %'] = percent(row['1st']) row['1st Deduction Setup'] = get_n_str(x, 0) row['2nd'] = get_n(x, 1) row['2nd %'] = percent(row['2nd']) row['2nd Deduction Setup'] = get_n_str(x, 1) row['3rd'] = get_n(x, 2) row['3rd %'] = percent(row['3rd']) row['3rd Deduction Setup'] = get_n_str(x, 2) return row df = ( pd.DataFrame({'VenueId': list(venue_ids)}) .apply(set_output, axis=1, result_type='expand') # .assign(**{ # 'Total Events': lambda df: df.VenueId.apply(lambda x: venue_counts[x]), # 'Venue Name': lambda df: df.VenueId.apply(lambda x: venue_fields[x].Name), # 'Venue City': lambda df: df.VenueId.apply(lambda x: venue_fields[x].City), # '1st': lambda df: df.VenueId.apply(lambda x: get_n(x, 0)), # '1st Deduction Setup': lambda df: df.VenueId.apply(lambda x: get_n_str(x, 0)), # '2nd': lambda df: df.VenueId.apply(lambda x: get_n(x, 1)), # '2nd Deduction Setup': lambda df: df.VenueId.apply(lambda x: get_n_str(x, 1)), # '3rd': lambda df: df.VenueId.apply(lambda x: get_n(x, 2)), # '3rd Deduction Setup': lambda df: df.VenueId.apply(lambda x: get_n_str(x, 2)), # }) .sort_values(['1st %', '1st'], ascending=False) ) pdh.to_excel(df, 'Common Deductions Analysis.xlsx', word_wrap=True) return
def save_cookie(driver): cookies = driver.get_cookies() Cache.set(('browser_cookie', driver.current_url), cookies)
def eos_venues(): def get_eos_dupes_analysis(): records = sql.query(""" SELECT v.Id, v.Name, v.RomeId, v.IsActive FROM Venue v LEFT JOIN TownCity t ON v.TownCityId = t.Id LEFT JOIN Region r ON t.RegionId = r.Id LEFT JOIN Country c ON r.CountryId = c.Id WHERE c.Name IN ('United Kingdom') """) return pdh.find_duplicates(records, 'Name', None, ['Id', 'RomeId', 'IsActive']) def get_deletion_log(): return pd.DataFrame( sql.query(""" SELECT l.DeletedRecordId__c , l.MergedAccount__c , l.RecordTypeName__c , v1.Id AS EOSIdToDelete , v1.Name AS EOSNameToDelete , v2.Id AS EOSIdToMergeInto , v2.Name AS EOSNameToMergeInto , CASE WHEN v1.Id < v2.Id THEN 'Double check this - older ID being deleted' ELSE NULL END AS SpotCheck FROM RomeDeletionLog l LEFT JOIN Venue v1 ON v1.RomeId = l.DeletedRecordId__c AND l.RecordTypeName__c = 'Venue' LEFT JOIN Venue v2 ON v2.RomeId = l.MergedAccount__c AND l.RecordTypeName__c = 'Venue'WHERE v1.Id IS NOT NULL """)) eos_dupes = Cache.get('eosdupes', None) if eos_dupes is None or prompt('Re-run EOS Venue Dupe Analysis?', boolean=True): eos_dupes = get_eos_dupes_analysis() Cache.set('eosdupes', eos_dupes) deletion_log = get_deletion_log() deleted_ids = set(deletion_log.DeletedRecordId__c) def note(row): r1, r2 = row['RomeId'], row['m_RomeId'] if pdh.isnull(r1) and pdh.isnull(r2): return 'EOS Merge - No Rome links exist' if r1 in deleted_ids and r2 in deleted_ids: return 'Both RomeIds are deleted in Rome' elif r2 in deleted_ids: return 'EOS Merge RIGHT into LEFT - RIGHT Id is deleted in Rome' elif r1 in deleted_ids: return 'EOS Merge LEFT into RIGHT - LEFT Id is deleted in Rome' if pdh.isnull(r1) and pdh.notnull(r2): return 'EOS Merge LEFT into RIGHT' if pdh.isnull(r2) and pdh.notnull(r1): return 'EOS Merge RIGHT into LEFT' if r1 == r2: return 'EOS Merge - Both EOS Venues have the same RomeId... merge one into the other' if r1 not in deleted_ids and r2 not in deleted_ids: return 'ROME Merge first, then EOS can be merged' eos_dupes.query("`m_%` > 70", inplace=True) eos_dupes['Note'] = eos_dupes.apply(note, axis=1) eos_dupes[ 'RomeMergeLink'] = 'https://lne.my.salesforce.com/merge/accmergewizard.jsp?goNext=+Next+&cid=' + eos_dupes[ 'RomeId'] + '&cid=' + eos_dupes['m_RomeId'] output = { 'EOS Dupe Analysis': eos_dupes, 'RomeDeletionLog': deletion_log, } file_name = loc.uk_folder_path + '/Master Data - Output/Venue Dupe Analysis/EOS Venue Duplicate Analysis ' + datetime.now( ).strftime('%Y-%m-%d %H.%M') + '.xlsx' pdh.to_excel(output, file_name) return