def migrate_uk_data(filepaths, multi=False): issues = [] filedata = get_file_data(filepaths, multi) offer_ids_list = filedata.Tour__c1.EOSId__c.tolist() data = ObjDict(get_eos_data(offer_ids_list)) duplicate_tourids = filedata.Tour__c1[filedata.Tour__c1.duplicated('EOSId__c', False)][['EOSId__c', 'Source_File']] duplicate_tourids['Issue'] = 'Tour EOS Id in >1 Source File' issues.append(duplicate_tourids) filedata.Tour__c1.drop_duplicates(subset='EOSId__c', inplace=True) data.update(filedata) data.Tour__c = data.Tour__c.merge(data.Tour__c1, on=['EOSId__c'], how='outer', suffixes=['_EOS',None], indicator=True) data.Tour__c['RecordTypeId'] = data.Tour__c['RecordType.Name'].apply(lambda x: session.record_type_id_mapper()('Tour__c', x).Id) data.TourDeal__c = ( data.TourDeal__c.merge(data.TourDeal__c1, on=['SourceSystemId__c'], how='outer', suffixes=['_EOS',None], indicator=True) .query("_merge != 'left_only'") .drop(columns='_merge') ) data.TourLeg__c = ( data.TourLeg__c.merge(data.TourLeg__c1, on=['SourceSystemId__c'], how='outer', suffixes=['_EOS',None], indicator=True) .query("_merge != 'left_only'") .drop(columns='_merge') ) data.Event__c = data.Event__c.merge(data.Event__c1, on='EOSId__c', suffixes=['_EOS',None], indicator=True) data.Deal__c = data.Deal__c.merge(data.Deal__c1, on='Event__r.EOSId__c', suffixes=['_EOS',None], indicator=True) del data['Tour__c1'] del data['TourDeal__c1'] del data['TourLeg__c1'] del data['Event__c1'] del data['Deal__c1'] set_computed_fields(data) data.Issues = pd.concat(data.Issues + issues + migration_data_issues(data)) pdh.to_excel(data, f'{folder}OUTPUT - UK Data to Migrate.xlsx') # Questions: # Add Deductions? if True or prompt('Ready to migrate data to Rome?', boolean=True): session.add_bypass_settings() results = {} def upsert(func, *args, **kwargs): results[args[0]] = func(*args, **kwargs) upsert(session.upsert, 'Tour__c', data.Tour__c, 'EOSId__c') upsert(session.upsert, 'TourDeal__c', data.TourDeal__c, 'SourceSystemId__c') upsert(session.upsert, 'TourLeg__c', data.TourLeg__c, 'SourceSystemId__c') upsert(session.upsert, 'TourOnSale__c', data.TourOnSale__c, 'SourceSystemId__c') event_results = session.create_events(data, upsert_field_names={ 'Event__c': 'EOSId__c' , 'EventDateTime__c': 'SourceSystemId__c' , 'Deal__c': 'SourceSystemId__c' , 'TicketScale__c': 'EOSId__c' # , 'Deduction__c': 'SourceSystemId__c' , 'LedgerEntry__c': 'SourceSystemId__c' , 'LedgerEntryBreakout__c': 'SourceSystemId__c' }) upsert(session.upsert, 'EventOnSale__c', data.EventOnSale__c, 'SourceSystemId__c') session.remove_bypass_settings() success_results = {obj: result.success for obj, result in results.items()} success_results.update(event_results.success) error_results = {obj: result.errors for obj, result in results.items()} error_results.update(event_results.errors) pdh.to_excel({f'{key} ({len(val)})':val for key,val in success_results.items()}, f'{folder}OUTPUT - UK Data Migration Results - SUCCESS.xlsx') pdh.to_excel({f'{key} ({len(val)})':val for key,val in error_results.items()}, f'{folder}OUTPUT - UK Data Migration Results - ERROR.xlsx') print('') return
def get_eos_data(offer_ids_list): data = ObjDict() sql = SQL_Server_API('EOS-prod') whereClause = """ WHERE o.Id IN ({}) """.format(",".join(offer_ids_list)) # -- AND o.RomeId IS NULL rome_data = ObjDict({ 'User': threading.new(session.select, "SELECT Id, EOSId__c FROM User WHERE EOSId__c <> NULL", return_type='dataframe', mute=True) , 'Account': threading.new(session.select, "SELECT Id, EOSId__c FROM Account WHERE EOSId__c <> NULL AND RecordType.Name IN ('Venue','Artist','Agency','Office')", return_type='dataframe', mute=True) , 'Contact': threading.new(session.select, "SELECT Id, EOSId__c FROM Contact WHERE EOSId__c <> NULL", return_type='dataframe', mute=True) }) stageSQL = open(stageSQLFile, 'r', encoding='utf-8-sig').read().replace("QUERY_WHERE_CLAUSE_HERE", whereClause) querySQL = open(querySQLFile, 'r', encoding='utf-8-sig').read() tourOnSalesQuerySQL = open(tourOnSalesQuerySQLFile, 'r', encoding='utf-8-sig').read().replace("QUERY_WHERE_CLAUSE_HERE", whereClause) eventOnSalesQuerySQL = open(eventOnSalesQuerySQLFile, 'r', encoding='utf-8-sig').read().replace("QUERY_WHERE_CLAUSE_HERE", whereClause) print("EOS queries in progress") sql.query(stageSQL, cast_to_string=True) rows = sql.query(querySQL, cast_to_string=True) tourOnSales = sql.query(tourOnSalesQuerySQL, cast_to_string=True) eventOnSales = sql.query(eventOnSalesQuerySQL, cast_to_string=True) print("EOS queries complete") df1 = pd.DataFrame(rows) df2 = pd.DataFrame(tourOnSales) df3 = pd.DataFrame(eventOnSales) df1["Tour__c.OldTourName"] = df1["Tour__c.TourName__c"] # df1["Tour__c.TourName__c"] = df1["Tour__c.TourName__c"].apply(html.unescape) df1 = df1.apply(html.unescape) def set_blank_ts_type(row): # desc = row['TicketScale__c.Notes__c'] if row['TicketScale__c.Type__c'] == '': return f'PL{row["TicketScale__c.PriceLevelRank"]}' return row['TicketScale__c.Type__c'] df1['TicketScale__c.Type__c'] = df1.apply(set_blank_ts_type, axis=1) cols_to_string = [] + [c for c in list(df1.columns)+list(df2.columns) if 'EOSId' in c or 'SourceSystemId' in c] for col in cols_to_string: if col in df1: df1[col] = df1[col].astype(str).replace('\.0','',regex=True) if col in df2: df2[col] = df2[col].astype(str).replace('\.0','',regex=True) if col in df3: df3[col] = df3[col].astype(str).replace('\.0','',regex=True) data.Issues = [] eosidcolumns = [ c for c in df1.columns.values if '__r.EOSId__c' in c and 'Tour__r.EOSId__c' not in c and 'Event__r.EOSId__c' not in c and 'TicketBand__r.EOSId__c' not in c] eosids = set(df1[eosidcolumns].to_numpy().flatten()) romeeosids = set() for d in rome_data.values(): if len(d.result()) > 0: romeeosids.update(d.result()['EOSId__c'].tolist()) missingeosids = {item for item in eosids if item not in romeeosids and item != ''} data.Issues.append(pd.DataFrame([ {'Issue': 'Rome is missing Master Data', 'EOSId__c': item} for item in missingeosids ]).sort_values(by='EOSId__c')) for item in missingeosids: df1[eosidcolumns] = df1[eosidcolumns].replace(item, '') # pdh.to_excel({ # 'FULL': df1 # , 'Tour Onsales': df2 # , 'Event Onsales': df3 # }, 'UK EOS Data Raw.xlsx') data.Tour__c = ( df1[[c for c in df1 if 'Tour__c.' in c]] .drop_duplicates(subset='Tour__c.EOSId__c') .copy() ) data.TourDeal__c = ( df1[[c for c in df1 if 'TourDeal__c.' in c]] .drop_duplicates(subset='TourDeal__c.Tour__r.EOSId__c') .copy() ) data.TourLeg__c = ( df1[[c for c in df1 if 'TourLeg__c.' in c]] .drop_duplicates(subset='TourLeg__c.Tour__r.EOSId__c') .copy() ) data.Event__c = ( df1[[c for c in df1 if 'Event__c.' in c]] .drop_duplicates(subset='Event__c.EOSId__c') .copy() ) data.EventDateTime__c = ( df1[[c for c in df1 if 'EventDateTime__c.' in c]] .drop_duplicates(subset='EventDateTime__c.SourceSystemId__c') .copy() ) data.Deal__c = ( df1[[c for c in df1 if c.startswith('Deal__c.')]] .drop_duplicates(subset='Deal__c.SourceSystemId__c') .copy() ) data.TicketScale__c = ( df1[[c for c in df1 if 'TicketScale__c.' in c]] .drop_duplicates(subset='TicketScale__c.EOSId__c') .copy() ) data.TourOnSale__c = df2 data.EventOnSale__c = df3 rename(data.Tour__c, 'Tour__c') rename(data.TourDeal__c, 'TourDeal__c') rename(data.TourLeg__c, 'TourLeg__c') rename(data.Event__c, 'Event__c') rename(data.EventDateTime__c, 'EventDateTime__c') rename(data.Deal__c, 'Deal__c') rename(data.TicketScale__c, 'TicketScale__c') return data