def plotStoreDailyTrends(trainSet,storeData,storeID,savepath = None): thisStore = trainSet[trainSet['Store'] == storeID] thisStore = thisStore[thisStore['Open'] == 1] plt.figure() plt.violinplot( [thisStore[thisStore['DayOfWeek'] == dow]['Sales'] for dow in set(thisStore['DayOfWeek'])], showmeans=True) plt.boxplot( [thisStore[thisStore['DayOfWeek'] == dow]['Sales'] for dow in set(thisStore['DayOfWeek'])], notch=1) plt.xlabel('day of week') plt.ylabel('sales') storeCompetitionFlag = ~np.isnan(storeData[storeData['Store']==storeID]['CompetitionOpenSinceYear'].values) if storeCompetitionFlag: thisStore = thisStore
assert_less(spm_nmi_data[spm_perf_mask_data].mean(), my_nmi_data[my_perf_mask_data].mean()) import matplotlib.pylab as plt data = [ my_spearman_data[my_perf_mask_data].ravel(), spm_spearman_data[spm_perf_mask_data].ravel() ] data = [ 1. / my_nmi_data[my_perf_mask_data].ravel(), 1. / spm_nmi_data[spm_perf_mask_data].ravel() ] parts = plt.violinplot(data, showmeans=False, showmedians=False, showextrema=False) for pc in parts['bodies']: pc.set_facecolor('#D43F3A') pc.set_edgecolor('black') pc.set_alpha(1) sammba_quartile1, sammba_median, sammba_quartile3 = np.percentile( data[0], [25, 50, 75]) spm_quartile1, spm_median, spm_quartile3 = np.percentile( data[1], [25, 50, 75]) quartile1 = [sammba_quartile1, spm_quartile1] medians = [sammba_median, spm_median] quartile3 = [sammba_quartile3, spm_quartile3] whiskers = np.array([
s_pf ~ gamma(10, 10) } fit1 = pystan.stan(model_code=model, data={'N': len( dic_target_player), 'G': len(LW), 'LW': LW}, iter=1000, chains=4) la1 = fit1.extract() print(fit1) plt.figure(figsize=(15, 7)) colors = ['red', 'yellow', 'green', 'blue'] for i, player in enumerate(arr_target_player): for j in range(4): g = plt.violinplot(la1['mu'][j * 500:(j + 1) * 500, i], positions=[i], showmeans=False, showextrema=False, showmedians=False) for pc in g['bodies']: pc.set_facecolor(colors[j]) plt.legend(['chain 1', 'chain 2', 'chain 3', 'chain 4']) plt.xticks(list(range(len(arr_target_player))), arr_target_player) plt.xticks(rotation=45) plt.xlabel('player') plt.ylabel('mu') plt.show() plt.figure(figsize=(15, 7))
def plot(trained_sets, switched_sets, attr, labels, new_order_labels, trained_folder=None, switched_folder=None, auto_load=False, yscale='linear', ylim=None, save_addition='', xlim=None): #for some reason auto_load stopped working, did not look for bug yet if not trained_folder is None: trained_sets = add_folder_name(trained_sets, trained_folder) switched_sets = add_folder_name(switched_sets, switched_folder) # -----PLot concetinated data # trained_sets = [j for sub in trained_sets for j in sub] # switched_sets = [j for sub in switched_sets for j in sub] npz_name = 'save/{}figs/{}_boxplot.npz'.format(switched_folder, attr) if path.isfile(npz_name) and auto_load: txt = 'Loading: ' + npz_name print(txt) data = np.load(npz_name) all_data = data['all_data'] data = data['data'] else: data = [] all_data = [] for trained_set, switched_set in zip(trained_sets, switched_sets): trained_vals, switched_vals = load_plot_data( trained_set, switched_set, attr) for trained_single_sim in trained_vals: all_data.append(trained_single_sim) for switched_single_sim in switched_vals: all_data.append(switched_single_sim) trained_vals_concat = [j for sub in trained_vals for j in sub] switched_vals_concat = [j for sub in switched_vals for j in sub] data.append(trained_vals_concat) data.append(switched_vals_concat) # plt.boxplot(data) # plt.xticks(np.arange(1, len(labels) + 1), labels, rotation='vertical') # plt.show() savefolder = 'save/{}figs/{}_'.format(switched_folder, attr) if not path.exists(savefolder): makedirs(savefolder) np.savez(npz_name, all_data=all_data, data=data) df, names = create_DF(all_data, labels) df = reorder_df(df, new_order_labels) all_data_reordered = df_to_nested_list(df) # plt.figure(figsize=(25, 5)) # chart = sns.violinplot(data=df, width=0.8, inner='quartile', scale='width', linewidth=0.01) # inner='quartile' # chart.set_xticklabels(chart.get_xticklabels(), rotation=70) # df.mean().plot(style='*') # plt.savefig('{}violin_df_neworder{}.png'.format(savefolder, save_addition), dpi=300, bbox_inches='tight') # plt.show() colors = [ '#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf' ] violin_colors = create_violin_colors(colors) # LEGEND legend_elements = [ Line2D([0], [0], marker='_', color='black', label='mean', markerfacecolor='g', markersize=10) ] plt.figure(figsize=(25, 10)) chart = sns.violinplot(data=df, width=0.8, inner='quartile', scale='width', linewidth=0.05, palette=violin_colors) # inner='quartile' df.mean().plot(style='_', c='black', ms=30) chart.set_xticklabels(chart.get_xticklabels(), rotation=70) plt.yscale(yscale) plt.gca().set_ylim(top=20) plt.legend(handles=legend_elements) plt.savefig('{}violin_df{}.png'.format(savefolder, save_addition), dpi=300, bbox_inches='tight') plt.show() fig, ax = plt.subplots() col_i = 0 for i, d in enumerate(all_data_reordered): color = colors[col_i] noisy_x = i * np.ones( (1, len(d))) + np.random.random(size=len(d)) * 0.5 ax.scatter(noisy_x[0, :], d, alpha=0.6, s=0.01, c=color) if (i + 1) % 4 == 0: col_i += 1 mean_series = df.mean() mean_series.plot(style='_', c='black', ms=7) ax.set_xticks(np.arange(32)) ax.set_yscale(yscale) #plt.ylabel('median energy') plt.ylabel(attr) plt.xticks(np.arange(1, len(new_order_labels) * 4 + 1, 4), new_order_labels, rotation=70) plt.legend(handles=legend_elements) plt.savefig('{}scatter{}.png'.format(savefolder, save_addition), dpi=300, bbox_inches='tight') plt.show() # plt.boxplot(data, showmeans=True) # plt.xticks(np.arange(1, len(labels) + 1), labels, rotation='vertical') # plt.ylabel(attr) # plt.savefig('{}boxplot.png'.format(savefolder), dpi=200, bbox_inches='tight') # plt.show() # plt.boxplot(all_data_reordered, showmeans=True) # plt.xticks(np.arange(1, len(new_order_labels)*4 + 1, 4), new_order_labels, rotation='vertical') # plt.ylabel(attr) # plt.savefig('{}boxplot_all.png'.format(savefolder), dpi=200, bbox_inches='tight') # plt.show() plt.figure(figsize=(20, 5)) plt.violinplot(all_data_reordered, showmeans=True, showextrema=False, widths=0.8) plt.xticks(np.arange(1, len(new_order_labels) * 4 + 1, 4), new_order_labels, rotation=70) plt.yscale(yscale) #plt.ylabel('median energy') plt.ylabel(attr) plt.ylim(ylim) plt.xlim(xlim) plt.savefig('{}violin_all{}.png'.format(savefolder, save_addition), dpi=300, bbox_inches='tight') plt.show()
def testCompetition(): #to look at the changes in dail stats due to competition #maybe establish some thresholds #plot time series for all the stores plt.figure(figsize=[20,9]) for storeNum in set(trainSet['Store']): if storeNum > 688: storeType = storeData[storeData['Store'] == storeNum]['StoreType'].values[0] #a,b,c,d storeAssortment = storeData[storeData['Store'] == storeNum]['Assortment'].values[0] #a,b,c print str(storeNum) + ' type ' + storeType + ' assortment ' + storeAssortment savePath = '../figures/storeTimeseries/' + \ 'type_' + storeType + '_assortment_' + storeAssortment + \ '/competition' + \ str(storeData[storeData['Store'] == storeNum]['CompetitionDistance'].values[0].astype(int)) + \ '_store' + str(storeNum) + '.jpg' plotStoresTimeSeries(trainSet,storeData,storeNum,savePath) #### # tests on a single store 2015-10-21 #### storeNum = 1108 thisStore = trainSet[trainSet['Store'] == 1108] thisStore = thisStore[thisStore['Open']==1] storeType = storeData[storeData['Store'] == storeNum]['StoreType'].values[0] storeAssortment = storeData[storeData['Store'] == storeNum]['Assortment'].values[0] plt.figure() plt.violinplot( [thisStore[thisStore['DayOfWeek'] == dow]['Sales'] for dow in set(thisStore['DayOfWeek'])], showmeans=True) plt.boxplot( [thisStore[thisStore['DayOfWeek'] == dow]['Sales'] for dow in set(thisStore['DayOfWeek'])], notch=1) plt.xlabel('day of week') plt.ylabel('sales') promotedStore = thisStore[thisStore['Promo'] == 1] unpromotedStore = thisStore[thisStore['Promo'] == 0] plt.figure() plt.violinplot( [promotedStore[promotedStore['DayOfWeek'] == dow]['Sales'] for dow in set(promotedStore['DayOfWeek'])], showmeans=True) plt.violinplot( [unpromotedStore[unpromotedStore['DayOfWeek'] == dow]['Sales'] for dow in set(unpromotedStore['DayOfWeek'])], showmeans=True) plt.boxplot( [promotedStore[promotedStore['DayOfWeek'] == dow]['Sales'] for dow in set(promotedStore['DayOfWeek'])], notch=1) plt.boxplot( [unpromotedStore[unpromotedStore['DayOfWeek'] == dow]['Sales'] for dow in set(unpromotedStore['DayOfWeek'])], notch=1) plt.xlabel('day of week') plt.ylabel('sales') idx = pd.date_range(dt.datetime(2013,1,1,00,00,00),dt.datetime(2015,7,31,00,00,00),freq = 'D') salesMatDay1 = np.zeros([len(set(trainSet['Store'])), len(idx)]) for irow, storeID in enumerate(set(trainSet['Store'])): thisStore = trainSet[trainSet['Store'] == storeID] theseSales = thisStore[thisStore['DayOfWeek'] == 1].Sales theseSales = theseSales.reindex(idx) salesMatDay1[irow,:] = theseSales.values dowpd = thisStore.groupby(thisStore['DayOfWeek']).count() dowpd = thisStore.groupby(thisStore['DayOfWeek']).std() thisStore[thisStore['DayOfWeek'] == 3]['Sales'].values[:134]-thisStore[thisStore['DayOfWeek'] == 6]['Sales'].values[:134] storeID = 2 def plotStoresScatterByIndicator(trainSet): storeID = 1 thisStore = trainSet[trainSet['Store'] == storeID] plt.figure(figsize=[15,15]) plt.plot(thisStore[thisStore['Open'] == 1]['Customers'],thisStore[thisStore['Open'] == 1]['Sales'],'k.') plt.plot(thisStore[(thisStore['Promo'] == 1) * (thisStore['Open'] == 1)]['Customers'], thisStore[(thisStore['Promo'] == 1) * (thisStore['Open'] == 1)]['Sales'],'rs') plt.plot(thisStore[(thisStore['StateHoliday'] == 1) * (thisStore['Open'] == 1) ]['Customers'], thisStore[(thisStore['StateHoliday'] == 1) * (thisStore['Open'] == 1)]['Sales'],'go') plt.plot(thisStore[(thisStore['SchoolHoliday'] == 1) * (thisStore['Open'] == 1)]['Customers'], thisStore[(thisStore['SchoolHoliday'] == 1)* (thisStore['Open'] == 1)]['Sales'],'bd') plt.xlabel('customers') plt.xlabel('sales') plt.legend(['no indicator','promotion','state holliday','school holiday']) plt.title('customers and sales by indicator') plt.savefig('../figures/storeScatters/byIndicator' + str(storeID)) def plotStoresScatterByDay(): storeID = 1 thisStore = trainSet[trainSet['Store'] == storeID] plt.figure(figsize=[30,10]) for day in np.arange(1,8): plt.plot(thisStore[(thisStore['DayOfWeek'] == day) * (thisStore['Open'] == 1)]['Customers'], thisStore[(thisStore['DayOfWeek'] == day) * (thisStore['Open'] == 1)]['Sales'],'.') plt.xlabel('customers') plt.xlabel('sales') plt.legend(['Day' + str(day) for day in np.arange(1,8)]) plt.title('customers and sales by indicator') plt.savefig('../figures/storeScatters/byDay' + str(storeID)) grouptedRateMedian = trainSet.groupby(trainSet.Store).median() plt.hist(grouptedRateMedian['Sales'].values,100) grouptedRateMean = trainSet.groupby(trainSet.Store).mean() plt.hist(grouptedRateMean['Sales'].values,100) grouptedRateStd = trainSet.groupby(trainSet.Store).std() plt.hist(grouptedRateMean['Sales'].values,100) groupRatedCount = trainSet.groupby(trainSet.Store).count() grouptedRateMean['ste'] = grouptedRateStd['Sales']/np.sqrt(groupRatedCount['Sales']) class Store(object): def __init__(self,fullDataFrame,storeData,storeIndx): self.storeIndx = storeIndx self.data = fullDataFrame[fullDataFrame['Store'] == storeIndx] ''' sales, customers, openFlag, promo, stateHoliday, schoolHoliday, dayOfWeek, timeStamps ''' self.daysFrom2014 = [dt.datetime.toordinal(tstamp)-dt.datetime.toordinal(dt.datetime(2014,1,1)) for tstamp in fullDataFrame[fullDataFrame['Store'] == storeIndx].index] self.storeType = storeData[storeData['Store']==storeIndx]['StoreType'] self.assortment = storeData[storeData['Store']==storeIndx]['Assortment'] self.promo2Flag = storeData[storeData['Store']==storeIndx]['Promo2'] self.promoStartWeek = storeData[storeData['Store']==storeIndx]['Promo2SinceWeek'] self.promoStartYear = storeData[storeData['Store']==storeIndx]['Promo2SinceYear'] self.promoInterval = storeData[storeData['Store']==storeIndx]['PromoInterval'] self.competitionDistance = storeData[storeData['Store']==storeIndx]['CompetitionDistance'] self.competitionStartMonth = storeData[storeData['Store']==storeIndx]['CompetitionOpenSinceMonth'] self.competitionStartYear = storeData[storeData['Store']==storeIndx]['CompetitionOpenSinceYear'] missingTime #scipy.signal.lombscargle
for bin in range(binSize, windowSize, binSize) ] for bin in range(binSize, windowSize, binSize): l = [] for i in G4perATcontent[bin:bin + binSize]: l += i G4perATbin.append(l if l else [0]) if includeRandomizedWindows: rG4perATbin = [] for bin in range(binSize, windowSize, binSize): l = [] for i in rG4perATcontent[bin:bin + binSize]: l += i rG4perATbin.append(l if l else [0]) #plt.violinplot(G4perATbin,positions=ticks) plt.violinplot(G4perATbin) plt.title('G4s per AT content in {} bp windows'.format(windowSize)) plt.xlabel('AT content (%)') plt.ylabel('# G4 (min GG.G tracts)') plt.xticks(range(1, len(ticks) + 1), ticks) for i in range(len(G4perATbin)): g4, counts = np.unique(G4perATbin[i], return_counts=True) plt.scatter([i + 1] * g4.shape[0], g4, s=(1 + 30 * np.log10(counts)), c=counts, alpha=0.7) plt.show(block=False)