Пример #1
0
def plotStoreDailyTrends(trainSet,storeData,storeID,savepath = None):

    thisStore = trainSet[trainSet['Store'] == storeID]
    thisStore = thisStore[thisStore['Open'] == 1]

    plt.figure()
    plt.violinplot(
        [thisStore[thisStore['DayOfWeek'] == dow]['Sales'] for dow in set(thisStore['DayOfWeek'])],
        showmeans=True)
    plt.boxplot(
        [thisStore[thisStore['DayOfWeek'] == dow]['Sales'] for dow in set(thisStore['DayOfWeek'])],
        notch=1)
    plt.xlabel('day of week')
    plt.ylabel('sales')


    storeCompetitionFlag = ~np.isnan(storeData[storeData['Store']==storeID]['CompetitionOpenSinceYear'].values)

    if storeCompetitionFlag:
        thisStore = thisStore
Пример #2
0
    assert_less(spm_nmi_data[spm_perf_mask_data].mean(),
                my_nmi_data[my_perf_mask_data].mean())

    import matplotlib.pylab as plt

    data = [
        my_spearman_data[my_perf_mask_data].ravel(),
        spm_spearman_data[spm_perf_mask_data].ravel()
    ]

    data = [
        1. / my_nmi_data[my_perf_mask_data].ravel(),
        1. / spm_nmi_data[spm_perf_mask_data].ravel()
    ]
    parts = plt.violinplot(data,
                           showmeans=False,
                           showmedians=False,
                           showextrema=False)

    for pc in parts['bodies']:
        pc.set_facecolor('#D43F3A')
        pc.set_edgecolor('black')
        pc.set_alpha(1)

    sammba_quartile1, sammba_median, sammba_quartile3 = np.percentile(
        data[0], [25, 50, 75])
    spm_quartile1, spm_median, spm_quartile3 = np.percentile(
        data[1], [25, 50, 75])
    quartile1 = [sammba_quartile1, spm_quartile1]
    medians = [sammba_median, spm_median]
    quartile3 = [sammba_quartile3, spm_quartile3]
    whiskers = np.array([
Пример #3
0
    s_pf ~ gamma(10, 10)
}

fit1 = pystan.stan(model_code=model, data={'N': len(
    dic_target_player), 'G': len(LW), 'LW': LW}, iter=1000, chains=4)
la1 = fit1.extract()
print(fit1)

plt.figure(figsize=(15, 7))

colors = ['red', 'yellow', 'green', 'blue']

for i, player in enumerate(arr_target_player):
    for j in range(4):

        g = plt.violinplot(la1['mu'][j * 500:(j + 1) * 500, i], positions=[i], showmeans=False, showextrema=False,
                           showmedians=False)

        for pc in g['bodies']:
            pc.set_facecolor(colors[j])

plt.legend(['chain 1', 'chain 2', 'chain 3', 'chain 4'])

plt.xticks(list(range(len(arr_target_player))), arr_target_player)
plt.xticks(rotation=45)

plt.xlabel('player')
plt.ylabel('mu')
plt.show()

plt.figure(figsize=(15, 7))
def plot(trained_sets,
         switched_sets,
         attr,
         labels,
         new_order_labels,
         trained_folder=None,
         switched_folder=None,
         auto_load=False,
         yscale='linear',
         ylim=None,
         save_addition='',
         xlim=None):
    #for some reason auto_load stopped working, did not look for bug yet

    if not trained_folder is None:
        trained_sets = add_folder_name(trained_sets, trained_folder)
        switched_sets = add_folder_name(switched_sets, switched_folder)

    # -----PLot concetinated data
    # trained_sets = [j for sub in trained_sets for j in sub]
    # switched_sets = [j for sub in switched_sets for j in sub]

    npz_name = 'save/{}figs/{}_boxplot.npz'.format(switched_folder, attr)

    if path.isfile(npz_name) and auto_load:
        txt = 'Loading: ' + npz_name
        print(txt)
        data = np.load(npz_name)
        all_data = data['all_data']
        data = data['data']
    else:
        data = []
        all_data = []
        for trained_set, switched_set in zip(trained_sets, switched_sets):
            trained_vals, switched_vals = load_plot_data(
                trained_set, switched_set, attr)

            for trained_single_sim in trained_vals:
                all_data.append(trained_single_sim)
            for switched_single_sim in switched_vals:
                all_data.append(switched_single_sim)

            trained_vals_concat = [j for sub in trained_vals for j in sub]
            switched_vals_concat = [j for sub in switched_vals for j in sub]
            data.append(trained_vals_concat)
            data.append(switched_vals_concat)

        # plt.boxplot(data)
        # plt.xticks(np.arange(1, len(labels) + 1), labels, rotation='vertical')
        # plt.show()

    savefolder = 'save/{}figs/{}_'.format(switched_folder, attr)
    if not path.exists(savefolder):
        makedirs(savefolder)
    np.savez(npz_name, all_data=all_data, data=data)

    df, names = create_DF(all_data, labels)
    df = reorder_df(df, new_order_labels)
    all_data_reordered = df_to_nested_list(df)

    # plt.figure(figsize=(25, 5))
    # chart = sns.violinplot(data=df, width=0.8, inner='quartile', scale='width', linewidth=0.01)  # inner='quartile'
    # chart.set_xticklabels(chart.get_xticklabels(), rotation=70)
    # df.mean().plot(style='*')
    # plt.savefig('{}violin_df_neworder{}.png'.format(savefolder, save_addition), dpi=300, bbox_inches='tight')
    # plt.show()

    colors = [
        '#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b',
        '#e377c2', '#7f7f7f', '#bcbd22', '#17becf'
    ]
    violin_colors = create_violin_colors(colors)

    # LEGEND

    legend_elements = [
        Line2D([0], [0],
               marker='_',
               color='black',
               label='mean',
               markerfacecolor='g',
               markersize=10)
    ]

    plt.figure(figsize=(25, 10))
    chart = sns.violinplot(data=df,
                           width=0.8,
                           inner='quartile',
                           scale='width',
                           linewidth=0.05,
                           palette=violin_colors)  # inner='quartile'
    df.mean().plot(style='_', c='black', ms=30)
    chart.set_xticklabels(chart.get_xticklabels(), rotation=70)
    plt.yscale(yscale)
    plt.gca().set_ylim(top=20)
    plt.legend(handles=legend_elements)
    plt.savefig('{}violin_df{}.png'.format(savefolder, save_addition),
                dpi=300,
                bbox_inches='tight')
    plt.show()

    fig, ax = plt.subplots()
    col_i = 0
    for i, d in enumerate(all_data_reordered):
        color = colors[col_i]
        noisy_x = i * np.ones(
            (1, len(d))) + np.random.random(size=len(d)) * 0.5
        ax.scatter(noisy_x[0, :], d, alpha=0.6, s=0.01, c=color)
        if (i + 1) % 4 == 0:
            col_i += 1

    mean_series = df.mean()
    mean_series.plot(style='_', c='black', ms=7)

    ax.set_xticks(np.arange(32))
    ax.set_yscale(yscale)
    #plt.ylabel('median energy')
    plt.ylabel(attr)
    plt.xticks(np.arange(1,
                         len(new_order_labels) * 4 + 1, 4),
               new_order_labels,
               rotation=70)
    plt.legend(handles=legend_elements)
    plt.savefig('{}scatter{}.png'.format(savefolder, save_addition),
                dpi=300,
                bbox_inches='tight')
    plt.show()
    # plt.boxplot(data, showmeans=True)
    # plt.xticks(np.arange(1, len(labels) + 1), labels, rotation='vertical')
    # plt.ylabel(attr)
    # plt.savefig('{}boxplot.png'.format(savefolder), dpi=200, bbox_inches='tight')
    # plt.show()

    # plt.boxplot(all_data_reordered, showmeans=True)
    # plt.xticks(np.arange(1, len(new_order_labels)*4 + 1, 4), new_order_labels, rotation='vertical')
    # plt.ylabel(attr)
    # plt.savefig('{}boxplot_all.png'.format(savefolder), dpi=200, bbox_inches='tight')
    # plt.show()

    plt.figure(figsize=(20, 5))
    plt.violinplot(all_data_reordered,
                   showmeans=True,
                   showextrema=False,
                   widths=0.8)
    plt.xticks(np.arange(1,
                         len(new_order_labels) * 4 + 1, 4),
               new_order_labels,
               rotation=70)
    plt.yscale(yscale)
    #plt.ylabel('median energy')
    plt.ylabel(attr)
    plt.ylim(ylim)
    plt.xlim(xlim)
    plt.savefig('{}violin_all{}.png'.format(savefolder, save_addition),
                dpi=300,
                bbox_inches='tight')
    plt.show()
Пример #5
0
def testCompetition():
    #to look at the changes in dail stats due to competition
    #maybe establish some thresholds





#plot time series for all the stores
plt.figure(figsize=[20,9])
for storeNum in set(trainSet['Store']):
    if storeNum > 688:

        storeType = storeData[storeData['Store'] == storeNum]['StoreType'].values[0]
        #a,b,c,d
        storeAssortment = storeData[storeData['Store'] == storeNum]['Assortment'].values[0]
        #a,b,c

        print str(storeNum) + ' type ' + storeType + ' assortment ' + storeAssortment

        savePath = '../figures/storeTimeseries/' + \
                   'type_' + storeType + '_assortment_' + storeAssortment + \
                   '/competition' + \
                   str(storeData[storeData['Store'] == storeNum]['CompetitionDistance'].values[0].astype(int)) + \
                   '_store' + str(storeNum) + '.jpg'
        plotStoresTimeSeries(trainSet,storeData,storeNum,savePath)




####
# tests on a single store 2015-10-21
####
storeNum = 1108
thisStore = trainSet[trainSet['Store'] == 1108]
thisStore = thisStore[thisStore['Open']==1]
storeType = storeData[storeData['Store'] == storeNum]['StoreType'].values[0]
storeAssortment = storeData[storeData['Store'] == storeNum]['Assortment'].values[0]



plt.figure()
plt.violinplot(
    [thisStore[thisStore['DayOfWeek'] == dow]['Sales'] for dow in set(thisStore['DayOfWeek'])],
    showmeans=True)
plt.boxplot(
    [thisStore[thisStore['DayOfWeek'] == dow]['Sales'] for dow in set(thisStore['DayOfWeek'])],
    notch=1)
plt.xlabel('day of week')
plt.ylabel('sales')


promotedStore = thisStore[thisStore['Promo'] == 1]
unpromotedStore = thisStore[thisStore['Promo'] == 0]
plt.figure()
plt.violinplot(
    [promotedStore[promotedStore['DayOfWeek'] == dow]['Sales'] for dow in set(promotedStore['DayOfWeek'])],
    showmeans=True)
plt.violinplot(
    [unpromotedStore[unpromotedStore['DayOfWeek'] == dow]['Sales'] for dow in set(unpromotedStore['DayOfWeek'])],
    showmeans=True)
plt.boxplot(
    [promotedStore[promotedStore['DayOfWeek'] == dow]['Sales'] for dow in set(promotedStore['DayOfWeek'])],
    notch=1)
plt.boxplot(
    [unpromotedStore[unpromotedStore['DayOfWeek'] == dow]['Sales'] for dow in set(unpromotedStore['DayOfWeek'])],
    notch=1)
plt.xlabel('day of week')
plt.ylabel('sales')










idx = pd.date_range(dt.datetime(2013,1,1,00,00,00),dt.datetime(2015,7,31,00,00,00),freq = 'D')
salesMatDay1 = np.zeros([len(set(trainSet['Store'])), len(idx)])
for irow, storeID in enumerate(set(trainSet['Store'])):
    thisStore = trainSet[trainSet['Store'] == storeID]
    theseSales = thisStore[thisStore['DayOfWeek'] == 1].Sales
    theseSales = theseSales.reindex(idx)
    salesMatDay1[irow,:] = theseSales.values

dowpd = thisStore.groupby(thisStore['DayOfWeek']).count()
dowpd = thisStore.groupby(thisStore['DayOfWeek']).std()

thisStore[thisStore['DayOfWeek'] == 3]['Sales'].values[:134]-thisStore[thisStore['DayOfWeek'] == 6]['Sales'].values[:134]

storeID = 2

def plotStoresScatterByIndicator(trainSet):
    storeID = 1
    thisStore = trainSet[trainSet['Store'] == storeID]
    plt.figure(figsize=[15,15])
    plt.plot(thisStore[thisStore['Open'] == 1]['Customers'],thisStore[thisStore['Open'] == 1]['Sales'],'k.')
    plt.plot(thisStore[(thisStore['Promo'] == 1) * (thisStore['Open'] == 1)]['Customers'],
             thisStore[(thisStore['Promo'] == 1) * (thisStore['Open'] == 1)]['Sales'],'rs')
    plt.plot(thisStore[(thisStore['StateHoliday'] == 1) * (thisStore['Open'] == 1) ]['Customers'],
             thisStore[(thisStore['StateHoliday'] == 1) * (thisStore['Open'] == 1)]['Sales'],'go')
    plt.plot(thisStore[(thisStore['SchoolHoliday'] == 1) * (thisStore['Open'] == 1)]['Customers'],
        thisStore[(thisStore['SchoolHoliday'] == 1)* (thisStore['Open'] == 1)]['Sales'],'bd')
    plt.xlabel('customers')
    plt.xlabel('sales')
    plt.legend(['no indicator','promotion','state holliday','school holiday'])
    plt.title('customers and sales by indicator')
    plt.savefig('../figures/storeScatters/byIndicator' + str(storeID))

def plotStoresScatterByDay():
    storeID = 1
    thisStore = trainSet[trainSet['Store'] == storeID]

    plt.figure(figsize=[30,10])

    for day in np.arange(1,8):
        plt.plot(thisStore[(thisStore['DayOfWeek'] == day) * (thisStore['Open'] == 1)]['Customers'],
                 thisStore[(thisStore['DayOfWeek'] == day) * (thisStore['Open'] == 1)]['Sales'],'.')


    plt.xlabel('customers')
    plt.xlabel('sales')

    plt.legend(['Day' + str(day) for day in np.arange(1,8)])
    plt.title('customers and sales by indicator')
    plt.savefig('../figures/storeScatters/byDay' + str(storeID))


grouptedRateMedian = trainSet.groupby(trainSet.Store).median()
plt.hist(grouptedRateMedian['Sales'].values,100)
grouptedRateMean = trainSet.groupby(trainSet.Store).mean()
plt.hist(grouptedRateMean['Sales'].values,100)
grouptedRateStd = trainSet.groupby(trainSet.Store).std()
plt.hist(grouptedRateMean['Sales'].values,100)
groupRatedCount = trainSet.groupby(trainSet.Store).count()
grouptedRateMean['ste'] = grouptedRateStd['Sales']/np.sqrt(groupRatedCount['Sales'])


class Store(object):

    def __init__(self,fullDataFrame,storeData,storeIndx):
        self.storeIndx = storeIndx

        self.data = fullDataFrame[fullDataFrame['Store'] == storeIndx]

        '''
        sales, customers, openFlag, promo, stateHoliday, schoolHoliday, dayOfWeek, timeStamps
        '''

        self.daysFrom2014 = [dt.datetime.toordinal(tstamp)-dt.datetime.toordinal(dt.datetime(2014,1,1)) for tstamp in fullDataFrame[fullDataFrame['Store'] == storeIndx].index]

        self.storeType = storeData[storeData['Store']==storeIndx]['StoreType']
        self.assortment = storeData[storeData['Store']==storeIndx]['Assortment']

        self.promo2Flag = storeData[storeData['Store']==storeIndx]['Promo2']
        self.promoStartWeek = storeData[storeData['Store']==storeIndx]['Promo2SinceWeek']
        self.promoStartYear = storeData[storeData['Store']==storeIndx]['Promo2SinceYear']
        self.promoInterval = storeData[storeData['Store']==storeIndx]['PromoInterval']

        self.competitionDistance = storeData[storeData['Store']==storeIndx]['CompetitionDistance']
        self.competitionStartMonth = storeData[storeData['Store']==storeIndx]['CompetitionOpenSinceMonth']
        self.competitionStartYear = storeData[storeData['Store']==storeIndx]['CompetitionOpenSinceYear']

        missingTime

        #scipy.signal.lombscargle
Пример #6
0
        for bin in range(binSize, windowSize, binSize)
    ]
    for bin in range(binSize, windowSize, binSize):
        l = []
        for i in G4perATcontent[bin:bin + binSize]:
            l += i
        G4perATbin.append(l if l else [0])
    if includeRandomizedWindows:
        rG4perATbin = []
        for bin in range(binSize, windowSize, binSize):
            l = []
            for i in rG4perATcontent[bin:bin + binSize]:
                l += i
            rG4perATbin.append(l if l else [0])

    #plt.violinplot(G4perATbin,positions=ticks)
    plt.violinplot(G4perATbin)
    plt.title('G4s per AT content in {} bp windows'.format(windowSize))
    plt.xlabel('AT content (%)')
    plt.ylabel('# G4 (min GG.G tracts)')
    plt.xticks(range(1, len(ticks) + 1), ticks)
    for i in range(len(G4perATbin)):
        g4, counts = np.unique(G4perATbin[i], return_counts=True)
        plt.scatter([i + 1] * g4.shape[0],
                    g4,
                    s=(1 + 30 * np.log10(counts)),
                    c=counts,
                    alpha=0.7)

    plt.show(block=False)