Exemplo n.º 1
0
def q03_stacked_point_plot(path,
                           x_column_name='month',
                           y_column_name='Sales',
                           hue='year',
                           order_of_the_axis=[
                               'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul',
                               'Aug', 'Sep', 'Oct', 'Nov', 'Dec'
                           ]):
    X_train, _ = q02_data_splitter(path)

    X_train['Year'] = X_train['Month'].dt.year
    X_train['month'] = X_train['Month'].dt.strftime('%b')

    plt.figure(figsize=(16, 4))
    sns.pointplot('month',
                  'Sales',
                  hue='Year',
                  data=X_train,
                  order=[
                      'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug',
                      'Sep', 'Oct', 'Nov', 'Dec'
                  ])
    plt.ylabel('Sales')
    plt.xlabel('month')
    plt.legend(loc='upper left')
    plt.show()
Exemplo n.º 2
0
def q03_stacked_point_plot(path,
                           x_column_name='month',
                           y_column_name='Sales',
                           hue='year',
                           order_of_the_axis=[
                               'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul',
                               'Aug', 'Sep', 'Oct', 'Nov', 'Dec'
                           ]):
    x_train, x_valid = q02_data_splitter(path)
    #x_train['Month'] = pd.to_datetime(df['Month'])
    x_train['year'] = x_train['Month'].dt.year
    x_train['month'] = x_train['Month'].dt.strftime('%b')
    plt.figure(figsize=(16, 7))
    sns.pointplot(x='month',
                  y='Sales',
                  hue='year',
                  data=x_train,
                  x_order=[
                      'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug',
                      'Sep', 'Oct', 'Nov', 'Dec'
                  ])
    plt.xlabel('month')
    plt.ylabel('$ millions')
    plt.title('Sales')
    plt.legend(loc='upper right')
Exemplo n.º 3
0
def q04_boxplot(path,x='month',y='Sales',kind='box',order_of_the_axis=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug','Sep', 'Oct', 'Nov', 'Dec'],size=8):
    X_train,X_valid=q02_data_splitter(path)
    X_train['year']=X_train['Month'].dt.year
    X_train['month']=X_train['Month'].dt.strftime('%b')
    plt.figure(figsize=(16, 4))
    sns.factorplot(x=x, y=y, data=X_train, kind=kind, order=order_of_the_axis, size=size, aspect=float(16/7))
    plt.xlabel('month')
    plt.ylabel('Sales')
Exemplo n.º 4
0
def q03_stacked_point_plot(path,x_column_name='month',y_column_name='Sales',hue='year',order_of_the_axis=['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']):
    X_train,X_test = q02_data_splitter(path)
    X_train['month'] = X_train['Month'].dt.strftime('%b')
    X_train['year'] = X_train['Month'].dt.year
    sns.pointplot(
        x=x_column_name,y=y_column_name,hue=hue,data=X_train,x_order=order_of_the_axis
    )
    plt.show()
Exemplo n.º 5
0
def q05_sarima_model(path):
    train, validation = q02_data_splitter(path)
    train.index = train['Month']
    train = train[['Sales']]
    train.index.name = None
    validation.index = validation['Month']
    validation = validation[['Sales']]
    validation.index.name = None
    return train, validation
Exemplo n.º 6
0
def q03_stacked_point_plot(path,
                           x_column_name="month",
                           y_column_name="Sales",
                           hue="year",
                           order_of_the_axis=[
                               'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul',
                               'Aug', 'Sep', 'Oct', 'Nov', 'Dec'
                           ]):
    train, validation = q02_data_splitter(path)
Exemplo n.º 7
0
def q03_stacked_point_plot(path,x_column_name='month',y_column_name='Sales',hue='year',order_of_the_axis=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug','Sep', 'Oct', 'Nov', 'Dec']):
    X_train,X_valid=q02_data_splitter(path)
    X_train['year'] = X_train['Month'].dt.year
    X_train['month'] = X_train['Month'].dt.strftime('%b')
    plt.figure(figsize=(16, 7))
    sns.pointplot(x=x_column_name, y=y_column_name, hue=hue, data=X_train, x_order=order_of_the_axis)
    plt.xlabel('month')
    plt.ylabel('Sales')
    plt.title('Stacked point chart')
    plt.legend(loc='upper right')
Exemplo n.º 8
0
def q04_boxplot(path,
                x="month",
                y="Sales",
                kind="box",
                order=[
                    'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug',
                    'Sep', 'Oct', 'Nov', 'Dec'
                ],
                size=8):
    train, validation = q02_data_splitter(path)
    "write your solution here"
Exemplo n.º 9
0
def q05_sarima_model(path):
    path = 'data/perrin-freres-monthly-champagne.csv'
    train, validation = q02_data_splitter(path)

    tss = pd.DataFrame(train[['Sales']])
    #tss['Month'] = pd.to_datetime(tss['Month'])

    tss.column = ['Sales']
    tss.index = train['Month'].values
    tss_valid = pd.DataFrame(validation['Sales'])
    tss_valid.column = ['Sales']
    tss_valid.index = validation['Month'].values
    return tss, tss_valid
Exemplo n.º 10
0
def q04_boxplot(path,
                x='month',
                y='Sales',
                kind='box',
                order=[
                    'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug',
                    'Sep', 'Oct', 'Nov', 'Dec'
                ],
                size=8):
    train, validation = q02_data_splitter(path)
    sns.factorplot(data=train, x=x, y=y, kind=kind, order=order, size=size)
    plt.xlabel('month')
    plt.ylable('Sales')
    plt.show()
Exemplo n.º 11
0
def q04_boxplot(path,
                x='month',
                y='Sales',
                kind='box',
                order_of_the_axis=[
                    'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug',
                    'Sep', 'Oct', 'Nov', 'Dec'
                ],
                temp=8):
    X_train, X_test = q02_data_splitter(path)
    X_train['month'] = X_train['Month'].dt.strftime('%b')
    X_train['year'] = X_train['Month'].dt.year
    sns.factorplot(x=x, y=y, data=X_train, kind=kind)
    plt.show()
Exemplo n.º 12
0
def q05_sarima_model(data):

    train, validation = q02_data_splitter(data)
    train = pd.DataFrame(train)
    validation = pd.DataFrame(validation)

    # Train dataframe
    tss = pd.DataFrame(train['Sales'])
    tss.column = ['Sales']
    tss.index = train['Month'].values

    tss_valid = pd.DataFrame(validation['Sales'])
    tss_valid.column = ['Sales']
    tss_valid.index = validation['Month'].values
    return tss, tss_valid
Exemplo n.º 13
0
def q04_boxplot(path,
                x='month',
                y='Sales',
                kind='box',
                order=[
                    'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug',
                    'Sep', 'Oct', 'Nov', 'Dec'
                ],
                size=8):
    'write your solution here'
    train, validation = q02_data_splitter(path)
    train['year'] = train['Month'].dt.year
    train['month'] = train['Month'].dt.strftime('%b')
    plt.figure(figsize=(16, 7))
    sns.factorplot(x=x, y=y, data=train, kind=kind, order=order, size=size)
    plt.xlabel('Month')
    plt.ylabel('Sale volumn in millions')
    plt.title('Monthly sales of champagne aggregated monthly')
Exemplo n.º 14
0
def q04_boxplot(path,
                x='month',
                y='Sales',
                kind='box',
                order=[
                    'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug',
                    'Sep', 'Oct', 'Nov', 'Dec'
                ],
                size=8):

    x_train, x_valid = q02_data_splitter(path)
    x_train['month'] = x_train['Month'].dt.strftime('%b')
    x_train['year'] = x_train['Month'].dt.year
    plt.figure(figsize=(16, 7))
    sns.factorplot(x=x, y=y, data=x_train, kind=kind, order=order, size=size)
    plt.xlabel('Month')
    plt.ylabel('Sales')
    plt.title('Sales in Millions')
Exemplo n.º 15
0
def q03_stacked_point_plot(path,
                           x_column_name='month',
                           y_column_name='Sales',
                           hue='year',
                           order_of_the_axis=[
                               'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul',
                               'Aug', 'Sep', 'Oct', 'Nov', 'Dec'
                           ]):
    train, validation = q02_data_splitter(path)
    #'write your solution here'
    ts = pd.DataFrame(train)
    ts['year'] = ts['Month'].dt.year
    ts['month'] = ts['Month'].dt.strftime('%b')
    return sns.pointplot(x=x_column_name,
                         y=y_column_name,
                         hue=hue,
                         data=ts,
                         x_order=order_of_the_axis)
Exemplo n.º 16
0
def q03_stacked_point_plot(path,
                           x_column_name='month',
                           y_column_name='Sales',
                           hue='year',
                           order_of_the_axis=[
                               'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul',
                               'Aug', 'Sep', 'Oct', 'Nov', 'Dec'
                           ]):
    train, validation = q02_data_splitter(path)
    train['year'] = train['Month'].dt.year
    train['month'] = train['Month'].dt.strftime('%b')
    sns.pointplot(data=train,
                  x=x_column_name,
                  y=y_column_name,
                  hue=train['year'],
                  order=order_of_the_axis)
    plt.xlabel('month')
    plt.ylabel('Sales')
    plt.legend(loc)

    plt.show()
Exemplo n.º 17
0
def q05_sarima_model(path):
    train, validation = q02_data_splitter(path)
    "write your solution here"
Exemplo n.º 18
0
# %load q05_sarima_model/build.py
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import seaborn as sns
import matplotlib.pyplot as plt
# import sys
# sys.path.append('./')
from greyatomlib.time_series_101_project.q02_data_splitter.build import q02_data_splitter

path = 'data/perrin-freres-monthly-champagne.csv'
plt.switch_backend('agg')
train, validation = q02_data_splitter(path)

'write your solution here'


def q05_sarima_model(path):
    train, validation = q02_data_splitter(path)
    train.index = train['Month']
    train = train[['Sales']]
    train.index.name = None
    validation.index = validation['Month']
    validation = validation[['Sales']]
    validation.index.name = None
    return train, validation