示例#1
0
 def modify_doc(doc):
     source = ColumnDataSource(dict(x=[1, 2], y=[1, 1], val=["a", "b"]))
     plot = Plot(plot_height=400, plot_width=400, x_range=Range1d(0, 1), y_range=Range1d(0, 1), min_border=0)
     plot.add_tools(CustomAction(callback=CustomJS(args=dict(s=source), code=RECORD("data", "s.data"))))
     plot.add_glyph(source, Circle(x='x', y='y', size=20))
     dp = DatePicker(title='Select date', value=datetime(2019, 9, 20), min_date=datetime(2019, 9, 1), max_date=datetime.utcnow(), css_classes=["foo"])
     def cb(attr, old, new):
         source.data['val'] = [old, new]
     dp.on_change('value', cb)
     doc.add_root(column(dp, plot))
from bokeh.models import DatePicker, HBox
from bokeh.io import curdoc

from datetime import datetime

beginning = DatePicker(title="Begin Date",
                       min_date=datetime(2014, 11, 1),
                       max_date=datetime.now(),
                       value=datetime(datetime.now().year, 1, 1))


def cb(attr, old, new):
    print(new)


beginning.on_change('value', cb)

curdoc().add_root(HBox(children=[beginning]))
示例#3
0
def accounts_tsa_tab(panel_title):
    class Thistab(Mytab):
        def __init__(self, table, cols, dedup_cols):
            Mytab.__init__(self, table, cols, dedup_cols)
            self.table = table
            self.cols = cols
            self.DATEFORMAT = "%Y-%m-%d %H:%M:%S"
            self.df = None
            self.df1 = {}  # to contain churned and retained splits
            self.df_predict = None
            self.day_diff = 1  # for normalizing for classification periods of different lengths
            self.df_grouped = ''

            self.rf = {}  # random forest
            self.cl = PythonClickhouse('aion')

            self.forecast_days = 30
            self.interest_var = 'address'
            self.trigger = -1
            self.status = 'all'
            self.update_type = 'all'
            self.status = 'all'
            self.account_type = 'all'
            self.interest_var = 'amount'

            self.pl = {}  # for rf pipeline
            self.div_style = """ style='width:300px; margin-left:25px;
            border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
            """
            self.header_style = """ style='color:blue;text-align:center;' """

            # list of tier specific addresses for prediction
            self.address_list = []
            self.address_select = Select(title='Filter by address',
                                         value='all',
                                         options=[])
            self.address = 'all'
            self.load_data_flag = False
            self.day_diff = 1
            self.groupby_dict = {}
            self.addresses = []

            self.div_style = """ style='width:300px; margin-left:25px;
                        border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
                        """
            self.max_loaded_date = None
            self.min_loaded_date = None

            # ------- DIVS setup begin
            self.page_width = 1200
            txt = """<hr/><div style="text-align:center;width:{}px;height:{}px;
                            position:relative;background:black;margin-bottom:200px">
                            <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                    </div>""".format(self.page_width, 50, 'Welcome')
            self.notification_div = {
                'top': Div(text=txt, width=self.page_width, height=20),
                'bottom': Div(text=txt, width=self.page_width, height=10),
            }

            self.section_divider = '-----------------------------------'
            self.section_headers = {
                'forecast':
                self.section_header_div(text='Forecasts:{}'.format(
                    self.section_divider),
                                        width=600,
                                        html_header='h2',
                                        margin_top=5,
                                        margin_bottom=-155),
            }

            # ----------------------  DIVS ----------------------------

        def section_header_div(self,
                               text,
                               html_header='h2',
                               width=600,
                               margin_top=150,
                               margin_bottom=-150):
            text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \
                .format(margin_top, margin_bottom, html_header, text, html_header)
            return Div(text=text, width=width, height=15)

            # ####################################################
            #              UTILITY DIVS

        def results_div(self, text, width=600, height=300):
            div = Div(text=text, width=width, height=height)
            return div

        def title_div(self, text, width=700):
            text = '<h2 style="color:#4221cc;">{}</h2>'.format(text)
            return Div(text=text, width=width, height=15)

        def reset_checkboxes(self):
            try:
                self.address_selected = ""
                self.address_select.value = "all"
            except Exception:
                logger.error('reset checkboxes', exc_info=True)

        ###################################################
        #               I/O
        def load_df(self, start_date, end_date):
            try:
                logger.warning("data load begun")
                if isinstance(start_date, str):
                    start_date = datetime.strptime(start_date, self.DATEFORMAT)
                if isinstance(end_date, str):
                    end_date = datetime.strptime(end_date, self.DATEFORMAT)

                if self.df is not None:
                    self.max_loaded_date = self.df.block_timestamp.max(
                    ).compute()
                    self.min_loaded_date = self.df.block_timestamp.min(
                    ).compute()
                    if start_date >= self.min_loaded_date and end_date <= self.max_loaded_date:
                        logger.warning("data already loaded - %s",
                                       self.df.tail(10))
                        pass
                    else:
                        self.df_load(start_date, end_date, cols=self.cols)
                        self.df = self.df.fillna(0)
                        df = self.df[['address']]
                        df = df.compute()
                        self.addresses = ['all'] + list(set(list(df)))
                        #self.make_delta()
                        #self.df = self.df.set_index('block_timestamp')
                        logger.warning("data loaded - %s", self.df.tail(10))
                else:
                    self.df_load(start_date, end_date, cols=self.cols)
                    self.df = self.df.fillna(0)
                    df = self.df[['address']]
                    df = df.compute()
                    self.addresses = ['all'] + list(set(list(df)))
                    # self.make_delta()
                    # self.df = self.df.set_index('block_timestamp')
                    logger.warning("data loaded - %s", self.df.tail(10))
                    self.df = self.filter(self.df)

            except Exception:
                logger.error('load_df', exc_info=True)

        ###################################################
        #               MUNGE DATA
        def make_delta(self):
            try:
                if self.df is not None:
                    if len(self.df) > 0:
                        df = self.df.compute()
                        for col in self.targets:
                            col_new = col + '_diff'
                            df[col_new] = df[col].pct_change()
                            df[col_new] = df[col_new].fillna(0)
                            logger.warning('diff col added : %s', col_new)
                        self.df = self.df.fillna(self.df.mean())
                        self.df = dd.dataframe.from_pandas(df, npartitions=15)
                        # logger.warning('POST DELTA:%s',self.df1.tail(20))

            except Exception:
                logger.error('make delta', exc_info=True)

        ##################################################
        #               EXPLICATORY GRAPHS
        # PLOTS
        def box_plot(self, variable):
            try:
                # logger.warning("difficulty:%s", self.df.tail(30))
                # get max value of variable and multiply it by 1.1
                minv = 0
                maxv = 0
                df = self.df
                if df is not None:
                    if len(df) > 0:
                        minv, maxv = dd.compute(df[variable].min(),
                                                df[variable].max())
                else:
                    df = SD('filter', [variable, 'status'], []).get_df()

                return df.hvplot.box(variable,
                                     by='status',
                                     ylim=(.9 * minv, 1.1 * maxv))
            except Exception:
                logger.error("box plot:", exc_info=True)

        ###################################################
        #               MODELS

        def filter(self, df):
            try:
                df = df.assign(freq=df.address)
                if self.status != 'all':
                    df = df[df.status == self.status]
                if self.account_type != 'all':
                    df = df[df.acccount_type == self.account_type]
                if self.update_type != 'all':
                    df = df[df.update_type == self.update_type]
                if self.address != 'all':
                    df = df[df.address == self.address]

                return df
            except Exception:
                logger.error("filter:", exc_info=True)

        def tsa_amount(self, launch):
            try:
                logger.warning('df columns:%s', list(self.df.columns))
                df = self.df.set_index('block_timestamp')
                df = df.resample('D').agg({'amount': 'mean'})
                df = df.reset_index()
                df = df.compute()
                label = 'amount_diff'
                df[label] = df[self.interest_var].diff()
                df = df.fillna(0)

                rename = {'block_timestamp': 'ds', 'amount': 'y'}
                df = df.rename(columns=rename)
                logger.warning('df:%s', df.head())
                df = df[['ds', 'y']]
                logger.warning('df:%s', df.tail())
                m = Prophet()
                m.fit(df)

                future = m.make_future_dataframe(periods=self.forecast_days)
                forecast = m.predict(future)
                print(forecast[['ds', 'yhat', 'yhat_lower',
                                'yhat_upper']].tail())
                print(list(forecast.columns))
                for idx, col in enumerate(['yhat', 'yhat_lower',
                                           'yhat_upper']):
                    if idx == 0:
                        p = forecast.hvplot.line(x='ds',
                                                 y=col,
                                                 width=600,
                                                 height=250,
                                                 value_label='$',
                                                 legend=False).relabel(col)
                    else:
                        p *= forecast.hvplot.scatter(x='ds',
                                                     y=col,
                                                     width=600,
                                                     height=250,
                                                     value_label='$',
                                                     legend=False).relabel(col)

                for idx, col in enumerate(['trend', 'weekly']):
                    if idx == 0:
                        q = forecast.hvplot.line(x='ds',
                                                 y=col,
                                                 width=550,
                                                 height=250,
                                                 value_label='$',
                                                 legend=False).relabel(col)
                    else:
                        q *= forecast.hvplot.line(x='ds',
                                                  y=col,
                                                  width=550,
                                                  height=250,
                                                  value_label='$',
                                                  legend=False).relabel(col)

                return p + q
            except Exception:
                logger.error("box plot:", exc_info=True)

        def tsa_freq(self, launch):
            try:
                logger.warning('df columns:%s', list(self.df.columns))
                df = self.df.set_index('block_timestamp')
                df = df.resample('D').agg({'address': 'nunique'})
                df = df.reset_index()
                df = df.compute()
                label = 'freq_diff'
                df[label] = df['address'].diff()
                df = df.fillna(0)

                rename = {'block_timestamp': 'ds', 'address': 'y'}
                df = df.rename(columns=rename)
                logger.warning('df:%s', df.head())
                df = df[['ds', 'y']]
                logger.warning('df:%s', df.tail())
                m = Prophet()
                m.fit(df)

                future = m.make_future_dataframe(periods=self.forecast_days)
                forecast = m.predict(future)

                print(forecast[['ds', 'yhat', 'yhat_lower',
                                'yhat_upper']].tail())
                print(list(forecast.columns))
                for idx, col in enumerate(['yhat', 'yhat_lower',
                                           'yhat_upper']):
                    if idx == 0:
                        p = forecast.hvplot.line(x='ds',
                                                 y=col,
                                                 width=600,
                                                 height=250,
                                                 value_label='#').relabel(col)
                    else:
                        p *= forecast.hvplot.scatter(
                            x='ds',
                            y=col,
                            width=600,
                            height=250,
                            value_label='#').relabel(col)

                for idx, col in enumerate(['trend', 'weekly']):
                    if idx == 0:
                        q = forecast.hvplot.line(x='ds',
                                                 y=col,
                                                 width=550,
                                                 height=250,
                                                 value_label='#').relabel(col)
                    else:
                        q *= forecast.hvplot.line(x='ds',
                                                  y=col,
                                                  width=550,
                                                  height=250,
                                                  value_label='#').relabel(col)

                return p + q
            except Exception:
                logger.error("box plot:", exc_info=True)

        ####################################################
        #               GRAPHS
    def update(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.update_type = update_type_select.value
        thistab.status = status_select.value
        thistab.account_type = account_type_select.value
        thistab.forecast_days = int(select_forecast_days.value)
        thistab.address = thistab.address_select.value
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("ready")

    def update_load(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.load_df(datepicker_start.value, datepicker_end.value)
        thistab.notification_updater("ready")

    try:
        # SETUP
        table = 'account_ext_warehouse'
        #cols = list(table_dict[table].keys())

        cols = [
            'address', 'block_timestamp', 'account_type', 'status',
            'update_type', 'amount'
        ]
        thistab = Thistab(table, cols, [])

        # setup dates
        first_date_range = datetime.strptime("2018-04-25 00:00:00",
                                             "%Y-%m-%d %H:%M:%S")
        last_date_range = datetime.now().date()
        last_date = dashboard_config['dates']['last_date']
        first_date = last_date - timedelta(days=60)
        # STREAMS Setup
        # date comes out stream in milliseconds
        stream_launch = streams.Stream.define('Launch', launch=-1)()
        stream_select_variable = streams.Stream.define('Select_variable',
                                                       variable='amount')()

        # setup widgets
        datepicker_start = DatePicker(title="Start",
                                      min_date=first_date_range,
                                      max_date=last_date_range,
                                      value=first_date)
        datepicker_end = DatePicker(title="End",
                                    min_date=first_date_range,
                                    max_date=last_date_range,
                                    value=last_date)
        select_forecast_days = Select(
            title='Select # of days which you want forecasted',
            value=str(thistab.forecast_days),
            options=['10', '20', '30', '40', '50', '60', '70', '80', '90'])
        status_select = Select(title='Select account status',
                               value=thistab.status,
                               options=menus['status'])
        account_type_select = Select(title='Select account type',
                                     value=thistab.account_type,
                                     options=menus['account_type'])
        update_type_select = Select(title='Select transfer type',
                                    value=thistab.update_type,
                                    options=menus['update_type'])
        # search by address checkboxes
        thistab.checkboxes = CheckboxButtonGroup(labels=thistab.addresses,
                                                 active=[0])

        # ----------------------------------- LOAD DATA
        # load model-making data
        thistab.load_df(datepicker_start.value, datepicker_end.value)
        # load data for period to be predicted

        # tables
        hv_tsa_amount = hv.DynamicMap(thistab.tsa_amount,
                                      streams=[stream_launch])
        tsa_amount = renderer.get_plot(hv_tsa_amount)

        hv_tsa_freq = hv.DynamicMap(thistab.tsa_freq, streams=[stream_launch])
        tsa_freq = renderer.get_plot(hv_tsa_freq)

        # add callbacks
        datepicker_start.on_change('value', update_load)
        datepicker_end.on_change('value', update_load)
        thistab.address_select.on_change('value', update)
        select_forecast_days.on_change('value', update)
        update_type_select.on_change('value', update)
        account_type_select.on_change('value', update)
        status_select.on_change('value', update)

        # put the controls in a single element
        controls = WidgetBox(datepicker_start, datepicker_end,
                             thistab.address_select, select_forecast_days,
                             update_type_select, account_type_select,
                             status_select, thistab.checkboxes)

        grid = gridplot([[thistab.notification_div['top']],
                         [Spacer(width=20, height=70)],
                         [thistab.section_headers['forecast']],
                         [Spacer(width=20, height=30)],
                         [tsa_amount.state, controls], [tsa_freq.state],
                         [thistab.notification_div['bottom']]])

        tab = Panel(child=grid, title=panel_title)
        return tab

    except Exception:
        logger.error('rendering err:', exc_info=True)
        return tab_error_flag(panel_title)
def cryptocurrency_eda_tab(cryptos, panel_title):
    lags_corr_src = ColumnDataSource(data=dict(variable_1=[],
                                               variable_2=[],
                                               relationship=[],
                                               lag=[],
                                               r=[],
                                               p_value=[]))

    class Thistab(Mytab):
        def __init__(self, table, cols, dedup_cols=[]):
            Mytab.__init__(self, table, cols, dedup_cols)
            self.table = table
            self.cols = cols
            self.DATEFORMAT = "%Y-%m-%d %H:%M:%S"
            self.df = None
            self.df1 = None
            self.df_predict = None
            self.day_diff = 1  # for normalizing for classification periods of different lengths
            self.df_grouped = ''

            self.cl = PythonClickhouse('aion')
            self.items = cryptos
            # add all the coins to the dict
            self.github_cols = ['watch', 'fork', 'issue', 'release', 'push']
            self.index_cols = ['close', 'high', 'low', 'market_cap', 'volume']

            self.trigger = 0

            self.groupby_dict = groupby_dict
            self.feature_list = list(self.groupby_dict.keys())
            self.variable = 'fork'
            self.crypto = 'all'
            self.lag_variable = 'push'
            self.lag_days = "1,2,3"
            self.lag = 0
            self.lag_menu = [str(x) for x in range(0, 100)]

            self.strong_thresh = .65
            self.mod_thresh = 0.4
            self.weak_thresh = 0.25
            self.corr_df = None
            self.div_style = """ 
                            style='width:350px; margin-left:-600px;
                            border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
                        """

            self.header_style = """ style='color:blue;text-align:center;' """
            # track variable for AI for significant effects
            self.adoption_variables = {
                'user': [],
                'developer': ['watch', 'fork']
            }

            self.significant_effect_dict = {}
            self.reset_adoption_dict(self.variable)
            self.relationships_to_check = ['weak', 'moderate', 'strong']
            # ------- DIVS setup begin
            self.page_width = 1250
            txt = """<hr/>
                           <div style="text-align:center;width:{}px;height:{}px;
                                  position:relative;background:black;margin-bottom:200px">
                                  <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                           </div>""".format(self.page_width, 50, 'Welcome')
            self.notification_div = {
                'top': Div(text=txt, width=self.page_width, height=20),
                'bottom': Div(text=txt, width=self.page_width, height=10),
            }
            #self.lag_section_head_txt = 'Lag relationships between {} and...'.format(self.variable)
            self.lag_section_head_txt = 'Lag relationships:'
            self.section_divider = '-----------------------------------'
            self.section_headers = {
                'lag':
                self.section_header_div(text=self.lag_section_head_txt,
                                        width=600,
                                        html_header='h3',
                                        margin_top=5,
                                        margin_bottom=-155),
                'distribution':
                self.section_header_div(
                    text='Pre transform distribution:{}'.format(
                        self.section_divider),
                    width=600,
                    html_header='h2',
                    margin_top=5,
                    margin_bottom=-155),
                'relationships':
                self.section_header_div(
                    text='Relationships between variables:'.format(
                        self.section_divider),
                    width=600,
                    html_header='h2',
                    margin_top=5,
                    margin_bottom=-155),
                'correlations':
                self.section_header_div(
                    text='non linear relationships between variables:',
                    width=600,
                    html_header='h3',
                    margin_top=5,
                    margin_bottom=-155),
                'non_linear':
                self.section_header_div(
                    text='non linear relationships between variables:',
                    width=600,
                    html_header='h3',
                    margin_top=5,
                    margin_bottom=-155),
            }

            # ----------------------  DIVS ----------------------------

        def section_header_div(self,
                               text,
                               html_header='h2',
                               width=600,
                               margin_top=150,
                               margin_bottom=-150):
            text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \
                .format(margin_top, margin_bottom, html_header, text, html_header)
            return Div(text=text, width=width, height=15)

        def notification_updater(self, text):
            txt = """<div style="text-align:center;background:black;width:{}px;">
                           <h4 style="color:#fff;">
                           {}</h4></div>""".format(self.page_width, text)
            for key in self.notification_div.keys():
                self.notification_div[key].text = txt

        def reset_adoption_dict(self, variable):
            self.significant_effect_dict[variable] = []

        def section_header_updater(self,
                                   text,
                                   section,
                                   html_header='h3',
                                   margin_top=150,
                                   margin_bottom=-150):
            text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \
                .format(margin_top, margin_bottom, html_header, text, html_header)
            self.section_headers[section].text = text

        # //////////////  DIVS   /////////////////////////////////

        def title_div(self, text, width=700):
            text = '<h2 style="color:#4221cc;">{}</h2>'.format(text)
            return Div(text=text, width=width, height=15)

        def corr_information_div(self, width=400, height=300):
            txt = """
            <div {}>
            <h4 {}>How to interpret relationships </h4>
            <ul style='margin-top:-10px;'>
                <li>
                Positive: as variable 1 increases, so does variable 2.
                </li>
                <li>
                Negative: as variable 1 increases, variable 2 decreases.
                </li>
                <li>
                Strength: decisions can be made on the basis of strong and moderate relationships.
                </li>
                <li>
                No relationship/not significant: no statistical support for decision making.
                </li>
                 <li>
               The scatter graphs (below) are useful for visual confirmation.
                </li>
                 <li>
               The histogram (right) shows the distribution of the variable.
                </li>
            </ul>
            </div>

            """.format(self.div_style, self.header_style)
            div = Div(text=txt, width=width, height=height)
            return div

        # /////////////////////////////////////////////////////////////
        def prep_data(self, df1):
            try:
                self.cols = list(df1.columns)

                df1['timestamp'] = df1['timestamp'].astype('M8[us]')
                df = df1.set_index('timestamp')
                #logger.warning('LINE 195 df:%s',df.head())
                # handle lag for all variables
                if self.crypto != 'all':
                    df = df[df.crypto == self.crypto]
                df = df.compute()
                #logger.warning('LINE 199: length before:%s',len(df))
                df = df.groupby('crypto').resample(self.resample_period).agg(
                    self.groupby_dict)
                #logger.warning('LINE 201: length after:%s',len(df))

                df = df.reset_index()
                vars = self.feature_list.copy()
                if int(self.lag) > 0:
                    for var in vars:
                        if self.variable != var:
                            df[var] = df[var].shift(int(self.lag))
                df = df.dropna()
                self.df1 = df
                #logger.warning('line 184- prep data: df:%s',self.df.head(10))

            except Exception:
                logger.error('prep data', exc_info=True)

        def set_groupby_dict(self):
            try:
                pass

            except Exception:
                logger.error('set groupby dict', exc_info=True)

        #   ///////////////// PLOTS /////////////////////

        def lags_plot(self, launch):
            try:
                df = self.df.copy()
                df = df[[self.lag_variable, self.variable]]
                df = df.compute()
                cols = [self.lag_variable]
                lags = self.lag_days.split(',')
                for day in lags:
                    try:
                        label = self.lag_variable + '_' + day
                        df[label] = df[self.lag_variable].shift(int(day))
                        cols.append(label)
                    except:
                        logger.warning('%s is not an integer', day)
                df = df.dropna()
                self.lags_corr(df)
                # plot the comparison
                #logger.warning('in lags plot: df:%s',df.head(10))
                return df.hvplot(x=self.variable,
                                 y=cols,
                                 kind='scatter',
                                 alpha=0.4)
            except Exception:
                logger.error('lags plot', exc_info=True)

        # calculate the correlation produced by the lags vector
        def lags_corr(self, df):
            try:
                corr_dict_data = {
                    'variable_1': [],
                    'variable_2': [],
                    'relationship': [],
                    'lag': [],
                    'r': [],
                    'p_value': []
                }
                a = df[self.variable].tolist()
                for col in df.columns:
                    if col not in ['timestamp', self.variable]:
                        # find lag
                        var = col.split('_')
                        try:
                            tmp = int(var[-1])

                            lag = tmp
                        except Exception:
                            lag = 'None'

                        b = df[col].tolist()
                        slope, intercept, rvalue, pvalue, txt = self.corr_label(
                            a, b)
                        corr_dict_data['variable_1'].append(self.variable)
                        corr_dict_data['variable_2'].append(col)
                        corr_dict_data['relationship'].append(txt)
                        corr_dict_data['lag'].append(lag)
                        corr_dict_data['r'].append(round(rvalue, 4))
                        corr_dict_data['p_value'].append(round(pvalue, 4))

                lags_corr_src.stream(corr_dict_data,
                                     rollover=(len(corr_dict_data['lag'])))
                columns = [
                    TableColumn(field="variable_1", title="variable 1"),
                    TableColumn(field="variable_2", title="variable 2"),
                    TableColumn(field="relationship", title="relationship"),
                    TableColumn(field="lag", title="lag(days)"),
                    TableColumn(field="r", title="r"),
                    TableColumn(field="p_value", title="p_value"),
                ]
                data_table = DataTable(source=lags_corr_src,
                                       columns=columns,
                                       width=900,
                                       height=400)
                return data_table
            except Exception:
                logger.error('lags corr', exc_info=True)

        def correlation_table(self, launch):
            try:

                corr_dict = {
                    'Variable 1': [],
                    'Variable 2': [],
                    'Relationship': [],
                    'r': [],
                    'p-value': []
                }
                # prep df
                df = self.df1
                # get difference for money columns
                df = df.drop('timestamp', axis=1)
                #df = df.compute()

                a = df[self.variable].tolist()

                for col in self.feature_list:
                    if col != self.variable:
                        #logger.warning('%s:%s', col, self.variable)
                        b = df[col].tolist()
                        slope, intercept, rvalue, pvalue, txt = self.corr_label(
                            a, b)
                        # add to dict
                        corr_dict['Variable 1'].append(self.variable)
                        corr_dict['Variable 2'].append(col)
                        corr_dict['Relationship'].append(txt)
                        corr_dict['r'].append(round(rvalue, 4))
                        corr_dict['p-value'].append(round(pvalue, 4))

                        # update significant effect variables
                        if self.variable in self.adoption_variables[
                                'developer']:
                            if any(relationship in txt for relationship in
                                   self.relationships_to_check):
                                if self.variable not in self.significant_effect_dict.keys(
                                ):
                                    self.significant_effect_dict[
                                        self.variable] = []
                                self.significant_effect_dict[
                                    self.variable].append(col)

                if self.variable in self.adoption_variables['developer']:
                    tmp = self.significant_effect_dict[self.variable].copy()
                    tmp = list(set(tmp))
                    tmp_dct = {
                        'features': tmp,
                        'timestamp': datetime.now().strftime(self.DATEFORMAT)
                    }
                    # write to redis
                    save_params = 'adoption_features:developer' + '-' + self.variable
                    self.redis.save(tmp_dct,
                                    save_params,
                                    "",
                                    "",
                                    type='checkpoint')

                df = pd.DataFrame({
                    'Variable 1': corr_dict['Variable 1'],
                    'Variable 2': corr_dict['Variable 2'],
                    'Relationship': corr_dict['Relationship'],
                    'r': corr_dict['r'],
                    'p-value': corr_dict['p-value']
                })
                #logger.warning('df:%s',df.head(23))
                return df.hvplot.table(columns=[
                    'Variable 1', 'Variable 2', 'Relationship', 'r', 'p-value'
                ],
                                       width=550,
                                       height=400,
                                       title='Correlation between variables')
            except Exception:
                logger.error('correlation table', exc_info=True)

        def non_parametric_relationship_table(self, launch):
            try:

                corr_dict = {
                    'Variable 1': [],
                    'Variable 2': [],
                    'Relationship': [],
                    'stat': [],
                    'p-value': []
                }
                # prep df
                df = self.df1
                # get difference for money columns
                df = df.drop('timestamp', axis=1)
                #df = df.compute()

                #logger.warning('line df:%s',df.head(10))
                a = df[self.variable].tolist()
                for col in self.feature_list:
                    if col != self.variable:
                        #logger.warning('%s:%s', col, self.variable)
                        b = df[col].tolist()
                        stat, pvalue, txt = self.mann_whitneyu_label(a, b)
                        corr_dict['Variable 1'].append(self.variable)
                        corr_dict['Variable 2'].append(col)
                        corr_dict['Relationship'].append(txt)
                        corr_dict['stat'].append(round(stat, 4))
                        corr_dict['p-value'].append(round(pvalue, 4))

                df = pd.DataFrame({
                    'Variable 1': corr_dict['Variable 1'],
                    'Variable 2': corr_dict['Variable 2'],
                    'Relationship': corr_dict['Relationship'],
                    'stat': corr_dict['stat'],
                    'p-value': corr_dict['p-value']
                })
                #logger.warning('df:%s',df.head(23))
                return df.hvplot.table(
                    columns=[
                        'Variable 1', 'Variable 2', 'Relationship', 'stat',
                        'p-value'
                    ],
                    width=550,
                    height=400,
                    title='Non parametricrelationship between variables')
            except Exception:
                logger.error('non parametric table', exc_info=True)

        def hist(self, launch):
            try:

                return self.df.hvplot.hist(y=self.feature_list,
                                           subplots=True,
                                           shared_axes=False,
                                           bins=25,
                                           alpha=0.3,
                                           width=300).cols(4)
            except Exception:
                logger.warning('histogram', exc_info=True)

        def matrix_plot(self, launch=-1):
            try:
                logger.warning('line 306 self.feature list:%s',
                               self.feature_list)
                df = self.df1
                #df = df[self.feature_list]

                # get difference for money columns

                #thistab.prep_data(thistab.df)
                if 'timestamp' in df.columns:
                    df = df.drop('timestamp', axis=1)
                #df = df.repartition(npartitions=1)
                #df = df.compute()

                df = df.fillna(0)
                #logger.warning('line 302. df: %s',df.head(10))

                cols_temp = self.feature_list.copy()
                if self.variable in cols_temp:
                    cols_temp.remove(self.variable)
                #variable_select.options = cols_lst

                p = df.hvplot.scatter(x=self.variable,
                                      y=cols_temp,
                                      width=330,
                                      subplots=True,
                                      shared_axes=False,
                                      xaxis=False).cols(4)

                return p

            except Exception:
                logger.error('matrix plot', exc_info=True)

        '''
        def regression(self,df):
            try:

            except Exception:
                logger.error('matrix plot', exc_info=True)
        '''

    def update_variable(attr, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.prep_data(thistab.df)
        thistab.variable = new
        if thistab.variable in thistab.adoption_variables['developer']:
            thistab.reset_adoption_dict(thistab.variable)
        thistab.lag_section_head_txt = 'Lag relationships between {} and...'.format(
            thistab.variable)
        #thistab.section_header_updater('lag',thistab.lag_section_head_txt)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_lag_plot_variable(attr, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.lag_variable = new
        thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_lags_var.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_crypto(attr, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.crypto = crypto_select.value
        thistab.lag = int(lag_select.value)
        thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_lag(attr, old, new):  # update lag & cryptocurrency
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.lag = int(lag_select.value)
        thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.df_load(datepicker_start.value,
                        datepicker_end.value,
                        timestamp_col='timestamp')
        thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_resample(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.resample_period = new
        thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_lags_selected():
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.lag_days = lags_input.value
        logger.warning('line 381, new checkboxes: %s', thistab.lag_days)
        thistab.trigger += 1
        stream_launch_lags_var.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    try:
        # SETUP
        table = 'external_daily'
        cols = list(groupby_dict.keys()) + ['timestamp', 'crypto']
        thistab = Thistab(table, [], [])

        # setup dates
        first_date_range = datetime.strptime("2018-04-25 00:00:00",
                                             "%Y-%m-%d %H:%M:%S")
        last_date_range = datetime.now().date()
        last_date = dashboard_config['dates']['last_date'] - timedelta(days=2)
        first_date = last_date - timedelta(days=200)
        # initial function call
        thistab.df_load(first_date, last_date, timestamp_col='timestamp')
        thistab.prep_data(thistab.df)

        # MANAGE STREAM
        # date comes out stream in milliseconds
        #stream_launch_hist = streams.Stream.define('Launch', launch=-1)()
        stream_launch_matrix = streams.Stream.define('Launch_matrix',
                                                     launch=-1)()
        stream_launch_corr = streams.Stream.define('Launch_corr', launch=-1)()
        stream_launch_lags_var = streams.Stream.define('Launch_lag_var',
                                                       launch=-1)()

        # CREATE WIDGETS
        datepicker_start = DatePicker(title="Start",
                                      min_date=first_date_range,
                                      max_date=last_date_range,
                                      value=first_date)

        datepicker_end = DatePicker(title="End",
                                    min_date=first_date_range,
                                    max_date=last_date_range,
                                    value=last_date)

        variable_select = Select(title='Select variable',
                                 value='fork',
                                 options=thistab.feature_list)

        lag_variable_select = Select(title='Select lag variable',
                                     value=thistab.lag_variable,
                                     options=thistab.feature_list)

        lag_select = Select(title='Select lag',
                            value=str(thistab.lag),
                            options=thistab.lag_menu)

        crypto_select = Select(title='Select cryptocurrency',
                               value='all',
                               options=['all'] + thistab.items)

        resample_select = Select(title='Select resample period',
                                 value='D',
                                 options=['D', 'W', 'M', 'Q'])

        lags_input = TextInput(
            value=thistab.lag_days,
            title="Enter lags (integer(s), separated by comma)",
            height=55,
            width=300)
        lags_input_button = Button(label="Select lags, then click me!",
                                   width=10,
                                   button_type="success")

        # --------------------- PLOTS----------------------------------
        columns = [
            TableColumn(field="variable_1", title="variable 1"),
            TableColumn(field="variable_2", title="variable 2"),
            TableColumn(field="relationship", title="relationship"),
            TableColumn(field="lag", title="lag(days)"),
            TableColumn(field="r", title="r"),
            TableColumn(field="p_value", title="p_value"),
        ]
        lags_corr_table = DataTable(source=lags_corr_src,
                                    columns=columns,
                                    width=500,
                                    height=280)

        width = 800

        hv_matrix_plot = hv.DynamicMap(thistab.matrix_plot,
                                       streams=[stream_launch_matrix])
        hv_corr_table = hv.DynamicMap(thistab.correlation_table,
                                      streams=[stream_launch_corr])
        hv_nonpara_table = hv.DynamicMap(
            thistab.non_parametric_relationship_table,
            streams=[stream_launch_corr])
        #hv_hist_plot = hv.DynamicMap(thistab.hist, streams=[stream_launch_hist])
        hv_lags_plot = hv.DynamicMap(thistab.lags_plot,
                                     streams=[stream_launch_lags_var])

        matrix_plot = renderer.get_plot(hv_matrix_plot)
        corr_table = renderer.get_plot(hv_corr_table)
        nonpara_table = renderer.get_plot(hv_nonpara_table)
        lags_plot = renderer.get_plot(hv_lags_plot)

        # setup divs

        # handle callbacks
        variable_select.on_change('value', update_variable)
        lag_variable_select.on_change('value', update_lag_plot_variable)
        lag_select.on_change('value', update_lag)  # individual lag
        resample_select.on_change('value', update_resample)
        crypto_select.on_change('value', update_crypto)
        datepicker_start.on_change('value', update)
        datepicker_end.on_change('value', update)
        lags_input_button.on_click(update_lags_selected)  # lags array

        # COMPOSE LAYOUT
        # put the controls in a single element
        controls = WidgetBox(datepicker_start, datepicker_end, variable_select,
                             lag_select, crypto_select, resample_select)

        controls_lag = WidgetBox(lag_variable_select, lags_input,
                                 lags_input_button)

        # create the dashboards
        grid = gridplot([[thistab.notification_div['top']],
                         [Spacer(width=20, height=70)],
                         [matrix_plot.state, controls],
                         [thistab.section_headers['relationships']],
                         [Spacer(width=20, height=30)],
                         [thistab.section_headers['correlations']],
                         [Spacer(width=20, height=30)],
                         [corr_table.state,
                          thistab.corr_information_div()],
                         [thistab.section_headers['non_linear']],
                         [Spacer(width=20, height=30)], [nonpara_table.state],
                         [thistab.section_headers['lag']],
                         [Spacer(width=20, height=30)],
                         [lags_plot.state, controls_lag], [lags_corr_table],
                         [thistab.notification_div['bottom']]])

        # Make a tab with the layout
        tab = Panel(child=grid, title=panel_title)
        return tab

    except Exception:
        logger.error('crypto:', exc_info=True)
        return tab_error_flag(panel_title)
示例#5
0
def account_predictive_tab(page_width=1200):
    class Thistab(Mytab):
        def __init__(self, table, cols, dedup_cols):
            Mytab.__init__(self, table, cols, dedup_cols)
            self.table = table
            self.cols = cols
            self.DATEFORMAT = "%Y-%m-%d %H:%M:%S"
            self.df = None
            self.df1 = {}  # to contain churned and retained splits
            self.df_predict = None
            self.day_diff = 1  # for normalizing for classification periods of different lengths
            self.df_grouped = ''

            self.rf = {}  # random forest
            self.cl = PythonClickhouse('aion')
            self.feature_list = hyp_variables

            self.targets = {
                'classification': {
                    'churned': {
                        'cols': ['churned', 'active'],
                        'target_col': 'status'
                    }
                },
                'regression': {
                    'aion_fork': {
                        'cols': [1, 0],
                        'target_col': 'aion_fork'
                    }
                }
            }
            self.interest_var = 'address'
            self.trigger = -1
            self.status = 'all'

            self.clf = None
            self.pl = {}  # for rf pipeline
            self.div_style = """ style='width:300px; margin-left:25px;
            border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
            """
            self.header_style = """ style='color:blue;text-align:center;' """

            # list of tier specific addresses for prediction
            self.address_list = []
            self.prediction_address_selected = ""
            self.load_data_flag = False
            self.day_diff = 1
            self.groupby_dict = {}
            for col in self.feature_list:
                self.groupby_dict[col] = 'mean'

            self.div_style = """ style='width:300px; margin-left:25px;
                        border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
                        """
            self.metrics_div = Div(text='', width=400, height=300)
            self.accuracy_df = None
            self.inspected_variable = 'amount'

            # ------- DIVS setup begin
            self.page_width = page_width
            txt = """<hr/><div style="text-align:center;width:{}px;height:{}px;
                                                                       position:relative;background:black;margin-bottom:200px">
                                                                       <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                                                                 </div>""".format(
                self.page_width, 50, 'Welcome')
            self.notification_div = {
                'top': Div(text=txt, width=self.page_width, height=20),
                'bottom': Div(text=txt, width=self.page_width, height=10),
            }

            self.section_divider = '-----------------------------------'
            self.section_headers = {
                'churn':
                self.section_header_div(
                    text=
                    'Churned accounts: prediction model accuracy, variable ranking:{}'
                    .format('----'),
                    width=int(self.page_width * .5),
                    html_header='h2',
                    margin_top=5,
                    margin_bottom=-155),
                'variable behavior':
                self.section_header_div(text='Variable behavior:{}'.format(
                    self.section_divider),
                                        width=600,
                                        html_header='h2',
                                        margin_top=5,
                                        margin_bottom=-155),
                'predictions':
                self.section_header_div(
                    text='Select date range to make predictions:{}'.format(
                        self.section_divider),
                    width=int(self.page_width * .5),
                    html_header='h2',
                    margin_top=5,
                    margin_bottom=-155),
            }

            # ----------------------  DIVS ----------------------------

        def section_header_div(self,
                               text,
                               html_header='h2',
                               width=600,
                               margin_top=150,
                               margin_bottom=-150):
            text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \
                .format(margin_top, margin_bottom, html_header, text, html_header)
            return Div(text=text, width=width, height=15)

            # ####################################################
            #              UTILITY DIVS

        def results_div(self, text, width=600, height=300):
            div = Div(text=text, width=width, height=height)
            return div

        def title_div(self, text, width=700):
            text = '<h2 style="color:#4221cc;">{}</h2>'.format(text)
            return Div(text=text, width=width, height=15)

        def reset_checkboxes(self):
            try:
                self.prediction_address_selected = ""
                self.prediction_address_select.value = "all"
            except Exception:
                logger.error('reset checkboxes', exc_info=True)

        ###################################################
        #               I/O
        def load_df(self,
                    start_date="2018-04-25 00:00:00",
                    end_date="2018-12-10 00:00:00"):
            try:
                if isinstance(start_date, str):
                    start_date = datetime.strptime(start_date, self.DATEFORMAT)
                if isinstance(end_date, str):
                    end_date = datetime.strptime(end_date, self.DATEFORMAT)
                self.df_load(start_date, end_date)
                self.df = self.df.fillna(0)
                #self.make_delta()
                #self.df = self.df.set_index('block_timestamp')
                #logger.warning("data loaded - %s",self.df.tail(10))

            except Exception:
                logger.error('load_df', exc_info=True)

        ###################################################
        #               MUNGE DATA
        def make_delta(self):
            try:
                if self.df is not None:
                    if len(self.df) > 0:
                        df = self.df.compute()
                        for col in self.targets:
                            col_new = col + '_diff'
                            df[col_new] = df[col].pct_change()
                            df[col_new] = df[col_new].fillna(0)
                            logger.warning('diff col added : %s', col_new)
                        self.df = self.df.fillna(self.df.mean())
                        self.df = dd.dataframe.from_pandas(df, npartitions=15)
                        # logger.warning('POST DELTA:%s',self.df1.tail(20))

            except Exception:
                logger.error('make delta', exc_info=True)

        def split_df(self, df, target):
            cols = self.target['classification'][target]
            target_col = self.target['classification'][target]
            for val in cols:
                self.df1[val] = df[target_col] == val
            logger.warning(
                "Finished split into churned and retained dataframes")

        ##################################################
        #               EXPLICATORY GRAPHS
        # PLOTS
        def box_plot(self, variable):
            try:
                # logger.warning("difficulty:%s", self.df.tail(30))
                # get max value of variable and multiply it by 1.1
                minv = 0
                maxv = 0
                df = self.df
                if df is not None:
                    if len(df) > 0:
                        minv, maxv = dd.compute(df[variable].min(),
                                                df[variable].max())
                else:
                    df = SD('filter', [variable, 'status'], []).get_df()

                return df.hvplot.box(variable,
                                     by='status',
                                     ylim=(.9 * minv, 1.1 * maxv))
            except Exception:
                logger.error("box plot:", exc_info=True)

        ###################################################
        #               MODELS
        def rf_clf(self):
            try:
                logger.warning("RANDOM FOREST LAUNCHED")

                error_lst = []
                df_temp = self.df
                df_temp = self.normalize(df_temp,
                                         timestamp_col='block_timestamp')
                # if all addresses used filter for only positive transactions

                for target in self.targets['classification']:
                    # filter out joined
                    df = df_temp.copy()
                    if target == 'churned':
                        df = df[df['status'] != 'joined']

                    #logger.warning("line 205: df columns in %s:",df.columns.tolist())
                    df = df.groupby(['address',
                                     'status']).agg(self.groupby_dict)
                    df = df.reset_index()
                    #logger.warning("line 222: df columns in %s:",df.tail(10))

                    df = df.compute()
                    '''
                    # only retain wanted values
                    col_values = list(self.df[self.targets['classification'][target]['target_col']].unique())
                    for val in col_values:
                        if val in self.targets['classification'][target]['cols']:
                            pass
                        else:
                            df[self.targets['classification'][target]['target_col']] = \
                            df[df[self.targets['classification'][target]['cols']] != val]
                    '''
                    X = df[self.feature_list]
                    y = df[self.targets['classification'][target]
                           ['target_col']]
                    #logger.warning('y=:%s',y.head(100))

                    X_train, X_test, y_train, y_test = train_test_split(
                        X, y, test_size=0.3)
                    self.feature_list = X_train.columns.tolist()

                    self.pl[target] = Pipeline([
                        ('imp',
                         SimpleImputer(missing_values=0, strategy='median')),
                        ('rf',
                         RandomForestClassifier(n_estimators=100,
                                                random_state=42,
                                                max_depth=4,
                                                class_weight='balanced'))
                    ])
                    self.pl[target].fit(X_train, y_train)

                    y_pred = self.pl[target].predict(X_test)
                    error_lst.append(
                        round(100 * metrics.accuracy_score(y_test, y_pred), 2))

                self.accuracy_df = pd.DataFrame({
                    'Outcome':
                    list(self.targets['classification'].keys()),
                    'Accuracy':
                    error_lst,
                })
                #logger.warning('accuracy_df:%s',self.accuracy_df.head())
                #self.make_tree(target=target)

                print('confusion matrix:\n')
                print(confusion_matrix(y_test, y_pred))
                print('classification report:\n')
                print(classification_report(y_test, y_pred))
                #logger.warning("clf model built:%s",self.pl)

            except Exception:
                logger.error("RF:", exc_info=True)

        def accuracy_table(self):
            try:
                columns = self.accuracy_df.columns.tolist()
                return self.accuracy_df.hvplot.table(
                    columns=['Outcome', 'Accuracy'],
                    width=250,
                    title='Prediction accuracy')

            except Exception:
                logger.error("RF:", exc_info=True)

        def prediction_information_div(self, width=350, height=450):
            txt = """
            <div {}>
            <h4 {}>Info </h4>
            <ul style='margin-top:-10px;'>
            <li>
            The table shows the predicted change.</br>
            </li>
            <li>
            For desirable outcomes:
            </br> ... a positive number is good!
            </br> ... the bigger the number the better.
            </br> ... a negative number is bad!
            </br> ... the bigger the negative number the worse it is.
            </li>
            <>
            For non-desirable outcomes:
            </br>... the inverse is true
            </li>
            <li>
            Use the datepicker(s) to select dates for the period desired
            </li>
            </ul>
            </div>

            """.format(self.div_style, self.header_style)
            div = Div(text=txt, width=width, height=height)
            return div

        def metrics_div_update(self, data):
            div_style = """ 
                   style='width:350px;margin-right:-600px;
                   border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
               """
            txt = """<div {}>
            <h4 {}>Prediction Info </h4>
            <ul style='margin-top:-10px;'>
            <li>
            {}% likely to churn
            </li>
            </ul>
            </div>""".format(div_style, self.header_style, data)
            self.metrics_div.text = txt

        def stats_information_div(self, width=400, height=300):
            div_style = """ 
                           style='width:350px;margin-left:-600px;
                           border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
                       """
            txt = """
            <div {}>
                   <h4 {}>Metadata Info </h4>
                   <ul>
                   <li >
                   <h4 style='margin-bottom:-2px;'>Table left:</h4>
                   - shows the outcome,</br>
                     and the accuracy in %</br>
                     <strong><i>100% is perfection!</i></strong>
                   </li>
                   <li>
                   <h4 style='margin-bottom:-2px;'>Table right:</h4>
                     - shows the desired outcome, the variables(things Aion controls)
                   </br> and their importance to the particular outcome
                   </br> ...which variable(s) have a greater impact on an outcome.
                   </br>- lower = better
                   </br>- generally only the best ranked 3 matter
                   </br>- business advice: manipulate the top ranked variables to attain desirable outcomes
                   </li>
                   </ul>
            </div>""".format(div_style, self.header_style)
            div = Div(text=txt, width=width, height=height)
            return div

        def load_prediction_df(self, start_date, end_date):
            if isinstance(start_date, date):
                start_date = datetime.combine(start_date, datetime.min.time())
            if isinstance(end_date, date):
                end_date = datetime.combine(end_date, datetime.min.time())
            cols = self.feature_list + ['address', 'block_timestamp']
            self.df_predict = self.cl.load_data(table=self.table,
                                                cols=cols,
                                                start_date=start_date,
                                                end_date=end_date)
            logger.warning('319:in load prediction: %s',
                           self.df_predict.head(5))

        def update_prediction_addresses_select(self):
            self.prediction_address_select.options = ['all']
            if len(self.df_predict) > 0:
                lst = ['all'] + list(
                    self.df_predict['address'].unique().compute())
                self.prediction_address_select.options = lst

        # the period for which the user wants a prediction
        def make_account_predictions(self, launch=-1):
            try:
                logger.warning("MAKE PREDICTIONS LAUNCHED")
                target = list(self.targets['classification'].keys())[0]
                # make
                df = self.df_predict
                #logger.warning("line 363%s",df.head(10))
                # make list of address for prediction select
                # filter if prediction for certain addresses
                #logger.warning('address selected:%s',self.prediction_address_select.value)
                if self.prediction_address_select.value is not None:
                    if len(self.prediction_address_select.value) > 0:
                        if self.prediction_address_select.value not in [
                                'all', ''
                        ]:
                            df = df[df.address ==
                                    self.prediction_address_select.value]

                #logger.warning('line 409 predict-df post filter:%s', df.head(20))
                # make table for display
                self.predict_df = pd.DataFrame({
                    'address': [],
                    'likely action': []
                })
                for target in list(self.targets['classification'].keys()):
                    if len(df) > 0:

                        df = self.normalize(df,
                                            timestamp_col='block_timestamp')
                        df = self.group_data(df,
                                             self.groupby_dict,
                                             timestamp_col='block_timestamp')
                        interest_labels = list(df['address'].unique())

                        # run model
                        df = df.fillna(0)
                        X = df[self.feature_list]
                        #logger.warning("df before prediction:%s",X.tail(10))
                        y_pred = self.pl[target].predict(X)
                        logger.warning('y_pred:%s', y_pred)
                        if target == 'churned':
                            y_pred_verbose = [
                                'remain' if x in ["active", 1] else "churn"
                                for x in y_pred
                            ]

                        #---- make table for display
                        self.predict_df = pd.DataFrame({
                            'address':
                            interest_labels,
                            'likely action':
                            y_pred_verbose
                        })

                        #------ label pools
                        self.predict_df['address'] = self.predict_df[
                            'address'].map(self.poolname_verbose_trun)
                        #logger.warning('self.predict_df:%s',self.predict_df)

                        churn_df = self.predict_df[
                            self.predict_df['likely action'] == 'churn']
                        perc_to_churn = round(
                            100 * len(churn_df) / len(self.predict_df), 1)
                        txt = target[:-2]
                        text = """<div {}>
                        <h3>Percentage likely to {}:</h3>
                        <strong 'style=color:black;'>{}%</strong></div>""".format(
                            self.header_style, txt, perc_to_churn)
                        self.metrics_div_update(data=perc_to_churn)
                    else:

                        text = """<div {}>
                            <br/> <h3>Sorry, address not found</h3>
                            </div>""".format(self.header_style)
                        self.metrics_div.text = text
                    logger.warning("end of %s predictions", target)
                return self.predict_df.hvplot.table(
                    columns=['address', 'likely action'],
                    width=500,
                    title='Account predictions')
            except Exception:
                logger.error("prediction:", exc_info=True)

        def make_tree(self, target='churned'):
            try:
                if not self.pl:
                    self.rf_clf()
                # Limit depth of tree to 3 levels
                # Extract the small tree
                tree_small = self.pl[target].named_steps['rf'].estimators_[5]
                # Save the tree as a png image
                export_graphviz(tree_small,
                                out_file='small_tree.dot',
                                feature_names=self.feature_list,
                                rounded=True,
                                precision=1)

                (graph, ) = pydot.graph_from_dot_file('small_tree.dot')
                # filepath = self.make_filepath('../../../static/images/small_tree.gif')
                # .write_png(filepath)
                filepath = self.make_filepath(
                    '/home/andre/Downloads/small_tree.png')
                graph.write_png(filepath)
                logger.warning("TREE SAVED")
            except Exception:
                logger.error("make tree:", exc_info=True)

        def make_feature_importances(self):
            try:
                if not self.pl:
                    self.rf_clf()

                results_dct = {
                    'outcome': [],
                    'feature': [],
                    'importance': [],
                    'rank_within_outcome': []
                }
                for target in self.targets['classification'].keys():
                    logger.warning('make feature importances for :%s', target)
                    # Get numerical feature importances
                    importances = list(
                        self.pl[target].named_steps['rf'].feature_importances_)

                    # List of tuples with variable and importance
                    feature_importances = [(feature, round(importance, 4))
                                           for feature, importance in zip(
                                               self.feature_list, importances)]

                    sorted_importances = sorted(feature_importances,
                                                key=itemgetter(1))

                    # logger.warning('importances :%s',importances)
                    # logger.warning("feature_importances:%s",feature_importances)
                    target_lst = [target] * len(importances)

                    count = 1
                    rank_lst = []
                    for i in importances:
                        rank_lst.append(count)
                        count += 1

                    results_dct['outcome'] += target_lst
                    results_dct['feature'] += [
                        i[0] for i in sorted_importances
                    ]
                    results_dct['importance'] += [
                        i[1] for i in sorted_importances
                    ]
                    results_dct['rank_within_outcome'] += sorted(rank_lst,
                                                                 reverse=True)

                df = pd.DataFrame.from_dict(results_dct)
                logger.warning('MAKE FEATURE IMPORTANCES FINISHED')
                return df.hvplot.table(
                    columns=[
                        'outcome', 'feature', 'importance',
                        'rank_within_outcome'
                    ],
                    width=600,
                    title="Variables ranked by importance (for each output)")

            except Exception:
                logger.error("Feature importances:", exc_info=True)

        ####################################################
        #               GRAPHS
    def update(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.load_prediction_df(datepicker_start.value,
                                   datepicker_end.value)
        thistab.update_prediction_addresses_select()
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        stream_select_variable.event(variable=thistab.inspected_variable)
        thistab.notification_updater("ready")

    def update_address_predictions(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("ready")

    def update_select_variable(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.inspected_variable = select_variable.value
        stream_select_variable.event(variable=thistab.inspected_variable)
        thistab.notification_updater("ready")

    try:
        # SETUP
        table = 'account_ext_warehouse'
        #cols = list(table_dict[table].keys())

        cols = hyp_variables + [
            'address', 'block_timestamp', 'account_type', 'status',
            'update_type'
        ]
        thistab = Thistab(table, cols, [])

        # setup dates
        first_date_range = datetime.strptime("2018-04-25 00:00:00",
                                             "%Y-%m-%d %H:%M:%S")
        last_date_range = datetime.now().date()
        last_date = dashboard_config['dates']['last_date']
        last_date = last_date - timedelta(days=50)
        first_date = last_date - timedelta(days=5)
        # STREAMS Setup
        # date comes out stream in milliseconds
        stream_launch = streams.Stream.define('Launch', launch=-1)()
        stream_select_variable = streams.Stream.define('Select_variable',
                                                       variable='amount')()

        # setup widgets
        datepicker_start = DatePicker(title="Start",
                                      min_date=first_date_range,
                                      max_date=last_date_range,
                                      value=first_date)
        datepicker_end = DatePicker(title="End",
                                    min_date=first_date_range,
                                    max_date=last_date_range,
                                    value=last_date)
        select_variable = Select(title='Filter by variable',
                                 value=thistab.inspected_variable,
                                 options=thistab.feature_list)

        # search by address checkboxes
        thistab.prediction_address_select = Select(title='Filter by address',
                                                   value='all',
                                                   options=[])
        reset_prediction_address_button = Button(label="reset address(es)",
                                                 button_type="success")

        # ----------------------------------- LOAD DATA
        # load model-making data
        end = datepicker_start.value
        start = end - timedelta(days=60)
        thistab.load_df(start, end)
        thistab.rf_clf()
        # load data for period to be predicted
        thistab.load_prediction_df(datepicker_start.value,
                                   datepicker_end.value)
        thistab.update_prediction_addresses_select()

        # tables
        hv_account_prediction_table = hv.DynamicMap(
            thistab.make_account_predictions, streams=[stream_launch])
        account_prediction_table = renderer.get_plot(
            hv_account_prediction_table)

        hv_features_table = hv.DynamicMap(thistab.make_feature_importances)
        features_table = renderer.get_plot(hv_features_table)

        hv_accuracy_table = hv.DynamicMap(thistab.accuracy_table)
        accuracy_table = renderer.get_plot(hv_accuracy_table)


        hv_variable_plot = hv.DynamicMap(thistab.box_plot,
                                 streams=[stream_select_variable])\
            .opts(plot=dict(width=800, height=500))

        variable_plot = renderer.get_plot(hv_variable_plot)

        # add callbacks
        datepicker_start.on_change('value', update)
        datepicker_end.on_change('value', update)
        thistab.prediction_address_select.on_change(
            'value', update_address_predictions)
        reset_prediction_address_button.on_click(thistab.reset_checkboxes)
        select_variable.on_change('value', update_select_variable)

        # put the controls in a single element
        controls = WidgetBox(select_variable, datepicker_start, datepicker_end,
                             thistab.prediction_address_select,
                             reset_prediction_address_button)

        controls_prediction = WidgetBox(datepicker_start, datepicker_end,
                                        thistab.prediction_address_select,
                                        reset_prediction_address_button)

        grid = gridplot(
            [[thistab.notification_div['top']], [Spacer(width=20, height=70)],
             [thistab.section_headers['churn']], [Spacer(width=20, height=70)],
             [accuracy_table.state,
              thistab.stats_information_div()], [features_table.state],
             [thistab.section_headers['variable behavior']],
             [Spacer(width=20, height=30)], [variable_plot.state, controls],
             [thistab.section_headers['predictions']],
             [Spacer(width=20, height=30)],
             [
                 account_prediction_table.state, thistab.metrics_div,
                 controls_prediction
             ], [thistab.notification_div['bottom']]])

        tab = Panel(child=grid, title='predictions: accounts by value')
        return tab

    except Exception:
        logger.error('rendering err:', exc_info=True)
        text = 'predictions: accounts by value'
        return tab_error_flag(text)
示例#6
0
    live_checkbox = CheckboxGroup(labels=['Live'], active=[0], width=150)
    live_checkbox.on_change('active',live_toggle)
    
    live_hours_input = TextInput(value=str(max_hours), title="Live past hours (max. 24h):", width=150)
    live_hours_input.on_change('value',live_hours)
    
#    live_slider = Slider(start=0.01, end=24, value=max_hours, step=0.01, title="Hours before")
#    live_slider.on_change('value',live_hours)
    
    # pick a date
    date_picker_i = DatePicker(value=date.today(),max_date=date.today(), title='Choose inital date:', width=150, disabled=False)
    mydate_i=date_picker_i.value
    date_picker_f = DatePicker(value=date.today(),max_date=date.today(), title='Choose final date:' , width=150, disabled=False)
    mydate_f=date_picker_f.value
    date_picker_i.on_change('value',initial_date)
    date_picker_f.on_change('value',final_date)
    
    hist_button = Button(label="Show History", button_type="default", width=310)
    hist_button.on_click(get_history)
    
    pre_head = PreText(text="N.B.: Readout every 10 seconds. Be patient!",width=500, height=50)
    pre_head2 = PreText(text="",width=400, height=25)
    pre_temp_top = PreText(text="",width=400, height=20)
    pre_temp_mid = PreText(text="",width=400, height=20)
    pre_temp_bot = PreText(text="",width=400, height=20)
    
    h_space = PreText(text="",width=50, height=1)
    v_space = PreText(text="",width=1, height=50)
    
#    curdoc().add_root(column(pre_head,row(div,column(pre_head2,pre_temp_top,pre_temp_mid,pre_temp_bot),column(plot['temperature'],plot['humidity'],plot['pressure']),)))
示例#7
0
    new_source_2_data = dict(
        races=races,
        cases_percent=[
            new_result['asian'][0], new_result['black'][0],
            new_result['cdph-other'][0], new_result['white'][0],
            new_result['latino'][0], new_result['other'][0]
        ],
        deaths_percent=[
            new_result['asian'][1], new_result['black'][1],
            new_result['cdph-other'][1], new_result['white'][1],
            new_result['latino'][1], new_result['other'][1]
        ],
        population_percent=[
            new_result['asian'][2], new_result['black'][2],
            new_result['cdph-other'][2], new_result['white'][2],
            new_result['latino'][2], new_result['other'][2]
        ],
        date=[
            selected_date, selected_date, selected_date, selected_date,
            selected_date, selected_date
        ])
    source_2.data = new_source_2_data
    print(new_result, flag_2)


date_picker.on_change("value", call_back)
# layout
curdoc().add_root(column(intro, date_picker, description, row(p1, p2)))

#### labels and legends
示例#8
0
def forecasting_bcc_rentals_visitor_tab(panel_title):
    class Thistab(Mytab):
        def __init__(self, table, cols, dedup_cols=[]):
            Mytab.__init__(self, table, cols, dedup_cols)
            self.table = table
            self.cols = cols
            self.DATEFORMAT = "%Y-%m-%d %H:%M:%S"
            self.df = None
            self.df1 = None
            self.df_predict = None
            self.day_diff = 1  # for normalizing for classification periods of different lengths
            self.df_grouped = ''

            self.cl = PythonClickhouse('aion')

            self.trigger = 0
            self.groupby_dict = {
                'category': 'nunique',
                'item': 'nunique',
                'area': 'nunique',
                'visit_duration': 'mean',
                'age': 'mean',
                'gender_coded': 'mean',
                'status_coded': 'mean',
                'rental_employee_gender_coded': 'mean',
                'rental_employee_age': 'mean',
                'rental_tab': 'sum'
            }

            self.feature_list = ['age', 'rental_employee_age', 'rental_tab']
            self.tsa_variable = 'rental_tab'
            self.forecast_days = 40
            self.lag_variable = 'visit_duration'
            self.lag_days = "1,2,3"
            self.lag = 0
            self.lag_menu = [str(x) for x in range(0, 100)]

            self.strong_thresh = .65
            self.mod_thresh = 0.4
            self.weak_thresh = 0.25
            self.corr_df = None
            self.div_style = """ 
                style='width:350px; margin-left:25px;
                border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
            """

            self.header_style = """ style='color:blue;text-align:center;' """

            self.variables = sorted(list(self.groupby_dict.keys()))
            self.variable = 'rental_tab'

            self.relationships_to_check = ['weak', 'moderate', 'strong']

            self.pym = PythonMongo('aion')
            self.menus = {
                'item': ['all'],
                'category': ['all'],
                'status': ['all', 'guest', 'member'],
                'gender': ['all', 'male', 'female'],
                'variables':
                list(self.groupby_dict.keys()),
                'history_periods':
                ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],
                'area': ['all', 'bar', 'rentals'],
                'tsa': ['rental_tab', 'visit_duration']
            }
            self.select = {}
            self.select['area'] = Select(title='Select BCC area',
                                         value='all',
                                         options=self.menus['area'])

            self.select['item'] = Select(title='Select item',
                                         value='all',
                                         options=self.menus['item'])

            self.select['status'] = Select(title='Select visitor status',
                                           value='all',
                                           options=self.menus['status'])

            self.select['gender'] = Select(title="Select visitor gender",
                                           value='all',
                                           options=self.menus['gender'])

            self.select['category'] = Select(title="Select category",
                                             value='all',
                                             options=self.menus['category'])

            self.select['rental_employee_gender'] = Select(
                title="Select category",
                value='all',
                options=self.menus['category'])

            self.select_values = {}
            for item in self.select.keys():
                self.select_values[item] = 'all'

            self.multiline_vars = {'x': 'gender', 'y': 'rental_tab'}
            self.timestamp_col = 'visit_start'
            # ------- DIVS setup begin
            self.page_width = 1250
            txt = """<hr/>
                    <div style="text-align:center;width:{}px;height:{}px;
                           position:relative;background:black;margin-bottom:200px">
                           <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                    </div>""".format(self.page_width, 50, 'Welcome')
            self.notification_div = {
                'top': Div(text=txt, width=self.page_width, height=20),
                'bottom': Div(text=txt, width=self.page_width, height=10),
            }
            lag_section_head_txt = 'Lag relationships between {} and...'.format(
                self.variable)

            self.section_divider = '-----------------------------------'
            self.section_headers = {
                'lag':
                self.section_header_div(text=lag_section_head_txt,
                                        width=600,
                                        html_header='h2',
                                        margin_top=5,
                                        margin_bottom=-155),
                'distribution':
                self.section_header_div(text='Pre-transform distribution:',
                                        width=600,
                                        html_header='h2',
                                        margin_top=5,
                                        margin_bottom=-155),
                'relationships':
                self.section_header_div(
                    text='Relationships between variables:{}'.format(
                        self.section_divider),
                    width=600,
                    html_header='h2',
                    margin_top=5,
                    margin_bottom=-155),
                'correlations':
                self.section_header_div(text='Correlations:',
                                        width=600,
                                        html_header='h3',
                                        margin_top=5,
                                        margin_bottom=-155),
                'forecast':
                self.section_header_div(text='Forecasts:{}'.format(
                    self.section_divider),
                                        width=600,
                                        html_header='h2',
                                        margin_top=5,
                                        margin_bottom=-155),
            }

            # ----- UPDATED DIVS END

            # ----------------------  DIVS ----------------------------

        def section_header_div(self,
                               text,
                               html_header='h2',
                               width=600,
                               margin_top=150,
                               margin_bottom=-150):
            text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \
                .format(margin_top, margin_bottom, html_header, text, html_header)
            return Div(text=text, width=width, height=15)

        def notification_updater(self, text):
            txt = """<div style="text-align:center;background:black;width:100%;">
                    <h4 style="color:#fff;">
                    {}</h4></div>""".format(text)
            for key in self.notification_div.keys():
                self.notification_div[key].text = txt

        # //////////////  DIVS   /////////////////////////////////

        def title_div(self, text, width=700):
            text = '<h2 style="color:#4221cc;">{}</h2>'.format(text)
            return Div(text=text, width=width, height=15)

        def corr_information_div(self, width=400, height=300):
            div_style = """ 
                style='width:350px; margin-left:-600px;
                border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
            """
            txt = """
            <div {}>
            <h4 {}>How to interpret relationships </h4>
            <ul style='margin-top:-10px;'>
                <li>
                Positive: as variable 1 increases, so does variable 2.
                </li>
                <li>
                Negative: as variable 1 increases, variable 2 decreases.
                </li>
                <li>
                Strength: decisions can be made on the basis of strong and moderate relationships.
                </li>
                <li>
                No relationship/not significant: no statistical support for decision making.
                </li>
                 <li>
               The scatter graphs (below) are useful for visual confirmation.
                </li>
                 <li>
               The histogram (right) shows the distribution of the variable.
                </li>
            </ul>
            </div>

            """.format(div_style, self.header_style)
            div = Div(text=txt, width=width, height=height)
            return div

        # /////////////////////////////////////////////////////////////

        def load_df(self, req_startdate, req_enddate, table, cols,
                    timestamp_col):
            try:
                # get min and max of loaded df
                if self.df is not None:
                    loaded_min = self.df[timestamp_col].min()
                    loaded_max = self.df[timestamp_col].max()

                    if loaded_min <= req_startdate and loaded_max >= req_enddate:
                        df = self.df[(self.df[timestamp_col] >= req_startdate)
                                     & (self.df[timestamp_col] <= req_enddate)]
                        return df
                return self.pym.load_df(req_startdate,
                                        req_enddate,
                                        table=table,
                                        cols=cols,
                                        timestamp_col=timestamp_col)

            except Exception:
                logger.error('load_df', exc_info=True)

        def filter_df(self, df1):
            try:
                df1 = df1[self.cols]

                for key, value in self.groupby_dict.items():
                    if value == 'count':
                        if self.select_values[key] != 'all':
                            df1 = df1[df1[key] == self.select_values[key]]
                return df1

            except Exception:
                logger.error('filter', exc_info=True)

        def prep_data(self, df):
            try:
                df = self.filter_df(df)
                # set up code columns
                codes = {
                    'gender': {
                        'male': 1,
                        'female': 2,
                        'other': 3
                    },
                    'status': {
                        'guest': 1,
                        'member': 2
                    }
                }
                for col in df.columns:
                    coded_col = col + '_coded'
                    if 'gender' in col:
                        df[coded_col] = df[col].map(codes['gender'])
                    if 'status' == col:
                        df[coded_col] = df[col].map(codes['status'])

                self.df = df.set_index(self.timestamp_col)
                # groupby and resample
                self.df1 = self.df.groupby('name').resample(
                    self.resample_period).agg(self.groupby_dict)
                self.df1 = self.df1.reset_index()
                self.df1 = self.df1.fillna(0)

                logger.warning('LINE 288 df:%s', self.df1.head(10))

            except Exception:
                logger.error('prep data', exc_info=True)

        def tsa(self, launch):
            try:
                df = self.df.resample('D').agg({self.tsa_variable: 'mean'})
                df = df.reset_index()
                label = self.tsa_variable + '_diff'
                df[label] = df[self.tsa_variable].diff()
                df = df.fillna(0)

                rename = {self.timestamp_col: 'ds', self.tsa_variable: 'y'}
                df = df.rename(columns=rename)
                df = df[['ds', 'y']]
                logger.warning('df:%s', df.tail())
                m = Prophet()
                m.fit(df)

                future = m.make_future_dataframe(periods=self.forecast_days)
                forecast = m.predict(future)
                print(forecast[['ds', 'yhat', 'yhat_lower',
                                'yhat_upper']].tail())
                print(list(forecast.columns))
                for idx, col in enumerate(['yhat', 'yhat_lower',
                                           'yhat_upper']):
                    if idx == 0:
                        p = forecast.hvplot.line(x='ds',
                                                 y=col,
                                                 width=600,
                                                 height=250,
                                                 value_label='$',
                                                 legend=False).relabel(col)
                    else:
                        p *= forecast.hvplot.scatter(x='ds',
                                                     y=col,
                                                     width=600,
                                                     height=250,
                                                     value_label='$',
                                                     legend=False).relabel(col)

                for idx, col in enumerate(['trend', 'weekly']):
                    if idx == 0:
                        q = forecast.hvplot.line(x='ds',
                                                 y=col,
                                                 width=550,
                                                 height=250,
                                                 value_label='$',
                                                 legend=False).relabel(col)
                    else:
                        q *= forecast.hvplot.line(x='ds',
                                                  y=col,
                                                  width=550,
                                                  height=250,
                                                  value_label='$',
                                                  legend=False).relabel(col)

                return p + q
            except Exception:
                logger.error("TSA:", exc_info=True)

    def update_variable(attr, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.variable = new
        thistab.section_head_updater('lag', thistab.variable)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_IVs(attrname, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        for item in thistab.select_values.keys():
            thistab.select_values[item] = thistab.select[item].value
        thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.df = thistab.pym.load_df(start_date=datepicker_start.value,
                                         end_date=datepicker_end.value,
                                         cols=[],
                                         table=thistab.table,
                                         timestamp_col=thistab.timestamp_col)

        thistab.df['gender_code'] = thistab.df['gender'].apply(
            lambda x: 1 if x == 'male' else 2)
        thistab.df1 = thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_resample(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.resample_period = new
        thistab.df1 = thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_lags_selected():
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.lag_days = lags_input.value
        logger.warning('line 381, new checkboxes: %s', thistab.lag_days)
        thistab.trigger += 1
        stream_launch_lags_var.event(launch=thistab.trigger)
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_multiline(attrname, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.multiline_vars['x'] = multiline_x_select.value
        thistab.multiline_vars['y'] = multiline_y_select.value
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_forecast(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.forecast_days = int(select_forecast_days.value)
        thistab.tsa_variable = forecast_variable_select.value
        thistab.trigger += 1
        stream_launch_tsa.event(launch=thistab.trigger)
        thistab.notification_updater("ready")

    try:
        # SETUP
        table = 'bcc_composite'
        cols = cols_to_load['guest'] + cols_to_load['rental']
        thistab = Thistab(table, cols, [])

        # setup dates
        first_date_range = datetime.strptime("2013-04-25 00:00:00",
                                             "%Y-%m-%d %H:%M:%S")
        last_date_range = datetime.now().date()
        last_date = dashboard_config['dates']['last_date'] - timedelta(days=1)
        first_date = last_date - timedelta(days=1000)
        # initial function call
        thistab.df = thistab.pym.load_df(start_date=first_date,
                                         end_date=last_date,
                                         cols=[],
                                         table=thistab.table,
                                         timestamp_col=thistab.timestamp_col)

        thistab.prep_data(thistab.df)

        # MANAGE STREAM
        stream_launch_hist = streams.Stream.define('Launch', launch=-1)()
        stream_launch_matrix = streams.Stream.define('Launch_matrix',
                                                     launch=-1)()
        stream_launch_corr = streams.Stream.define('Launch_corr', launch=-1)()
        stream_launch_lags_var = streams.Stream.define('Launch_lag_var',
                                                       launch=-1)()
        stream_launch = streams.Stream.define('Launch', launch=-1)()
        stream_launch_tsa = streams.Stream.define('Launch_tsa', launch=-1)()

        # CREATE WIDGETS
        datepicker_start = DatePicker(title="Start",
                                      min_date=first_date_range,
                                      max_date=last_date_range,
                                      value=first_date)

        datepicker_end = DatePicker(title="End",
                                    min_date=first_date_range,
                                    max_date=last_date_range,
                                    value=last_date)

        variable_select = Select(title='Select variable',
                                 value=thistab.variable,
                                 options=thistab.variables)

        lag_variable_select = Select(title='Select lag variable',
                                     value=thistab.lag_variable,
                                     options=thistab.feature_list)

        lag_select = Select(title='Select lag',
                            value=str(thistab.lag),
                            options=thistab.lag_menu)

        select_forecast_days = Select(
            title='Select # of days which you want forecasted',
            value=str(thistab.forecast_days),
            options=['10', '20', '30', '40', '50', '60', '70', '80', '90'])

        forecast_variable_select = Select(title='Select forecast variable',
                                          value=thistab.menus['tsa'][0],
                                          options=thistab.menus['tsa'])

        resample_select = Select(title='Select resample period',
                                 value='D',
                                 options=['D', 'W', 'M', 'Q'])

        multiline_y_select = Select(
            title='Select comparative DV(y)',
            value=thistab.multiline_vars['y'],
            options=['price', 'amount', 'visit_duration'])

        multiline_x_select = Select(title='Select comparative IV(x)',
                                    value=thistab.multiline_vars['x'],
                                    options=[
                                        'category', 'gender',
                                        'rental_employee_gender', 'status',
                                        'item'
                                    ])

        lags_input = TextInput(
            value=thistab.lag_days,
            title="Enter lags (integer(s), separated by comma)",
            height=55,
            width=300)
        lags_input_button = Button(label="Select lags, then click me!",
                                   width=10,
                                   button_type="success")

        # --------------------- PLOTS----------------------------------

        # tables
        hv_tsa = hv.DynamicMap(thistab.tsa, streams=[stream_launch_tsa])
        tsa = renderer.get_plot(hv_tsa)

        # setup divs

        # handle callbacks
        variable_select.on_change('value', update_variable)
        resample_select.on_change('value', update_resample)
        thistab.select['area'].on_change('value', update_IVs)
        thistab.select['gender'].on_change('value', update_IVs)
        thistab.select['rental_employee_gender'].on_change('value', update_IVs)
        thistab.select['item'].on_change('value', update_IVs)
        thistab.select['category'].on_change('value', update_IVs)
        thistab.select['status'].on_change('value', update_IVs)
        select_forecast_days.on_change('value', update_forecast)
        forecast_variable_select.on_change('value', update_forecast)
        datepicker_start.on_change('value', update)
        datepicker_end.on_change('value', update)

        multiline_x_select.on_change('value', update_multiline)
        multiline_y_select.on_change('value', update_multiline)

        # COMPOSE LAYOUT
        # put the controls in a single element
        controls_tsa = WidgetBox(datepicker_start, datepicker_end,
                                 variable_select, thistab.select['status'],
                                 resample_select, thistab.select['gender'],
                                 thistab.select['category'],
                                 thistab.select['area'],
                                 forecast_variable_select,
                                 select_forecast_days)

        # create the dashboards

        grid = gridplot([[thistab.notification_div['top']],
                         [Spacer(width=20, height=70)],
                         [thistab.section_headers['forecast']],
                         [tsa.state, controls_tsa],
                         [Spacer(width=20, height=30)],
                         [thistab.notification_div['bottom']]])

        # Make a tab with the layout
        tab = Panel(child=grid, title=panel_title)
        return tab

    except Exception:
        logger.error('EDA projects:', exc_info=True)
        return tab_error_flag(panel_title)
示例#9
0
def eda_projects_tab(panel_title):
    lags_corr_src = ColumnDataSource(data=dict(variable_1=[],
                                               variable_2=[],
                                               relationship=[],
                                               lag=[],
                                               r=[],
                                               p_value=[]))

    class Thistab(Mytab):
        def __init__(self, table, cols, dedup_cols=[]):
            Mytab.__init__(self, table, cols, dedup_cols)
            self.table = table
            self.cols = cols
            self.DATEFORMAT = "%Y-%m-%d %H:%M:%S"
            self.df = None
            self.df1 = None
            self.df_predict = None
            self.day_diff = 1  # for normalizing for classification periods of different lengths
            self.df_grouped = ''

            self.cl = PythonClickhouse('aion')

            self.trigger = 0
            self.groupby_dict = {
                'project_duration': 'sum',
                'project_start_delay': 'mean',
                'project_end_delay': 'mean',
                'project_owner_age': 'mean',
                'project_owner_gender': 'mean',
                'milestone_duration': 'sum',
                'milestone_start_delay': 'mean',
                'milestone_end_delay': 'mean',
                'milestone_owner_age': 'mean',
                'milestone_owner_gender': 'mean',
                'task_duration': 'sum',
                'task_start_delay': 'sum',
                'task_end_delay': 'mean',
                'task_owner_age': 'mean',
                'task_owner_gender': 'mean'
            }
            self.feature_list = list(self.groupby_dict.keys())
            self.lag_variable = 'task_duration'
            self.lag_days = "1,2,3"
            self.lag = 0
            self.lag_menu = [str(x) for x in range(0, 100)]

            self.strong_thresh = .65
            self.mod_thresh = 0.4
            self.weak_thresh = 0.25
            self.corr_df = None
            self.div_style = """ 
                style='width:350px; margin-left:25px;
                border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
            """

            self.header_style = """ style='color:blue;text-align:center;' """

            self.variables = sorted(list(self.groupby_dict.keys()))
            self.variable = self.variables[0]

            self.relationships_to_check = ['weak', 'moderate', 'strong']

            self.status = 'all'
            self.pm_gender = 'all'
            self.m_gender = 'all'
            self.t_gender = 'all'
            self.type = 'all'

            self.pym = PythonMongo('aion')
            self.menus = {
                'status': ['all', 'open', 'closed'],
                'type': [
                    'all', 'research', 'reconciliation', 'audit', 'innovation',
                    'construction', 'manufacturing', 'conference'
                ],
                'gender': ['all', 'male', 'female'],
                'variables':
                list(self.groupby_dict.keys()),
                'history_periods':
                ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'],
            }
            self.multiline_vars = {'x': 'manager_gender', 'y': 'remuneration'}
            self.timestamp_col = 'project_startdate_actual'
            # ------- DIVS setup begin
            self.page_width = 1250
            txt = """<hr/>
                    <div style="text-align:center;width:{}px;height:{}px;
                           position:relative;background:black;margin-bottom:200px">
                           <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                    </div>""".format(self.page_width, 50, 'Welcome')
            self.notification_div = {
                'top': Div(text=txt, width=self.page_width, height=20),
                'bottom': Div(text=txt, width=self.page_width, height=10),
            }
            lag_section_head_txt = 'Lag relationships between {} and...'.format(
                self.variable)

            self.section_divider = '-----------------------------------'
            self.section_headers = {
                'lag':
                self.section_header_div(text=lag_section_head_txt,
                                        width=600,
                                        html_header='h2',
                                        margin_top=5,
                                        margin_bottom=-155),
                'distribution':
                self.section_header_div(text='Pre-transform distribution:',
                                        width=600,
                                        html_header='h2',
                                        margin_top=5,
                                        margin_bottom=-155),
                'relationships':
                self.section_header_div(
                    text='Relationships between variables:{}'.format(
                        self.section_divider),
                    width=600,
                    html_header='h2',
                    margin_top=5,
                    margin_bottom=-155),
                'correlations':
                self.section_header_div(text='Correlations:',
                                        width=600,
                                        html_header='h3',
                                        margin_top=5,
                                        margin_bottom=-155),
            }

            # ----- UPDATED DIVS END

            # ----------------------  DIVS ----------------------------

        def section_header_div(self,
                               text,
                               html_header='h2',
                               width=600,
                               margin_top=150,
                               margin_bottom=-150):
            text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \
                .format(margin_top, margin_bottom, html_header, text, html_header)
            return Div(text=text, width=width, height=15)

        def notification_updater(self, text):
            txt = """<div style="text-align:center;background:black;width:100%;">
                    <h4 style="color:#fff;">
                    {}</h4></div>""".format(text)
            for key in self.notification_div.keys():
                self.notification_div[key].text = txt

        def reset_adoption_dict(self, variable):
            self.significant_effect_dict[variable] = []

        # //////////////  DIVS   /////////////////////////////////

        def title_div(self, text, width=700):
            text = '<h2 style="color:#4221cc;">{}</h2>'.format(text)
            return Div(text=text, width=width, height=15)

        def corr_information_div(self, width=400, height=300):
            div_style = """ 
                style='width:350px; margin-left:-600px;
                border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
            """
            txt = """
            <div {}>
            <h4 {}>How to interpret relationships </h4>
            <ul style='margin-top:-10px;'>
                <li>
                Positive: as variable 1 increases, so does variable 2.
                </li>
                <li>
                Negative: as variable 1 increases, variable 2 decreases.
                </li>
                <li>
                Strength: decisions can be made on the basis of strong and moderate relationships.
                </li>
                <li>
                No relationship/not significant: no statistical support for decision making.
                </li>
                 <li>
               The scatter graphs (below) are useful for visual confirmation.
                </li>
                 <li>
               The histogram (right) shows the distribution of the variable.
                </li>
            </ul>
            </div>

            """.format(div_style, self.header_style)
            div = Div(text=txt, width=width, height=height)
            return div

        # /////////////////////////////////////////////////////////////
        def filter_df(self, df1):
            if self.status != 'all':
                df1 = df1[df1.status == self.status]
            if self.pm_gender != 'all':
                df1 = df1[df1.project_owner_gender == self.pm_gender]
            if self.m_gender != 'all':
                df1 = df1[df1.milestone_owner_gender == self.m_gender]
            if self.t_gender != 'all':
                df1 = df1[df1.task_owner_gender == self.t_gender]

            if self.type != 'all':
                df1 = df1[df1.type == self.type]
            return df1

        def prep_data(self, df1):
            try:
                '''
                df1[self.timestamp_col] = df1[self.timestamp_col].apply(lambda x: datetime(x.year,
                                                                                   x.month,
                                                                                   x.day,
                                                                                   x.hour,0,0))
                '''
                df1 = df1.set_index(self.timestamp_col)
                logger.warning('LINE 195 df:%s', df1.head())
                # handle lag for all variables

                df = df1.copy()
                df = self.filter_df(df)

                logger.warning('LINE 199: length before:%s', len(df))
                slice = df[['project']]
                df = df[list(self.groupby_dict.keys())]
                logger.warning('LINE 218: columns:%s', df.head())
                df = df.astype(float)
                df = pd.concat([df, slice], axis=1)
                df = df.groupby('project').resample(self.resample_period).agg(
                    self.groupby_dict)
                logger.warning('LINE 201: length after:%s', len(df))

                df = df.reset_index()
                vars = self.feature_list.copy()
                if int(self.lag) > 0:
                    for var in vars:
                        if self.variable != var:
                            df[var] = df[var].shift(int(self.lag))
                df = df.dropna()
                self.df1 = df
                logger.warning('line 184- prep data: df:%s', self.df.head(10))

            except Exception:
                logger.error('prep data', exc_info=True)

        def lags_plot(self, launch):
            try:
                df = self.df.copy()
                df = df[[self.lag_variable, self.variable]]
                cols = [self.lag_variable]
                lags = self.lag_days.split(',')
                for day in lags:
                    try:
                        label = self.lag_variable + '_' + day
                        df[label] = df[self.lag_variable].shift(int(day))
                        cols.append(label)
                    except:
                        logger.warning('%s is not an integer', day)
                df = df.dropna()
                self.lags_corr(df)
                # plot the comparison
                logger.warning('in lags plot: df:%s', df.head(10))
                return df.hvplot(x=self.variable,
                                 y=cols,
                                 kind='scatter',
                                 alpha=0.4)
            except Exception:
                logger.error('lags plot', exc_info=True)

        # calculate the correlation produced by the lags vector
        def lags_corr(self, df):
            try:
                corr_dict_data = {
                    'variable_1': [],
                    'variable_2': [],
                    'relationship': [],
                    'lag': [],
                    'r': [],
                    'p_value': []
                }
                a = df[self.variable].tolist()
                for col in df.columns:
                    if col not in [self.timestamp_col, self.variable]:
                        # find lag
                        var = col.split('_')
                        try:
                            tmp = int(var[-1])

                            lag = tmp
                        except Exception:
                            lag = 'None'

                        b = df[col].tolist()
                        slope, intercept, rvalue, pvalue, txt = self.corr_label(
                            a, b)
                        corr_dict_data['variable_1'].append(self.variable)
                        corr_dict_data['variable_2'].append(col)
                        corr_dict_data['relationship'].append(txt)
                        corr_dict_data['lag'].append(lag)
                        corr_dict_data['r'].append(round(rvalue, 4))
                        corr_dict_data['p_value'].append(round(pvalue, 4))

                lags_corr_src.stream(corr_dict_data,
                                     rollover=(len(corr_dict_data['lag'])))
                columns = [
                    TableColumn(field="variable_1", title="variable 1"),
                    TableColumn(field="variable_2", title="variable 2"),
                    TableColumn(field="relationship", title="relationship"),
                    TableColumn(field="lag", title="lag(days)"),
                    TableColumn(field="r", title="r"),
                    TableColumn(field="p_value", title="p_value"),
                ]
                data_table = DataTable(source=lags_corr_src,
                                       columns=columns,
                                       width=500,
                                       height=280)
                return data_table
            except Exception:
                logger.error('lags corr', exc_info=True)

        def correlation_table(self, launch):
            try:

                corr_dict = {
                    'Variable 1': [],
                    'Variable 2': [],
                    'Relationship': [],
                    'r': [],
                    'p-value': []
                }
                # prep df
                df = self.df1
                # get difference for money columns
                df = df.drop(self.timestamp_col, axis=1)
                # df = df.compute()

                a = df[self.variable].tolist()

                for col in self.feature_list:
                    logger.warning('col :%s', col)
                    if col != self.variable:
                        logger.warning('%s:%s', col, self.variable)
                        b = df[col].tolist()
                        slope, intercept, rvalue, pvalue, txt = self.corr_label(
                            a, b)
                        # add to dict
                        corr_dict['Variable 1'].append(self.variable)
                        corr_dict['Variable 2'].append(col)
                        corr_dict['Relationship'].append(txt)
                        corr_dict['r'].append(round(rvalue, 4))
                        corr_dict['p-value'].append(round(pvalue, 4))

                df = pd.DataFrame({
                    'Variable 1': corr_dict['Variable 1'],
                    'Variable 2': corr_dict['Variable 2'],
                    'Relationship': corr_dict['Relationship'],
                    'r': corr_dict['r'],
                    'p-value': corr_dict['p-value']
                })
                # logger.warning('df:%s',df.head(23))
                return df.hvplot.table(columns=[
                    'Variable 1', 'Variable 2', 'Relationship', 'r', 'p-value'
                ],
                                       width=550,
                                       height=200,
                                       title='Correlation between variables')
            except Exception:
                logger.error('correlation table', exc_info=True)

        def non_parametric_relationship_table(self, launch):
            try:

                corr_dict = {
                    'Variable 1': [],
                    'Variable 2': [],
                    'Relationship': [],
                    'stat': [],
                    'p-value': []
                }
                # prep df
                df = self.df1
                # get difference for money columns
                df = df.drop(self.timestamp_col, axis=1)
                # df = df.compute()

                # logger.warning('line df:%s',df.head(10))
                a = df[self.variable].tolist()
                for col in self.feature_list:
                    logger.warning('col :%s', col)
                    if col != self.variable:
                        logger.warning('%s:%s', col, self.variable)
                        b = df[col].tolist()
                        stat, pvalue, txt = self.mann_whitneyu_label(a, b)
                        corr_dict['Variable 1'].append(self.variable)
                        corr_dict['Variable 2'].append(col)
                        corr_dict['Relationship'].append(txt)
                        corr_dict['stat'].append(round(stat, 4))
                        corr_dict['p-value'].append(round(pvalue, 4))

                df = pd.DataFrame({
                    'Variable 1': corr_dict['Variable 1'],
                    'Variable 2': corr_dict['Variable 2'],
                    'Relationship': corr_dict['Relationship'],
                    'stat': corr_dict['stat'],
                    'p-value': corr_dict['p-value']
                })
                # logger.warning('df:%s',df.head(23))
                return df.hvplot.table(
                    columns=[
                        'Variable 1', 'Variable 2', 'Relationship', 'stat',
                        'p-value'
                    ],
                    width=550,
                    height=200,
                    title='Non parametric relationship between variables')
            except Exception:
                logger.error('non parametric table', exc_info=True)

        def hist(self, launch):
            try:

                return self.df.hvplot.hist(y=self.feature_list,
                                           subplots=True,
                                           shared_axes=False,
                                           bins=25,
                                           alpha=0.3,
                                           width=300).cols(4)
            except Exception:
                logger.warning('histogram', exc_info=True)

        def matrix_plot(self, launch=-1):
            try:
                logger.warning('line 306 self.feature list:%s',
                               self.feature_list)

                df = self.df1

                if df is not None:
                    # thistab.prep_data(thistab.df)
                    if self.timestamp_col in df.columns:
                        df = df.drop(self.timestamp_col, axis=1)

                    df = df.fillna(0)
                    # logger.warning('line 302. df: %s',df.head(10))

                    cols_temp = self.feature_list.copy()
                    if self.variable in cols_temp:
                        cols_temp.remove(self.variable)
                    # variable_select.options = cols_lst

                    p = df.hvplot.scatter(x=self.variable,
                                          y=cols_temp,
                                          width=330,
                                          subplots=True,
                                          shared_axes=False,
                                          xaxis=False).cols(4)
                else:
                    p = df.hvplot.scatter(x=[0, 0, 0], y=[0, 0, 0], width=330)

                return p

            except Exception:
                logger.error('matrix plot', exc_info=True)

        def multiline(self, launch=1):
            try:
                yvar = self.multiline_vars['y']
                xvar = self.multiline_vars['x']
                df = self.df.copy()
                df = df[[xvar, yvar, self.timestamp_col]]
                df = df.set_index(self.timestamp_col)
                df = df.groupby(xvar).resample(self.resample_period).agg(
                    {yvar: 'mean'})
                df = df.reset_index()
                lines = df[xvar].unique()
                # split data frames
                dfs = {}
                for idx, line in enumerate(lines):
                    dfs[line] = df[df[xvar] == line]
                    dfs[line] = dfs[line].fillna(0)
                    logger.warning('LINE 428:%s - %s:', line, dfs[line].head())
                    if idx == 0:
                        p = dfs[line].hvplot.line(x=self.timestamp_col,
                                                  y=yvar,
                                                  width=1200,
                                                  height=500).relabel(line)
                    else:
                        p *= dfs[line].hvplot.line(x=self.timestamp_col,
                                                   y=yvar,
                                                   width=2,
                                                   height=500).relabel(line)
                return p
            except Exception:
                logger.error('multiline plot', exc_info=True)

    def update_variable(attr, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.prep_data(thistab.df)
        if 'milestone owner gender' == new:
            thistab.variable = 'm_gender_code'
        if 'project owner gender' == new:
            thistab.variable = 'pm_gender_code'
        if 'task owner gender' == new:
            thistab.variable = 't_gender_code'

        if thistab.variable in thistab.adoption_variables['developer']:
            thistab.reset_adoption_dict(thistab.variable)
        thistab.section_head_updater('lag', thistab.variable)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_lag_plot_variable(attr, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.lag_variable = new
        thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_lags_var.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_IVs(attrname, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.pm_gender = pm_gender_select.value
        thistab.m_gender = m_gender_select.value
        thistab.t_gender = t_gender_select.value
        thistab.status = status_select.value
        thistab.type = type_select.value
        thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_lag(attr, old, new):  # update lag & cryptocurrency
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.lag = int(lag_select.value)
        thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.df = thistab.pym.load_df(start_date=datepicker_start.value,
                                         end_date=datepicker_end.value,
                                         cols=[],
                                         table=thistab.table,
                                         timestamp_col=thistab.timestamp_col)
        thistab.df['project_owner_gender'] = thistab.df[
            'project_owner_gender'].apply(lambda x: 1 if x == 'male' else 2)
        thistab.df['milestone_owner_gender'] = thistab.df[
            'milestone_owner_gender'].apply(lambda x: 1 if x == 'male' else 2)
        thistab.df['task_owner_gender'] = thistab.df[
            'task_owner_gender'].apply(lambda x: 1 if x == 'male' else 2)
        thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_resample(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.resample_period = new
        thistab.prep_data(thistab.df)
        thistab.trigger += 1
        stream_launch_matrix.event(launch=thistab.trigger)
        stream_launch_corr.event(launch=thistab.trigger)
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_lags_selected():
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.lag_days = lags_input.value
        logger.warning('line 381, new checkboxes: %s', thistab.lag_days)
        thistab.trigger += 1
        stream_launch_lags_var.event(launch=thistab.trigger)
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    def update_multiline(attrname, old, new):
        thistab.notification_updater("Calculations in progress! Please wait.")
        thistab.multiline_vars['x'] = multiline_x_select.value
        thistab.multiline_vars['y'] = multiline_y_select.value
        thistab.trigger += 1
        stream_launch.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    try:
        # SETUP
        table = 'project_composite1'
        thistab = Thistab(table, [], [])

        # setup dates
        first_date_range = datetime.strptime("2013-04-25 00:00:00",
                                             "%Y-%m-%d %H:%M:%S")
        last_date_range = datetime.now().date()
        last_date = dashboard_config['dates']['last_date'] - timedelta(days=2)
        first_date = last_date - timedelta(days=30)
        # initial function call
        thistab.df = thistab.pym.load_df(start_date=first_date,
                                         end_date=last_date,
                                         cols=[],
                                         table=thistab.table,
                                         timestamp_col=thistab.timestamp_col)
        if len(thistab.df) > 0:
            thistab.df['manager_gender'] = thistab.df['project_owner_gender']
            thistab.df['project_owner_gender'] = thistab.df[
                'project_owner_gender'].apply(lambda x: 1
                                              if x == 'male' else 2)
            thistab.df['milestone_owner_gender'] = thistab.df[
                'milestone_owner_gender'].apply(lambda x: 1
                                                if x == 'male' else 2)
            thistab.df['task_owner_gender'] = thistab.df[
                'task_owner_gender'].apply(lambda x: 1 if x == 'male' else 2)
            logger.warning('LINE 527:columns %s', list(thistab.df.columns))

            thistab.prep_data(thistab.df)

        # MANAGE STREAM
        stream_launch_hist = streams.Stream.define('Launch', launch=-1)()
        stream_launch_matrix = streams.Stream.define('Launch_matrix',
                                                     launch=-1)()
        stream_launch_corr = streams.Stream.define('Launch_corr', launch=-1)()
        stream_launch_lags_var = streams.Stream.define('Launch_lag_var',
                                                       launch=-1)()
        stream_launch = streams.Stream.define('Launch', launch=-1)()

        # CREATE WIDGETS
        datepicker_start = DatePicker(title="Start",
                                      min_date=first_date_range,
                                      max_date=last_date_range,
                                      value=first_date)

        datepicker_end = DatePicker(title="End",
                                    min_date=first_date_range,
                                    max_date=last_date_range,
                                    value=last_date)

        variable_select = Select(title='Select variable',
                                 value=thistab.variable,
                                 options=thistab.variables)

        lag_variable_select = Select(title='Select lag variable',
                                     value=thistab.lag_variable,
                                     options=thistab.feature_list)

        lag_select = Select(title='Select lag',
                            value=str(thistab.lag),
                            options=thistab.lag_menu)

        type_select = Select(title='Select project type',
                             value=thistab.type,
                             options=thistab.menus['type'])

        status_select = Select(title='Select project status',
                               value=thistab.status,
                               options=thistab.menus['status'])

        pm_gender_select = Select(title="Select project owner's gender",
                                  value=thistab.pm_gender,
                                  options=thistab.menus['gender'])

        m_gender_select = Select(title="Select milestone owner's gender",
                                 value=thistab.m_gender,
                                 options=thistab.menus['gender'])

        t_gender_select = Select(title="Select task owner's gender",
                                 value=thistab.t_gender,
                                 options=thistab.menus['gender'])

        resample_select = Select(title='Select resample period',
                                 value='D',
                                 options=['D', 'W', 'M', 'Q'])

        multiline_y_select = Select(title='Select comparative DV(y)',
                                    value=thistab.multiline_vars['y'],
                                    options=[
                                        'remuneration', 'delay_start',
                                        'delay_end', 'project_duration'
                                    ])

        multiline_x_select = Select(
            title='Select comparative IV(x)',
            value=thistab.multiline_vars['x'],
            options=['manager_gender', 'type', 'status'])

        lags_input = TextInput(
            value=thistab.lag_days,
            title="Enter lags (integer(s), separated by comma)",
            height=55,
            width=300)
        lags_input_button = Button(label="Select lags, then click me!",
                                   width=10,
                                   button_type="success")

        # --------------------- PLOTS----------------------------------
        columns = [
            TableColumn(field="variable_1", title="variable 1"),
            TableColumn(field="variable_2", title="variable 2"),
            TableColumn(field="relationship", title="relationship"),
            TableColumn(field="lag", title="lag(days)"),
            TableColumn(field="r", title="r"),
            TableColumn(field="p_value", title="p_value"),
        ]
        lags_corr_table = DataTable(source=lags_corr_src,
                                    columns=columns,
                                    width=500,
                                    height=200)

        hv_matrix_plot = hv.DynamicMap(thistab.matrix_plot,
                                       streams=[stream_launch_matrix])
        hv_corr_table = hv.DynamicMap(thistab.correlation_table,
                                      streams=[stream_launch_corr])
        hv_nonpara_table = hv.DynamicMap(
            thistab.non_parametric_relationship_table,
            streams=[stream_launch_corr])
        # hv_hist_plot = hv.DynamicMap(thistab.hist, streams=[stream_launch_hist])
        hv_lags_plot = hv.DynamicMap(thistab.lags_plot,
                                     streams=[stream_launch_lags_var])
        hv_multiline = hv.DynamicMap(thistab.multiline,
                                     streams=[stream_launch])

        matrix_plot = renderer.get_plot(hv_matrix_plot)
        corr_table = renderer.get_plot(hv_corr_table)
        nonpara_table = renderer.get_plot(hv_nonpara_table)
        lags_plot = renderer.get_plot(hv_lags_plot)
        multiline = renderer.get_plot(hv_multiline)

        # setup divs

        # handle callbacks
        variable_select.on_change('value', update_variable)
        lag_variable_select.on_change('value', update_lag_plot_variable)
        lag_select.on_change('value', update_lag)  # individual lag
        resample_select.on_change('value', update_resample)
        pm_gender_select.on_change('value', update_IVs)
        m_gender_select.on_change('value', update_IVs)
        t_gender_select.on_change('value', update_IVs)
        datepicker_start.on_change('value', update)
        datepicker_end.on_change('value', update)
        lags_input_button.on_click(update_lags_selected)  # lags array

        status_select.on_change('value', update_IVs)
        type_select.on_change('value', update_IVs)

        multiline_x_select.on_change('value', update_multiline)
        multiline_y_select.on_change('value', update_multiline)

        # COMPOSE LAYOUT
        # put the controls in a single element
        controls_lag = WidgetBox(lags_input, lags_input_button,
                                 lag_variable_select)

        controls_multiline = WidgetBox(multiline_x_select, multiline_y_select)

        controls_page = WidgetBox(datepicker_start, datepicker_end,
                                  variable_select, type_select, status_select,
                                  resample_select, pm_gender_select,
                                  m_gender_select, t_gender_select)
        controls_gender = WidgetBox(pm_gender_select, m_gender_select,
                                    t_gender_select)

        # create the dashboards

        grid = gridplot(
            [[thistab.notification_div['top']], [Spacer(width=20, height=70)],
             [thistab.section_headers['relationships']],
             [Spacer(width=20, height=30)], [matrix_plot.state, controls_page],
             [thistab.section_headers['correlations']],
             [Spacer(width=20, height=30)],
             [corr_table.state,
              thistab.corr_information_div()],
             [thistab.title_div('Compare levels in a variable', 400)],
             [Spacer(width=20, height=30)],
             [multiline.state, controls_multiline],
             [thistab.section_headers['lag']], [Spacer(width=20, height=30)],
             [lags_plot.state, controls_lag], [lags_corr_table],
             [thistab.notification_div['bottom']]])

        # Make a tab with the layout
        tab = Panel(child=grid, title=panel_title)
        return tab

    except Exception:
        logger.error('EDA projects:', exc_info=True)
        return tab_error_flag(panel_title)
示例#10
0
def tab2():
    data = pd.read_csv('cdph-race-ethnicity.csv')
    data['date_time'] = pd.to_datetime(data['date'])
    data = data[(data['age'] == 'all')]
    percentages = ['confirmed cases', 'general population']
    regions = ['asian', 'black', "cdph-other", 'latino', 'other', 'white']
    x = [(race, percent) for race in regions for percent in percentages]

    def create_dataset(df):
        counts = sum(
            zip(df['confirmed_cases_percent'], df['population_percent']), ())
        source = ColumnDataSource(data=dict(x=x, counts=counts))
        return source

    def create_plot(source):
        p = figure(x_range=FactorRange(*x),
                   y_axis_label='Percentage',
                   plot_width=1030)
        p.title.text = "Confirmed_case% VS Population% by races"
        p.title.align = "center"
        p.title.text_font_size = "20px"
        p.vbar(x='x',
               top='counts',
               width=0.9,
               source=source,
               line_color="white",
               fill_color=factor_cmap('x',
                                      factors=percentages,
                                      palette=["#c9d9d3", "#718dbf"],
                                      start=1,
                                      end=2))
        p.y_range.start = 0
        p.x_range.range_padding = 0.1
        p.xaxis.major_label_orientation = 1
        p.xgrid.grid_line_color = None
        p.x_range.range_padding = 0.1
        p.xgrid.grid_line_color = None
        p.legend.location = "top_left"
        p.legend.orientation = "horizontal"
        p.xgrid.grid_line_color = None
        p.add_tools(
            HoverTool(tooltips=[('Race, category', "@x"),
                                ('Percentage', "@counts")], ))
        p.add_layout(
            Title(
                text="Data "
                "published by latimes.com/coronavirustracker; download data "
                "from "
                "https://github.com/datadesk/california-coronavirus-data/cdph-race"
                "-ethnicity.csv in GitHub",
                text_font_style="italic"), 'below')
        p.add_layout(
            Title(
                text="Data Source: California Department of Public Health "
                "https://www.cdph.ca.gov/Programs/CID/DCDC/Pages/COVID-19/Race-Ethnicity.aspx",
                text_font_style="italic"), 'below')
        p.add_layout(
            Title(text="Date of last update: 2020-10-14",
                  text_font_style="italic"), 'below')
        return p

    def callback(attr, old, new):
        new_src = create_dataset(
            data[(data['date_time'] == date_picker.value)])
        src.data.update(new_src.data)

    src = create_dataset(data[(data['date_time'] == '2020-10-01')])
    p = create_plot(src)
    date_picker = DatePicker(title='Choose a date',
                             min_date="2020-05-14",
                             max_date='2020-10-14')
    date_picker.on_change('value', callback)
    controls = WidgetBox(date_picker)
    layout = row(p, controls)
    tab = Panel(child=layout, title='Percentage of confirmed cases by race')
    return tab
示例#11
0
    else:
        button.label = 'Play'
        curdoc().remove_periodic_callback(animate_update)

button = Button(label='Play', width=60)
button.on_click(animate)

def dateChange(attrname, old, new):
    global df2
    if button.label == 'Pause':
        button.label = 'Play'
        curdoc().remove_periodic_callback(animate_update)
    df2 = updateRange()
    label.text = datetime.fromtimestamp(totimestamp(df2.index[0])).strftime(labelFormat)
    slider.end = totimestamp(df2.index[-1])-totimestamp(df2.index[0])
    slider.value = 0

datePick.on_change('value',dateChange)

plot.select_one(HoverTool).tooltips = [
    ('Name', '@Name'),
    ('Level', '@Level'+'%'),
]

layout = layout([
    [plot,datePick],
    [slider, button],
])

curdoc().add_root(layout)
示例#12
0
class EphemerisApp:

    to_seconds = dict(Day=86400, Hour=3600, Minute=60, Second=1)

    def __init__(self):

        # app variables
        self.active = True
        self.playAnimation = None
        self.start_epoch = None
        self.stop_epoch = None
        self.current_epoch = None

        # get initial configuration
        self.available_models = inspect.getmembers(StandardEphemerisModels,
                                                   inspect.isfunction)
        self.ephemeris_model = self.available_models[0][1]()
        self.spice_provider = SpiceProvider()
        self.spice_provider.SPICE_IDS = self.ephemeris_model.objects
        self.spice_provider.SPICE_NAMES = {
            v: k
            for k, v in self.ephemeris_model.objects.items()
        }

        # init data sources
        self.plot_source = self.spice_provider.state_source
        self.table_source = self.spice_provider.ephemeris_source
        self.cum_source = self.spice_provider.cum_source

        # gather options from ephemeris model and spice provider
        self.allowed_models = {
            model[1]().name: model[1]
            for model in self.available_models
        }
        allowed_objects = [
            self.spice_provider.fromId(name)
            for name in self.ephemeris_model.objects
        ]
        allowed_frames = self.ephemeris_model.FRAMES
        allowed_corrections = [name for name in SpiceProvider.CORRECTIONS]
        allowed_durations = [
            str(v) for v in self.ephemeris_model.DURATION_DAYS
        ]
        allowed_intervals = [name for name in SpiceProvider.INTERVALS]

        # set up widgets
        self.model = Select(title="Ephemeris Model",
                            options=list(self.allowed_models.keys()))

        self.center = Select(title="Center",
                             value=self.ephemeris_model.center,
                             options=allowed_objects)

        self.target = Select(title="Target",
                             value=self.ephemeris_model.target,
                             options=allowed_objects)

        self.frames = Select(title="Frame",
                             value=self.ephemeris_model.frame,
                             options=allowed_frames)

        self.planes = RadioButtonGroup(labels=['XY', 'YZ', 'XZ'], active=0)

        self.vector = Select(title='Vector Type',
                             value=self.ephemeris_model.vector_type,
                             options=allowed_corrections)

        self.epoch = DatePicker(title="Select Epoch",
                                value=datetime.strftime(
                                    self.ephemeris_model.epoch, "%Y-%m-%d"))

        self.offset = Slider(title="Days Since Epoch",
                             value=self.ephemeris_model.offset,
                             start=0,
                             end=self.ephemeris_model.duration,
                             step=1)

        self.duration = Select(title="Duration (Days)",
                               value=str(self.ephemeris_model.duration),
                               options=allowed_durations)

        self.interval = Select(title="Time Step",
                               value=str(self.ephemeris_model.step_size),
                               options=allowed_intervals)

        # create buttons
        self.play_button = Button(label="Play")
        self.exportRange = Div(text="Start and Stop Epoch: ")
        self.update_button = Button(label="Play")
        self.export_button = Button(label="Export")

        self.infoDiv = Div(
            text=
            "<hr>All ephemeris data shown on this website was obtained from publicly available "
            "SPICE files located at <a href='https://naif.jpl.nasa.gov/naif/data.html'>"
            "https://naif.jpl.nasa.gov/naif/data.html</a>, which is hosted by the  "
            "Navigation and Ancillary Information Facility (NAIF) at the NASA Jet Propulsion "
            "Laboratory. The exception is the SPICE kernel for the Parker Solar Probe, which is "
            "available at <a href='https://sppgway.jhuapl.edu/ancil_products'>"
            "https://sppgway.jhuapl.edu/ancil_products</a>, hosted by the Johns Hopkins University "
            "Applied Physics Laboratory. SpiceyPy is being used to process the SPICE files.",
            sizing_mode='stretch_width')

        # create plot tab objects
        self.plot = figure(match_aspect=True,
                           sizing_mode="stretch_both",
                           title="Astropynamics",
                           tools="hover, pan, reset, save",
                           tooltips=[("name", "@index")])

        self.plot.add_tools(BoxZoomTool(match_aspect=True))
        self.plot.circle('px',
                         'py',
                         size='radii',
                         source=self.plot_source,
                         line_width=3,
                         line_alpha=0.5,
                         name='XY')
        self.plot.circle('px',
                         'pz',
                         size='radii',
                         source=self.plot_source,
                         line_width=3,
                         line_alpha=0.5,
                         name='XZ').visible = False
        self.plot.circle('py',
                         'pz',
                         size='radii',
                         source=self.plot_source,
                         line_width=3,
                         line_alpha=0.5,
                         name='YZ').visible = False
        self.plot.line('px',
                       'py',
                       source=self.cum_source,
                       line_width=2,
                       line_alpha=0.5,
                       color='red',
                       name='XYOrbit')
        self.plot.line('px',
                       'pz',
                       source=self.cum_source,
                       line_width=2,
                       line_alpha=0.5,
                       color='red',
                       name='XZOrbit').visible = False
        self.plot.line('py',
                       'pz',
                       source=self.cum_source,
                       line_width=2,
                       line_alpha=0.5,
                       color='red',
                       name='YZOrbit').visible = False

        self.plotLayout = column(self.plot,
                                 self.offset,
                                 sizing_mode="stretch_width")
        self.plotTab = Panel(child=self.plotLayout, title="Display")

        # create data table tab objects
        fmt = NumberFormatter(format='0.000', text_align=TextAlign.right)
        columns = [
            TableColumn(field="index",
                        title="Epoch",
                        formatter=DateFormatter(format="%m/%d/%Y %H:%M:%S")),
            TableColumn(field="px", title="PX", formatter=fmt, width=10),
            TableColumn(field="py", title="PY", formatter=fmt),
            TableColumn(field="pz", title="PZ", formatter=fmt),
            TableColumn(field="vx", title="VX", formatter=fmt),
            TableColumn(field="vy", title="VY", formatter=fmt),
            TableColumn(field="vz", title="VZ", formatter=fmt)
        ]

        self.ephemerisTable = DataTable(source=self.table_source,
                                        columns=columns,
                                        sizing_mode="stretch_both")
        self.ephemerisLayout = column(self.exportRange,
                                      self.ephemerisTable,
                                      sizing_mode="stretch_width")
        self.dataTab = Panel(child=self.ephemerisLayout, title="Table")

        self.kernels = Div()
        self.kernelTab = Panel(child=self.kernels, title="Kernels")

        self.tabs = Tabs(tabs=[self.plotTab, self.dataTab, self.kernelTab])

        # init data
        self.model.value = "The Solar System"
        self.update_model(None, 0, self.model.value)
        self.update_epochs(None, 0, 0)
        self.update_states(None, 0, 0)

        self.model.on_change('value', self.update_model)
        self.frames.on_change('value', self.update_epochs)
        self.planes.on_change('active', self.update_plot_view)
        self.center.on_change('value', self.update_epochs)
        self.target.on_change('value', self.update_epochs)
        self.offset.on_change('value', self.update_offset)
        self.epoch.on_change('value', self.update_epochs)
        self.duration.on_change('value', self.update_epochs)
        self.interval.on_change('value', self.update_epochs)
        self.update_button.on_click(self.update_onclick)
        self.tabs.on_change('active', self.update_button_type)

        self.inputs = column(self.model, self.frames, self.planes, self.center,
                             self.target, self.epoch, self.duration,
                             self.interval, self.update_button)

    def get_layout(self):
        return column(row([self.inputs, self.tabs]),
                      self.infoDiv,
                      sizing_mode='stretch_width')

    def update_kenerls_tab(self):
        kernels = self.spice_provider.fetch_kernels()
        kernel_text = "<h3>Loaded Spice Kernels:</h3>\n"
        kernel_text += '<table>\n'
        for k in kernels:
            kernel_text += f"<tr><td><b>{k[1]}&emsp;&emsp;</b></td><td>{k[0].split('/')[-1]}</td></tr>\n"
        self.kernels.text = kernel_text + "</table>"

    def update_model(self, attr, old, new):

        # disable callbacks
        self.active = False

        # update model and load new kernel
        self.ephemeris_model = self.allowed_models[new]()
        self.spice_provider.set_meta_kernel(self.ephemeris_model.kernel)
        self.spice_provider.setSpiceIds(self.ephemeris_model.objects)

        self.update_kenerls_tab()

        # set widget values
        allowed_objects = [
            self.spice_provider.fromId(name)
            for name in self.ephemeris_model.objects
        ]
        allowed_frames = self.ephemeris_model.FRAMES
        allowed_durations = [
            str(v) for v in self.ephemeris_model.DURATION_DAYS
        ]
        self.target.options = allowed_objects
        self.center.options = allowed_objects
        self.frames.options = allowed_frames
        self.duration.options = allowed_durations

        self.target.value = self.ephemeris_model.target
        self.center.value = self.ephemeris_model.center
        self.frames.value = self.ephemeris_model.frame
        self.planes.active = self.ephemeris_model.plane
        self.epoch.value = datetime.strftime(self.ephemeris_model.epoch,
                                             "%Y-%m-%d")
        self.duration.value = str(self.ephemeris_model.duration)
        self.offset.value = self.ephemeris_model.offset
        self.offset.end = self.ephemeris_model.duration
        self.interval.value = self.ephemeris_model.step_size

        # reinstate callbacks
        self.active = True

        # update start and stop epochs and plot
        self.update_epochs(None, 0, 0)

    def update_epochs(self, attr, old, new):

        self.start_epoch = datetime.strptime(self.epoch.value, "%Y-%m-%d")
        self.stop_epoch = self.start_epoch + \
                          pd.Timedelta(seconds=(int(float(self.duration.value) * EphemerisApp.to_seconds['Day'])))

        self.offset.value = 0
        self.offset.end = int(
            float(self.duration.value) * EphemerisApp.to_seconds['Day'] /
            EphemerisApp.to_seconds[self.interval.value])
        self.offset.title = f"{self.interval.value}s Since Epoch"
        self.exportRange.text = f"Showing epoch range:\t<b>{self.start_epoch} to {self.stop_epoch}</b>"

        self.update_offset(None, 0, 0)

    def update_offset(self, attr, old, new):

        scale_factor = EphemerisApp.to_seconds[self.interval.value]
        self.current_epoch = self.start_epoch + pd.Timedelta(
            seconds=(self.offset.value * scale_factor))

        if self.playAnimation is None:
            SpiceProvider.reset_source(self.spice_provider.cum_source)

        self.update_states(None, 0, 0)

    def update_ephemeris(self, attr, old, new):

        self.update_epochs(attr, old, new)

        self.spice_provider.set_center(self.center.value)
        self.spice_provider.frame = self.frames.value
        self.spice_provider.correction = SpiceProvider.CORRECTIONS[
            self.vector.value]

        if self.active:
            self.spice_provider.fetch_ephemeris_states(self.target.value,
                                                       self.start_epoch,
                                                       self.stop_epoch,
                                                       self.interval.value)

    def update_states(self, attr, old, new):

        self.spice_provider.set_center(self.center.value)
        self.spice_provider.frame = self.frames.value
        self.spice_provider.correction = SpiceProvider.CORRECTIONS[
            self.vector.value]

        if self.active:
            self.spice_provider.fetch_target_states(
                self.ephemeris_model.objects, self.current_epoch,
                self.target.value)

    def update_plot_view(self, attr, old, new):

        self.plot.select_one({"name": self.planes.labels[old]}).visible = False
        self.plot.select_one({"name": self.planes.labels[new]}).visible = True

        self.plot.select_one({
            "name": self.planes.labels[old] + "Orbit"
        }).visible = False
        self.plot.select_one({
            "name": self.planes.labels[new] + "Orbit"
        }).visible = True

    def animate_update(self):

        self.offset.value = 0 if self.offset.value > self.offset.end else self.offset.value + 1
        if self.offset.value == 0:
            SpiceProvider.reset_source(self.spice_provider.cum_source)

    def animate(self, start=True):
        if self.update_button.label == 'Play' and start:
            self.update_button.label = 'Pause'
            self.playAnimation = curdoc().add_periodic_callback(
                self.animate_update, 50)
        elif self.playAnimation is not None:
            self.update_button.label = 'Play'
            curdoc().remove_periodic_callback(self.playAnimation)
            self.playAnimation = None

    def update_onclick(self):
        if self.tabs.active == 0:
            self.animate()
        elif self.tabs.active == 1:
            self.update_ephemeris(None, 0, 0)
        elif self.tabs.active == 2:
            self.update_kenerls_tab()

    def update_button_type(self, attr, old, new):
        self.animate(False)
        if self.tabs.active == 0:
            self.update_button.label = "Play"
        else:
            self.update_button.label = "Update"
            self.update_ephemeris(None, 0, 0)
示例#13
0
def cryptocurrency_clustering_tab(panel_title):
    class Thistab(Mytab):
        def __init__(self, table, cols, dedup_cols=[]):
            Mytab.__init__(self, table, cols, dedup_cols)
            self.table = table
            self.cols = cols
            self.DATEFORMAT = "%Y-%m-%d %H:%M:%S"
            self.df = None
            self.df1 = None
            self.df_predict = None
            self.day_diff = 1  # for normalizing for classification periods of different lengths
            self.df_grouped = ''

            self.cl = PythonClickhouse('aion')
            # add all the coins to the dict
            self.github_cols = ['watch', 'fork', 'issue', 'release', 'push']
            self.index_cols = ['close', 'high', 'low', 'market_cap', 'volume']

            self.trigger = 0

            self.groupby_dict = groupby_dict
            self.feature_list = list(self.groupby_dict.keys())
            self.kmean_model = {}

            self.div_style = """ style='width:350px; margin-left:25px;
                            border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
                            """

            self.header_style = """ style='color:blue;text-align:center;' """

            self.k = '1'
            self.max_clusters_menu = [str(k) for k in range(1, 12)]

            self.launch_cluster_table = False  # launch cluster
            self.cryptos = None
            # ------- DIVS setup begin
            self.page_width = 1200
            txt = """<hr/><div style="text-align:center;width:{}px;height:{}px;
                        position:relative;background:black;margin-bottom:200px">
                        <h1 style="color:#fff;margin-bottom:300px">{}</h1>
                  </div>""".format(self.page_width, 50, 'Welcome')
            self.notification_div = {
                'top': Div(text=txt, width=self.page_width, height=20),
                'bottom': Div(text=txt, width=self.page_width, height=10),
            }

            self.section_divider = '-----------------------------------'
            self.section_headers = {
                'Crypto families':
                self.section_header_div(text='Crypto families:{}'.format(
                    self.section_divider),
                                        width=600,
                                        html_header='h2',
                                        margin_top=5,
                                        margin_bottom=-155),
            }

        # ----------------------  DIVS ----------------------------

        def section_header_div(self,
                               text,
                               html_header='h2',
                               width=600,
                               margin_top=150,
                               margin_bottom=-150):
            text = """<div style="margin-top:{}px;margin-bottom:-{}px;"><{} style="color:#4221cc;">{}</{}></div>""" \
                .format(margin_top, margin_bottom, html_header, text, html_header)
            return Div(text=text, width=width, height=15)

        # //////////////  DIVS   /////////////////////////////////

        def title_div(self, text, width=700):
            text = '<h2 style="color:#4221cc;">{}</h2>'.format(text)
            return Div(text=text, width=width, height=15)

        def information_div(self, width=400, height=150):
            div_style = """ 
               style='width:350px;
               border:1px solid #ddd;border-radius:3px;background:#efefef50;' 
           """
            txt = """
            <div {}>
            <h4 {}>How to interpret relationships </h4>
            <ul style='margin-top:-10px;'>
                <li>
                A cluster is statistical grouping of items based on a composite similarity of the variables under review. 
                
                </li>
                <li>
                I have highlighted the peers in our cluster (aion_cluster), and simply labeled the other clusters with numbers.
                </li>
            </ul>
            </div>

            """.format(div_style, self.header_style)
            div = Div(text=txt, width=width, height=height)
            return div

        # ////////////////// HELPER FUNCTIONS ////////////////////
        def set_groupby_dict(self):
            try:
                lst = ['mention', 'hashtags', 'tweets', 'replies', 'favorites']
                for col in self.cols:
                    if col not in self.groupby_dict.keys():
                        if not string_contains_list(lst, col):
                            self.groupby_dict[col] = 'mean'
                        else:
                            self.groupby_dict[col] = 'sum'

            except Exception:
                logger.error('set groupby dict', exc_info=True)

        # /////////////////////////////////////////////////////////////
        def optimalK(self, data, nrefs=3, maxClusters=10):
            try:
                """
                Calculates KMeans optimal K using Gap Statistic from Tibshirani, Walther, Hastie
                Params:
                    data: ndarry of shape (n_samples, n_features)
                    nrefs: number of sample reference datasets to create
                    maxClusters: Maximum number of clusters to test for
                Returns: (gaps, optimalK)
                """
                gaps = np.zeros((len(range(1, maxClusters)), ))
                resultsdf = pd.DataFrame({'clusterCount': [], 'gap': []})
                for gap_index, k in enumerate(
                        range(1, len(self.max_clusters_menu))):
                    logger.warning('starting for k=%s', k)
                    # Holder for reference dispersion results
                    refDisps = np.zeros(nrefs)

                    # For n references, generate random sa,kmple and perform kmeans getting resulting dispersion of each loop
                    for i in range(nrefs):
                        logger.warning('nref=%s', i)

                        # Create new random reference set
                        randomReference = np.random.random_sample(
                            size=data.shape)

                        # Fit to it
                        km = KMeans(k)
                        km.fit(randomReference)

                        refDisp = km.inertia_
                        refDisps[i] = refDisp

                    # Fit cluster to original data and create dispersion

                    self.kmean_model[k] = KMeans(k, random_state=42)
                    self.kmean_model[k].fit(data)

                    origDisp = km.inertia_

                    # Calculate gap statistic
                    gap = np.log(np.mean(refDisps)) - np.log(origDisp)

                    # Assign this loop's gap statistic to gaps
                    gaps[gap_index] = gap

                    resultsdf = resultsdf.append(
                        {
                            'clusterCount': k,
                            'gap': gap
                        }, ignore_index=True)

                return (
                    gaps.argmax() + 1, resultsdf
                )  # Plus 1 because index of 0 means 1 cluster is optimal, index 2 = 3 clusters are optimal

            except Exception:
                logger.error('optimal', exc_info=True)

        def cluster_table(self, launch):
            try:
                # prep
                df = self.df.groupby(['crypto']).agg(groupby_dict)
                df = df.compute()
                logger.warning('df after groupby:%s', df)

                self.cryptos = df.index.tolist()
                logger.warning('self.cryptos:%s', self.cryptos)
                print(self.cryptos)

                X = df[self.feature_list]
                scaler = StandardScaler()
                X = scaler.fit_transform(X)
                self.k, gapdf = self.optimalK(X,
                                              nrefs=3,
                                              maxClusters=len(
                                                  self.max_clusters_menu))
                logger.warning('Optimal k is:%s ', self.k)
                # Labels of each point
                labels = self.kmean_model[self.k].labels_

                # Nice Pythonic way to get the indices of the points for each corresponding cluster
                mydict = {
                    'cluster_' + str(i): np.where(labels == i)[0].tolist()
                    for i in range(self.kmean_model[self.k].n_clusters)
                }
                mydict_verbose = mydict.copy(
                )  # make a dictionary with the clusters and name of the cryptos

                # Transform this dictionary into dct with matching crypto labels
                dct = {
                    'crypto': self.cryptos,
                    'cluster': [''] * len(self.cryptos)
                }
                # get index aion to identify the aion cluster
                aion_idx = self.cryptos.index('aion')

                for key, values in mydict.items():
                    if aion_idx in values:
                        key = 'aion_cluster'
                    mydict_verbose[key] = []
                    for crypto_index in values:
                        try:
                            dct['cluster'][int(crypto_index)] = key
                            mydict_verbose[key].append(
                                self.cryptos[int(crypto_index)])

                        except:
                            logger.warning('cannot change to int:%s',
                                           crypto_index)  # save to redis
                self.write_clusters(mydict_verbose)
                logger.warning('line 229: cluster labels:%s', mydict_verbose)

                df = pd.DataFrame.from_dict(dct)
                self.launch_cluster_table = False
                cols = ['crypto', 'cluster']
                return df.hvplot.table(columns=cols,
                                       width=500,
                                       height=1200,
                                       title='Cluster table')
            except Exception:
                logger.error('cluster table', exc_info=True)

        def write_clusters(self, my_dict):
            try:
                # write to redis
                cluster_dct = my_dict.copy()
                cluster_dct['timestamp'] = datetime.now().strftime(
                    self.DATEFORMAT)
                cluster_dct['features'] = self.feature_list
                save_params = 'clusters:cryptocurrencies'
                self.redis.save(cluster_dct,
                                save_params,
                                "",
                                "",
                                type='checkpoint')
                logger.warning('%s saved to redis', save_params)
            except:
                logger.error('', exc_info=True)

    def update(attrname, old, new):
        thistab.notification_updater(
            "Calculations underway. Please be patient")
        thistab.df_load(datepicker_start.value,
                        datepicker_end.value,
                        timestamp_col='timestamp')
        thistab.trigger += 1
        stream_launch_elbow_plot.event(launch=thistab.trigger)
        thistab.notification_updater("Ready!")

    try:
        # SETUP
        table = 'external_daily'
        #cols = list(groupby_dict.keys()) + ['crypto']
        thistab = Thistab(table, [], [])

        # setup dates
        first_date_range = datetime.strptime("2018-04-25 00:00:00",
                                             "%Y-%m-%d %H:%M:%S")
        last_date_range = datetime.now().date()
        last_date = dashboard_config['dates']['last_date'] - timedelta(days=2)
        first_date = last_date - timedelta(days=340)
        # initial function call
        thistab.df_load(first_date, last_date, timestamp_col='timestamp')
        thistab.cols = sorted(list(thistab.df.columns))

        # MANAGE STREAMS
        stream_launch_elbow_plot = streams.Stream.define('Launch_elbow_plot',
                                                         launch=-1)()
        stream_launch_cluster_table = streams.Stream.define(
            'Launch_cluster_table', launch=-1)()

        # CREATE WIDGETS
        datepicker_start = DatePicker(title="Start",
                                      min_date=first_date_range,
                                      max_date=last_date_range,
                                      value=first_date)

        datepicker_end = DatePicker(title="End",
                                    min_date=first_date_range,
                                    max_date=last_date_range,
                                    value=last_date)

        datepicker_start.on_change('value', update)
        datepicker_end.on_change('value', update)

        # PLOTS
        hv_cluster_table = hv.DynamicMap(thistab.cluster_table,
                                         streams=[stream_launch_cluster_table])
        cluster_table = renderer.get_plot(hv_cluster_table)

        # COMPOSE LAYOUT
        # put the controls in a single element
        controls = WidgetBox(datepicker_start, datepicker_end)

        # create the dashboards

        grid = gridplot([[thistab.notification_div['top']],
                         [Spacer(width=20, height=70)],
                         [thistab.information_div(), controls],
                         [thistab.section_headers['Crypto families']],
                         [Spacer(width=20, height=30)], [cluster_table.state],
                         [thistab.notification_div['bottom']]])

        # Make a tab with the layout
        tab = Panel(child=grid, title=panel_title)
        return tab

    except Exception:
        logger.error('crypto:', exc_info=True)
        return tab_error_flag(panel_title)
示例#14
0
def load_file(index_file_name):
    index = json.loads(read_file(index_file_name, "r"))

    def date_range_change(is_start, attr, old, new):
        print(is_start, new)
        start = startDate.value
        start = datetime(start.year, start.month, start.day, 0, 0, 0)
        end = endDate.value
        end = datetime(end.year, end.month, end.day, 23, 59, 59)
        print(type(start), end)
        filter_func = lambda ds: any(
            map(lambda dt: start <= dt and dt <= end, ds["_time_values"]))
        dsTable.filter_datasets(filter_func)

    def name_filter_changed(attr, old, new):
        regex = re.compile(new)
        filter_func = lambda ds: regex.search(ds["id"])
        dsTable.filter_datasets(filter_func)

    class DatasetsTable:
        def __init__(self, index):
            self.meta_data = DotDict(index["meta"])
            self.datasets = index["datasets"]
            self.meta_variables = self.meta_data.variables
            self._fill_variables_table()
            self._fill_datasets_table()

        def _fill_variables_table(self):
            self.vars_short_names = list(self.meta_variables.keys())

            vars_names = list(map(self.to_long_name, self.vars_short_names))
            vars_dims = list(
                map(
                    lambda v: ", ".join(
                        list(map(self.to_long_name, v["shape"]))),
                    self.meta_variables.values()))
            self.vars_data = dict(
                names=vars_names,
                dims=vars_dims,
            )
            self.vars_source = ColumnDataSource(self.vars_data)
            vars_columns = [
                TableColumn(field="names", title="Variable name"),
                TableColumn(field="dims", title="Variable dimensions"),
            ]
            self.vars_table = DataTable(source=self.vars_source,
                                        columns=vars_columns,
                                        width=600,
                                        height=350,
                                        selectable=True)
            # vars_source.selected.on_change('indices', table_changed)

        def to_long_name(self, name, show_units=False):

            if name not in self.meta_variables:
                return name

            attr = self.meta_variables[name]["attributes"]

            if "long_name" in attr:
                name = attr["long_name"]["value"]
            elif "standard_name" in attr:
                name = attr["standard_name"]["value"]

            if show_units and "units" in attr:
                return "%s [%s]" % (name, attr["units"]["value"])
            return name

        def filter_datasets(self, filter_func):
            self.filtered_datasets = list(filter(filter_func, self.datasets))
            self.datasets_source.data.update(
                self._populate_datasets_table_data(self.filtered_datasets))

        def _fill_datasets_table(self):
            # preprocess the time values
            max_date = min_date = None

            for ds in self.datasets:
                ds["_time_values"] = list(
                    map(lambda t: np.datetime64(t).astype(datetime),
                        ds["data"]["time"]))
                mx = max(ds["_time_values"])
                mn = min(ds["_time_values"])
                if max_date is None or mx > max_date:
                    max_date = mx
                if min_date is None or mn < min_date:
                    min_date = mn

            self.filtered_datasets = self.datasets
            self.datasets_min_date = min_date
            self.datasets_max_date = max_date
            self.datasets_source = ColumnDataSource(
                self._populate_datasets_table_data(self.filtered_datasets))
            datasets_columns = [
                TableColumn(field="names", title="Name", width=600),
                TableColumn(field="dates", title="Date", width=600)
            ]
            self.datasets_table = DataTable(source=self.datasets_source,
                                            columns=datasets_columns,
                                            width=600,
                                            height=350,
                                            selectable=True)

            # datasets_source.selected.on_change('indices', _single_selection(datasets_source, lambda index: update_available_vars(
            #    filtered_datasets[index]["meta"])))

        def _populate_datasets_table_data(self, datasets):
            ds_names = []
            ds_dates = []
            for ds in datasets:
                ds_names.append(ds["id"])
                ds_dates.append(ds["data"]["time"][0])  # todo format date
            return {"names": ds_names, "dates": ds_dates}

        def get_plot_infos(self):
            vars_index = self.vars_source.selected.indices
            ds_index = self.datasets_source.selected.indices
            if not vars_index or not ds_index:
                log("Nothing selected!")
                return
            var_name = self.vars_short_names[vars_index[0]]
            var = self.meta_variables[var_name]
            ds = self.filtered_datasets[ds_index[0]]
            return (ds, var_name, var["shape"])

    def gen_plot():
        infos = dsTable.get_plot_infos()
        if infos is None:
            return
        ds, var_name, shape = infos
        ds_uri = ds["id"]
        timestamp = ds["data"]["time"][0]
        file_name = ds_uri.split("/")[-1]
        kdims = shape
        vdims = [var_name]

        lon_key, lat_key = None, None
        for key in dsTable.meta_variables:
            entry = dsTable.meta_variables[key]
            if "attributes" not in entry or "standard_name" not in entry[
                    "attributes"]:
                continue
            if entry["attributes"]["standard_name"]["value"] == "longitude":
                lon_key = key

            if entry["attributes"]["standard_name"]["value"] == "latitude":
                lat_key = key

        if lat_key not in kdims or lon_key not in kdims:
            log("'lat' and 'lon' are required dimensions!")
            return
        full_url = index["opendap_url"] + ds_uri
        log("Opening dataset: " + full_url)
        btn_plot_lonXlat.disabled = True
        try:
            print("Opening : " + full_url)
            dataset = xr.open_dataset(full_url)
            log("Dataset successfully opened. Loading data...")
            kdimsSingularValue = list(
                filter(lambda dim: dataset[dim].size == 1, kdims))
            kdimsMultipleValues = list(
                filter(lambda dim: dataset[dim].size > 1, kdims))
            indexers = {
                key: dataset[key].values[0]
                for key in kdimsSingularValue
            }
            print(indexers)
            dataset = dataset.sel(indexers=indexers)
            print(kdimsMultipleValues, kdimsSingularValue)

            xr_dataset = gv.Dataset(
                dataset[var_name],
                group=dsTable.to_long_name(var_name, True) + "  ",
                crs=ccrs.PlateCarree())
            image = xr_dataset.to(gv.Image, [lon_key, lat_key], dynamic=True)

            graph = image.options(colorbar=True,
                                  tools=['hover'],
                                  cmap="viridis",
                                  width=800,
                                  height=640,
                                  colorbar_position="right",
                                  toolbar="below") * gf.coastline()
            renderer = hv.renderer('bokeh')
            hover = HoverTool(
                tooltips=[
                    ("(x,y)", "(@lon{%0.1f}, @lat{%0.1f})"),
                    ('desc', '@' + var_name),
                ],
                formatters={
                    'y': 'printf',  # use 'datetime' formatter for 'date' field
                    'x':
                    'printf',  # use 'printf' formatter for 'adj close' field
                    # use default 'numeral' formatter for other fields
                })
            plot = renderer.get_plot(graph)

            if len(kdimsMultipleValues) > 2:

                # callback_policy="mouseup" for slider in plots

                print(plot)
                plot = renderer.get_widget(plot, "server")
                bokeh_layout = plot.init_layout()
                print(bokeh_layout)

                latFull = dsTable.meta_variables[lat_key]["attributes"][
                    "standard_name"]["value"]
                lonFull = dsTable.meta_variables[lon_key]["attributes"][
                    "standard_name"]["value"]

                bk_plot = bokeh_layout.children[0]
                #bk_plot.add_tools(hover)
                bk_slider = bokeh_layout.children[1].children[1]
                print(bk_slider.callback_policy)
                bk_slider.callback_policy = "mouseup"
                bk_plot.xaxis.axis_label = lonFull
                bk_plot.yaxis.axis_label = latFull
                print(lonFull, latFull)
                # bk_plot.xaxis[0].formatter = NumeralTickFormatter(format="0.0")

                # bk_plot.yaxis[0].formatter = NumeralTickFormatter(format="$0")
            else:
                bokeh_layout = plot.state

            tab = Panel(title=timestamp, child=bokeh_layout)
            plotTabs.tabs.append(tab)

            log("Data successfully loaded!")
        except Exception as e:
            log("Failed to open or process dataset: %s" % full_url, e)
        finally:
            btn_plot_lonXlat.disabled = False

    dsTable = DatasetsTable(index)
    btn_plot_lonXlat = Button(
        label="Plot variable over 'lon'x'lat' (this may take some time)")
    btn_plot_lonXlat.on_click(gen_plot)

    startDate = DatePicker(title="Start date",
                           min_date=dsTable.datasets_min_date,
                           max_date=dsTable.datasets_max_date,
                           value=dsTable.datasets_min_date)
    endDate = DatePicker(title="End date",
                         min_date=dsTable.datasets_min_date,
                         max_date=dsTable.datasets_max_date,
                         value=dsTable.datasets_max_date)
    startDate.on_change("value", partial(date_range_change, True))
    endDate.on_change("value", partial(date_range_change, False))

    plotTabs = Tabs(
        tabs=[],
        width=1000,
        height=640,
    )

    plotLayout = column(plotTabs, name="plotLayout")
    mainLayout = column(Div(height=50, style={"height": 50}),
                        row(startDate, endDate),
                        dsTable.datasets_table,
                        dsTable.vars_table,
                        btn_plot_lonXlat,
                        plotLayout,
                        status_bar,
                        name='mainLayout')

    doc.remove_root(loadLayout)
    doc.add_root(mainLayout)
示例#15
0
def modify_doc(doc):
    
    def startdate_update(attrname, old, new):
        rs.start.year = new.year
        rs.start.month = new.month
        rs.start.day = new.day

    def enddate_update(attrname, old, new):
        rs.end.year = new.year
        rs.end.month = new.month
        rs.end.day = new.day

    def starttime_update(attrname, old, new):
        h,m,s = map(int, new.split(':'))
        rs.start.hour = h
        rs.start.minute = m
        rs.start.second = s

    def endtime_update(attrname, old, new):
        h,m,s = map(int, new.split(':'))
        rs.end.hour = h
        rs.end.minute = m
        rs.end.second = s

    def update_target(attrname, old, new):
        rs.target = new.lower()
        dm.event()

    @gen.coroutine
    @without_document_lock
    def load():
        global text
        if rs.start > rs.end:
            msg = "Start time is later than end time.\n"
            text.append(msg)
            return
        if (rs.end - rs.start) > 3600.:
            msg = "Length of requested data exceeds 1 hour.\n" 
            text.append(msg)
            return
        executor = ThreadPoolExecutor(max_workers=4)
        msg = "Loading data for {:s} between {:s} and {:s}\n".format(rs.target, str(rs.start), str(rs.end))
        text.append(msg)
        rs.reset()
        for _type in ['seismic', 'acoustic']:
            for slc in stations[rs.target][_type]:
                s, l, c = slc
                msg = "Downloading {:s}.{:s}.{:s}\n".format(s, l, c)
                text.append(msg)
                tr = yield executor.submit(get_data, s, l, c, rs.start, rs.end)
                if tr is not None:
                    rs.streams[_type] += tr
                else:
                    msg = 'No data for {}.{}.{}\n'.format(s, l, c)
                    text.append(msg)
                    time.sleep(0.1)
                    continue
                # The sleep has to be added to prevent some kind of racing condition
                # in the underlying bokeh implementation
                time.sleep(0.1)
                doc.add_next_tick_callback(update_plot)
        text.append("LOADING FINISHED.\n")

    def reset():
        global text
        if rs.start > rs.end:
            msg = "Start time is later than end time.\n"
            text.append(msg)
            return
        if (rs.end - rs.start) > 3600.:
            msg = "Length of requested data exceeds 1 hour.\n" 
            text.append(msg)
            return
        dm.event(replot=True)

    def update_traces(attr, old, new):
        rs.plot_seismic = False
        rs.plot_acoustic = False
        rs.plot_labels = False
        for _c in new:
            if _c == 0:
                rs.plot_seismic = True
            if _c == 1:
                rs.plot_acoustic = True
            if _c == 2:
                rs.plot_labels = True
        dm.event()

    def update_red_vel(attr, old, new):
        if len(new) < 1:
            rs.red_vel = 0
        else:
           rs.red_vel = 330. 
        dm.event()

    sdateval = "{:d}-{:d}-{:d}".format(rs.start.year,
                                       rs.start.month,
                                       rs.start.day)
    date_start = DatePicker(title='Start date', value=sdateval)
    date_start.on_change('value', startdate_update)

    edateval = "{:d}-{:d}-{:d}".format(rs.end.year,
                                       rs.end.month,
                                       rs.end.day)
    date_end = DatePicker(title='End date', value=edateval)
    date_end.on_change('value', enddate_update)
    
    stimeval = "{:02d}:{:02d}:{:02d}".format(rs.start.hour,
                                             rs.start.minute,
                                             rs.start.second)
    starttime = TextInput(title='Start time', value=stimeval)
    starttime.on_change('value', starttime_update)

    etimeval = "{:02d}:{:02d}:{:02d}".format(rs.end.hour,
                                             rs.end.minute,
                                             rs.end.second)
    endtime = TextInput(title='End time', value=etimeval)
    endtime.on_change('value', endtime_update)
    
    loadb = Button(label='Load', button_type='success')
    loadb.on_click(load)
    
    resetb = Button(label='Reset', button_type='success')
    resetb.on_click(reset)
    
    select_target = Select(title='Volcano', value="Ruapehu",
                           options=["Te Maari", "Ruapehu", "Ngauruhoe", "Red Crater"])
    select_target.on_change('value', update_target)

    cg = CheckboxGroup(labels=["Seismic", "Acoustic", "Station name"], active=[1, 1, 0])
    cg.on_change('active', update_traces)
    
    rvel = CheckboxGroup(labels=['Reduction velocity (330 m/s)'], active=[])
    rvel.on_change('active', update_red_vel)

    div = Div(text="""<a target="_blank" href="https://wiki.geonet.org.nz/display/volcano/Record+Section+Plot
">Documentation</a>""", width=80, height=30)
    # Create HoloViews plot and attach the document
    hvplot = renderer.get_plot(dm, doc)
    doc.add_root(layout([[hvplot.state, widgetbox(pre)], 
                         [widgetbox(date_start, starttime, div), 
                          widgetbox(date_end, endtime),
                          widgetbox(select_target, loadb, resetb, cg, rvel)]], sizing_mode='fixed'))
    return doc
示例#16
0
race_table = raceTable(data_race, t_source, t_last_update)

tableData = dict(
    race=['Asian', 'Black', 'Cdph-other', 'Latino', 'White', 'Other'],
    confirmed_cases_percent=race_table.confirm,
    deaths_percent=race_table.death,
    population_percent=race_table.percent,
    last_update=race_table.last_update,
    source=race_table.source)
source2 = ColumnDataSource1(tableData)
columns = [
    TableColumn(field="race", title="Race"),
    TableColumn(field='confirmed_cases_percent',
                title="Confirmed_cases_percent"),
    TableColumn(field="deaths_percent", title="Deaths_percent"),
    TableColumn(field="population_percent", title="Population_percent")
]
# TableColumn(field="last_update",title="Last_update"),TableColumn(field="source",title="Source")
table = DataTable(source=source2, columns=columns, width=width, height=500)
date_picker = DatePicker(title='Select a date',
                         value="2020-10-26",
                         min_date="2020-05-01",
                         max_date="2020-11-02")
date_picker.on_change('value', update1)
pre3 = TextAreaInput(value='Source: ' + t_source + '\nLast update: ' +
                     t_last_update,
                     rows=3)
layout = column(pre, dropdown_state, textbox, p, pre2, date_picker, pre3,
                table)
curdoc().add_root(layout)

def callback2(attr, old, new):
    date = pd.to_datetime(date_picker2.value)
    data_case.data = get_data(date, data_race, "confirmed_cases_total")
    data_death.data = get_data(date, data_race, "deaths_total")
    data_num.data = get_data(date, data_race, "population_percent")


plot2.axis.axis_label = None
plot2.axis.visible = False
plot2.grid.grid_line_color = None
plot3.axis.axis_label = None
plot3.axis.visible = False
plot3.grid.grid_line_color = None
plot4.axis.axis_label = None
plot4.axis.visible = False
plot4.grid.grid_line_color = None

date_picker.on_change("value", callback)
date_picker2.on_change("value", callback2)

title2 = Div(
    text="""Comparison of COVID-19 case/death to its population by race""",
    width=1000,
    height=120)
title2.default_size = 50

curdoc().add_root(
    layout([[title1], [plot1, date_picker], [plot2, plot3],
            [plot4, date_picker2]]))
示例#18
0
def tab2():
    data = pd.read_csv('cdph-race-ethnicity.csv')
    data['date_time'] = pd.to_datetime(data['date'])
    max_date = data['date'].iloc[0]
    data = data[(data['age'] == 'all')]
    percentages = ['confirmed cases', 'general population']
    regions = ['asian', 'black', "cdph-other", 'latino', 'other', 'white']
    x = [(race, percent) for race in regions for percent in percentages]

    def create_dataset(df):
        counts = sum(
            zip(df['confirmed_cases_percent'], df['population_percent']),
            ())  # like an hstack
        source = ColumnDataSource(data=dict(x=x, counts=counts))
        return source

    def create_plot(source):
        p = figure(
            x_range=FactorRange(*x),
            title=
            'Comparison of the persent of cases by race to the general population',
            y_axis_label='Persentage')
        palette = ["#CAB2D6", "#e84d60"]
        p.vbar(x='x',
               top='counts',
               width=0.9,
               source=source,
               line_color="white",
               fill_color=factor_cmap('x',
                                      palette=palette,
                                      factors=percentages,
                                      start=1,
                                      end=2))
        p.y_range.start = 0
        p.x_range.range_padding = 0.1
        p.xaxis.major_label_orientation = 1
        p.xgrid.grid_line_color = None
        p.x_range.range_padding = 0.1
        p.xgrid.grid_line_color = None
        p.legend.location = "top_left"
        p.legend.orientation = "horizontal"
        p.xgrid.grid_line_color = None
        p.add_tools(
            HoverTool(tooltips=[('Race, category', "@x"),
                                ('Percentage', "@counts")], ))
        mytext = Label(
            x=20,
            y=-150,
            x_units='screen',
            text=
            f"Source of data: coming from a continual Times survey of California's 58 county health\n "
            " agencies and three city agencieas, pubished on https://www.latimes.com/projects/california-coronavirus-cases-tracking-outbreak/"
            " , access from Github repository https://github.com/datadesk/california-coronavirus-data/blob/master/cdph-race-ethnicity.csv"
            f"  Date of last update: 2020-11-04",
            render_mode='css',
            y_units='screen',
            border_line_color='black',
            border_line_alpha=1.0,
            background_fill_color='white',
            background_fill_alpha=1.0,
        )
        p.add_layout(mytext)
        return p

    def callback(attr, old, new):
        new_src = create_dataset(
            data[(data['date_time'] == date_picker.value)])
        src.data.update(new_src.data)

    # Initial Plot
    src = create_dataset(data[(data['date_time'] == '2020-10-01')])
    p = create_plot(src)
    date_picker = DatePicker(
        title='Click to choose a date (blank means no data)',
        min_date="2020-05-14",
        max_date=date.today())
    date_picker.on_change('value', callback)
    controls = WidgetBox(date_picker)
    layout = row(controls, p)
    tab = Panel(child=layout, title='Percentage of confirmed cases by race')
    return tab
示例#19
0
class UIClass:
    def __init__(self):
        self.input_df = pd.DataFrame({
            'x': ['2010-01-01'] * DF_NUM_PREVIEW_ROWS,
            'y': [0] * DF_NUM_PREVIEW_ROWS
        })
        self.forecasted_df = None
        self.datefmt = DateFormatter(format='%m-%d-%Y')
        self.inputs = None
        self.x_range = [0, 10]
        self.demand_plot = figure(
            x_range=self.x_range,
            x_axis_type="datetime",
            tools=["pan", 'wheel_zoom'])  #,wheel_zoom,box_zoom,reset,resize")

        self.plot_data_source = ColumnDataSource(
            data=self.input_df)  #dict(x=[0], y=[0])
        self.line1 = self.demand_plot.line(x='x',
                                           y='y',
                                           source=self.plot_data_source,
                                           line_color='blue',
                                           name='line1')
        self.demand_plot.xaxis.formatter = DatetimeTickFormatter(
            days="%d %b %Y", hours="")
        self.demand_plot.axis.minor_tick_line_color = None
        self.demand_plot.xaxis[
            0].ticker.desired_num_ticks = 10  #num_minor_ticks = 0
        self.demand_plot.xaxis.major_label_orientation = radians(
            30)  # from math import radians

        # Set up widgets
        self.data_source_selector = Select(
            title='Step 1/5: Select Data',
            value='Not Selected',
            options=['Not Selected', 'Use Example Data', 'Upload Data'])
        self.file_input = FileInput(accept='.csv,.xlsx')
        self.data_table = DataTable(
            height=DATATABLE_PREVIEW_HEIGHT,
            width=DATATABLE_PREVIEW_WIDTH,
            fit_columns=False,
            index_position=None,
            margin=(0, 15, 0, 15),  #aspect_ratio=0.5,
            #default_size=50
        )
        self.data_preview_paragraph = Paragraph(text='Data Preview:',
                                                margin=(0, 15, 0, 15))
        self.values_col_selector = Select(
            title='Step 2/5: Select column with demand values',
            value='Not Selected',
            options=['Not Selected'])
        self.product_id_col_selector = Select(
            title='Step 3/5: Select column with product ID',
            value='Not Selected',
            options=['Not Selected'])
        self.date_col_selector = Select(title="Step 4/5: Select date column",
                                        value='Not Selected',
                                        options=['Not Selected'])
        self.last_date_picker = DatePicker(
            title='Select the date of last observation',
            max_date=datetime.datetime.date(pd.to_datetime("today")),
            value=datetime.datetime.date(pd.to_datetime("today")))
        self.workdays_checkboxgroup = CheckboxGroup(
            labels=["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"],
            active=[],
            inline=True,
            margin=(0, 15, 0, 0))
        self.workdays_apply_button = Button(label='Select Business Days',
                                            button_type='primary')
        self.product_selector_plotting = Select(
            title='Select Product to Display',
            value='v1',
            options=['v1', 'v2'])
        self.prediction_button = Button(
            label='Forecast Demand for Selected Product ID',
            button_type='primary')
        self.default_info_msg = 'This window will contain additional information,\nas you interact with the app.'
        self.info_paragraph = PreText(
            text='Details:\n{}'.format(self.default_info_msg))
        # self.text = TextInput(title='title', value='my sine wave')
        # self.offset = Slider(title='offset', value=0.0, start=-5.0, end=5.0, step=0.1)

        self.widgets = {
            'data_source_selector': self.data_source_selector,
            'file_input': self.file_input,
            'values_col_selector': self.values_col_selector,
            'product_id_col_selector': self.product_id_col_selector,
            'data_preview_paragraph': self.data_preview_paragraph,
            'data_table': self.data_table,
            'product_selector': self.product_selector_plotting,
            'demand_plot': self.demand_plot,
            'date_col_selector': self.date_col_selector,
            'last_date_picker': self.last_date_picker,
            'workdays_checkboxgroup': self.workdays_checkboxgroup,
            'workdays_apply_button': self.workdays_apply_button,
            'prediction_button': self.prediction_button,
            #'': self.,
        }

        self.values_colname = None
        self.product_id_colname = None
        self.date_colname = None
        self.product_ids = []

########## WIDGETS VISIBILITY CONTROLS ##########

    def _change_widgets_visibility(self, names, names_show_or_hide='show'):
        displaying = True if names_show_or_hide == 'show' else False
        for widget_name in self.widgets:
            if widget_name in names:
                self.widgets[widget_name].visible = displaying
            else:
                self.widgets[widget_name].visible = not displaying

    def display_all_widgets_except(self, widgets=[]):
        self._change_widgets_visibility(widgets, 'hide')

    def hide_all_widgets_except(self, widgets=[]):
        self._change_widgets_visibility(widgets, 'show')

########## LOGIC ##########

    def set_widget_to_default_value(self,
                                    widget_names,
                                    default_val='Not Selected'):
        for widget_name in widget_names:
            self.widgets[widget_name].value = default_val

    def prepare_values_col_selection(self):
        self.values_col_selector.options = ['Not Selected'
                                            ] + self.input_df.columns.tolist()

    def get_additional_cols_to_show(self):
        return ['file_input'
                ] if self.data_source_selector.value == 'Upload Data' else []

    def update_details_msg(self, msg):
        self.info_paragraph.text = "Details:\n{}".format(msg)

    def preview_input_df(self):
        # https://stackoverflow.com/questions/40942168/how-to-create-a-bokeh-datatable-datetime-formatter
        columns = [
            TableColumn(field=Ci, title=Ci, width=DATATABLE_PREVIEW_COL_WIDTH)
            for Ci in self.input_df.columns
        ]
        self.data_table.update(columns=columns)
        self.data_table.update(
            source=ColumnDataSource(self.input_df.head(DF_NUM_PREVIEW_ROWS)))
        self.data_table.visible = True
        self.data_preview_paragraph.visible = True

    def upload_fit_data(self, attr, old, new):
        print('fit data upload succeeded')
        self.update_details_msg(msg='Step 1/5: Uploading data')
        base64_message = self.file_input.value
        base64_bytes = base64_message.encode('ascii')
        message_bytes = base64.b64decode(base64_bytes)
        message = message_bytes.decode('ascii')
        self.input_df = pd.read_csv(StringIO(message), sep=',')
        self.update_details_msg(
            msg='Step 1/5: Data has been successfully uploaded!')
        print('Input DF shape: {}'.format(self.input_df.shape))
        self.prepare_values_col_selection()
        self.hide_all_widgets_except(
            ['data_source_selector', 'file_input', 'values_col_selector'])
        self.preview_input_df()

    def replace_selector_options(self, selector, old_value, new_options):
        selector.options = [old_value] + new_options
        selector.value = new_options[0]
        selector.options = new_options

    def date_col_integrity(self, date_colname):
        if not isinstance(self.input_df[date_colname][0], str):
            self.input_df[date_colname] = self.input_df[date_colname].astype(
                str)
        if '-' in self.input_df[date_colname][0]:
            sep = '-'
        elif '/' in self.input_df[date_colname][0]:
            sep = '/'
        else:
            return 'no separator found'
        date_parts = self.input_df[date_colname].apply(lambda x: x.split(sep))
        if (date_parts.apply(lambda x: len(x)) == 3).all():
            try:
                self.input_df[date_colname] = pd.to_datetime(
                    self.input_df[date_colname])
                return 'ok'
            except:
                return 'error converting to datetime'
        else:
            return 'not all dates have exactly 3 components'

    def display_preview_plot(self):
        self.replace_selector_options(self.product_selector_plotting, 'v1',
                                      self.product_ids)
        self.product_selector_plotting.visible = True
        self.prediction_button.visible = True
        self.demand_plot.renderers.remove(self.line1)
        self.plot_data_source = None
        self.plot_data_source = ColumnDataSource(data=self.input_df[
            self.input_df[self.product_id_colname] == self.product_ids[0]])
        self.line1 = self.demand_plot.line(x=self.date_colname,
                                           y=self.values_colname,
                                           source=self.plot_data_source,
                                           line_color='blue',
                                           name='line1')
        self.update_plot(None, None, self.product_ids[0])
        self.demand_plot.visible = True

    def generate_dates(self, end_date: datetime.datetime, work_days: list,
                       num_periods: int):
        work_days = ' '.join(work_days)  # 'Sun Mon Tue Wed Fri'
        freq = pd.offsets.CustomBusinessDay(weekmask=work_days)
        return pd.date_range(end=end_date, periods=num_periods, freq=freq)

    def clean_df(self):
        """
        Modifies self.input_df:
        1) Removing duplicates based on [self.date_colname, self.product_id_colname]
        2) Sorting based on self.date_colname
        :return: void
        """
        self.input_df = self.input_df[~self.input_df.duplicated(
            subset=[self.date_colname, self.product_id_colname], keep='first')]
        self.input_df.sort_values(by=self.date_colname, inplace=True)
        print('===RESULTED INPUT_DF SHAPE AFTER CLEANING: ',
              self.input_df.shape)

########## WIDGETS ON_CHANGE METHODS ##########

    def select_data_source(self, attrname, old_val, new_val):
        self.set_widget_to_default_value([
            'values_col_selector', 'product_id_col_selector',
            'date_col_selector'
        ])
        if new_val == 'Upload Data':
            self.update_details_msg(
                msg=
                'Step 1/5: Please upload data in one of the\nfollowing formats: .CSV or .XLSX'
            )
            self.hide_all_widgets_except(
                ['data_source_selector', 'file_input'])
        elif new_val == 'Use Example Data':
            self.update_details_msg(
                msg=
                'Step 1/5: Using a sample toy data. You can use it\nto test the functionality of this app.'
            )
            self.input_df = pd.read_csv('default_table.csv')
            self.prepare_values_col_selection()
            self.preview_input_df()
            self.hide_all_widgets_except([
                'data_source_selector', 'values_col_selector',
                'data_preview_paragraph', 'data_table'
            ])
        else:  # Not Selected
            self.update_details_msg(msg=self.default_info_msg)
            self.hide_all_widgets_except(['data_source_selector'])

    def select_values_colname(self, attrname, old_val, new_val):
        self.update_details_msg(
            msg=
            'Step 2/5: Please select a column that contains\nthe demand values. Note, that all the values in\nthis column should be numerical.'
        )
        self.set_widget_to_default_value(
            ['product_id_col_selector', 'date_col_selector'])
        self.hide_all_widgets_except([
            'data_source_selector', 'values_col_selector',
            'data_preview_paragraph', 'data_table'
        ] + self.get_additional_cols_to_show())
        if new_val == 'Not Selected':
            pass
        else:
            self.values_colname = new_val
            try:
                self.input_df[self.values_colname] = self.input_df[
                    self.values_colname].astype(float)
                available_cols = set(self.input_df.columns)
                available_cols.remove(self.values_colname)
                if self.date_colname in available_cols:
                    available_cols.remove(self.date_colname)
                self.product_id_col_selector.options = [
                    'Not Selected'
                ] + list(available_cols)
                self.product_id_col_selector.visible = True
            except:
                self.update_details_msg(
                    msg=
                    'WARNING! Step 2/5: Not all the values\nin selected column are numerical!'
                )

    def select_product_id_colname(self, attrname, old_val, new_val):
        self.update_details_msg(
            msg=
            "Step 3/5: Please select a column that contains products' identifiers."
        )
        self.set_widget_to_default_value(['date_col_selector'])
        self.hide_all_widgets_except([
            'data_source_selector', 'values_col_selector',
            'data_preview_paragraph', 'data_table', 'product_id_col_selector'
        ] + self.get_additional_cols_to_show())
        if new_val == 'Not Selected':
            pass
        else:
            self.product_id_colname = new_val
            self.product_ids = self.input_df[
                self.product_id_colname].unique().astype(str).tolist()
            available_cols = set(self.input_df.columns)
            for colname in [self.values_colname, self.product_id_colname]:
                available_cols.remove(colname)
            if self.date_colname in available_cols:
                available_cols.remove(self.date_colname)
            self.date_col_selector.options = ['Not Selected'
                                              ] + list(available_cols)
            self.date_col_selector.visible = True
            self.last_date_picker.visible = True
            self.workdays_checkboxgroup.visible = True
            self.workdays_apply_button.visible = True

    def select_date_column(self, attrname, old_val, new_val):
        self.update_details_msg(
            msg="Step 4/5: If there is a date column, please select it's name.\n"
            "Note: Dates should be in one of the following formats:\n"
            "yyyy-mm-dd OR mm-dd-yyyy OR yyyy/mm/dd OR mm/dd/yyyy\n"
            "If there is no such column, use 'Not Selected' option.")
        self.hide_all_widgets_except([
            'data_source_selector', 'values_col_selector',
            'data_preview_paragraph', 'data_table', 'product_id_col_selector',
            'date_col_selector'
        ] + self.get_additional_cols_to_show())
        if new_val == 'Not Selected':
            self.last_date_picker.visible = True
            self.workdays_checkboxgroup.visible = True
            self.workdays_apply_button.visible = True
        else:
            self.date_colname = new_val
            date_col_integrity_status = self.date_col_integrity(
                self.date_colname)
            if date_col_integrity_status == 'ok':
                self.clean_df()
                self.display_preview_plot()
            else:
                print('date_col_integrity_status: ', date_col_integrity_status)
                self.update_details_msg(
                    msg=
                    "ERROR: selected date column doesn't satisfy specified requirements:\n"
                    "Dates should be in one of the following formats:\n"
                    "yyyy-mm-dd OR mm-dd-yyyy OR yyyy/mm/dd OR mm/dd/yyyy\n"
                    "If there is no such column, use 'Not Selected' option.")

    def select_last_date(self, attrname, old_val, new_val):
        self.update_details_msg(
            msg="Alright, dates will be automatically generated for you!\n"
            "Select days when your business works.")
        self.workdays_checkboxgroup.visible = True
        self.workdays_apply_button.visible = True

    def workdays_button_pressed(self, new):
        if len(self.workdays_checkboxgroup.active) == 0:
            self.update_details_msg(
                msg="Please select at least one business day.")
        else:
            self.update_details_msg(msg="Generating dates.")
            if 'generated_dates' in self.input_df.columns:
                self.update_details_msg(
                    msg="Please rename the generated_dates column in you table."
                )
            else:
                self.date_colname = 'generated_date'
                self.input_df[self.date_colname] = ''
                for product_id in self.product_ids:
                    inds = self.input_df[self.product_id_colname] == product_id
                    self.input_df.loc[
                        inds, self.date_colname] = self.generate_dates(
                            end_date=self.last_date_picker.value,
                            work_days=np.array(
                                self.workdays_checkboxgroup.labels)[
                                    self.workdays_checkboxgroup.active],
                            num_periods=inds.sum())
                self.input_df[self.date_colname] = pd.to_datetime(
                    self.input_df[self.date_colname])
                self.clean_df()
                self.display_preview_plot()
                #self.preview_input_df() # https://stackoverflow.com/questions/40942168/how-to-create-a-bokeh-datatable-datetime-formatter

    def prediction_button_pressed(self, new):
        train_dataset = pd.DataFrame()
        print('Preparing forecast for product: ',
              self.product_selector_plotting.value)
        inds = self.input_df[
            self.product_id_colname] == self.product_selector_plotting.value
        train_dataset['ds'] = self.input_df.loc[inds, self.date_colname]
        train_dataset['y'] = self.input_df.loc[inds, self.values_colname]
        # train_dataset = train_dataset[train_dataset.duplicated(subset=['ds'],keep='first')]
        #train_dataset.sort_values(by=self.date_colname, inplace=True)
        print('Train Dataset shape: ', train_dataset.shape)
        for q in self.make_predictions(train_dataset):
            if q[0] == 'msg':
                print('Message: ', q[1])
            else:
                self.forecasted_df = q[1]
                self.forecasted_df.columns = ['ds', 'y']
                print('Done; shape: ', self.forecasted_df.shape)
                #self.demand_plot.line(x='ds', y='yhat', source=ColumnDataSource(data=self.forecasted_df, name='line2'))
                #print(self.forecasted_df.tail(30))

                #combined_dataset = train_dataset.append(self.forecasted_df.tail(30), ignore_index=True)
                d = {
                    'ds':
                    train_dataset['ds'].append(
                        self.forecasted_df.tail(30)['ds']),
                    'y':
                    train_dataset['y'].append(
                        self.forecasted_df.tail(30)['y'])
                }
                combined_dataset = pd.DataFrame(d)

                try:
                    while len(self.demand_plot.legend[0].items) > 0:
                        self.demand_plot.legend[0].items.pop()
                except:
                    print(
                        'FAIL: popping legends in prediction_button_pressed()')

                self.demand_plot.renderers.remove(self.line1)
                try:
                    self.demand_plot.renderers.remove(self.line2)
                except:
                    pass

                self.plot_data_source = None
                self.plot_data_source = ColumnDataSource(data=combined_dataset)
                self.line1 = self.demand_plot.line(x=train_dataset['ds'],
                                                   y=train_dataset['y'],
                                                   line_color='blue',
                                                   name='line1',
                                                   legend_label='Historical')
                self.line2 = self.demand_plot.line(
                    x=train_dataset['ds'].tail(1).append(
                        self.forecasted_df['ds'].tail(30)),
                    y=train_dataset['y'].tail(1).append(
                        self.forecasted_df['y'].tail(30)),
                    line_color='red',
                    name='line2',
                    legend_label='Forecast')
                #print('QQQ ', self.demand_plot.select(name="line2"))
                self.demand_plot.legend.location = "top_left"

                self.demand_plot.x_range.start = combined_dataset['ds'].min()
                self.demand_plot.x_range.end = combined_dataset['ds'].max()
                self.demand_plot.y_range.start = combined_dataset['y'].min()
                self.demand_plot.y_range.end = combined_dataset['y'].max()

                self.demand_plot.visible = True

########## OTHER ##########

    def dates_diff_count(self, df, product_name):
        days_diffs = (
            df[1:][self.date_colname].values -
            df[:-1][self.date_colname].values) / 1000000000 / 60 / 60 / 24
        unique_diffs, diffs_counts = np.unique(days_diffs, return_counts=True)
        msg = 'Product: {}:\n# Days Delta ; Count\n'.format(product_name)
        for value, count in zip(unique_diffs, diffs_counts):
            msg += '{:10} ; {}\n'.format(value, count)
        msg += 'If there is more than one unique value\nit can make forecast less accurate'
        self.update_details_msg(msg=msg)

    # https://facebook.github.io/prophet/docs/non-daily_data.html
    def make_predictions(self, df, days_ahead=30):
        yield ['msg', 'training model']
        prophet = Prophet(weekly_seasonality=False, daily_seasonality=False)
        prophet.fit(df)
        yield ['msg', 'making predictions']
        future = prophet.make_future_dataframe(periods=days_ahead)
        forecast = prophet.predict(future)
        yield ['results', forecast[['ds', 'yhat']]]

    def update_plot(self, attrname, old, new):
        try:
            while len(self.demand_plot.legend[0].items) > 0:
                self.demand_plot.legend[0].items.pop()
        except:
            print('FAIL: popping legends in update_plot()')
        try:
            self.demand_plot.renderers.remove(self.line2)
        except:
            pass

        sub_df = self.input_df[self.input_df[self.product_id_colname] == new]
        self.dates_diff_count(sub_df, new)
        self.demand_plot.renderers.remove(self.line1)
        self.plot_data_source = None
        self.plot_data_source = ColumnDataSource(data=sub_df)
        self.line1 = self.demand_plot.line(x=self.date_colname,
                                           y=self.values_colname,
                                           source=self.plot_data_source,
                                           line_color='blue',
                                           legend_label='Historical',
                                           name='line1')
        self.demand_plot.legend.location = "top_left"
        self.demand_plot.x_range.start = sub_df[self.date_colname].min()
        self.demand_plot.x_range.end = sub_df[self.date_colname].max()
        self.demand_plot.y_range.start = sub_df[self.values_colname].min()
        self.demand_plot.y_range.end = sub_df[self.values_colname].max()


########## MAIN ##########

    def display(self):
        self.file_input.on_change('value', self.upload_fit_data)
        self.plot = figure(plot_height=400,
                           plot_width=400,
                           title='my sine wave',
                           tools='crosshair,pan,reset,save,wheel_zoom')

        # Set up layouts and add to document
        self.inputs = column(self.data_source_selector, self.file_input,
                             self.values_col_selector,
                             self.product_id_col_selector,
                             self.date_col_selector, self.last_date_picker,
                             self.workdays_checkboxgroup,
                             self.workdays_apply_button)

        #self.data_source_selector.visible = True
        self.hide_all_widgets_except(['data_source_selector'])
        self.data_source_selector.on_change('value', self.select_data_source)
        self.values_col_selector.on_change('value', self.select_values_colname)
        self.product_id_col_selector.on_change('value',
                                               self.select_product_id_colname)
        self.product_selector_plotting.on_change('value', self.update_plot)
        self.date_col_selector.on_change('value', self.select_date_column)
        self.last_date_picker.on_change('value', self.select_last_date)
        self.workdays_apply_button.on_click(self.workdays_button_pressed)
        self.prediction_button.on_click(self.prediction_button_pressed)

        #self.col_left = self.inputs

        columns = [
            TableColumn(field=Ci, title=Ci, width=DATATABLE_PREVIEW_COL_WIDTH)
            for Ci in self.input_df.columns
        ]
        self.data_table.columns = columns
        self.data_table.source = ColumnDataSource(
            self.input_df.head(DF_NUM_PREVIEW_ROWS))

        self.col_middle = column(self.data_preview_paragraph, self.data_table)
        #self.col_info = column()

        #self.col_left.width = 300
        #self.col_right.max_width = 500
        #self.col_right.sizing_mode = 'scale_width'

        #self.row_data_input = row(self.col_left, self.col_right, self.info_paragraph)
        #self.row_data_input.sizing_mode = 'scale_width'

        #self.row_demand_plot = row(self.product_selector_plotting)#, self.demand_plot)

        #self.layout = column(self.row_data_input, self.row_demand_plot)

        self.layout = column(
            row(
                column(
                    self.data_source_selector,
                    self.file_input,
                    self.values_col_selector,
                    self.product_id_col_selector,
                ), column(self.data_preview_paragraph, self.data_table),
                self.info_paragraph),
            row(
                column(self.date_col_selector, self.last_date_picker,
                       self.workdays_checkboxgroup, self.workdays_apply_button,
                       self.product_selector_plotting, self.prediction_button),
                self.demand_plot))

        curdoc().add_root(self.layout)
        curdoc().title = 'Demand Forecasting'
示例#20
0
from bokeh.models import DatePicker, HBox
from bokeh.io import curdoc

from datetime import datetime

beginning = DatePicker(title="Begin Date", min_date=datetime(2014,11,1),
                       max_date=datetime.now(),
                       value=datetime(datetime.now().year,1,1))

def cb(attr, old, new):
  print(new)

beginning.on_change('value', cb)

curdoc().add_root(HBox(children=[beginning]))

示例#21
0
                            row(clm_in_div, clm_input_dir_text_input),
                            row(cases_div, cases_dir_text_input),
                            row(output_div, output_dir_text_input))

###############################################################################
"""
Simulation period
"""
###############################################################################
period_div = Div(text="<h2>Simulation period</h2>",
                 sizing_mode="stretch_width")
start_date_picker = DatePicker(title='Select simulation start date:',
                               value="2000-01-01",
                               min_date="1900-01-01",
                               max_date="2020-12-30")
start_date_picker.on_change('value', _input_changed)

end_date_picker = DatePicker(title='Select simulation end date:',
                             value="2001-01-01",
                             min_date="1900-01-02",
                             max_date="2020-12-31")
end_date_picker.on_change('value', _input_changed)

dates_section = column(period_div, row(start_date_picker, end_date_picker))


def _check_dates():
    global start_date, end_date
    start_date = datetime.strptime(start_date_picker.value, '%Y-%m-%d')
    end_date = datetime.strptime(end_date_picker.value, '%Y-%m-%d')
示例#22
0
          "src='https://portal.rockgympro.com/portal/public/"
          "4f7e4c65977f6cd9be6d61308c7d7cc2/occupancy?&iframeid"
          "=occupancyCounter&fId=' height=250 width=100% "
          "scrolling='no' style='border:0px;'></iframe>")
DF = load_data()
GYMS = list(DF["gym"].unique())
D_MIN = DF["scrape_time"].min().date()
D_MAX = DF["scrape_time"].max().date()
WEEKS = math.ceil((D_MAX - D_MIN).days / 7)
PLURAL = {1: ""}.get(WEEKS, "s")

gyms = Select(value=GYM, options=GYMS)
gyms.on_change("value", update_gym)

date = DatePicker(value=str(D_MAX), min_date=str(D_MIN), max_date=str(D_MAX))
date.on_change("value", update_date)

reload_str = f"Dataset last loaded <i>{datetime.now().strftime('%Y-%m-%d %H:%M')}</i>"
last_refresh = Div(text=reload_str)

reload_btn = Button(label="Reload Counter", button_type="primary")
reload_btn.on_click(reload_counter)
_iframe = Div(text=IFRAME)
INFO_TEXT = f"""Dataset file: <pre>{DATASET}</pre>"""
#     <br>Remote file: <pre>{REMOTE_DATASET}</pre>
#     <br>Hashes: <pre>{HASH}<br>{REMOTE_HASH}</pre>
#     <br>If the hashes do not match the dataset has been updated on GitHub"""
dataset_info = Div(text=INFO_TEXT)

extras = row(column(_iframe), column(dataset_info), column(reload_btn))