Exemplo n.º 1
0
def render_api_login_form(api_choice, user_id):

    if r.get(f"{user_id}_{api_choice}_api") is not None:
        # Previously connected
        return logins_ui_mapping[api_choice][1]
    else:
        return logins_ui_mapping[api_choice][0]
 def __init__(self, dataset=None):
     if dataset is None:
         self.dataset = dataset
     elif dataset == "*":
         raise NotImplementedError()
     else:
         self.dataset = pickle.loads(r.get(dataset))
Exemplo n.º 3
0
def show_schema(api_choice, user_id):


    if api_choice is None:
        return [html.H4("Nothing selected.")]

    else:
        df = get_data(api_choice, user_id)

    if df is None:
        return [html.H4("Nothing to display")]

    schema = r.get(f"{user_id}_{api_choice}_schema")
    if schema is None:
        sample = df.sample(n=50, replace=True).dropna()
        types, subtypes = infer_types(sample, is_sample=True)
        r.set(f"{user_id}_{api_choice}_schema", dill.dumps({
            "types": types,
            "subtypes": subtypes
        }))

    else:
        schema = dill.loads(schema)
        types, subtypes = schema["types"], schema["subtypes"]

    return [
        html.Br(),
        dcc.ConfirmDialog(id="schema_confirmation"),
        html.Button("Update schema", id="update_schema"),

        schema_table(df[:500], types, subtypes)
    ]
Exemplo n.º 4
0
def Pipeline_Options(options, user_id):
    """
    Generate the layout of the dashboard.

    Args:
        options (list(dict)): Available datasets as options for `dcc.Dropdown`.

    Returns:
        A Dash element or list of elements.
    """

    if user_id.startswith("python_generated_ssid"):
        # Trim id
        user_id = user_id.split("-")[-1]

    available_pipelines = {
        k.decode(): r.get(k)
        for k in r.keys(f'{user_id}_pipeline_*')
    }

    return html.Div(children=[
        # Dataset is defined in the ModelBuilder

        # Choose an algorithm
        html.Div(create_dropdown("Choose algorithm type",
                                 options=[{
                                     'label': f'Pipeline --> {pipe_name}',
                                     'value': pipe_name
                                 } for pipe_name in available_pipelines],
                                 multi=False,
                                 id="algo_choice_pipeline"),
                 className="horizontal_dropdowns"),

        # Available choices for fitting
        html.Div(
            id="variable_choices_pipeline",
            children=[
                # Debuggers
                html.Button("Fit model",
                            id="fit_model",
                            n_clicks=0,
                            style={"display": "none"}),
                html.Div(
                    create_dropdown("", [],
                                    multi=True,
                                    id="xvars_pipeline",
                                    style={"display": "none"})),
                html.Div(
                    create_dropdown("", [],
                                    multi=True,
                                    id="yvars_pipeline",
                                    style={"display": "none"})),
            ]),

        # The results
        html.Div(id="training_results_pipeline"),
    ])
 def __init__(self,
              func_name="",
              cols=None,
              dataset_choice=None,
              user_id=None):
     self.query = r.get(func_name)
     self.cols = cols
     self.func = lambda x: x
     self.dataset_choice = dataset_choice
     self.user_id = user_id
Exemplo n.º 6
0
def get_users_tweets(n_clicks, acc_name, user_id):

    if n_clicks > 0:
        # Get the API handle
        api = pickle.loads(r.get(f"{user_id}_twitter_api_handle"))

        # TODO: This is a cache so consider a better implementation.
        query = r.get(f"{user_id}_twitter_data_{acc_name}")
        if query is None:
            # TODO: Consider saving for future use / as a dataset.
            query = api.GetUserTimeline(screen_name=acc_name)

            # Expire the retrieved tweets cache in one hour
            r.set(f"{user_id}_twitter_data_{acc_name}",
                  pickle.dumps(query),
                  ex=3600)
        else:
            query = pickle.loads(query)

        return [html.P(str(status.text)) for status in query]

    else:
        raise PreventUpdate()
Exemplo n.º 7
0
def cache_data_retrieve(key):
    data = json.loads(r.get(key))

    def fetch_live_data():
        print('Cache is invalid')
        return retrieve_data_from_hive(key)

    if int(time.time()) - data["cached_on"] <= 86400:
        if 'ETag' in data:
            if cache_valid_check(key, data['ETag']):
                print('Returning Cached Data')
                return data
            else:
                return fetch_live_data()
        else:
            return fetch_live_data()
    else:
        return fetch_live_data()
Exemplo n.º 8
0
def fulfil_request(url, etag=None):
    if etag and cache_data_exists(url):
        data = json.loads(r.get(url))
        if data['ETag'] == etag:
            return Response('', status=304)
    try:
        if cache_data_exists(url):
            resp = cache_data_retrieve(url)
        else:
            resp = retrieve_data_from_hive(url)

        return Response(json.dumps(resp['data']),
                        headers={'ETag': resp['ETag']},
                        mimetype='application/json')
    except Exception as error:
        logger.info({
            'error': str(error),
        })
Exemplo n.º 9
0
def compare_torrents(name, files, sites):
    torrents = r.get(name)
    if not torrents:
        torrents = db.select_torrent(name)
        r.set(name, json.dumps(torrents))
    else:
        torrents = json.loads(str(torrents, encoding='utf-8'))

    cmp_success = []
    cmp_warning = []
    for t in torrents:
        success_count = failure_count = 0
        torrent_files = eval(t['files'])
        result_site = format_sites(t['sites_existed'], sites)
        if not result_site:
            continue
        if len(torrent_files):
            if type(files) is int:
                continue

            keys = list(files.keys())
            for key in keys:
                files[key.replace('\\', '/')] = files.pop(key)  # 对于Windows,将\\更改为/,以适配数据库

            for k, v in torrent_files.items():
                if v * 0.95 < files.get(k, -1) < v * 1.05:
                    success_count += 1
                else:
                    failure_count += 1
            if failure_count:
                if success_count > failure_count:
                    db.hit(t['id'])
                    cmp_warning.append({'id': t['id'], 'sites': result_site})
            else:
                db.hit(t['id'])
                cmp_success.append({'id': t['id'], 'sites': result_site})
        else:
            if type(files) is not int:
                continue
            if t['length'] * 0.95 < files < t['length'] * 1.05:
                db.hit(t['id'])
                cmp_success.append({'id': t['id'], 'sites': result_site})
    return {'name': name, 'cmp_success': cmp_success, 'cmp_warning': cmp_warning}
Exemplo n.º 10
0
def display_reddit_posts(n_clicks, subreddit_choice, user_id):
    """
    For the Reddit API, allow the user to specify a subreddit \
    to get data from.

    Args:
        n_clicks (int): Number of times button was clicked.
        subreddit_choice (str): The name of the subreddit.
        user_id (str): Session/user id.

    Returns:
        list: A list of dash components.
    """

    if n_clicks is not None and n_clicks >= 1:
        if subreddit_choice is not None:

            api = pickle.loads(r.get(f"{user_id}_reddit_api_handle"))
            subreddit = api.subreddit(subreddit_choice)

            posts = [
                html.Div([
                    dbc.Card([
                        dbc.CardHeader([
                            html.H4(post.title),
                            html.A("view at reddit", href=post.permalink),
                        ]),
                        dbc.CardBody([
                            dbc.CardTitle(f"Written by {post.author.name}, "
                                          f"score: {post.score}"),
                            dbc.CardText(dcc.Markdown(post.selftext), ),
                        ]),
                    ]),
                    html.Br(),
                ]) for post in subreddit.hot(limit=5)
            ]
            return posts

        else:
            return [html.H4("No subreddit choice")]

    else:
        return [html.H4("No reddit data to display.")]
Exemplo n.º 11
0
def get_users_ganalytics(n_clicks, metrics, user_id):

    if metrics is None:
        raise PreventUpdate()

    if n_clicks > 0:
        # TODO: Why have this requester here if below you do your own request?!
        # Get the API handle
        requester = dill.loads(r.get(f"{user_id}_ganalytics_api_handle"))

        if not isinstance(metrics, list):
            metrics = [metrics]

        # requests response object
        response = requests.get(
            f"http://127.0.0.1:5000/{user_id}/{','.join(metrics)}")

        # parse the results and reform them
        results = json.loads(response.text)

        for metric in metrics:

            data = results["data"][metric[3:]]

            # TODO: This signifies duplication of storage. The other
            #       server already stores the results in a redis cache
            #       but we cannot remove this because other parts of the
            #       code depend on this storage. Consider reworking the
            #       REST API, but using the same database for 2 servers
            #       is an anti-pattern for micro-services architectures.
            r.set(f"{user_id}_ganalytics_data_{metric}",
                  pickle.dumps(data),
                  ex=3600)

        return [html.Br(), html.P(str(results))]

    else:
        raise PreventUpdate()
Exemplo n.º 12
0
def render_variable_choices_pipeline(algo_choice_pipeline, user_id):
    """
    Create a menu of dcc components to select pipeline and variables.

    Args:
        algo_choice_pipeline (str): Choice among (pre)defined pipelines.
        user_id (str): Session/user id.

    Returns:
        list: Dash elements.
    """

    # Make sure all variables have a value before returning choices
    if algo_choice_pipeline is None:
        return [html.H4("Select a pipeline first.")]

    model = dill.loads(r.get(algo_choice_pipeline))

    input_node = pipeline_creator.find_pipeline_node(
        pipeline_classes.BaseInput)(model)

    terminal_node = pipeline_creator.find_pipeline_node(
        pipeline_classes.TerminalNode)(model)

    # defaults
    layout = []
    options = []

    if isinstance(input_node, pipeline_classes.GenericInput):
        try:
            dataset_choice = input_node.dataset
        except AttributeError:
            return [html.H4("Something went wrong with the input")]

        if isinstance(input_node, pipeline_classes.TwitterAPI):
            layout += [
                html.Button("Fit model", id="fit_model", n_clicks=0),

                # Debugger
                html.Div(
                    dcc.Dropdown(options=[],
                                 multi=True,
                                 id="xvars_pipeline",
                                 style={"display": "none"})),
            ]

        else:
            df = get_data(dataset_choice, user_id)

            # Truncate labels so they don't fill the whole dropdown
            options = [{'label': col[:35], 'value': col} for col in df.columns]

            layout += [
                html.Div(create_dropdown("X variable(s)",
                                         options,
                                         multi=True,
                                         id="xvars_pipeline"),
                         className="horizontal_dropdowns"),

                # to debug the next callback
                html.Button("Fit model",
                            id="fit_model",
                            n_clicks=0,
                            style={"display": "none"})
            ]

    else:
        layout += [
            # Debuggers
            html.Button("Fit model",
                        id="fit_model",
                        n_clicks=0,
                        style={"display": "none"}),
            html.Div(
                dcc.Dropdown(options=[],
                             multi=True,
                             id="xvars_pipeline",
                             style={"display": "none"})),
        ]

    if not isinstance(terminal_node, pipeline_classes.UnsupervisedLearner):
        layout += [
            html.Div(dcc.Dropdown(options=options,
                                  multi=False,
                                  id="yvars_pipeline"),
                     className="horizontal_dropdowns")
        ]

    else:
        # Only existing for debugging the next callback, no yvars needed
        layout += [
            dcc.Dropdown(options=[],
                         id="yvars_pipeline",
                         style={"display": "none"})
        ]

    return layout
Exemplo n.º 13
0
def fit_pipeline_model(xvars, yvars, fit_model, algo_choice_pipeline, user_id):
    """
    Take user choices and, if all are present, fit the appropriate model.

    Args:
        xvars (list(str)): predictor variables.
        yvars (str): target variable.
        algo_choice_pipeline (str): Choice among (pre)defined pipelines.
        user_id: Session/user id.

    Returns:
        list: Dash element(s) with the results of model fitting.
    """

    if algo_choice_pipeline is None:
        raise PreventUpdate()

    # We have the dictionary that maps keys to models so use that
    model = dill.loads(r.get(algo_choice_pipeline))

    input_node = pipeline_creator.find_pipeline_node(
        pipeline_classes.GenericInput)(model)

    terminal_node = pipeline_creator.find_pipeline_node(
        pipeline_classes.TerminalNode)(model)

    if isinstance(input_node, pipeline_classes.GenericInput):
        if isinstance(input_node, pipeline_classes.TwitterAPI):
            if fit_model == 0:
                # Don't fit
                raise PreventUpdate()

            X = []

        else:
            try:
                dataset_choice = input_node.dataset
            except AttributeError:
                return [html.H4("Something went wrong with the input")]

            df = get_data(dataset_choice, user_id)

            ## Make sure all variables have a value before fitting
            if any(x is None for x in [xvars, df, dataset_choice]):
                raise PreventUpdate()

            # if we used df[xvars] directly the ordering of variables that the user
            # gave would actually affect the model. This forces those variables to
            # be in the order of the original df. It matters only here since the
            # user might have defined a FeatureMaker that depends on this.
            xvars = [xvar for xvar in df.columns if xvar in xvars]
            X = df[xvars]

    else:
        X = []

    if isinstance(terminal_node, pipeline_classes.UnsupervisedLearner):
        model.fit(X)
        return [html.H4(str(model.predict(X)))]

    else:

        if yvars is None:
            raise PreventUpdate()

        model.fit(X, df[yvars])

        # TODO: Implement score function for all models.
        return [
            html.H4(
                f"Pipeline model scored: {model.score(df[xvars], df[yvars])}")
        ]
Exemplo n.º 14
0
def render_table(api_choice, user_id):
    """
    Create a display for the chosen dataset.

    Args:
        api_choice (str): Value from the dropdown.
        user_id (str): Session/user id.

    Returns:
        list: A list of dash components.
    """

    if api_choice is None:
        return [html.H4("Nothing selected.")]

    if api_choice == "twitter_api":
        api = pickle.loads(r.get(f"{user_id}_{api_choice}_handle"))

        return pretty_print_tweets(api, 5)

    elif api_choice == "reddit_api":
        # No need to get the api here
        # TODO: But maybe this change?

        return [
            html.H4("Write the name of a subreddit:"),
            dcc.Input(
                id="subreddit_choice",
                type="text",
                value="",
            ),
            html.Button("Gimme dem reddits", id="reddit_submit"),
            html.Br(),
            html.Br(),
            html.Div(id="subreddit_posts"),
        ]

    elif api_choice == "spotify_api":
        spotify = pickle.loads(r.get(f"{user_id}_{api_choice}_handle"))
        top_playlists = spotify.category_playlists(
            "toplists")["playlists"]["items"]

        posts = [
            html.Div([
                dbc.Card([
                    dbc.CardHeader([
                        html.H4(playlist["name"]),
                        html.A("listen on Spotify",
                               href=playlist["external_urls"]["spotify"]),
                    ]),
                    dbc.CardBody([
                        dbc.CardTitle(
                            f"Owner: {playlist['owner']['display_name']}"),
                        dbc.CardText(
                            f"{playlist['tracks']['total']} total tracks"),
                    ]),
                ]),
                html.Br(),
            ]) for playlist in top_playlists
        ]
        return posts

    elif api_choice == "quandl_api":
        df = get_data(api_choice, user_id)

    else:
        df = get_data(api_choice, user_id)

    if df is None:
        return [html.H4("Nothing to display")]

    df = df[df.columns[:10]]
    return [
        html.Br(),
        create_table(df),
    ]