def convert_model(n_clicks, close, elements, layout, user_id, is_open): if user_id.startswith("python_generated_ssid"): # Trim id user_id = user_id.split("-")[-1] if n_clicks is None: return [False, [html.H5("No specs defined yet")]] else: # Keep elements that are either edges (have a source) # or elements that have a parent (nodes, not groups) elements = [ elem for elem in elements if (("source" in elem["data"]) or ("parent" in elem["data"])) ] pipelines, classifiers = pipeline_creator.create_pipelines( elements, node_options) # Save pipelines to Redis (to be used in other modules) for pipe, clf in zip(pipelines, classifiers): r.set(f"{user_id}_pipeline_{clf}", dill.dumps(pipe)) # TODO: Make this a modal # https://dash-bootstrap-components.opensource.faculty.ai/l/components/modal return [ not is_open, [ html.P(f"{i+1}) {str(pipeline)}") for (i, pipeline) in enumerate(pipelines) ] ]
def show_schema(api_choice, user_id): if api_choice is None: return [html.H4("Nothing selected.")] else: df = get_data(api_choice, user_id) if df is None: return [html.H4("Nothing to display")] schema = r.get(f"{user_id}_{api_choice}_schema") if schema is None: sample = df.sample(n=50, replace=True).dropna() types, subtypes = infer_types(sample, is_sample=True) r.set(f"{user_id}_{api_choice}_schema", dill.dumps({ "types": types, "subtypes": subtypes })) else: schema = dill.loads(schema) types, subtypes = schema["types"], schema["subtypes"] return [ html.Br(), dcc.ConfirmDialog(id="schema_confirmation"), html.Button("Update schema", id="update_schema"), schema_table(df[:500], types, subtypes) ]
def serve_layout(): """ The layout of our app needs to be inside a function \ so that every time some new session starts a new \ session_id is generated. """ session_id = f"python_generated_ssid_{uuid.uuid4()}" # TODO: This should probably be moved to `utils.startup` # load some data for all users for file in os.listdir("../data"): if file.endswith("csv"): df = pd.read_csv("../data/" + file) r.set(f"{session_id}_user_data_example_{file[:-4]}", pickle.dumps(df)) return html.Div( children=[ html.H2(session_id, id="user_id", style={"display": "none"}), html.Div( [ # Sidebar / menu html.Div(children=SideBar, className="col-sm-4 col-md-3 col-xl-2", id="sidebar", style={"display": "inline-block"}), # main Div html.Div(children=MainMenu, className="col-sm-8 col-md-9 col-xl-10", id="mainmenu", style={"display": "inline-block"}), # Sidebar / menu html.Div(children=[ sd_material_ui.Drawer(SideBar2, id="drawer", open=True, docked=True, openSecondary=True), ], className="", id="sidebar2", style={"display": "inline-block"}), ], className="row", id="main_content") ], className="container", style={"display": "inline"}, id="main_page")
def update_schema(n_clicks, table_colnames, row_types, row_subtypes, dataset_choice, user_id): """ Update the dataset schema. This function takes the html elements \ from the table head (containing column names) and its first two \ rows (containing dropdowns with the data types/subtypes), parses \ them and stores them in redis. Args: n_clicks (int): Number of button clicks. table_colnames (dict): The head (`html.Thead`) of the table, \ as a Dash dict. row_types (dict): The first table row (`html.Tr`) containing \ the Dash dropdown dict with the data types. row_subtypes (dict): The first table row (`html.Tr`) containing \ the Dash dropdown dict with the data subtypes. dataset_choice (str): Name of dataset. user_id (str): Session/user id. Returns: list(str, bool): A message and a boolean for a browser alert. """ types = {} for col_name, col in zip(table_colnames, row_types): dropdown = col["props"]["children"] dropdown_value = dropdown["props"]["value"] col_name = col_name["props"]["children"] types[col_name ] = dropdown_value subtypes = {} for col_name, col in zip(table_colnames, row_subtypes): dropdown = col["props"]["children"] dropdown_value = dropdown["props"]["value"] col_name = col_name["props"]["children"] subtypes[col_name] = dropdown_value r.set(f"{user_id}_{dataset_choice}_schema", dill.dumps({ "types": types, "subtypes": subtypes })) return "Updated", True
def compare_torrents(name, files, sites): torrents = r.get(name) if not torrents: torrents = db.select_torrent(name) r.set(name, json.dumps(torrents)) else: torrents = json.loads(str(torrents, encoding='utf-8')) cmp_success = [] cmp_warning = [] for t in torrents: success_count = failure_count = 0 torrent_files = eval(t['files']) result_site = format_sites(t['sites_existed'], sites) if not result_site: continue if len(torrent_files): if type(files) is int: continue keys = list(files.keys()) for key in keys: files[key.replace('\\', '/')] = files.pop(key) # 对于Windows,将\\更改为/,以适配数据库 for k, v in torrent_files.items(): if v * 0.95 < files.get(k, -1) < v * 1.05: success_count += 1 else: failure_count += 1 if failure_count: if success_count > failure_count: db.hit(t['id']) cmp_warning.append({'id': t['id'], 'sites': result_site}) else: db.hit(t['id']) cmp_success.append({'id': t['id'], 'sites': result_site}) else: if type(files) is not int: continue if t['length'] * 0.95 < files < t['length'] * 1.05: db.hit(t['id']) cmp_success.append({'id': t['id'], 'sites': result_site}) return {'name': name, 'cmp_success': cmp_success, 'cmp_warning': cmp_warning}
def api_connect(api_choice, user_id, *args, **kwargs): """ Connect to the selected API. A function that serves as the front \ end to all others, abstracting them away. ALso stores the API \ handle in Redis for later usage. Args: api_choice (str): A key in `connectors_mapping`. user_id (str): Session/user id. *args: Arguments to be passed to the appropriate API connector. **kwargs: Keyword arguments to be passed to the appropriate \ API connector. Returns: bool: Whether everything succeeded or not (an exception was raised). """ if any(x is None for x in args): return False func = connectors_mapping[api_choice] if api_choice == "ganalytics": # Google analytics needs the user_id too kwargs.update({"user_id": user_id}) try: api_handle = func(*args, **kwargs) # TODO: Maybe add a timeout here as well? # Store in Redis that the API connected, and its handle(s) r.set(f"{user_id}_{api_choice}_api", "true") r.set(f"{user_id}_{api_choice}_api_handle", dill.dumps(api_handle)) return True except Exception as e: print(e) return False
def get_users_ganalytics(n_clicks, metrics, user_id): if metrics is None: raise PreventUpdate() if n_clicks > 0: # TODO: Why have this requester here if below you do your own request?! # Get the API handle requester = dill.loads(r.get(f"{user_id}_ganalytics_api_handle")) if not isinstance(metrics, list): metrics = [metrics] # requests response object response = requests.get( f"http://127.0.0.1:5000/{user_id}/{','.join(metrics)}") # parse the results and reform them results = json.loads(response.text) for metric in metrics: data = results["data"][metric[3:]] # TODO: This signifies duplication of storage. The other # server already stores the results in a redis cache # but we cannot remove this because other parts of the # code depend on this storage. Consider reworking the # REST API, but using the same database for 2 servers # is an anti-pattern for micro-services architectures. r.set(f"{user_id}_ganalytics_data_{metric}", pickle.dumps(data), ex=3600) return [html.Br(), html.P(str(results))] else: raise PreventUpdate()
def get_users_tweets(n_clicks, acc_name, user_id): if n_clicks > 0: # Get the API handle api = pickle.loads(r.get(f"{user_id}_twitter_api_handle")) # TODO: This is a cache so consider a better implementation. query = r.get(f"{user_id}_twitter_data_{acc_name}") if query is None: # TODO: Consider saving for future use / as a dataset. query = api.GetUserTimeline(screen_name=acc_name) # Expire the retrieved tweets cache in one hour r.set(f"{user_id}_twitter_data_{acc_name}", pickle.dumps(query), ex=3600) else: query = pickle.loads(query) return [html.P(str(status.text)) for status in query] else: raise PreventUpdate()
def modify_graph(remove_clicked_time, connect_selected_time, modify_node_time, load_prebuilt_time, *add_nodes): # This is necessary since Python cannot accept *args in the middle # of the function parameter list. The tapped node is used only for # altering parameters on the last-clicked node, while the selected # is used for connecting nodes. The modify_node_attribute refers to # the dropdown (sklearn kwarg) and modify_node_params is the value (elems, to_be_deleted, selected, modify_node_attribute, modify_node_params, tapped, user_text, mapping_store, pipeline_options, user_id) = add_nodes[-10:] add_nodes = add_nodes[:-10] if all(x is None for x in [ remove_clicked_time, connect_selected_time, modify_node_time, *add_nodes ]): if elems is not None: return elems else: return [] G = Graph(elems) # Create list of tuples, e.g.: (time_clicked, add_xgb) add_node_list = [(add_node, f"add_{model}") for (add_node, model) in zip(add_nodes, node_options)] # Sort buttons based on clicked time (most recent first) buttons_and_clicks = sorted([(remove_clicked_time, "remove"), (connect_selected_time, "connect"), (modify_node_time, "modify"), (load_prebuilt_time, "prebuilt")] + add_node_list, reverse=True) # Graph operations if buttons_and_clicks[0][1] == "remove": G.node_collection.remove_node(to_be_deleted) elif buttons_and_clicks[0][1] == "connect": G.edge_collection.add_edges(selected) elif buttons_and_clicks[0][1].startswith("add_"): # e.g.: (time_clicked, add_xgb) --> xgb G.node_collection.add_node(buttons_and_clicks[0][1][4:]) elif buttons_and_clicks[0][1] == "prebuilt": pipeline_steps = prebuilt_pipelines[pipeline_options] return GraphUtils(pipeline_steps).render_graph() elif buttons_and_clicks[0][1] == "modify": if tapped is not None: for node in G.node_collection.nodes: # iterate over all the nodes to find the appropriate one # TODO: The fact that is is necessary means that `Graph` # should implement a __get__ method (or w/e it is) if node.id == tapped["id"]: if node.node_type == "feat_maker": try: dataset_choice = pipeline_creator.find_input_node( elems).dataset except AttributeError: raise PreventUpdate() # Get the mapping symbols # These are the same now but will be changed later user_columns = list(mapping_store["selected_columns"]) user_symbols = list( mapping_store["selected_columns"].values()) # left- and right-hand side lhs = ','.join(user_symbols) rhs = ' '.join(user_symbols) # TODO: Make sure that these symbols are defined in the # correct order and that this order is preserved # when passed to the func inside the pipeline. # Line 183 probably fixes this but we need to # double check. exec_commands = [ f"{lhs} = sympy.symbols('{rhs}')", f"f = {user_text}", f"lambdify( ({lhs}), f)", ] func_name = f"{user_id}_feat_{'-'.join(user_columns)}" # Store the func to Redis, and save only the # key. This is due to python functions not # being JSON serializable. r.set(func_name, dill.dumps(exec_commands)) # TODO: This needs improvement, e.g. with adding # variables in the edges and passing data # through there. The current implementation # is forced to load the dataset twice. params = { "func_name": func_name, "cols": user_columns, "dataset_choice": dataset_choice, "user_id": user_id } node.options["data"]["func_params"].update(params) else: node.options["data"]["func_params"].update( {modify_node_attribute: modify_node_params}) return G.render_graph()