def predict(): req=request.get_json() if ("params" in req): data = req["params"]["data"] user_id = req["params"]["user_id"] project_id = req["params"]["project_id"] filename = req["params"]["filename"] if (user_id == None or project_id == None or filename == None): return apierrors.NoData() else: return apierrors.NoData(); P = Projects(user_id, project_id) project = P.read(id=project_id) P.addDataset(data) dataset = P.getDataset() # reshape the dataset to a dataframe like object X = {} if(type(dataset[0]["data"]) == str): dataset[0]["data"] = json.loads(dataset[0]["data"]) for k in dataset[0]["data"]: X[k] = [] for i in dataset: if(i["data"]=="data"): continue if(type(i["data"]) == str): obj=json.loads(i["data"]) else: obj=json.loads(json.dumps(i["data"])) for k in obj: X[k].append(obj[k]) # convert the array to pandas dataframe rawX = pd.DataFrame(X) X = preProcess(dataset=rawX) pkl_file = get_pickle(user_id + "/"+project_id+"/pickle.pkl") if(pkl_file==None): return apierrors.ErrorMessage("No pickle file found, maybe you should train the model first") model = pickle.load(pkl_file) nbrs = model.fit(X) distances, indices = nbrs.kneighbors(X) obj = [X[0]] # for i in X[0]: # print(i) # obj.append([i]) data = rawX.to_json() indexes = pd.DataFrame(indices).to_json() print(nbrs.kneighbors(obj)) return '{ "data": ' + data + ', "indexes": '+indexes+'}'
def fit(): nu=0.3 kernel="rbf" gamma=0.1 degree=3 coef0=0.0 tol=0.001 shrinking=True req = request.get_json() if "params" in req: if("nu" in req["params"]): nu = req["params"]["nu"] if("kernel" in req["params"]): kernel = req["params"]["kernel"] if("gamma" in req["params"]): gamma = req["params"]["gamma"] if("degree" in req["params"]): degree = req["params"]["degree"] if("coef0" in req["params"]): coef0 = req["params"]["coef0"] if("tol" in req["params"]): tol = req["params"]["tol"] if("shrinking" in req["params"]): shrinking = req["params"]["shrinking"] user_id = req["params"]["user_id"] project_id = req["params"]["project_id"] filename = req["params"]["filename"] else: return apierrors.NoData() print("start") fullPath = user_id + "/"+project_id+"/" + filename print(fullPath) dataset = read_file(fullPath) print(dataset) if(dataset==None): return apierrors.ErrorMessage("dataset not found") rawX = pd.read_csv(StringIO(dataset.decode('utf-8'))) X = preProcess(dataset=rawX) X_train = X[0:int(len(X) * 0.66)] print(nu,kernel,gamma) # fit the model clf = svm.OneClassSVM(degree=int(degree), coef0=float(coef0),tol=float(tol),shrinking=bool(shrinking), nu=float(nu), kernel=kernel, gamma=float(gamma)) clf.fit(X_train) y_pred_train = clf.predict(X_train) y_pred_test = clf.predict(X) n_error_train = y_pred_train[y_pred_train == -1].size n_error_test = y_pred_test[y_pred_test == -1].size s = pickle.dumps(clf) write_file(user_id, project_id, "pickle.pkl", s) return json.dumps({ "dataset": json.loads(rawX.to_json()), "labels": y_pred_test.tolist() })
def predict(): req = request.get_json() if ("params" in req): user_id = req["params"]["user_id"] project_id = req["params"]["project_id"] data = req["params"]["data"] else: return apierrors.ErrorMessage( "You need to specify parameters to load ") P = Projects(user_id, project_id) project = P.read(id=project_id) P.addDataset(data) dataset = P.getDataset() X = [] keys = [] for i in dataset: a = json.dumps(i["data"]) b = json.loads(a) X.append(b) # X.append() X = pd.DataFrame(X) print(X.head()) X = preProcess(dataset=X) pkl_file = get_pickle(user_id + "/" + project_id + "/pickle.pkl") model = pickle.load(pkl_file) db = model.fit(X) core_samples_mask = np.zeros_like(db.labels_, dtype=bool) core_samples_mask[db.core_sample_indices_] = True labels = db.labels_ n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) resultObj = { "clusters": n_clusters_, "dataset": X.tolist(), "labels": labels.tolist() } # return json.dumps(resultObj) return json.dumps(resultObj)
def fit(): req = request.get_json() eps = 0.7 min_samples = 4 if ("params" in req): eps = float(req["params"]["eps"]) min_samples = float(req["params"]["min"]) user_id = req["params"]["user_id"] project_id = req["params"]["project_id"] filename = req["params"]["filename"] if (user_id == None or project_id == None or filename == None): return apierrors.NoData() if "max" in req: max_samples = req["params"]["max"] else: return apierrors.NoData() fullPath = user_id + "/" + project_id + "/" + filename dataset = read_file(fullPath) if (dataset == None): return apierrors.ErrorMessage("dataset not found") rawX = pd.read_csv(StringIO(dataset.decode('utf-8'))) X = preProcess(dataset=rawX) print(type(X[0][0])) DB = DBSCAN(eps, min_samples) s = pickle.dumps(DB) write_file(user_id, project_id, "pickle.pkl", s) db = DB.fit(X) print("ok") core_samples_mask = np.zeros_like(db.labels_, dtype=bool) core_samples_mask[db.core_sample_indices_] = True labels = db.labels_ n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) resultObj = { "clusters": n_clusters_, "dataset": json.loads(rawX.to_json()), "labels": json.loads(pd.DataFrame(labels).to_json()) } # resultObj = {} return json.dumps(resultObj)
def predict(): req = request.get_json() if "params" in req: data = req["params"]["data"] user_id = req["params"]["user_id"] project_id = req["params"]["project_id"] filename = req["params"]["filename"] else: return apierrors.NoData() print("start") P = Projects(user_id, project_id) project = P.read(id=project_id) P.addDataset(data) dataset = P.getDataset() # reshape the dataset to a dataframe like object X = {} if(type(dataset[0]["data"]) == str): dataset[0]["data"] = json.loads(dataset[0]["data"]) for k in dataset[0]["data"]: X[k] = [] for i in dataset: if(type(i["data"]) == str): obj=json.loads(i["data"]) else: obj=json.loads(json.dumps(i["data"])) for k in obj: X[k].append(obj[k]) # convert the array to pandas dataframe rawX = pd.DataFrame(X) X = preProcess(dataset=rawX) pkl_file = get_pickle(user_id + "/"+project_id+"/pickle.pkl") if(pkl_file==None): return apierrors.ErrorMessage("No pickle file found, maybe you should train the model first") model = pickle.load(pkl_file) obj = [X[0]] labels = pd.DataFrame(model.predict(X)).to_json() return json.dumps({ "dataset": json.loads(rawX.to_json()), "labels": json.loads(labels) })
def fit(): req = request.get_json() dimensions = 2 if ("params" in req): dimensions = req["params"]["dimensions"] user_id = req["params"]["user_id"] project_id = req["params"]["project_id"] filename = req["params"]["filename"] if (user_id == None or project_id == None or filename == None): return apierrors.NoData() else: return apierrors.NoData() fullPath = user_id + "/" + project_id + "/" + filename dataset = read_file(fullPath) if (dataset == None): return apierrors.ErrorMessage("dataset not found") pca = PCA(n_components=dimensions) transformed = pca.fit_transform(dataset) resultObj = {"original": dataset, "transformed": transformed} # resultObj = {} return json.dumps(resultObj)
def analyse(): from pandas.plotting import scatter_matrix req = request.get_json() user_id = req["params"]["user_id"] project_id = req["params"]["project_id"] filename = req["params"]["filename"] fullPath = user_id + "/" + project_id + "/" + filename dataset_file = read_file(fullPath) if (dataset_file == None): return apierrors.ErrorMessage("dataset not found") file = StringIO(dataset_file.decode('utf-8')) dataset = pd.read_csv(file) if "label_encode" in req: dataset = pd.read_csv(file, dtype="unicode") dataset = dataset.apply(le().fit_transform) dataset = dataset.fillna(0) hp = plt.subplot() dataset.hist(ax=hp, figsize=(12, 12)) dp = dataset.plot(kind='density') bp = dataset.plot(kind='box') sm = scatter_matrix(dataset, figsize=(12, 12)) resultset = { "plot": write_base64_img(user_id, project_id, "plot.png", plot(dataset.plot())), "hp_plot": write_base64_img(user_id, project_id, "hp.png", plot(hp)), "dp_plot": write_base64_img(user_id, project_id, "dp.png", plot(dp)), "bp_plot": write_base64_img(user_id, project_id, "bp.png", plot(bp)), "sm_plot": write_base64_img(user_id, project_id, "sm.png", plot(sm[0][0])) } return json.dumps(resultset)
def fit(): req = request.get_json() if ("params" in req): eps = req["params"]["eps"] min_samples = req["params"]["min"] user_id = req["params"]["user_id"] project_id = req["params"]["project_id"] filename = req["params"]["project_id"] if (user_id == None or project_id == None or filename == None): return apierrors.NoData() if "max" in req: max_samples = req["params"]["max"] else: return apierrors.NoData() fullPath = user_id + "/" + project_id + "/" + filename dataset = read_file(fullPath) if (dataset == None): return apierrors.ErrorMessage("dataset not found") le = preprocessing.LabelEncoder() y = pd.read_csv(StringIO(dataset.decode('utf-8'))) y = y.fillna(y.bfill()) p = d = q = range(0, 2) # Generate all different combinations of p, q and q triplets pdq = list(itertools.product(p, d, q)) # Generate all different combinations of seasonal p, q and q triplets seasonal_pdq = [(x[0], x[1], x[2], 12) for x in list(itertools.product(p, d, q))] #test all different combinations of p d q parameters warnings.filterwarnings("ignore") # specify to ignore warning messages pdqparams = [] for param in pdq: for param_seasonal in seasonal_pdq: try: mod = sm.tsa.statespace.SARIMAX( y, order=param, seasonal_order=param_seasonal, enforce_stationarity=False, enforce_invertibility=False) results = mod.fit() pdqparams.append([param, param_seasonal, results.aic]) except: continue for i in pdqparams: if(minval == None): minval = i[2] if (i[2] < minval): minval = i[2] minparams = i mod = sm.tsa.statespace.SARIMAX(y, order=minparams[0], seasonal_order= minparams[1], enforce_stationarity=False, enforce_invertibility=False) results = mod.fit() s = pickle.dumps(mod) write_file(user_id, project_id, "pickle.pkl", s) return ""