def two_axis_plot_compute(): d, w, l, cats = mongo2pandas(dbm, request.vars.study, {'category':\ {'$in': [ObjectId(el) for el in request.args]}}) if request.vars.pca: # compute principal component analysis pca = PCA(n_components=2) pca.fit((d*w).values) pdata = pandas.DataFrame(index=d.index, columns=['x', 'y', 'c']) pdata[['x','y']] = pca.transform((d*w).values) pdata['c'] = 0 else: columns = list(cats.keys()) columns.append('c') pdata = pandas.DataFrame(index=d.index, columns=columns) # computing rating for categories for _cid in cats: mask = cats[_cid] cw = w[mask] cw = cw/cw.sum(axis=1)[0] pdata[_cid] = (d[mask]*cw).sum(axis=1) pdata['c'] = 0 if request.vars.cluster: # K-means clustering k_means = KMeans(n_clusters=int(request.vars.n_clusters)) if request.vars.use_categories and not request.vars.pca: k_means.fit(pdata[[c for c in pdata.columns if c != 'c']].values) else: k_means.fit((d*w).values) if request.vars.pca: pdata['c'] = k_means.labels_ else: pdata['c'] = k_means.labels_ if request.vars.type == 'html': request.vars['type'] = 'png' img = IMG(_src=URL(r=request, c='analyse', f='two_axis_plot_compute', args=request.args, vars=request.vars)) if request.vars.cluster: # Create a table with different cluster for visualization bins = np.bincount(k_means.labels_) ii = np.nonzero(bins)[0] ctable = pandas.DataFrame(columns=np.arange(len(np.unique(k_means.labels_))), index=np.arange(bins.max()), dtype=object) ctable.ix[:] = '' for idx, val in enumerate(np.unique(k_means.labels_)): companies = d.index[ k_means.labels_ == val] ctable[idx][:len(companies)] = sorted(list(companies)) return dict(img=img, ctable=ctable) else: return dict(img=img) elif request.vars.type == 'png': if request.vars.pca: return mplt.two_axis_plot(pdata, xlabel=u'Проекция 1', ylabel=u'Проекция 2', title=u'Визуализация методом главных компонентов') else: cats_db = list(dbm.categories.find({'_id':\ {'$in': [ObjectId(el) for el in request.args]}})) return mplt.two_axis_plot(xval=pdata[request.args[0]], yval=pdata[request.args[1]], c=pdata['c'], xlabel=filter(lambda x: x['_id'] == ObjectId(request.args[0]), cats_db)[0]['title'], ylabel=filter(lambda x: x['_id'] == ObjectId(request.args[1]), cats_db)[0]['title'])
def two_axis_plot_compute(): d, w, l, cats = mongo2pandas(dbm, request.vars.study, {'category':\ {'$in': [ObjectId(el) for el in request.args]}}) if request.vars.pca: # compute principal component analysis pca = PCA(n_components=2) pca.fit((d * w).values) pdata = pandas.DataFrame(index=d.index, columns=['x', 'y', 'c']) pdata[['x', 'y']] = pca.transform((d * w).values) pdata['c'] = 0 else: columns = list(cats.keys()) columns.append('c') pdata = pandas.DataFrame(index=d.index, columns=columns) # computing rating for categories for _cid in cats: mask = cats[_cid] cw = w[mask] cw = cw / cw.sum(axis=1)[0] pdata[_cid] = (d[mask] * cw).sum(axis=1) pdata['c'] = 0 if request.vars.cluster: # K-means clustering k_means = KMeans(n_clusters=int(request.vars.n_clusters)) if request.vars.use_categories and not request.vars.pca: k_means.fit(pdata[[c for c in pdata.columns if c != 'c']].values) else: k_means.fit((d * w).values) if request.vars.pca: pdata['c'] = k_means.labels_ else: pdata['c'] = k_means.labels_ if request.vars.type == 'html': request.vars['type'] = 'png' img = IMG(_src=URL(r=request, c='analyse', f='two_axis_plot_compute', args=request.args, vars=request.vars)) if request.vars.cluster: # Create a table with different cluster for visualization bins = np.bincount(k_means.labels_) ii = np.nonzero(bins)[0] ctable = pandas.DataFrame(columns=np.arange( len(np.unique(k_means.labels_))), index=np.arange(bins.max()), dtype=object) ctable.ix[:] = '' for idx, val in enumerate(np.unique(k_means.labels_)): companies = d.index[k_means.labels_ == val] ctable[idx][:len(companies)] = sorted(list(companies)) return dict(img=img, ctable=ctable) else: return dict(img=img) elif request.vars.type == 'png': if request.vars.pca: return mplt.two_axis_plot( pdata, xlabel=u'Проекция 1', ylabel=u'Проекция 2', title=u'Визуализация методом главных компонентов') else: cats_db = list(dbm.categories.find({'_id':\ {'$in': [ObjectId(el) for el in request.args]}})) return mplt.two_axis_plot( xval=pdata[request.args[0]], yval=pdata[request.args[1]], c=pdata['c'], xlabel=filter(lambda x: x['_id'] == ObjectId(request.args[0]), cats_db)[0]['title'], ylabel=filter(lambda x: x['_id'] == ObjectId(request.args[1]), cats_db)[0]['title'])
def bar_plot_compute(): d, w, labels, cats = mongo2pandas(dbm, request.vars.study) return mplt.bar_plot(d[list(request.args)], labels)