def _get_train_costs(self): # Get train scores monary = Monary(host=self.mongo_host) iterations, loss, source_id = monary.query( db=self.mongo_db, coll='train_scores', query={'experiment_id': self.experiment_id}, fields=['iteration', 'loss', 'source_id'], types=['int32', 'float32', 'int8'] ) scores_df = pd.DataFrame( {'loss': loss, 'source_id': source_id}, index=iterations) scores_df = scores_df.sort_index() return scores_df
def _get_train_costs(self): # Get train scores monary = Monary(host=self.mongo_host) iterations, loss, source_id = monary.query( db=self.mongo_db, coll='train_scores', query={'experiment_id': self.experiment_id}, fields=['iteration', 'loss', 'source_id'], types=['int32', 'float32', 'int8']) scores_df = pd.DataFrame({ 'loss': loss, 'source_id': source_id }, index=iterations) scores_df = scores_df.sort_index() return scores_df
def _plot_validation_scores_for_source_and_fold(self, ax, source_id, fold, show_axes_labels, show_scales): fields = ['iteration'] + ['scores.' + metric_name for metric_name in self.validation_metric_names] monary = Monary(host=self.mongo_host) result = monary.query( db=self.mongo_db, coll='validation_scores', query={ 'experiment_id': self.experiment_id, 'source_id': source_id, 'fold': fold }, fields=fields, types=['int32'] + ['float32'] * len(self.validation_metric_names) ) index = result[0] data = {metric_name: result[i+1] for i, metric_name in enumerate(self.validation_metric_names)} df = pd.DataFrame(data, index=index) df = df.sort_index() df = self._downsample(df) # Create multiple independent axes. Adapted from Joe Kington's answer: # http://stackoverflow.com/a/7734614 # Colours n = len(self.validation_metric_names) colors = get_colors(n) # Twin the x-axis to make independent y-axes. axes = [ax] for metric_name in self.validation_metric_names[1:]: axes.append(ax.twinx()) SEP = 0.2 if show_scales: for i, axis in enumerate(axes): axis.yaxis.tick_right() if i != 0: # To make the border of the right-most axis visible, # we need to turn the frame on. This hides the other plots, # however, so we need to turn its fill off. axis.set_frame_on(True) axis.patch.set_visible(False) # Move the last y-axes spines over to the right. axis.spines['right'].set_position( ('axes', 1 + (SEP * i))) else: for axis in axes: axis.tick_params(labelright=False, labelleft=False) axis.yaxis.set_ticks_position('none') axis.spines['right'].set_visible(False) for axis in axes: for spine in ['top', 'left', 'bottom']: axis.spines[spine].set_visible(False) axis.xaxis.set_ticks_position('none') lines = [] for i, (axis, metric_name, color) in enumerate( zip(axes, self.validation_metric_names, colors)): axis.tick_params(axis='y', colors=color, direction='out') label = metric_name.replace("regression.", "") label = label.replace("classification_", "") label = label.replace("_", " ") label = label.replace(".", " ") label = label.replace(" ", "\n") line, = axis.plot( df.index, df[metric_name].values, color=color, label=label) if show_axes_labels and show_scales: axis.set_ylabel( label, color=color, rotation=0, fontsize=8, va='bottom') if i == 0: coords = (1.05, 1.1) else: coords = (1.05 + (SEP * i), 1.1) axis.yaxis.set_label_coords(*coords) lines.append(line) self._last_iteration_processed['validation'] = index[-1] return lines
from monary import Monary import numpy as np import pandas as pd import time mon = Monary() columns = [ 'properties.total_residential_units', 'properties.total_job_spaces', 'properties.parcel_id', 'properties.max_dua', 'properties.max_far' ] t1 = time.time() numpy_arrays = mon.query( 'togethermap', 'places', {'collectionId': 'ZC7yyAyA8jkDFnRtf'}, columns, ['float32']*len(columns) ) df = np.matrix(numpy_arrays).transpose() df = pd.DataFrame(df, columns=columns) print time.time()-t1 print df.describe()
from monary import Monary import numpy as np import pandas as pd import time mon = Monary() columns = [ 'properties.total_residential_units', 'properties.total_job_spaces', 'properties.parcel_id', 'properties.max_dua', 'properties.max_far' ] t1 = time.time() numpy_arrays = mon.query('togethermap', 'places', {'collectionId': 'ZC7yyAyA8jkDFnRtf'}, columns, ['float32'] * len(columns)) df = np.matrix(numpy_arrays).transpose() df = pd.DataFrame(df, columns=columns) print time.time() - t1 print df.describe()
def _plot_validation_scores_for_source_and_fold(self, ax, source_id, fold, show_axes_labels, show_scales): fields = ['iteration'] + [ 'scores.' + metric_name for metric_name in self.validation_metric_names ] monary = Monary(host=self.mongo_host) result = monary.query(db=self.mongo_db, coll='validation_scores', query={ 'experiment_id': self.experiment_id, 'source_id': source_id, 'fold': fold }, fields=fields, types=['int32'] + ['float32'] * len(self.validation_metric_names)) index = result[0] data = { metric_name: result[i + 1] for i, metric_name in enumerate(self.validation_metric_names) } df = pd.DataFrame(data, index=index) df = df.sort_index() df = self._downsample(df) # Create multiple independent axes. Adapted from Joe Kington's answer: # http://stackoverflow.com/a/7734614 # Colours n = len(self.validation_metric_names) colors = get_colors(n) # Twin the x-axis to make independent y-axes. axes = [ax] for metric_name in self.validation_metric_names[1:]: axes.append(ax.twinx()) SEP = 0.2 if show_scales: for i, axis in enumerate(axes): axis.yaxis.tick_right() if i != 0: # To make the border of the right-most axis visible, # we need to turn the frame on. This hides the other plots, # however, so we need to turn its fill off. axis.set_frame_on(True) axis.patch.set_visible(False) # Move the last y-axes spines over to the right. axis.spines['right'].set_position(('axes', 1 + (SEP * i))) else: for axis in axes: axis.tick_params(labelright=False, labelleft=False) axis.yaxis.set_ticks_position('none') axis.spines['right'].set_visible(False) for axis in axes: for spine in ['top', 'left', 'bottom']: axis.spines[spine].set_visible(False) axis.xaxis.set_ticks_position('none') lines = [] for i, (axis, metric_name, color) in enumerate( zip(axes, self.validation_metric_names, colors)): axis.tick_params(axis='y', colors=color, direction='out') label = metric_name.replace("regression.", "") label = label.replace("classification_", "") label = label.replace("_", " ") label = label.replace(".", " ") label = label.replace(" ", "\n") line, = axis.plot(df.index, df[metric_name].values, color=color, label=label) if show_axes_labels and show_scales: axis.set_ylabel(label, color=color, rotation=0, fontsize=8, va='bottom') if i == 0: coords = (1.05, 1.1) else: coords = (1.05 + (SEP * i), 1.1) axis.yaxis.set_label_coords(*coords) lines.append(line) self._last_iteration_processed['validation'] = index[-1] return lines
#script to save csv with comment data import pymongo, urllib3 from pymongo import MongoClient from hpfunctions import getUrl, stripWhite,stripWhiteList from lxml import html client=MongoClient() db=client['hp'] import pandas import numpy from monary import Monary mon=Monary() print 'available columns' for i in db.comStats.find()[0]:print i columns = ['typos', 'avSyllables', 'nPunct'] numpy_arrays = mon.query('hp', 'comStats', {}, columns, ['int32', 'int32', 'int32:20']) df = numpy.matrix(numpy_arrays).transpose() df = pandas.DataFrame(df, columns=columns) print 'starting to write file pandasTest.csv' df.to_csv('pandasTest.csv', sep='\t')