def __init__(self, colormap=None, **kwds): self._add_slots(kwds, 'input_descriptors', [SlotDescriptor('array', type=Table)]) super(Heatmap, self).__init__(table_slot='heatmap', **kwds) self.colormap = colormap self.default_step_size = 1 name = self.generate_table_name('Heatmap') params = self.params # if params.filename is None: # params.filename = name+'%d.png' self._table = Table(name, dshape=Heatmap.schema, create=True)
def __init__(self, **kwds: Any) -> None: super(Input, self).__init__(**kwds) self.tags.add(self.TAG_INPUT) table = Table(name=None, dshape=Input.schema, create=True) self.result = table self._last = len(table) self.default_step_size = 1000000
def __init__(self, colormap: None = None, **kwds: Any) -> None: super(Heatmap, self).__init__(output_required=False, **kwds) self.tags.add(self.TAG_VISUALIZATION) self.colormap = colormap self.default_step_size = 1 name = self.generate_table_name("Heatmap") self.result = Table(name, dshape=Heatmap.schema, create=True)
def test_cmp_query(self): s=self.scheduler() random = RandomTable(10, rows=10000, scheduler=s) cmp_ = CmpQueryLast(scheduler=s) cst = Table("cmp_table", data={'_1': [0.5]}) value = Constant(cst, scheduler=s) cmp_.input.cmp = value.output.table cmp_.input.table = random.output.table pr=Print(proc=self.terse, scheduler=s) pr.input.df = cmp_.output.select s.start() s.join()
class Input(TableModule): parameters = [('history', np.dtype(int), 3)] schema = '{input: string}' def __init__(self, **kwds): super(Input, self).__init__(**kwds) self._table = Table(name=None, dshape=Input.schema, create=True) self._last = len(self._table) self.default_step_size = 1000000 def is_ready(self): return len(self._table) > self._last def run_step(self, run_number, step_size, howlong): self._last = len(self._table) return self._return_run_step(self.state_blocked, steps_run=0) def from_input(self, msg): if not isinstance(msg, (list, dict)): msg = {'input': msg} self._table.add(msg) def is_input(self): return True
#PREFIX= 'https://storage.googleapis.com/tlc-trip-data/2015/' #SUFFIX= '' PREFIX= '../nyc-taxi/' SUFFIX= '.bz2' URLS = [ PREFIX+'yellow_tripdata_2015-01.csv'+SUFFIX, PREFIX+'yellow_tripdata_2015-02.csv'+SUFFIX, PREFIX+'yellow_tripdata_2015-03.csv'+SUFFIX, PREFIX+'yellow_tripdata_2015-04.csv'+SUFFIX, PREFIX+'yellow_tripdata_2015-05.csv'+SUFFIX, PREFIX+'yellow_tripdata_2015-06.csv'+SUFFIX, ] filenames = pd.DataFrame({'filename': URLS}) cst = Constant(Table('filenames', data=filenames), scheduler=s) csv = CSVLoader(index_col=False,skipinitialspace=True,usecols=['dropoff_longitude', 'dropoff_latitude'], filter_=filter_, scheduler=s) csv.input.filenames = cst.output.table min = Min(scheduler=s) min.input.table = csv.output.table max = Max(scheduler=s) max.input.table = csv.output.table histogram2d = Histogram2D('dropoff_longitude', 'dropoff_latitude', xbins=RESOLUTION, ybins=RESOLUTION, scheduler=s) histogram2d.input.table = csv.output.table histogram2d.input.min = min.output.table histogram2d.input.max = max.output.table heatmap = Heatmap(filename='nyc_dropoff_yellow%d.png', history=5, scheduler=s) heatmap.input.array = histogram2d.output.table if __name__=='__main__': s.start()
def __init__(self, **kwds): super(Input, self).__init__(**kwds) self._table = Table(name=None, dshape=Input.schema, create=True) self._last = len(self._table) self.default_step_size = 1000000
if six.PY3: SUFFIX = '.bz2' else: SUFFIX = '.gz' URLS = [ PREFIX + 'yellow_tripdata_2015-01.csv' + SUFFIX, PREFIX + 'yellow_tripdata_2015-02.csv' + SUFFIX, PREFIX + 'yellow_tripdata_2015-03.csv' + SUFFIX, PREFIX + 'yellow_tripdata_2015-04.csv' + SUFFIX, PREFIX + 'yellow_tripdata_2015-05.csv' + SUFFIX, PREFIX + 'yellow_tripdata_2015-06.csv' + SUFFIX, ] FILENAMES = pd.DataFrame({'filename': URLS}) CST = Constant(Table('filenames', data=FILENAMES), scheduler=s) CSV = CSVLoader(index_col=False, skipinitialspace=True, usecols=['pickup_longitude', 'pickup_latitude'], filter_=_filter, scheduler=s) CSV.input.filenames = CST.output.table PR = Every(scheduler=s) PR.input.df = CSV.output.table SCATTERPLOT = MCScatterPlot(scheduler=s, classes=[('Scatterplot', 'pickup_longitude', 'pickup_latitude')], approximate=True) SCATTERPLOT.create_dependent_modules(CSV, 'table') s.set_interaction_opts(starving_mods=SCATTERPLOT.get_starving_mods(),
class Heatmap(TableModule): "Heatmap module" parameters = [('cmax', np.dtype(float), np.nan), ('cmin', np.dtype(float), np.nan), ('high', np.dtype(int), 65536), ('low', np.dtype(int), 0), ('filename', np.dtype(object), None), ('history', np.dtype(int), 3)] # schema = [('image', np.dtype(object), None), # ('filename', np.dtype(object), None), # UPDATE_COLUMN_DESC] schema = "{filename: string, time: int64}" def __init__(self, colormap=None, **kwds): self._add_slots(kwds, 'input_descriptors', [SlotDescriptor('array', type=Table)]) super(Heatmap, self).__init__(table_slot='heatmap', **kwds) self.colormap = colormap self.default_step_size = 1 name = self.generate_table_name('Heatmap') params = self.params # if params.filename is None: # params.filename = name+'%d.png' self._table = Table(name, dshape=Heatmap.schema, create=True) def predict_step_size(self, duration): _ = duration # Module sample is constant time (supposedly) return 1 def run_step(self, run_number, step_size, howlong): dfslot = self.get_input_slot('array') input_df = dfslot.data() dfslot.update(run_number) indices = dfslot.created.next() steps = indices_len(indices) if steps == 0: indices = dfslot.updated.next() steps = indices_len(indices) if steps == 0: return self._return_run_step(self.state_blocked, steps_run=1) with dfslot.lock: histo = input_df.last()['array'] if histo is None: return self._return_run_step(self.state_blocked, steps_run=1) params = self.params cmax = params.cmax if np.isnan(cmax): cmax = None cmin = params.cmin if np.isnan(cmin): cmin = None high = params.high low = params.low try: image = sp.misc.toimage(sp.special.cbrt(histo), cmin=cmin, cmax=cmax, high=high, low=low, mode='I') image = image.transpose(Image.FLIP_TOP_BOTTOM) filename = params.filename except: image = None filename = None if filename is not None: try: if re.search(r'%(0[\d])?d', filename): filename = filename % (run_number) filename = self.storage.fullname(self, filename) #TODO should do it atomically since it will be called 4 times with the same fn image.save(filename, format='PNG') #, bits=16) logger.debug('Saved image %s', filename) image = None except: logger.error('Cannot save image %s', filename) raise else: buffered = six.BytesIO() image.save(buffered, format='PNG', bits=16) res = base64.b64encode(buffered.getvalue()) if six.PY3: res = str(base64.b64encode(buffered.getvalue()), "ascii") filename = "data:image/png;base64," + res if len(self._table) == 0 or self._table.last()['time'] != run_number: values = {'filename': filename, 'time': run_number} with self.lock: self._table.add(values) return self._return_run_step(self.state_blocked, steps_run=1, reads=1, updates=1) def is_visualization(self): return True def get_visualization(self): return "heatmap" def to_json(self, short=False): json = super(Heatmap, self).to_json(short) if short: return json return self.heatmap_to_json(json, short) def heatmap_to_json(self, json, short): dfslot = self.get_input_slot('array') histo = dfslot.output_module json['columns'] = [histo.x_column, histo.y_column] with dfslot.lock: histo_df = dfslot.data() if histo_df is not None and len(histo_df) != 0: row = histo_df.last() if not (np.isnan(row['xmin']) or np.isnan(row['xmax']) or np.isnan(row['ymin']) or np.isnan(row['ymax'])): json['bounds'] = { 'xmin': row['xmin'], 'ymin': row['ymin'], 'xmax': row['xmax'], 'ymax': row['ymax'] } with self.lock: df = self._table if df is not None and self._last_update != 0: row = df.last() json['image'] = row['filename'] #"/progressivis/module/image/%s?run_number=%d"%(self.name, row['time']) return json def get_image(self, run_number=None): if self._table is None or len(self._table) == 0: return None last = self._table.last() if run_number is None or run_number >= last['time']: run_number = last['time'] filename = last['filename'] else: time = self._table['time'] idx = np.where(time == run_number)[0] if len(idx) == 0: filename = last['filename'] else: filename = self._table['filename'][idx[0]] return filename
#PREFIX= 'https://storage.googleapis.com/tlc-trip-data/2015/' #SUFFIX= '' PREFIX = '../nyc-taxi/' SUFFIX = '.bz2' URLS = [ PREFIX + 'yellow_tripdata_2015-01.csv' + SUFFIX, PREFIX + 'yellow_tripdata_2015-02.csv' + SUFFIX, PREFIX + 'yellow_tripdata_2015-03.csv' + SUFFIX, PREFIX + 'yellow_tripdata_2015-04.csv' + SUFFIX, PREFIX + 'yellow_tripdata_2015-05.csv' + SUFFIX, PREFIX + 'yellow_tripdata_2015-06.csv' + SUFFIX, ] filenames = pd.DataFrame({'filename': URLS}) cst = Constant(Table('filenames', data=filenames), scheduler=s) csv = CSVLoader(index_col=False, skipinitialspace=True, usecols=['pickup_longitude', 'pickup_latitude'], filter_=filter_, scheduler=s) csv.input.filenames = cst.output.table #min = Min(scheduler=s) #min.input.df = csv.output.df #max = Max(scheduler=s) #max.input.df = csv.output.df min = Constant(table=Table('bounds_min', data=pd.DataFrame([bounds_min])), scheduler=s) max = Constant(table=Table('bounds_min', data=pd.DataFrame([bounds_max])), scheduler=s) histogram2d = Histogram2D('pickup_longitude',