示例#1
0
    def __init__(self, colormap=None, **kwds):
        self._add_slots(kwds, 'input_descriptors',
                        [SlotDescriptor('array', type=Table)])
        super(Heatmap, self).__init__(table_slot='heatmap', **kwds)
        self.colormap = colormap
        self.default_step_size = 1

        name = self.generate_table_name('Heatmap')
        params = self.params
        # if params.filename is None:
        #     params.filename = name+'%d.png'
        self._table = Table(name, dshape=Heatmap.schema, create=True)
示例#2
0
 def __init__(self, **kwds: Any) -> None:
     super(Input, self).__init__(**kwds)
     self.tags.add(self.TAG_INPUT)
     table = Table(name=None, dshape=Input.schema, create=True)
     self.result = table
     self._last = len(table)
     self.default_step_size = 1000000
示例#3
0
 def __init__(self, colormap: None = None, **kwds: Any) -> None:
     super(Heatmap, self).__init__(output_required=False, **kwds)
     self.tags.add(self.TAG_VISUALIZATION)
     self.colormap = colormap
     self.default_step_size = 1
     name = self.generate_table_name("Heatmap")
     self.result = Table(name, dshape=Heatmap.schema, create=True)
 def test_cmp_query(self):
     s=self.scheduler()
     random = RandomTable(10, rows=10000, scheduler=s)
     cmp_ = CmpQueryLast(scheduler=s)
     cst = Table("cmp_table", data={'_1': [0.5]})
     value = Constant(cst, scheduler=s)
     cmp_.input.cmp = value.output.table
     cmp_.input.table = random.output.table
     pr=Print(proc=self.terse, scheduler=s)
     pr.input.df = cmp_.output.select
     s.start()
     s.join()
示例#5
0
class Input(TableModule):
    parameters = [('history', np.dtype(int), 3)]
    schema = '{input: string}'

    def __init__(self, **kwds):
        super(Input, self).__init__(**kwds)
        self._table = Table(name=None, dshape=Input.schema, create=True)
        self._last = len(self._table)
        self.default_step_size = 1000000

    def is_ready(self):
        return len(self._table) > self._last

    def run_step(self, run_number, step_size, howlong):
        self._last = len(self._table)
        return self._return_run_step(self.state_blocked, steps_run=0)

    def from_input(self, msg):
        if not isinstance(msg, (list, dict)):
            msg = {'input': msg}
        self._table.add(msg)

    def is_input(self):
        return True
示例#6
0
#PREFIX= 'https://storage.googleapis.com/tlc-trip-data/2015/'
#SUFFIX= ''
PREFIX= '../nyc-taxi/'
SUFFIX= '.bz2'

URLS = [
    PREFIX+'yellow_tripdata_2015-01.csv'+SUFFIX,
    PREFIX+'yellow_tripdata_2015-02.csv'+SUFFIX,
    PREFIX+'yellow_tripdata_2015-03.csv'+SUFFIX,
    PREFIX+'yellow_tripdata_2015-04.csv'+SUFFIX,
    PREFIX+'yellow_tripdata_2015-05.csv'+SUFFIX,
    PREFIX+'yellow_tripdata_2015-06.csv'+SUFFIX,
]

filenames = pd.DataFrame({'filename': URLS})
cst = Constant(Table('filenames', data=filenames), scheduler=s)
csv = CSVLoader(index_col=False,skipinitialspace=True,usecols=['dropoff_longitude', 'dropoff_latitude'], filter_=filter_, scheduler=s)
csv.input.filenames = cst.output.table
min = Min(scheduler=s)
min.input.table = csv.output.table
max = Max(scheduler=s)
max.input.table = csv.output.table
histogram2d = Histogram2D('dropoff_longitude', 'dropoff_latitude', xbins=RESOLUTION, ybins=RESOLUTION, scheduler=s)
histogram2d.input.table = csv.output.table
histogram2d.input.min = min.output.table
histogram2d.input.max = max.output.table
heatmap = Heatmap(filename='nyc_dropoff_yellow%d.png', history=5, scheduler=s)
heatmap.input.array = histogram2d.output.table

if __name__=='__main__':
    s.start()
示例#7
0
 def __init__(self, **kwds):
     super(Input, self).__init__(**kwds)
     self._table = Table(name=None, dshape=Input.schema, create=True)
     self._last = len(self._table)
     self.default_step_size = 1000000
if six.PY3:
    SUFFIX = '.bz2'
else:
    SUFFIX = '.gz'

URLS = [
    PREFIX + 'yellow_tripdata_2015-01.csv' + SUFFIX,
    PREFIX + 'yellow_tripdata_2015-02.csv' + SUFFIX,
    PREFIX + 'yellow_tripdata_2015-03.csv' + SUFFIX,
    PREFIX + 'yellow_tripdata_2015-04.csv' + SUFFIX,
    PREFIX + 'yellow_tripdata_2015-05.csv' + SUFFIX,
    PREFIX + 'yellow_tripdata_2015-06.csv' + SUFFIX,
]

FILENAMES = pd.DataFrame({'filename': URLS})
CST = Constant(Table('filenames', data=FILENAMES), scheduler=s)
CSV = CSVLoader(index_col=False,
                skipinitialspace=True,
                usecols=['pickup_longitude', 'pickup_latitude'],
                filter_=_filter,
                scheduler=s)

CSV.input.filenames = CST.output.table
PR = Every(scheduler=s)
PR.input.df = CSV.output.table
SCATTERPLOT = MCScatterPlot(scheduler=s,
                            classes=[('Scatterplot', 'pickup_longitude',
                                      'pickup_latitude')],
                            approximate=True)
SCATTERPLOT.create_dependent_modules(CSV, 'table')
s.set_interaction_opts(starving_mods=SCATTERPLOT.get_starving_mods(),
示例#9
0
class Heatmap(TableModule):
    "Heatmap module"
    parameters = [('cmax', np.dtype(float), np.nan),
                  ('cmin', np.dtype(float), np.nan),
                  ('high', np.dtype(int), 65536), ('low', np.dtype(int), 0),
                  ('filename', np.dtype(object), None),
                  ('history', np.dtype(int), 3)]

    # schema = [('image', np.dtype(object), None),
    #           ('filename', np.dtype(object), None),
    #           UPDATE_COLUMN_DESC]
    schema = "{filename: string, time: int64}"

    def __init__(self, colormap=None, **kwds):
        self._add_slots(kwds, 'input_descriptors',
                        [SlotDescriptor('array', type=Table)])
        super(Heatmap, self).__init__(table_slot='heatmap', **kwds)
        self.colormap = colormap
        self.default_step_size = 1

        name = self.generate_table_name('Heatmap')
        params = self.params
        # if params.filename is None:
        #     params.filename = name+'%d.png'
        self._table = Table(name, dshape=Heatmap.schema, create=True)

    def predict_step_size(self, duration):
        _ = duration
        # Module sample is constant time (supposedly)
        return 1

    def run_step(self, run_number, step_size, howlong):
        dfslot = self.get_input_slot('array')
        input_df = dfslot.data()
        dfslot.update(run_number)
        indices = dfslot.created.next()
        steps = indices_len(indices)
        if steps == 0:
            indices = dfslot.updated.next()
            steps = indices_len(indices)
            if steps == 0:
                return self._return_run_step(self.state_blocked, steps_run=1)
        with dfslot.lock:
            histo = input_df.last()['array']
        if histo is None:
            return self._return_run_step(self.state_blocked, steps_run=1)
        params = self.params
        cmax = params.cmax
        if np.isnan(cmax):
            cmax = None
        cmin = params.cmin
        if np.isnan(cmin):
            cmin = None
        high = params.high
        low = params.low
        try:
            image = sp.misc.toimage(sp.special.cbrt(histo),
                                    cmin=cmin,
                                    cmax=cmax,
                                    high=high,
                                    low=low,
                                    mode='I')
            image = image.transpose(Image.FLIP_TOP_BOTTOM)
            filename = params.filename
        except:
            image = None
            filename = None
        if filename is not None:
            try:
                if re.search(r'%(0[\d])?d', filename):
                    filename = filename % (run_number)
                filename = self.storage.fullname(self, filename)
                #TODO should do it atomically since it will be called 4 times with the same fn
                image.save(filename, format='PNG')  #, bits=16)
                logger.debug('Saved image %s', filename)
                image = None
            except:
                logger.error('Cannot save image %s', filename)
                raise
        else:
            buffered = six.BytesIO()
            image.save(buffered, format='PNG', bits=16)
            res = base64.b64encode(buffered.getvalue())
            if six.PY3:
                res = str(base64.b64encode(buffered.getvalue()), "ascii")
            filename = "data:image/png;base64," + res

        if len(self._table) == 0 or self._table.last()['time'] != run_number:
            values = {'filename': filename, 'time': run_number}
            with self.lock:
                self._table.add(values)
        return self._return_run_step(self.state_blocked,
                                     steps_run=1,
                                     reads=1,
                                     updates=1)

    def is_visualization(self):
        return True

    def get_visualization(self):
        return "heatmap"

    def to_json(self, short=False):
        json = super(Heatmap, self).to_json(short)
        if short:
            return json
        return self.heatmap_to_json(json, short)

    def heatmap_to_json(self, json, short):
        dfslot = self.get_input_slot('array')
        histo = dfslot.output_module
        json['columns'] = [histo.x_column, histo.y_column]
        with dfslot.lock:
            histo_df = dfslot.data()
            if histo_df is not None and len(histo_df) != 0:
                row = histo_df.last()
                if not (np.isnan(row['xmin']) or np.isnan(row['xmax'])
                        or np.isnan(row['ymin']) or np.isnan(row['ymax'])):
                    json['bounds'] = {
                        'xmin': row['xmin'],
                        'ymin': row['ymin'],
                        'xmax': row['xmax'],
                        'ymax': row['ymax']
                    }
        with self.lock:
            df = self._table
            if df is not None and self._last_update != 0:
                row = df.last()
                json['image'] = row['filename']
                #"/progressivis/module/image/%s?run_number=%d"%(self.name, row['time'])
        return json

    def get_image(self, run_number=None):
        if self._table is None or len(self._table) == 0:
            return None
        last = self._table.last()
        if run_number is None or run_number >= last['time']:
            run_number = last['time']
            filename = last['filename']
        else:
            time = self._table['time']
            idx = np.where(time == run_number)[0]
            if len(idx) == 0:
                filename = last['filename']
            else:
                filename = self._table['filename'][idx[0]]

        return filename
#PREFIX= 'https://storage.googleapis.com/tlc-trip-data/2015/'
#SUFFIX= ''
PREFIX = '../nyc-taxi/'
SUFFIX = '.bz2'

URLS = [
    PREFIX + 'yellow_tripdata_2015-01.csv' + SUFFIX,
    PREFIX + 'yellow_tripdata_2015-02.csv' + SUFFIX,
    PREFIX + 'yellow_tripdata_2015-03.csv' + SUFFIX,
    PREFIX + 'yellow_tripdata_2015-04.csv' + SUFFIX,
    PREFIX + 'yellow_tripdata_2015-05.csv' + SUFFIX,
    PREFIX + 'yellow_tripdata_2015-06.csv' + SUFFIX,
]

filenames = pd.DataFrame({'filename': URLS})
cst = Constant(Table('filenames', data=filenames), scheduler=s)
csv = CSVLoader(index_col=False,
                skipinitialspace=True,
                usecols=['pickup_longitude', 'pickup_latitude'],
                filter_=filter_,
                scheduler=s)
csv.input.filenames = cst.output.table
#min = Min(scheduler=s)
#min.input.df = csv.output.df
#max = Max(scheduler=s)
#max.input.df = csv.output.df
min = Constant(table=Table('bounds_min', data=pd.DataFrame([bounds_min])),
               scheduler=s)
max = Constant(table=Table('bounds_min', data=pd.DataFrame([bounds_max])),
               scheduler=s)
histogram2d = Histogram2D('pickup_longitude',