def _execute(self, sources, alignment_stream, interval):
        max_interval = TimeInterval(MIN_DATE, interval.end)
        exp_list = {}
        for timestamp, value in sources[0].window(max_interval, force_calculation=True):
            if value['tier'] != "Experiment":
                continue
            d = deepcopy(value)
            mongo_id = d.pop('_id')
            trigger = d.pop('trigger')
            if trigger == 1:
                u = {'start': timestamp}
            else:
                u = {'end': timestamp}

            if mongo_id in exp_list:
                if u.keys()[0] in exp_list[mongo_id]:
                    raise ValueError("Duplicate {} triggers found for timestamp {}".format(trigger, timestamp))
                exp_list[mongo_id].update(u)
            else:
                d.update(u)
                exp_list[mongo_id] = d

        for i, doc in enumerate(exp_list.values()):
            if TimeInterval(doc['start'], doc['end']) in max_interval:
                yield StreamInstance(doc['end'], doc)
示例#2
0
def run(house, wearables, delete_existing_workflows=True, loglevel=logging.INFO):
    from hyperstream import HyperStream, TimeInterval, StreamNotFoundError
    from workflows.asset_splitter import split_sphere_assets
    from workflows.deploy_localisation_model import create_workflow_localisation_predict
    # from workflows.deploy_localisation_model_new_api import create_workflow_localisation_predict

    hyperstream = HyperStream(loglevel=loglevel, file_logger=None)
    D = hyperstream.channel_manager.mongo
    A = hyperstream.channel_manager.assets

    experiment_ids = A.find_stream(name="experiments_selected", house=house).window(
        TimeInterval.up_to_now()).last().value

    experiment_ids_str = '_'.join(experiment_ids)
    workflow_id0 = "asset_splitter"
    workflow_id1 = "lda_localisation_model_predict_"+experiment_ids_str

    if delete_existing_workflows:
        hyperstream.workflow_manager.delete_workflow(workflow_id0)
        hyperstream.workflow_manager.delete_workflow(workflow_id1)

    split_sphere_assets(hyperstream, house)

    try:
        w = hyperstream.workflow_manager.workflows[workflow_id1]
    except KeyError:
        w = create_workflow_localisation_predict(hyperstream, house=house, experiment_ids=experiment_ids, safe=False)
        hyperstream.workflow_manager.commit_workflow(workflow_id1)

    # def safe_purge(channel, stream_id):
    #     try:
    #         channel.purge_stream(stream_id)
    #     except StreamNotFoundError:
    #         pass

    # A.purge_node("wearables_by_house")
    # A.purge_node("access_points_by_house")
    # D.purge_node("predicted_locations_broadcasted")

    # for h in [1, 2, 1176, 1116]:
    #     safe_purge(A, StreamId(name="wearables_by_house", meta_data=(('house', h),)))
    #     safe_purge(A, StreamId(name="access_points_by_house", meta_data=(('house', h),)))
    #     for w in wearables:
    #         safe_purge(D, StreamId(name="predicted_locations_broadcasted", meta_data=(('house', h), ('wearable', w))))

    ti0 = TimeInterval.up_to_now()
    ti1 = TimeInterval.now_minus(minutes=1)

    # ti0 = TimeInterval(MIN_DATE, parse("2016-12-02 17:14:25.075Z"))
    # ti1 = TimeInterval(start=ti0.end - timedelta(minutes=1), end=ti0.end)

    w.execute(ti1)

    print('number of non_empty_streams: {}'.format(
        len(hyperstream.channel_manager.memory.non_empty_streams)))

    from display_localisation_predictions import display_predictions
    display_predictions(hyperstream, ti1, house, wearables=wearables)
示例#3
0
    def test_time_interval(self):
        i1 = TimeIntervals([
            TimeInterval(now, now + hour),
            TimeInterval(now + 2 * hour, now + 3 * hour),
        ])

        i2 = TimeIntervals([
            TimeInterval(now + 30 * minute, now + 30 * minute + 2 * hour),
        ])

        # print(i1)
        assert (i1 == TimeIntervals(intervals=[TimeInterval(start=datetime(2016, 1, 1, 0, 0),
                                                            end=datetime(2016, 1, 1, 1, 0)),
                                               TimeInterval(start=datetime(2016, 1, 1, 2, 0),
                                                            end=datetime(2016, 1, 1, 3, 0))]))

        # print(i2)
        # print()
        s = i1 + i2
        assert (s == TimeIntervals(intervals=[TimeInterval(start=datetime(2016, 1, 1, 0, 0),
                                                           end=datetime(2016, 1, 1, 3, 0))]))

        d = i1 - i2

        assert (d == TimeIntervals(intervals=[TimeInterval(start=datetime(2016, 1, 1, 0, 0),
                                                           end=datetime(2016, 1, 1, 0, 30)),
                                              TimeInterval(start=datetime(2016, 1, 1, 2, 30),
                                                           end=datetime(2016, 1, 1, 3, 0))]))
示例#4
0
    def _execute(self, source, splitting_stream, interval, output_plate):
        raise NotImplementedError
        # the development of this tool has not been finished
        if splitting_stream is None:
            raise ValueError("Splitting stream required for this tool")

        if isinstance(splitting_stream, AssetStream):
            time_interval = TimeInterval(MIN_DATE, interval.end)
            splitter = splitting_stream.window(time_interval,
                                               force_calculation=True).last()
        else:
            splitter = splitting_stream.window(interval,
                                               force_calculation=True).last()

        if not splitter:
            logging.debug(
                "No assets found for source {} and splitter {}".format(
                    source.stream_id, splitting_stream.stream_id))
            return

        mapping = splitter.value

        for timestamp, value in source.window(interval,
                                              force_calculation=True):
            for key in value.keys():
                if key not in mapping:
                    logging.warn(
                        "Unknown value {} for meta data in SplitterOfDictFromStream"
                        .format(key))
                    continue
                plate_value = mapping[key]
                yield StreamMetaInstance(
                    (timestamp, value[key]),
                    (output_plate.meta_data_id, plate_value))
示例#5
0
    def test_tool_channel_new_api(self):
        with HyperStream(file_logger=False,
                         console_logger=False,
                         mqtt_logger=None) as hs:
            M = hs.channel_manager.memory

            # new way of loading tools
            clock_new = hs.tools.clock()

            # old way of loading tools
            clock_old = hs.channel_manager.tools["clock"].window(
                (MIN_DATE, utcnow())).last().value()

            # TODO: NOTE THAT IF WE DO IT THE OLD WAY FIRST, THEN THE NEW WAY FAILS WITH:
            # TypeError: super(type, obj): obj must be an instance or subtype of type
            # which possibly relates to:
            # https://stackoverflow.com/questions/9722343/python-super-behavior-not-dependable

            ticker_old = M.get_or_create_stream("ticker_old")
            ticker_new = M.get_or_create_stream("ticker_new")

            now = utcnow()
            before = (now - timedelta(seconds=30)).replace(tzinfo=UTC)
            ti = TimeInterval(before, now)

            clock_old.execute(sources=[], sink=ticker_old, interval=ti)
            clock_new.execute(sources=[], sink=ticker_new, interval=ti)

            self.assertListEqual(ticker_old.window().values(),
                                 ticker_new.window().values())
示例#6
0
    def test_simple_workflow(self):
        # Create a simple one step workflow for querying
        w = hyperstream.create_workflow(
            workflow_id="simple_query_workflow",
            name="Simple query workflow",
            owner="TD",
            description="Just a test of creating workflows")

        time_interval = TimeInterval(t1, t1 + 1 * minute)

        # Create some streams (collected in a node)
        node = w.create_node(stream_name="environmental",
                             channel=S,
                             plate_ids=["H1"
                                        ])  # .window((t1, t1 + 1 * minute))

        # Create a factor to produce some data
        w.create_multi_output_factor(tool=dict(
            name="sphere", parameters=dict(modality="environmental")),
                                     source=None,
                                     splitting_node=None,
                                     sink=node)

        # Execute the workflow
        w.execute(time_interval)

        # Check the values
        assert (node.streams[('house',
                              '1'), ].window(time_interval).first().value == {
                                  u'electricity': 0.0,
                                  'uid': u'04063'
                              })
    def test_basic_aggregator(self):
        """
        An average of RSS values per location
        """
        # access_points_by_wearable_and_house().execute(TimeInterval.up_to_now())

        w = basic_workflow(sys._getframe().f_code.co_name)

        N = w.nodes
        w.create_factor(
            tool=channels.get_tool(
                name="aggregate",
                parameters=dict(func=online_average, aggregation_meta_data="wearable")
            ),
            sources=[N["rss"]],
            sink=N["rss_dev_avg"]
        )

        time_interval = TimeInterval(scripted_experiments[0].start, scripted_experiments[0].start + 2 * minute)
        w.execute(time_interval)

        print_head(w, "rss", h1 + wA, locs, time_interval, 10, print)
        print_head(w, "rss_dev_avg", h1, locs, time_interval, 10, print)

        assert all(list(N["rss_dev_avg"].streams[k].window(time_interval).head(10)) == v
                   for k, v in RSS_DEV_AVG.items())
    def test_off_plate_aggregator(self):
        """
        This is a test for aggregation where the aggregate is up the tree, but the destination plate is not in the
        ancestry. For example:
         source plate: H1.L.W
         aggregate:    L
         destination:  H1.W

        Note here that H1.W is not an ancestor of H1.L.W (only H1 and H1.L are), so we have to figure out that H1.W is
        a valid destination, based on the fact that all but one of the meta data id's are shared.
        """
        w = basic_workflow(sys._getframe().f_code.co_name)

        aggregate_loc = channels.get_tool(
            name="aggregate",
            parameters=dict(func=online_average, aggregation_meta_data="location")
        )

        N = w.nodes
        w.create_factor(
            tool=aggregate_loc,
            sources=[N["rss"]],
            sink=N["rss_loc_avg"]
        )

        time_interval = TimeInterval(scripted_experiments[0].start, scripted_experiments[0].start + 2 * minute)
        w.execute(time_interval)

        print_head(w, "rss", h1 + wA, locs, time_interval, 10, print)
        print_head(w, "rss_loc_avg", h1, wA, time_interval, 10, print)

        assert all(list(N["rss_loc_avg"].streams[k].window(time_interval).head(10)) == v
                   for k, v in RSS_LOC_AVG.items())
    def test_index_of_by_stream(self):
        w = basic_workflow(sys._getframe().f_code.co_name)

        aggregate_loc = channels.get_tool(
            name="index_of_by_stream",
            parameters=dict(index="kitchen")
        )

        # Create a stream with the single value "location" in it
        w.create_node(stream_name="selector_meta_data", channel=A, plate_ids=None)

        A.write_to_stream(stream_id=StreamId(name="selector_meta_data"),
                          data=StreamInstance(timestamp=utcnow(), value="location"))

        N = w.nodes
        w.create_factor(
            tool=aggregate_loc,
            sources=[N["selector_meta_data"], N["rss"]],
            sink=N["rss_kitchen"]
        )

        time_interval = TimeInterval(scripted_experiments[0].start, scripted_experiments[0].start + 2 * minute)
        w.execute(time_interval)

        key = h1 + (('location', 'kitchen'),) + wA

        assert all(a == b for a, b in zip(N['rss_kitchen'].streams[h1 + wA].window(time_interval).head(10),
                                          N['rss'].streams[key].window(time_interval).head(10)))
    def test_save_workflow(self):
        workflow_id = sys._getframe().f_code.co_name

        # hyperstream.logger.setLevel(logging.WARN)

        # First delete the workflow if it's there
        hyperstream.workflow_manager.delete_workflow(workflow_id)

        w1 = basic_workflow(workflow_id)

        time_interval = TimeInterval(
            scripted_experiments[0].start,
            scripted_experiments[0].start + 2 * minute)
        w1.execute(time_interval)

        hyperstream.workflow_manager.commit_workflow(workflow_id)

        # Now remove it from the workflow manager
        del hyperstream.workflow_manager.workflows[workflow_id]

        # And then reload it
        w2 = hyperstream.workflow_manager.load_workflow(workflow_id)

        # print_head(w, "rss", h1 + wA, locs, time_interval, 10, print)
        # print_head(w, "rss_dev_avg", h1, locs, time_interval, 10, print)

        assert all(
            list(w1.nodes["rss_dev_avg"].streams[k].window(time_interval).head(
                10)) == v for k, v in RSS_DEV_AVG.items())

        assert all(
            list(w2.nodes["rss_dev_avg"].streams[k].window(time_interval).head(
                10)) == v for k, v in RSS_DEV_AVG.items())
    def _execute(self, source, splitting_stream, interval, output_plate):
        if splitting_stream is None:
            raise ValueError("Splitting stream required for this tool")

        if isinstance(splitting_stream, AssetStream):
            time_interval = TimeInterval(MIN_DATE, interval.end)
            splitter = splitting_stream.window(time_interval,
                                               force_calculation=True).last()
        else:
            splitter = splitting_stream.window(interval,
                                               force_calculation=True).last()

        if not splitter:
            logging.debug(
                "No assets found for source {} and splitter {}".format(
                    source.stream_id, splitting_stream.stream_id))
            return

        mapping = splitter.value

        for timestamp, value in source.window(interval,
                                              force_calculation=True):
            if self.element not in value:
                logging.debug("Mapping element {} not in instance".format(
                    self.element))
                continue
            value = deepcopy(value)
            meta_data = str(value.pop(self.element))
            if meta_data not in mapping:
                logging.warn("Unknown value {} for meta data {}".format(
                    meta_data, self.element))
                continue
            plate_value = mapping[meta_data]
            yield StreamMetaInstance((timestamp, value),
                                     (output_plate.meta_data_id, plate_value))
def run(delete_existing_workflows=True, loglevel=logging.INFO):
    from hyperstream import HyperStream, TimeInterval
    from workflows.deploy_summariser import create_workflow_coord_plate_creation, create_workflow_summariser
    from sphere_connector_package.sphere_connector import SphereConnector

    hyperstream = HyperStream(loglevel=loglevel, file_logger=None)

    if not globs['sphere_connector']:
        globs['sphere_connector'] = SphereConnector(
            config_filename='config.json',
            include_mongo=True,
            include_redcap=False,
            sphere_logger=None)

    workflow_id = "coord3d_plate_creation"
    if delete_existing_workflows:
        hyperstream.workflow_manager.delete_workflow(workflow_id)
    try:
        w = hyperstream.workflow_manager.workflows[workflow_id]
    except KeyError:
        w = create_workflow_coord_plate_creation(hyperstream, safe=False)
        hyperstream.workflow_manager.commit_workflow(workflow_id)

    time_interval = TimeInterval.now_minus(minutes=1)
    w.execute(time_interval)

    workflow_id = "periodic_summaries"
    if delete_existing_workflows:
        hyperstream.workflow_manager.delete_workflow(workflow_id)
    try:
        w = hyperstream.workflow_manager.workflows[workflow_id]
    except KeyError:

        w = create_workflow_summariser(hyperstream,
                                       env_window_size=1 * 60 * 60.0,
                                       rss_window_size=4 * 60 * 60.0,
                                       acc_window_size=4 * 60 * 60.0,
                                       vid_window_size=4 * 60 * 60.0,
                                       pred_window_size=4 * 60 * 60.0,
                                       safe=False)
        hyperstream.workflow_manager.commit_workflow(workflow_id)

    time_interval = TimeInterval.now_minus(minutes=1)
    w.execute(time_interval)

    print('number of non_empty_streams: {}'.format(
        len(hyperstream.channel_manager.memory.non_empty_streams)))
示例#13
0
    def _execute(self, source, splitting_stream, interval, output_plate):
        if splitting_stream is None:
            raise ValueError("Splitting stream required for this tool")

        if isinstance(splitting_stream, AssetStream):
            time_interval = TimeInterval(MIN_DATE, interval.end)
            splitter = splitting_stream.window(time_interval,
                                               force_calculation=True).last()
        else:
            splitter = splitting_stream.window(interval,
                                               force_calculation=True).last()

        if not splitter:
            logging.debug(
                "No assets found for source {} and splitter {}".format(
                    source.stream_id, splitting_stream.stream_id))
            return

        mapping = splitter.value

        try:  # try if mapping is a dict
            if len(mapping.keys()) == 0:
                logging.warn(
                    "The mapping provided to splitter_from_stream by the last element of the splitting stream is empty"
                )
            if self.use_mapping_keys_only:
                mapping = dict([(x, x) for x in mapping.keys()])
        except:  # assume that mapping is a list
            mapping = dict([(x, x) for x in mapping])

        for timestamp, value in source.window(interval,
                                              force_calculation=True):
            if self.element is None:
                for meta_data, sub_value in value.items():
                    if meta_data not in mapping:
                        logging.warn(
                            "Unexpected splitting value {}".format(meta_data))
                        continue
                    plate_value = mapping[meta_data]
                    yield StreamMetaInstance(
                        (timestamp, sub_value),
                        (output_plate.meta_data_id, plate_value))
            else:
                if self.element not in value:
                    logging.debug("Mapping element {} not in instance".format(
                        self.element))
                    continue
                value = deepcopy(value)
                meta_data = str(value.pop(self.element))
                if meta_data not in mapping:
                    logging.warn("Unknown value {} for meta data {}".format(
                        meta_data, self.element))
                    continue
                plate_value = mapping[meta_data]
                yield StreamMetaInstance(
                    (timestamp, value),
                    (output_plate.meta_data_id, plate_value))
 def _execute(self, sources, alignment_stream, interval):
     sliding_window = sources[0].window(interval, force_calculation=True)
     result = sliding_window.first()
     if result is None:
         return
     sources[1].window(TimeInterval(interval.start, interval.end),
                       force_calculation=True).first()
     return
     yield  # required to make this function into a generator
 def _execute(self, sources, alignment_stream, interval):
     data = sources[0].window(interval, force_calculation=True)
     mappings = []
     for x in data:
         experiment_interval = TimeInterval(x.value['start'],
                                            x.value['end'])
         experiment_id = construct_experiment_id(experiment_interval)
         if experiment_id in self.experiment_ids:
             mappings.append((experiment_id, experiment_interval))
     yield StreamInstance(interval.end, mappings)
示例#16
0
    def test_database_channel(self):
        # Simple querying
        ti = TimeInterval(t1, t1 + minute)

        # Get or create the stream that lives in the database
        env = D.get_or_create_stream(
            stream_id=StreamId('environmental_db', (("house", "1"), )))

        D.purge_stream(env.stream_id)

        env_tool = channels.get_tool(
            "sphere",
            dict(modality="environmental", rename_keys=True, dedupe=True))

        env_tool.execute(source=None,
                         splitting_stream=None,
                         sinks=[env],
                         interval=ti,
                         input_plate_value=None,
                         output_plate=hyperstream.plate_manager.plates["H"])

        # Create stream whose source will be the above database stream
        elec = M.create_stream(StreamId('electricity'))

        env_tool = channels.get_tool(
            "sphere",
            dict(modality="environmental", rename_keys=True, dedupe=True))
        elec_tool = T[component].window(
            (MIN_DATE, utcnow())).last().value(key='electricity-04063')

        env_tool.execute(source=None,
                         splitting_stream=None,
                         sinks=[env],
                         interval=ti,
                         input_plate_value=None,
                         output_plate=hyperstream.plate_manager.plates["H"])

        elec_tool.execute(sources=[env],
                          sink=elec,
                          interval=ti,
                          alignment_stream=None)

        q1 = "\n".join("=".join(map(str, ee)) for ee in elec.window(ti))

        # print(q1)
        # print(edl)

        assert (q1 == '2016-04-28 20:00:00.159000+00:00=0.0\n'
                '2016-04-28 20:00:06.570000+00:00=0.0\n'
                '2016-04-28 20:00:12.732000+00:00=0.0\n'
                '2016-04-28 20:00:25.125000+00:00=0.0\n'
                '2016-04-28 20:00:31.405000+00:00=0.0\n'
                '2016-04-28 20:00:50.132000+00:00=0.0')

        assert (elec.window(ti).values() == [0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
示例#17
0
def main(dataset, classifier, epochs, seed, batchsize):
    hs = HyperStream(loglevel=30)
    print(hs)
    print([p.channel_id_prefix for p in hs.config.plugins])

    M = hs.channel_manager.memory

    data = getattr(datasets, 'load_{}'.format(dataset))()
    data_tool = hs.plugins.sklearn.tools.dataset(data,
                                                 shuffle=True,
                                                 epochs=epochs,
                                                 seed=seed)
    data_stream = M.get_or_create_stream('dataset')

    model = getattr(linear_model, classifier)()
    classifier_tool = hs.plugins.sklearn.tools.classifier(model)
    classifier_stream = M.get_or_create_stream('classifier')

    now = datetime.utcnow().replace(tzinfo=UTC)
    now = (now - timedelta(hours=1)).replace(tzinfo=UTC)
    before = datetime.utcfromtimestamp(0).replace(tzinfo=UTC)
    ti = TimeInterval(before, now)

    data_tool.execute(sources=[], sink=data_stream, interval=ti)

    print("Example of a data stream")
    key, value = data_stream.window().iteritems().next()
    print('[%s]: %s' % (key, value))

    mini_batch_tool = hs.plugins.sklearn.tools.minibatch(batchsize=batchsize)
    mini_batch_stream = M.get_or_create_stream('mini_batch')
    mini_batch_tool.execute(sources=[data_stream],
                            sink=mini_batch_stream,
                            interval=ti)

    classifier_tool.execute(sources=[mini_batch_stream],
                            sink=classifier_stream,
                            interval=ti)

    scores = []
    for key, value in classifier_stream.window():
        scores.append(value['score'])

    # The data is repeated the number of epochs. This makes the mini-batches to
    # cycle and contain data from the begining and end of the dataset. This
    # makes possible that the number of scores is not divisible by epochs.
    if batchsize == 1:
        print("Test scores per epoch")
        scores = np.array(scores).reshape(epochs, -1)
        print(scores.mean(axis=1).round(decimals=2))
    else:
        scores = np.array(scores).reshape(1, -1)
        print("Test scores per minibatch (cyclic)")
        print(scores.round(decimals=2))
def run(delete_existing_workflows=True, loglevel=logging.INFO):
    from hyperstream import HyperStream, TimeInterval
    from workflows.summaries_to_csv import create_workflow_summaries_to_csv
    from sphere_connector_package.sphere_connector import SphereConnector

    if not globs['sphere_connector']:
        globs['sphere_connector'] = SphereConnector(
            config_filename='config.json',
            include_mongo=True,
            include_redcap=False,
            sphere_logger=None)

    hyperstream = HyperStream(loglevel=loglevel, file_logger=None)

    workflow_id = "summaries_to_csv"
    if delete_existing_workflows:
        hyperstream.workflow_manager.delete_workflow(workflow_id)
    try:
        w = hyperstream.workflow_manager.workflows[workflow_id]
    except KeyError:
        # percentile_results = []
        # w = create_workflow_summaries_to_csv(hyperstream,percentile_results=percentile_results,safe=False)
        w = create_workflow_summaries_to_csv(hyperstream, safe=False)
        hyperstream.workflow_manager.commit_workflow(workflow_id)

    day_str = "2016_12_15_23_00"
    t1 = parse("2016-12-15T19:58:25Z")
    t2 = parse("2016-12-15T20:01:05Z")
    t1 = parse("2016-12-15T22:58:25Z")
    t2 = parse("2016-12-15T23:01:05Z")
    t1 = parse("2017-02-24T08:01:00Z")
    t2 = parse("2017-02-24T08:04:00Z")

    t_1_2 = TimeInterval(start=t1, end=t2)
    # w.factors[0].execute(t_1_2)
    w.execute(t_1_2)

    env_results = w.factors[0].tool.global_result_list

    csv_string = pd.DataFrame(env_results).to_csv(sep="\t", header=False)

    with open("mk/visualise_summaries/env_summaries_{}.csv".format(day_str),
              "w") as text_file:
        text_file.write(csv_string)

    # print(env_results)
    # print(percentile_results)


#    time_interval = TimeInterval.now_minus(minutes=1)
#    w.execute(time_interval)

    print('number of non_empty_streams: {}'.format(
        len(hyperstream.channel_manager.memory.non_empty_streams)))
 def _execute(self, sources, alignment_stream, interval):
     data = list(sources[0].window(interval, force_calculation=True))
     flattened = map(lambda x: dict(dict(
         experiment_id=construct_experiment_id(TimeInterval(x.value['start'], x.value['end'])),
         start=x.value['start'],
         end=x.value['end'],
         annotator=x.value['annotator']
     ), **(x.value['notes'])), data)
     df = pd.DataFrame(flattened)
     df['id'] = range(1, len(df) + 1)
     yield StreamInstance(interval.end, df)
 def _execute(self, sources, alignment_stream, interval):
     sliding_window = sources[0].window(interval, force_calculation=True)
     first_window = sliding_window.first()
     if first_window is None:
         return
     try:
         last_window = list(sliding_window)[-1]
     except:
         last_window = first_window
     sources[1].window(TimeInterval(first_window.value.start, last_window.value.end), force_calculation=True).first()
     return
     yield # required to make this function into a generator
示例#21
0
def main(dataset, model, epochs, seed, batchsize):
    hs = HyperStream(loglevel=30)
    print(hs)
    print([p.channel_id_prefix for p in hs.config.plugins])

    M = hs.channel_manager.memory

    data = getattr(datasets, 'load_{}'.format(dataset))()
    data_tool = hs.plugins.sklearn.tools.dataset(data,
                                                 shuffle=True,
                                                 epochs=epochs,
                                                 seed=seed)
    data_stream = M.get_or_create_stream('dataset')

    anomaly_detector_tool = hs.plugins.sklearn.tools.anomaly_detector(model)
    anomaly_detector_stream = M.get_or_create_stream('anomaly_detector')

    now = datetime.utcnow()
    now = (now - timedelta(hours=1))
    before = datetime.utcfromtimestamp(0)
    ti = TimeInterval(before, now)

    data_tool.execute(sources=[], sink=data_stream, interval=ti)

    print("Example of a data stream")
    key, value = next(iter(data_stream.window()))
    print('[%s]: %s' % (key, value))

    mini_batch_tool = hs.plugins.sklearn.tools.minibatch(batchsize=batchsize)
    mini_batch_stream = M.get_or_create_stream('mini_batch')
    mini_batch_tool.execute(sources=[data_stream],
                            sink=mini_batch_stream,
                            interval=ti)

    anomaly_detector_tool.execute(sources=[mini_batch_stream],
                                  sink=anomaly_detector_stream,
                                  interval=ti)

    probas = []
    for key, value in anomaly_detector_stream.window():
        probas.append(value['proba'])

    # The data is repeated the number of epochs. This makes the mini-batches to
    # cycle and contain data from the beginning and end of the dataset. This
    # makes possible that the number of scores is not divisible by epochs.
    probas = np.array(probas)
    print(probas.shape)
    means = np.array([np.nanmean(aux) for aux in probas])
    print(means.shape)
    print("Test probabilities per minibatch (cyclic)")
    print(means.round(decimals=2))
示例#22
0
def stream(channel,
           name,
           meta_data,
           mimetype,
           func,
           parameters=None,
           start=None,
           end=None):
    try:
        stream = hs.channel_manager[channel].find_stream(name=name,
                                                         **meta_data)
        if start and end:
            ti = TimeInterval(start, end)
            window = stream.window(ti)
        else:
            window = stream.window()

    except (KeyError, TypeError, MultipleStreamsFoundError,
            StreamNotFoundError, StreamNotAvailableError) as e:
        return exception_json(
            e,
            dict(channel=channel,
                 name=name,
                 meta_data=meta_data,
                 start=start,
                 end=end))

    try:
        if hasattr(window, func):
            if parameters:
                data = getattr(window, func)(*(KNOWN_TYPES[p[0]](p[1])
                                               for p in parameters))
            else:
                data = getattr(window, func)()
            from collections import deque
        else:
            return jsonify({
                'exception': "Function not available",
                "message": func
            })
    except (KeyError, TypeError) as e:
        return exception_json(e, (func, parameters))

    try:
        return ENDPOINTS[mimetype](data)
    except KeyError as e:
        # FIXME is the error message informative?
        # The previous message was: 'Endpoint not found'
        return exception_json(e, mimetype)
    except TypeError as e:
        return exception_json(e, (func, parameters, str(list(data))))
示例#23
0
    def test_data_generators(self):
        M = self.hs.channel_manager.memory
        T = self.hs.plugins.sklearn.tools

        data = load_iris()
        epochs = 10
        seed = 42
        batchsize = 2

        data_tool = T.dataset(data, shuffle=True, epochs=epochs, seed=seed)
        data_stream = M.get_or_create_stream('dataset')
        model = 'Gaussian'

        anomaly_detector_tool = T.anomaly_detector(model)
        anomaly_detector_stream = M.get_or_create_stream('anomaly_detector')

        now = datetime.utcnow()
        now = (now - timedelta(hours=1))
        before = datetime.utcfromtimestamp(0)
        ti = TimeInterval(before, now)

        data_tool.execute(sources=[], sink=data_stream, interval=ti)

        print("Example of a data stream")
        key, value = next(iter(data_stream.window()))
        print('[%s]: %s' % (key, value))

        mini_batch_tool = T.minibatch(batchsize=batchsize)
        mini_batch_stream = M.get_or_create_stream('mini_batch')
        mini_batch_tool.execute(sources=[data_stream],
                                sink=mini_batch_stream,
                                interval=ti)

        anomaly_detector_tool.execute(sources=[mini_batch_stream],
                                      sink=anomaly_detector_stream,
                                      interval=ti)

        probas = []
        for key, value in anomaly_detector_stream.window():
            probas.append(value['proba'])

        # The data is repeated the number of epochs. This makes the mini-batches to
        # cycle and contain data from the beginning and end of the dataset. This
        # makes possible that the number of scores is not divisible by epochs.
        probas = np.array(probas)
        print(probas.shape)
        means = np.array([np.nanmean(aux) for aux in probas])
        np.testing.assert_almost_equal(true_means, means, decimal=2)
        print(means.shape)
        print("Test probabilities per minibatch (cyclic)")
        print(means.round(decimals=2))
示例#24
0
    def test_sessions(self):
        hs = HyperStream(loglevel=logging.CRITICAL)
        print_sessions(hs)
        # hs.clear_sessions(inactive_only=False, clear_history=True)

        # TODO: this needs to clear stream definitions as well
        hs.clear_sessions(clear_history=True)
        print("after clearing")
        print_sessions(hs)
        assert (len(hs.sessions) == 0)
        del hs

        with HyperStream(loglevel=logging.CRITICAL) as hs:
            print("enter ...")
            print_sessions(hs)
            assert (len(hs.sessions) == 1)
            assert hs.current_session.active

            M = hs.channel_manager.memory
            dg = hs.plugins.data_generators

            ticker = M.get_or_create_stream("ticker")
            random = M.get_or_create_stream("random")

            ti = TimeInterval(t1, t1 + minute)

            hs.tools.clock().execute(sources=[], sink=ticker, interval=ti)
            dg.tools.random().execute(sources=[],
                                      sink=random,
                                      interval=ti,
                                      alignment_stream=ticker)

            history = hs.current_session.history
            for item in history:
                print(item)

            assert (history[0].value['tool'] == 'clock')
            assert (history[1].value['tool'] == 'random')
            assert (history[0].value['document_count'] == 60)
            assert (history[1].value['document_count'] == 60)

        print("exit ...")

        hs = HyperStream(loglevel=logging.CRITICAL)

        assert hs.current_session is None
        print_sessions(hs)
        assert (len(hs.sessions) == 1)
        assert hs.sessions[0].end is not None
        assert not hs.sessions[0].active
def run(house, wearables, loglevel=logging.CRITICAL):
    from hyperstream import HyperStream, TimeInterval

    if not globs['hyperstream']:
        globs['hyperstream'] = HyperStream(loglevel=loglevel, file_logger=None)

    display_predictions(globs['hyperstream'],
                        TimeInterval.now_minus(minutes=1), house, wearables)
    print()

    from display_access_points import display_access_points

    display_access_points(house=house)
    print()
    def _execute(self, sources, alignment_stream, interval):
        if interval.start < self.first:
            interval.start = self.first

        n_widths = int((interval.start - self.first).total_seconds() //
                       self.width.total_seconds())

        lower = self.first + n_widths * self.width
        upper = lower + self.width

        while upper <= interval.end:
            yield StreamInstance(upper, TimeInterval(lower, upper))

            lower += self.increment
            upper += self.increment
    def _execute(self, sources, alignment_stream, interval):
        data = sources[0].window(interval, force_calculation=True)
        try:
            experiment_ids = sources[1].window(
                interval, force_calculation=True).last().value
        except AttributeError:
            return

        mappings = []
        for x in data:
            experiment_interval = TimeInterval(x.value['start'],
                                               x.value['end'])
            experiment_id = construct_experiment_id(experiment_interval)
            if experiment_id in experiment_ids:
                mappings.append((experiment_id, experiment_interval))
        yield StreamInstance(interval.end, mappings)
示例#28
0
    def test_iris(self):
        M = self.hs.channel_manager.memory
        T = self.hs.plugins.sklearn.tools

        data = load_iris()
        epochs = 10
        seed = 42
        batchsize = 2

        data_tool = T.dataset(data, shuffle=True, epochs=epochs, seed=seed)
        data_stream = M.get_or_create_stream('dataset')

        now = datetime.utcnow()
        now = (now - timedelta(hours=1))
        before = datetime.utcfromtimestamp(0)
        ti = TimeInterval(before, now)

        data_tool.execute(sources=[], sink=data_stream, interval=ti)

        print("Example of a data stream")
        key, value = next(iter(data_stream.window()))
        print('[%s]: %s' % (key, value))

        mini_batch_tool = T.minibatch(batchsize=batchsize)
        mini_batch_stream = M.get_or_create_stream('mini_batch')
        mini_batch_tool.execute(sources=[data_stream], sink=mini_batch_stream,
                                interval=ti)

        key, value = mini_batch_stream.window().items()[0]

        assert(key == datetime(1970, 1, 1, 0, 2, tzinfo=UTC))

        expected_value = {'x_te': np.array([[ 5.6,  2.8,  4.9,  2. ],
                                           [ 7.3,  2.9,  6.3,  1.8]]),
                         'x_tr': np.array([[ 6. ,  2.2,  5. ,  1.5],
                                           [ 5. ,  2. ,  3.5,  1. ]]),
                         'y_te': np.array([[0, 0, 1], [0, 0, 1]]),
                         'y_tr': np.array([[0, 0, 1], [0, 1, 0]])}

        for e_key, e_value in expected_value.items():
            assert(e_key in value)
            np.testing.assert_equal(e_value, value[e_key])
示例#29
0
    def test_plugins(self):
        with HyperStream(file_logger=False,
                         console_logger=False,
                         mqtt_logger=None) as hs:
            M = hs.channel_manager.memory

            clock_tool = hs.tools.clock()
            dummy_tool = hs.plugins.example.tools.dummy()

            ticker = M.get_or_create_stream("ticker")
            ticker_copy = M.get_or_create_stream("ticker_copy")

            before = now - timedelta(seconds=30)
            ti = TimeInterval(before, now)

            clock_tool.execute(sources=[], sink=ticker, interval=ti)
            dummy_tool.execute(sources=[ticker], sink=ticker_copy, interval=ti)

            assert (all(
                map(lambda pair: pair[0].value == pair[1].value,
                    zip(ticker.window(), ticker_copy.window()))))
示例#30
0
    def test_save_workflow(self):
        hs = HyperStream(file_logger=False,
                         console_logger=False,
                         mqtt_logger=None)
        workflow_id = sys._getframe().f_code.co_name

        # First delete the workflow if it's there
        hs.workflow_manager.delete_workflow(workflow_id)

        w = basic_workflow(hs, workflow_id)

        time_interval = TimeInterval(t1, t2)
        w.execute(time_interval)

        hs.workflow_manager.commit_workflow(workflow_id)

        # Now remove it from the workflow manager
        del hs.workflow_manager.workflows[workflow_id]

        # And then reload it
        hs.workflow_manager.load_workflow(workflow_id)