Exemplo n.º 1
0
    def _execute(self, sources, alignment_stream, interval):
        s0 = sources[0].window(interval, force_calculation=True).items()

        first = True
        for dt, value in s0:
            # TODO consider the case with training or test
            x_tr = value['x_tr']
            x_te = value['x_te']
            if len(x_tr.shape) == 1:
                raise (
                    ValueError('Anomaly detector needs more than one sample'))

            if first:
                if hasattr(self.model, 'partial_fit'):
                    self.fit = self.model.partial_fit
                else:
                    self.fit = self.model.fit

                if hasattr(self.model, 'predict_proba'):
                    self.predict_proba = self.model.predict_proba
                elif hasattr(self.model, 'predict'):
                    self.predict_proba = self.model.predict
                elif hasattr(self.model, 'likelihood'):
                    self.predict_proba = self.model.likelihood

                first = False

            self.fit(x_tr, **self.fit_arguments)
            proba = self.predict_proba(x_te)
            yield StreamInstance(dt, dict(proba=proba))
    def _execute(self, sources, alignment_stream, interval):
        if alignment_stream is None:
            raise ToolExecutionError("Alignment stream expected")

        for ti, _ in alignment_stream.window(interval, force_calculation=True):
            yield StreamInstance(
                ti, random.gammavariate(alpha=self.alpha, beta=self.beta))
Exemplo n.º 3
0
 def writer(document_collection):
     if stream.stream_id not in self.data:
         raise RuntimeError(
             "Data slot does not exist for {}, perhaps create_stream was not used?"
             .format(stream))
     if isinstance(document_collection, StreamInstance):
         try:
             self.data[stream.stream_id].append(document_collection)
         except KeyError as e:
             # Deal with the duplicate error by adding microseconds to the time until we succeed
             # logging.debug(e.message)
             doc = StreamInstance(
                 timestamp=document_collection.timestamp +
                 timedelta(microseconds=1000),
                 # timestamp=document_collection.timestamp + timedelta(microseconds=1000),
                 # MK used 1000 here when storing derived streams later back to mongo
                 # TODO: come up with a better solution
                 value=document_collection.value)
             return writer(doc)
     elif isinstance(document_collection, list):
         for d in document_collection:
             writer(d)
     else:
         raise TypeError(
             'Expected: [StreamInstance, list<StreamInstance>], got {}. '
             .format(type(document_collection)))
Exemplo n.º 4
0
    def _execute(self, sources, alignment_stream, interval):
        if alignment_stream is None:
            raise ToolExecutionError("Alignment stream expected")

        for ti, _ in alignment_stream.window(interval, force_calculation=True):
            yield StreamInstance(ti, random.gauss(mu=self.mu,
                                                  sigma=self.sigma))
Exemplo n.º 5
0
    def _execute(self, sources, alignment_stream, interval):
        data = sources[0].window(interval, force_calculation=True)

        for timestamp, value in data:
            y_est = perceptron_predict(np.array(value["x"]), self.weights)
            self.weights = perceptron_update(np.array(value["x"]), self.weights, value["y"])
            yield StreamInstance(timestamp, y_est)
 def _execute(self, sources, alignment_stream, interval):
     # Combine the data, apply the mapping and sort (inefficient!)
     results = dict()
     for source in sources:
         data = source.window(interval, force_calculation=True)
         meta_data = source.stream_id.meta_data
         d = dict(meta_data)
         try:
             v = d.pop(self.aggregation_meta_data)
         except:
             raise
         for timestamp,value in data:
             if not timestamp in results.keys():
                 results[timestamp] = StreamInstance(timestamp,dict())
             results[timestamp].value[v] = self.func(value)
     return sorted(results.values(),key=lambda x: x.timestamp)
Exemplo n.º 7
0
    def _execute(self, sources, alignment_stream, interval):

        start_time = datetime(2017,1,1,0,0).replace(tzinfo=UTC)

        raw = numpy.genfromtxt(self.data_path + self.script_ID + '/acceleration.csv',
                               delimiter=',', skip_header=1)

        for line in raw:
            dt = start_time + timedelta(seconds=line[0])
            if dt in interval:
                yield StreamInstance(dt, line[1:])
Exemplo n.º 8
0
 def get_results(self, stream, time_interval):
     """
     Get the results for a given stream
     :param time_interval: The time interval
     :param stream: The stream object
     :return: A generator over stream instances
     """
     query = stream.stream_id.as_raw()
     query['datetime'] = {'$gt': time_interval.start, '$lte': time_interval.end}
     with switch_db(SummaryInstanceModel, 'hyperstream'):
         for instance in SummaryInstanceModel.objects(__raw__=query):
             yield StreamInstance(timestamp=instance.datetime, value=instance.value)
Exemplo n.º 9
0
    def _execute(self, source, splitting_stream, interval, meta_data_id,
                 output_plate_values):

        start_time = datetime(2017, 1, 1, 0, 0).replace(tzinfo=UTC)

        raw = numpy.genfromtxt(self.data_path + self.script_ID + '/pir.csv',
                               delimiter=',',
                               skip_header=1)

        for pv in output_plate_values:

            if len(pv) > 1:
                raise NotImplementedError(
                    "Nested plates not supported for this tool")

            ((meta_data_id, plate_value), ) = pv

            for line in raw:

                if str(int(line[3])) == plate_value:

                    dt_1 = start_time + timedelta(seconds=line[0])

                    dt_0 = start_time + timedelta(seconds=line[1])

                    if dt_1 in interval:

                        instance_1 = StreamInstance(dt_1, 1)

                        yield StreamMetaInstance(instance_1,
                                                 (meta_data_id, plate_value))

                    if dt_0 in interval:

                        instance_0 = StreamInstance(dt_0, 0)

                        yield StreamMetaInstance(instance_0,
                                                 (meta_data_id, plate_value))
    def _execute(self, sources, alignment_stream, interval):
        s0 = sources[0].window(interval, force_calculation=True).items()

        for group in grouper(self.batchsize, s0):
            values = {}
            for stream in group:
                for key, value in stream.value.items():
                    if key in values:
                        values[key].append(value)
                    else:
                        values[key] = [value]
            for key in values.keys():
                values[key] = np.concatenate(values[key])
            yield StreamInstance(group[-1].timestamp, values)
Exemplo n.º 11
0
    def _execute(self, sources, alignment_stream, interval):

        # Let's make the assumption that the first field is the timestamp

        first = True

        with open(self.filename, 'rU') as f:
            for line in f.readlines():
                if first:
                    first = False
                    continue
                elements = line.split(',')
                dt = parse(elements[0])
                if dt in interval:
                    yield StreamInstance(dt, map(float, elements[1:]))
Exemplo n.º 12
0
    def _execute(self, source, splitting_stream, interval, meta_data_id,
                 output_plate_values):

        # Let's make the assumption that the first field is the timestamp

        for pv in output_plate_values:
            count = 0

            if len(pv) > 1:
                raise NotImplementedError(
                    "Nested plates not supported for this tool")

            # Unpack the plate value
            ((meta_data_id, plate_value), ) = pv

            filename = self.filename_template.format(plate_value)

            if not os.path.isfile(filename):
                continue

            with open(filename, 'rU') as f:
                for line in f.readlines():
                    if count == 0:
                        if self.header:
                            colnames = [
                                name.replace('\n', '')
                                for name in line.split(',')
                            ]
                            del colnames[self.datetime_column]
                        count += 1
                        continue
                    count += 1
                    if count < self.skip_rows + 1:
                        continue
                    elements = line.split(',')
                    dt = self.datetime_parser(elements[self.datetime_column])
                    del elements[self.datetime_column]
                    if dt in interval:
                        if self.header:
                            values = dict(
                                zip(colnames, map(my_float, elements)))
                        else:
                            values = map(float, elements)
                        instance = StreamInstance(dt, values)
                        yield StreamMetaInstance(instance,
                                                 (meta_data_id, plate_value))
    def _execute(self, sources, alignment_stream, interval):
        """
        It expects at least one source of streams, each streams with a
        dictionary with training and test data in the form:
        x_tr: array of float
            Training values for the given data stream
        y_tr: array of int
            Training binary label corresponding to the given data stream
        x_te: array of float
            Test values for the given data stream
        y_te: array of int
            Test binary label corresponding to the given data stream
        """
        s0 = sources[0].window(interval, force_calculation=True).items()

        first = True
        for dt, value in s0:
            x_tr = value['x_tr']
            x_te = value['x_te']
            if len(x_tr.shape) == 1:
                x_tr = x_tr.reshape(1, -1)
                x_te = x_te.reshape(1, -1)
            y_tr = np.argmax(value['y_tr'], axis=1).reshape(-1, 1)
            y_te = np.argmax(value['y_te'], axis=1).reshape(-1, 1)

            if first:
                self.classes = range(value['y_tr'].shape[1])
                if hasattr(self.model, 'partial_fit'):
                    self.fit = self.model.partial_fit
                else:
                    self.fit = self.model.fit

                self.fit(x_tr, y_tr, self.classes)

                if hasattr(self.model, 'predict_proba'):
                    self.predict_proba = self.model.predict_proba
                else:
                    self.predict_proba = self.model.predict

                first = False
            else:
                self.fit(x_tr, y_tr)
            proba_tr = self.predict_proba(x_tr)
            score = self.model.score(x_te, y_te)
            yield StreamInstance(dt, dict(proba=proba_tr, score=score))
Exemplo n.º 14
0
    def _execute(self, sources, alignment_stream, interval):
        startdate = interval[0].strftime("%Y-%m-%d")
        enddate = interval[1].strftime("%Y-%m-%d")

        url = "https://environment.data.gov.uk/flood-monitoring/id/stations/{}/readings".format(self.station)
        values = {'startdate' : startdate,
                  'enddate' : enddate}
        url_parameters = urllib.urlencode(values)

        full_url = url + '?' + url_parameters
        response = urllib2.urlopen(full_url)
        data = json.load(response)

        for item in data['items']:
            dt = parse(item.get('dateTime'))
            if dt in interval:
                value = float(item.get('value'))
                yield StreamInstance(dt, value)
Exemplo n.º 15
0
    def _execute(self, sources, alignment_stream, interval):
        data = sources[0].window(interval, force_calculation=True)

	for dt, value in data:
	    #self.weights exist
	    if hasattr(self, 'weights'):    
		break
	    self.weights = [0]*len(np.array(value["x"]))
	    #self.weights = np.random.rand(len(np.array(value["x"])))

	for dt, value in data:
	    x = map(float, value["x"])
    	    y = map(float, value["y"])
	    #y_est = passiveaggressivethree_predict(np.array(value["x"]), self.weights)
	    y_est = passiveaggressivethree_predict(np.array(x), self.weights)
	    error = y[0] - y_est
            self.weights = passiveaggressivethree_update(np.array(x), self.weights, error)
	    #self.weights = passiveaggressivethree_update(np.array(value["x"]), self.weights, value["y"])
            yield StreamInstance(dt, y_est)
    def _execute(self, sources, alignment_stream, interval):
        """
        Processes the input data and produces streamed data

        yelds
        =====
        stream : with date and dictionary with following entries
            x_tr: array of float
                Training values for the given data stream
            y_tr: array of int
                Training binary label corresponding to the given data stream
            x_te: array of float
                Test values for the given data stream
            y_te: array of int
                Test binary label corresponding to the given data stream
        """
        x = self.dataset.data
        y = self.dataset.target
        # Binarize data
        classes = np.unique(y)
        y = label_binarize(y, classes)

        j = 0
        start_dt = datetime.utcfromtimestamp(0).replace(tzinfo=UTC)
        for i in range(self.epochs):
            X_tr, X_te, Y_tr, Y_te = train_test_split(x,
                                                      y,
                                                      shuffle=self.shuffle,
                                                      train_size=0.5,
                                                      stratify=y,
                                                      random_state=self.seed)
            for x_tr, y_tr in zip(X_tr, Y_tr):
                x_te, y_te = X_te[j % len(X_te)], Y_te[j % len(Y_te)]
                j += 1
                dt = (start_dt + timedelta(minutes=j)).replace(tzinfo=UTC)
                yield StreamInstance(
                    dt,
                    dict(x_tr=x_tr.reshape(1, -1),
                         x_te=x_te.reshape(1, -1),
                         y_tr=y_tr.reshape(1, -1),
                         y_te=y_te.reshape(1, -1)))
Exemplo n.º 17
0
    def _execute(self, sources, alignment_stream, interval):

        if self.dateparser is None:
            self.dateparser = parse
        # Let's make the assumption that the first field is the timestamp
        first = True

        with open(self.filename, 'rU') as f:
            for line in f.readlines():
                if first:
                    if self.header:
                        colnames = [name.replace('\n', '') for name in line.split(',')]
                    first = False
                    continue
                elements = line.split(',')
                dt = self.dateparser(elements[0])
                if dt in interval:
                    if self.header:
                        values = dict(zip(colnames[1:], map(float, elements[1:])))
                    else:
                        values = map(float, elements[1:])
                    yield StreamInstance(dt, values)
Exemplo n.º 18
0
    def _execute(self, sources, alignment_stream, interval):
        data = sources[0].window(interval, force_calculation=True)

        for dt, value in data:
            #self.weights exist
            if hasattr(self, 'weights'):
                break
            self.weights = [0] * len(np.array(value["x"]))
            #self.weights = np.random.rand(len(np.array(value["x"])))
        count = 0
        for dt, value in data:
            count += 1
            x = map(float, value["x"])
            y = map(float, value["y"])
            #y_est = OGDone_predict(np.array(value["x"]), self.weights)
            y_est = OGDone_predict(np.array(x), self.weights)
            error = y[0] - y_est
            learning_rate = 1 / math.sqrt(count)
            self.weights = OGDone_update(np.array(x), self.weights, error,
                                         learning_rate)
            #self.weights = OGDone_update(np.array(value["x"]), self.weights, value["y"])
            yield StreamInstance(dt, y_est)
Exemplo n.º 19
0
    def _execute(self, sources, alignment_stream, interval):
        data = sources[0].window(interval, force_calculation=True)
	
	for dt, value in data:
	    #self.weights exist
	    if hasattr(self, 'weights'):    
		break
	    self.weights = [0]*len(np.array(value["x"]))
	    #self.weights = np.random.rand(len(np.array(value["x"])))
	
	
	totalweights = 0
	count = 0
	for dt, value in data:
	    x = map(float, value["x"])
    	    y = map(float, value["y"])
	    y_est = avgperceptron_predict(np.array(x), self.weights)
	    error = y[0] - y_est
            self.weights = avgperceptron_update(np.array(x), self.weights, error)
	    totalweights += self.weights
	    count += 1
	    self.weights = totalweights/count
            yield StreamInstance(dt, y_est)
    def _execute(self, sources, alignment_stream, interval):
        """
        It expects at least one source of streams, each streams with a
        dictionary with training and test data in the form:
        x_tr: array of float
            Training values for the given data stream
        x_te: array of float
            Test values for the given data stream
        """
        s0 = sources[0].window(interval, force_calculation=True).items()

        first = True
        for dt, value in s0:
            x_tr = value['x_tr']
            x_te = value['x_te']
            if len(x_tr.shape) == 1:
                x_tr = x_tr.reshape(1, -1)
                x_te = x_te.reshape(1, -1)

            if first:
                if hasattr(self.model, 'partial_fit'):
                    self.fit = self.model.partial_fit
                else:
                    self.fit = self.model.fit

                self.fit(x_tr)

                first = False
            else:
                self.fit(x_tr)

            h_te = self.model.transform(x_te)
            pred_te = self.model.inverse_transform(h_te)

            score = self.model.score(x_te, pred_te)

            yield StreamInstance(dt, dict(score=score, reduced_x=h_te))
Exemplo n.º 21
0
def run(house,
        selection,
        delete_existing_workflows=True,
        loglevel=logging.INFO):
    from hyperstream import HyperStream, StreamId, TimeInterval
    from workflows.display_experiments import create_workflow_list_technicians_walkarounds
    from workflows.learn_localisation_model import create_workflow_lda_localisation_model_learner
    from hyperstream.utils import StreamNotFoundError, reconstruct_interval

    hyperstream = HyperStream(loglevel=loglevel, file_logger=None)
    M = hyperstream.channel_manager.memory
    D = hyperstream.channel_manager.mongo
    A = hyperstream.channel_manager.assets

    workflow_id0 = "list_technicians_walkarounds"

    if delete_existing_workflows:
        hyperstream.workflow_manager.delete_workflow(workflow_id0)

    try:
        w0 = hyperstream.workflow_manager.workflows[workflow_id0]
    except KeyError:
        w0 = create_workflow_list_technicians_walkarounds(hyperstream,
                                                          house=house,
                                                          safe=False)
        hyperstream.workflow_manager.commit_workflow(workflow_id0)
    time_interval = TimeInterval.up_to_now()
    w0.execute(time_interval)

    # from datetime import timedelta
    # time_interval.end += timedelta(milliseconds=1)
    df = M[StreamId('experiments_dataframe',
                    (('house', house), ))].window().values()[0]
    experiment_ids = set([df['experiment_id'][i - 1] for i in selection])

    experiment_ids_str = '_'.join(experiment_ids)

    create_selected_localisation_plates(hyperstream)

    # Ensure the model is overwritten if it's already there
    for model_name in ('lda', 'svm', 'room_rssi_hmm'):
        model_id = StreamId(name="location_prediction",
                            meta_data=(('house', house), ('localisation_model',
                                                          model_name)))
        try:
            hyperstream.channel_manager.mongo.purge_stream(model_id)
        except StreamNotFoundError:
            pass

    workflow_id1 = "lda_localisation_model_learner_" + experiment_ids_str

    if delete_existing_workflows:
        hyperstream.workflow_manager.delete_workflow(workflow_id1)

    try:
        w1 = hyperstream.workflow_manager.workflows[workflow_id1]
    except KeyError:
        w1 = create_workflow_lda_localisation_model_learner(
            hyperstream,
            house=house,
            experiment_ids=experiment_ids,
            safe=False)
        hyperstream.workflow_manager.commit_workflow(workflow_id1)

    # Put the experiments selected into an asset stream
    from hyperstream import StreamInstance
    from hyperstream.utils import utcnow

    A.write_to_stream(stream_id=StreamId(name="experiments_selected",
                                         meta_data=(('house', house), )),
                      data=StreamInstance(timestamp=utcnow(),
                                          value=list(experiment_ids)))

    time_interval = TimeInterval.up_to_now()
    w1.execute(time_interval)

    print('number of non_empty_streams: {}'.format(
        len(hyperstream.channel_manager.memory.non_empty_streams)))

    for model_name in ('lda', 'svm', 'room_rssi_hmm'):
        print("Model: {}".format(model_name))
        model_id = StreamId(name="location_prediction",
                            meta_data=(('house', house), ('localisation_model',
                                                          model_name)))
        try:
            model = D[model_id].window().last().value
        except (AttributeError, KeyError):
            print(
                "No {} model was learnt - not requested or no data recorded?".
                format(model_name))

        for experiment_id in list(experiment_ids):
            print("Experiment id: {}".format(experiment_id))
            print("Time interval: {}".format(
                reconstruct_interval(experiment_id)))
            print("Accuracy: {}".format(
                pformat(model['performance'][experiment_id]['accuracy'])))
            print("Macro F1: {}".format(
                pformat(
                    model['performance'][experiment_id]['f1_score_macro'])))
            print("Micro F1: {}".format(
                pformat(
                    model['performance'][experiment_id]['f1_score_micro'])))
            print("Confusion Matrix:")
            pprint(model['performance'][experiment_id]['confusion_matrix'])
            print("")
    return True
Exemplo n.º 22
0
    def _execute(self, sources, alignment_stream, interval):
        if alignment_stream is None:
            raise ToolExecutionError("Alignment stream expected")

        for ti, _ in alignment_stream.window(interval, force_calculation=True):
            yield StreamInstance(ti, random.triangular(low=self.low, high=self.high, mode=self.mode))
Exemplo n.º 23
0
    def _execute(self, sources, alignment_stream, interval):
        if alignment_stream is None:
            raise ToolExecutionError("Alignment stream expected")

        for ti, _ in alignment_stream.window(interval, force_calculation=True):
            yield StreamInstance(ti, random.randrange(start=self.start, stop=self.stop, step=self.step))
Exemplo n.º 24
0
def create_workflow_rssi_distributions_per_room(hyperstream,
                                                house,
                                                experiment_indices,
                                                experiment_ids,
                                                safe=True):
    experiment_ids_str = '_'.join(experiment_ids)
    # Create a simple one step workflow for querying
    workflow_id = "rssi_distributions_per_room_" + experiment_ids_str

    houses = hyperstream.plate_manager.plates["H"]
    selected_experiments = hyperstream.plate_manager.plates[
        "H.SelectedLocalisationExperiment"]
    models = hyperstream.plate_manager.plates["LocalisationModels"]

    with hyperstream.create_workflow(
            workflow_id=workflow_id,
            name="RSSI distributions per room",
            owner="MK",
            description=
            "RSSI distributions per room, based on technician walkarounds",
            online=False,
            safe=safe) as w:

        # Various channels
        M = hyperstream.channel_manager.memory
        S = hyperstream.channel_manager.sphere
        T = hyperstream.channel_manager.tools
        D = hyperstream.channel_manager.mongo
        A = hyperstream.channel_manager.assets

        nodes = (
            ("experiments_list", M,
             [houses]),  # Current annotation data in 2s windows
            ("experiments_mapping", M,
             [houses]),  # Current annotation data in 2s windows
            ("rss_raw", S, [houses]),  # Raw RSS data
            ("rss_time", S, [selected_experiments
                             ]),  # RSS data split by experiment
            ("annotation_raw_locations", S, [houses]),  # Raw annotation data
            ("annotation_time", S, [selected_experiments
                                    ]),  # RSS data split by experiment
            ("every_2s", M, [selected_experiments
                             ]),  # sliding windows one every minute
            ("annotation_state_location", M,
             [selected_experiments]),  # Annotation data in 2s windows
            ("annotation_state_2s_windows", M, [selected_experiments]),
            ("rss_2s", M, [selected_experiments
                           ]),  # max(RSS) per AP in past 2s of RSS
            ("merged_2s", M, [selected_experiments
                              ]),  # rss_2s with annotation_state_2s
            ("merged_2s_flat_" + experiment_ids_str, M,
             [houses]),  # flattened version of merged_2s
            ("dataframe_" + experiment_ids_str, M, [houses]),
            ("csv_string_" + experiment_ids_str, M, [houses]),
            ("pdf_" + experiment_ids_str, M, [houses]),
            ("experiments_selected", A, [houses]))

        # Create all of the nodes
        N = dict((stream_name, w.create_node(stream_name, channel, plate_ids))
                 for stream_name, channel, plate_ids in nodes)

        # TODO: Perhaps we want to do this same
        A.write_to_stream(stream_id=StreamId(name="experiments_selected",
                                             meta_data=(('house', house), )),
                          data=StreamInstance(timestamp=utcnow(),
                                              value=list(experiment_ids)))

        w.create_factor(
            tool=hyperstream.channel_manager.get_tool(
                name="experiments_mapping_builder", parameters={}),
            sources=[N["experiments_list"], N["experiments_selected"]],
            sink=N["experiments_mapping"])

        w.create_multi_output_factor(tool=hyperstream.channel_manager.get_tool(
            name="sphere",
            parameters=dict(modality="wearable", elements={"rss"})),
                                     source=None,
                                     splitting_node=None,
                                     sink=N["rss_raw"])

        w.create_multi_output_factor(tool=hyperstream.channel_manager.get_tool(
            name="splitter_time_aware_from_stream",
            parameters=dict(meta_data_id="localisation-experiment")),
                                     source=N["rss_raw"],
                                     splitting_node=N["experiments_mapping"],
                                     sink=N["rss_time"])

        w.create_multi_output_factor(tool=hyperstream.channel_manager.get_tool(
            name="sphere",
            parameters=dict(modality="annotations",
                            annotators=[0],
                            elements={"Location"},
                            filters={})),
                                     source=None,
                                     splitting_node=None,
                                     sink=N["annotation_raw_locations"])

        w.create_multi_output_factor(tool=hyperstream.channel_manager.get_tool(
            name="splitter_time_aware_from_stream",
            parameters=dict(meta_data_id="localisation-experiment")),
                                     source=N["annotation_raw_locations"],
                                     splitting_node=N["experiments_mapping"],
                                     sink=N["annotation_time"])

        w.create_factor(tool=hyperstream.channel_manager.get_tool(
            name="sliding_window",
            parameters=dict(lower=-2.0, upper=0.0, increment=2.0)),
                        sources=None,
                        sink=N["every_2s"])

        w.create_factor(tool=hyperstream.channel_manager.get_tool(
            name="annotation_state_location", parameters=dict()),
                        sources=[N["every_2s"], N["annotation_time"]],
                        sink=N["annotation_state_location"])

        w.create_factor(tool=hyperstream.channel_manager.get_tool(
            name="aligning_window", parameters=dict(lower=-2.0, upper=0.0)),
                        sources=[N["annotation_state_location"]],
                        sink=N["annotation_state_2s_windows"])

        def component_wise_max(init_value=None,
                               id_field='aid',
                               value_field='wearable-rss'):
            if init_value is None:
                init_value = {}

            def func(data):
                result = init_value.copy()
                for (time, value) in data:
                    if result.has_key(value[id_field]):
                        result[value[id_field]] = max(result[value[id_field]],
                                                      value[value_field])
                    else:
                        result[value[id_field]] = value[value_field]
                return result

            return func

        w.create_factor(
            tool=hyperstream.channel_manager.get_tool(
                name="sliding_apply",
                parameters=dict(func=component_wise_max())),
            sources=[N["annotation_state_2s_windows"], N["rss_time"]],
            sink=N["rss_2s"])

        w.create_factor(tool=hyperstream.channel_manager.get_tool(
            name="aligned_merge",
            parameters=dict(names=["annotations", "rssi"])),
                        sources=[N["annotation_state_location"], N["rss_2s"]],
                        sink=N["merged_2s"])

        w.create_factor(tool=hyperstream.channel_manager.get_tool(
            name="aggregate_plate",
            parameters=dict(aggregation_meta_data="localisation-experiment")),
                        sources=[N["merged_2s"]],
                        sink=N["merged_2s_flat_" + experiment_ids_str])

        w.create_factor(tool=hyperstream.channel_manager.get_tool(
            name="dallan_data_frame_builder", parameters=dict()),
                        sources=[N["merged_2s_flat_" + experiment_ids_str]],
                        sink=N["dataframe_" + experiment_ids_str])

        w.create_factor(tool=hyperstream.channel_manager.get_tool(
            name="data_frame_to_csv_string", parameters=dict()),
                        sources=[N["dataframe_" + experiment_ids_str]],
                        sink=N["csv_string_" + experiment_ids_str])

        w.create_factor(tool=hyperstream.channel_manager.get_tool(
            name="r_rssi_comparison_plot",
            parameters=dict(
                output_path=hyperstream.config.output_path,
                filename_suffix="_rssi_comparison_plot_{}.pdf".format('_'.join(
                    map(str, experiment_indices))))),
                        sources=[N["csv_string_" + experiment_ids_str]],
                        sink=N["pdf_" + experiment_ids_str])

        #   w.create_factor(
        #        tool=hyperstream.channel_manager.get_tool(
        #             name="localisation_model_learn",
        #             parameters=dict(nan_value=-110.0)
        #         ),
        #         sources=[N["merged_2s_flat_"+experiment_ids_str]],
        #         sink=N["location_prediction_lda_"+experiment_ids_str])
        #
        return w
Exemplo n.º 25
0
# 
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
# 
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
# OR OTHER DEALINGS IN THE SOFTWARE.

from hyperstream import HyperStream, StreamId, StreamInstance, TimeInterval
from hyperstream.utils import utcnow, UTC
from datetime import timedelta
import os


os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), ".."))


if __name__ == '__main__':
    hs = HyperStream(loglevel=20)
    M = hs.channel_manager.memory
    T = hs.channel_manager.tools
    A = hs.channel_manager.assets
    test_assets = StreamId(name="test_assets")
    A.get_or_create_stream(test_assets)
    A.write_to_stream(test_assets, StreamInstance(utcnow(), {'a', 'b', 'c'}))
    print(list(A[test_assets].window().tail(5)))