示例#1
0
def cmTreePlot(tree):
    ptree = IntervalTree()
    rtree = IntervalTree()
    for item in tree:
        if not (item.data.R is None):
            rtree[item.begin:item.end] = item
        if not (item.data.P is None):
            ptree[item.begin:item.end] = item
    from result_analyse.visualisation import plotJoinTree
    plotJoinTree(rtree, ptree)
示例#2
0
    def precompute(self, dataset):
        maxact = 0
        self.a_events_tree = IntervalTree()
        for i, act in dataset.a_events.iterrows():
            self.a_events_tree[act.StartTime.value:act.EndTime.value] = act

        self.acts = list(range(dataset.a_events.Activity.max() + 1))
示例#3
0
    def train(self, datasetdscr, data, acts):
        self.gacts = self.groupize(datasetdscr, acts)
        self.acts = acts
        self.strategies = {}
        self.acts_name = {}
        train_results = {}
        self.train_quality = {}

        intree = IntervalTree()

        # intree = IntervalTree()
        for indx, tacts in enumerate(self.gacts):
            logger.info("=======================working on activties " +
                        tacts.__str__() + "=========")
            weight = np.ones(len(acts))
            for a in tacts:
                weight[a] = self.alpha
            datasetdscr.indx = indx
            self.strategies[indx] = ml_strategy.Simple.SimpleStrategy()
            self.strategies[indx].train(datasetdscr, data, acts, weight)
            if ('result' in self.strategies[indx].bestOpt.result):
                result = self.strategies[indx].bestOpt.result['result']
            else:
                result = self.strategies[indx].test(data)

            utils.saveState(self.strategies[indx].get_info(), 'wgroupact',
                            'n-%d' % (indx))

            train_results[indx] = result
        utils.saveState(
            [self.strategies[indx].get_info() for indx in self.strategies],
            'wgroupact', 'n-all')
        return self.fusion(train_results, data.a_events, True)
示例#4
0
    def train(self, datasetdscr, data, acts):
        self.gacts = self.groupize(datasetdscr, acts)
        self.acts = acts
        self.strategies = {}
        self.acts_name = {}
        train_results = {}
        self.train_quality = {}

        intree = IntervalTree()

        # intree = IntervalTree()
        for indx, tacts in enumerate(self.gacts):
            logger.info("=======================working on activties " +
                        tacts.__str__() + "=========")
            Tdata = self.justifySet(tacts, data, False)

            self.acts_name[indx] = datasetdscr.activities[tacts]
            self.strategies[indx] = ml_strategy.Simple.SimpleStrategy()
            self.strategies[indx].train(datasetdscr, Tdata,
                                        list(range(len(tacts))))
            if ('result' in self.strategies[indx].bestOpt.result):
                result = self.strategies[indx].bestOpt.result['result']
            else:
                result = self.strategies[indx].test(Tdata)

            utils.saveState(self.strategies[indx].get_info(), 'groupact',
                            str(indx))

            train_results[indx] = result
        utils.saveState(
            [self.strategies[indx].get_info() for indx in self.strategies],
            'groupact', 'all')
        return self.fusion(train_results, data.a_events, True)
def load_memory_mappings(avatar, target, forward=False, update=True):
    """
    Load memory maps from the specified target
    :param forward: Enable forwarding of memory to that target
    :param update:  If true, replaces avatars memory_ranges with the loaded ones
    :return:        An Intervaltree object containing the mappings
    """
    if not isinstance(target, GDBTarget):
        raise TypeError("The memory mapping can be loaded ony from GDBTargets")

    ret, resp = target.protocols.execution.get_mappings()
    lines = resp.split("\n")[4:]
    mappings = [{
        "start": int(x[0], 16),
        "end": int(x[1], 16),
        "size": int(x[2], 16),
        "offset": int(x[3], 16),
        "obj": x[4],
    } for x in [y.split() for y in lines]]
    memory_ranges = IntervalTree()

    for m in mappings:
        avatar.add_memory_range(
            m["start"],
            m["size"],
            name=m["obj"],
            forwarded=forward,
            forwarded_to=target if forward else None,
            interval_tree=memory_ranges,
        )
    if update is True:
        avatar.memory_ranges = memory_ranges
    return memory_ranges
示例#6
0
 def _calculate_activity(self):
     self.activity_events = self.activity_events.sort_values(
         ['StartTime', 'EndTime'])
     print(self.activities)
     self.activities.sort()
     self.activities = np.insert(self.activities, 0, 'None')
     self.activities_map_inverse = {
         k: v
         for v, k in enumerate(self.activities)
     }
     self.activities_map = {v: k for v, k in enumerate(self.activities)}
     self.activity_events.Activity = self.activity_events.Activity.apply(
         lambda x: self.activities_map_inverse[x])
     self.activity_events['Duration'] = self.activity_events.EndTime - \
         self.activity_events.StartTime
     self.activity_events_tree = IntervalTree()
     for i, act in self.activity_events.iterrows():
         if (act.StartTime.value == act.EndTime.value):
             self.activity_events_tree[act.StartTime.
                                       value:act.StartTime.value + 1] = {
                                           'StartTime': act.StartTime,
                                           'EndTime': act.EndTime,
                                           'Activity': act.Activity
                                       }
         else:
             self.activity_events_tree[act.StartTime.value:act.EndTime.
                                       value] = {
                                           'StartTime': act.StartTime,
                                           'EndTime': act.EndTime,
                                           'Activity': act.Activity
                                       }
示例#7
0
    def combine2(self, times, act_data):
        predicted   = np.argmax(act_data, axis=1) 
        events      = []
        ptree       = {}
        epsilon=pd.to_timedelta('1s')
        
        for i in range(len(times)):   
            start   = times[i][0]
            end     = times[i][1]
            #pclass = np.argmax(predicted[i])
            pclass  = predicted[i]

            if not(pclass in ptree):
                ptree[pclass] = IntervalTree()
            ptree[pclass][start:end+epsilon] = {
                'Activity': pclass, 'StartTime': start, 'EndTime': end
            }
            if(i>0 and pclass>0 and predicted[i-1]==predicted[i] and False):
                #fix gap
                start   = times[i-1][1]
                end     = times[i][0]
                if(end>start):
                #pclass = np.argmax(predicted[i])
                    ptree[pclass][start:end] = {
                        'Activity': pclass, 'StartTime': start, 'EndTime': end
                    }                   

        tree = IntervalTree()

        def datamerger(x, y):
            start = min(x['StartTime'], y['StartTime'])
            end = max(x['EndTime'], y['EndTime'])
            return {'Activity': x['Activity'], 'StartTime': start, 'EndTime': end}

        for a in ptree:
            ptree[a].merge_overlaps(data_reducer=datamerger)
            tree |= ptree[a]

        tree.split_overlaps()

        def data_reducer(x, y):
            if(x['EndTime'] > y['EndTime']):
                return y
            return x

        tree.merge_equals(data_reducer=data_reducer)
        for inv in tree:
            events.append({'Activity': inv.data['Activity'], 'StartTime': inv.begin, 'EndTime': inv.end})

        events = pd.DataFrame(events)
        events = events.sort_values(['StartTime'])
        events = events.reset_index()
        events = events.drop(['index'], axis=1)
        return events
示例#8
0
def merge_split_overlap_IntervalTree(p_acts, r_acts):
    tree = IntervalTree()
    from result_analyse.visualisation import plotJoinTree
    PACT = column_index(p_acts, 'Activity')
    PSTIME = column_index(p_acts, 'StartTime')
    PETIME = column_index(p_acts, 'EndTime')

    for row in p_acts.values:
        if (row[PACT] == 0):
            continue
        start = row[PSTIME]
        end = row[PETIME]
        startv = start.value
        endv = end.value
        if (startv == endv):
            startv = startv - 1
        #tree[start:end]={'P':{'Activitiy':act.Activity,'Type':'P','Data':act}]
        d = Data('P-act')
        d.P = {'Activity': row[PACT], 'StartTime': start, 'EndTime': end}
        d.R = None
        tree[startv:endv] = d

    RACT = column_index(r_acts, 'Activity')
    RSTIME = column_index(r_acts, 'StartTime')
    RETIME = column_index(r_acts, 'EndTime')

    for row in r_acts.values:
        if (row[RACT] == 0):
            continue
        start = row[RSTIME]
        end = row[RETIME]
        startv = start.value
        endv = end.value
        if (startv == endv):
            startv = startv - 1
        #tree[start:end]=[{'Activitiy':act.Activity,'Type':'R','Data':act}]
        d = Data('R-act')
        d.P = None
        d.R = {'Activity': row[RACT], 'StartTime': start, 'EndTime': end}
        tree[startv:endv] = d
    # cmTreePlot(tree)
    tree.split_overlaps()

    # cmTreePlot(tree)
    def data_reducer(x, y):
        res = Data('merge')
        res.R = x.R
        res.P = x.P
        if not (y.P is None):
            if (res.P is None) or y.P['EndTime'] < res.P['EndTime']:
                res.P = y.P
        if not (y.R is None):
            if (res.R is None) or y.R['EndTime'] < res.R['EndTime']:
                res.R = y.R
        return res

    tree.merge_equals(data_reducer=data_reducer)

    return tree
示例#9
0
    def __show_diff_scatter(self):
        intervaltrees_dict = dict()
        min_coord = 1e10
        max_coord = -1

        for idx in xrange(len(self.input)):
            data = pandas.read_table(self.input[idx], skiprows=[0]).ix[:, 1:3]
            itree = IntervalTree()

            curmin = min(data.ix[:, 0])
            if curmin < min_coord:
                min_coord = curmin

            curmax = max(data.ix[:, 1])
            if curmax > max_coord:
                max_coord = curmax

            for line in xrange(len(data)):
                start = data.ix[line, 0]
                end = data.ix[line, 1] + 1
                itree[start:end] = (start, end)
            intervaltrees_dict[self.toolnames[idx]] = itree

        print min_coord, max_coord

        for out1, out2 in combinations(self.toolnames, 2):
            if out1 != out2:
                simm_diff = intervaltrees_dict[out1] ^ intervaltrees_dict[out2]
                tree_size = len(simm_diff)
                x = np.zeros(tree_size)
                s = np.zeros(tree_size)
                items = list(simm_diff.items())
                for i in xrange(tree_size):
                    it = items[i]
                    x[i] = it.begin + (it.end - it.begin) / float(2)
                    s[i] = x[i]**0.27

                plt.scatter(x, x, s=s, c='r', alpha=0.3, edgecolors='none')
                plt.ylabel('Genome coordinates')
                plt.xlabel('Genome coordinates')
                plt.title("Coverage difference for tools: " + out1 + " vs " +
                          out2)
                plt.savefig("peaks_covdif_%s_%s" % (out1, out2))
def merge_split_overlap_IntervalTree(p_acts, r_acts):
    tree = IntervalTree()

    for act in p_acts:
        if (act['Activity'] == 0):
            continue
        start = act['StartTime'].value
        end = act['EndTime'].value
        if (start == end):
            start = start - 1
        #tree[start:end]={'P':{'Activitiy':act.Activity,'Type':'P','Data':act}]
        d = Data('P-act')
        d.P = act
        d.R = None
        tree[start:end] = d  #{'P':act,'PActivitiy':act.Activity}

    for act in r_acts:
        start = act['StartTime'].value
        end = act['EndTime'].value
        if (start == end):
            start = start - 1
        #tree[start:end]=[{'Activitiy':act.Activity,'Type':'R','Data':act}]
        d = Data('P-act')
        d.P = None
        d.R = act
        tree[start:end] = d  #{'R':act,'RActivitiy':act.Activity}

    tree.split_overlaps()

    def data_reducer(x, y):
        res = x
        if not (y.P is None):
            if (res.P is None) or y.P['EndTime'] < res.P['EndTime']:
                res.P = y.P
        if not (y.R is None):
            if (res.R is None) or y.R['EndTime'] < res.R['EndTime']:
                res.R = y.R
        return res

    tree.merge_equals(data_reducer=data_reducer)

    return tree
示例#11
0
    def fusion(self, results, real_events, isTrain):
        intree = IntervalTree()
        logger.info("\n=======================fusion activties ========")
        # intree = IntervalTree()
        # Segmentaion ###########################
        for indx, tacts in enumerate(self.gacts):
            result = results[indx]

            for i in range(0, len(result.Sdata.set_window)):
                idx = result.Sdata.set_window[i]
                start = result.Sdata.s_event_list[idx[0], 1]
                end = result.Sdata.s_event_list[idx[-1], 1]
                rcls = result.Sdata.label[i]
                pcls = result.predicted_classes[i]
                fullprob = result.predicted[i]
                if (end == start):
                    continue
                d = Data(str(i))
                d.real = rcls
                d.pred = pcls
                d.pred_prob = fullprob
                if (isTrain):
                    self.train_quality[indx] = result.quality

                d.gindx = indx
                # {'real':rcls,'pred':pcls,'pred_prob':fullprob,'train_q':result.quality}
                intree[start:end] = d

        intree.split_overlaps()
        segments = defaultdict(dict)
        for item in intree.items():
            segments[item.begin.value << 64
                     | item.end.value]['begin'] = item.begin
            segments[item.begin.value << 64 | item.end.value]['end'] = item.end
            segments[item.begin.value << 64
                     | item.end.value][item.data.gindx] = item.data

        # Feature Extraction ###########################
        f = np.zeros((len(segments), len(self.gacts) * len(self.acts)))
        label = np.zeros(len(segments))
        times = []
        iseg = 0
        for timeseg in segments:
            seg = segments[timeseg]
            b = seg['begin']
            e = seg['end']
            times.append({'begin': b, 'end': e})
            for indx in range(len(self.gacts)):
                if (indx in seg):
                    label[iseg] = seg[indx].real
                    start = indx * len(self.acts)
                    end = (indx + 1) * len(self.acts)
                    if (self.train_quality[indx]['f1'] < 0.1):
                        continue
                    f[iseg, start:end] = seg[indx].pred_prob
            iseg += 1

        #TRAIN #######################

        if (isTrain):
            inputsize = (len(f[0]), )
            outputsize = len(self.acts)
            self.fusion_model = tf.keras.models.Sequential([
                tf.keras.layers.Dense(128, input_shape=inputsize),
                tf.keras.layers.Dense(512, activation=tf.nn.relu),
                tf.keras.layers.Dropout(0.2),
                tf.keras.layers.Dense(outputsize, activation=tf.nn.softmax)
            ],
                                                           name='fusion')
            if (np.max(label) == 0):
                # self.trained=False
                cw = np.ones(len(self.acts))
            else:
                cw = compute_class_weight("balanced", self.acts, label)

            self.fusion_model.summary()
            self.fusion_model.compile(
                optimizer='adam',
                loss='sparse_categorical_crossentropy',
                metrics=[
                    tf.keras.metrics.SparseCategoricalAccuracy(name='acc')
                ])
            self.fusion_model.fit(f, label, epochs=10, class_weight=cw)

        #EVALUATE #######################
        result = Data('result')
        result.results = results
        result.predicted = self.fusion_model.predict(f)
        result.predicted_classes = self.fusion_model.predict_classes(f)
        # predicted   = np.argmax(model.predict(f), axis=1)
        pred_events = []
        ptree = {}
        epsilon = pd.to_timedelta('1s')

        for i in range(len(f)):
            start = times[i]['begin']
            end = times[i]['end']
            pclass = result.predicted_classes[i]
            pred_events.append({
                'Activity': pclass,
                'StartTime': start,
                'EndTime': end
            })

        pred_events = pd.DataFrame(pred_events)
        pred_events = pred_events.sort_values(['StartTime'])
        pred_events = pred_events.reset_index()
        pred_events = pred_events.drop(['index'], axis=1)

        result.shortrunname = "fusion model" + str(
            {r: results[r].shortrunname
             for r in results})
        result.times = times
        result.pred_events = pred_events
        result.real_events = real_events

        result.event_cm = event_confusion_matrix(result.real_events,
                                                 result.pred_events, self.acts)
        result.quality = CMbasedMetric(result.event_cm, 'macro')
        result.functions = {r: results[r].functions for r in results}
        logger.debug('Evalution quality is %s' % result.quality)

        return result
示例#12
0
    def fusion(self, results, real_events, isTrain):
        intree = IntervalTree()
        logger.info("\n=======================fusion activties ========")
        # intree = IntervalTree()
        # Segmentaion ###########################
        for indx, tacts in enumerate(self.gacts):
            result = results[indx]

            for i in range(0, len(result.Sdata.set_window)):
                idx = result.Sdata.set_window[i]
                start = result.Sdata.s_event_list[idx[0], 1]
                end = result.Sdata.s_event_list[idx[-1], 1]
                rcls = result.Sdata.label[i]
                pcls = result.predicted_classes[i]
                fullprob = result.predicted[i]
                if (end == start):
                    continue
                d = Data(str(i))
                d.real = rcls
                d.pred = pcls
                d.pred_prob = fullprob
                if (isTrain):
                    self.train_quality[indx] = result.quality

                d.gindx = indx
                # {'real':rcls,'pred':pcls,'pred_prob':fullprob,'train_q':result.quality}
                intree[start:end] = d

        intree.split_overlaps()
        segments = defaultdict(dict)
        for item in intree.items():
            segments[item.begin.value << 64
                     | item.end.value]['begin'] = item.begin
            segments[item.begin.value << 64 | item.end.value]['end'] = item.end
            segments[item.begin.value << 64
                     | item.end.value][item.data.gindx] = item.data

        probs = np.zeros((len(segments), len(self.acts)))

        # Feature Extraction ###########################

        label = np.zeros(len(segments))
        times = []
        iseg = 0
        for timeseg in segments:
            seg = segments[timeseg]
            b = seg['begin']
            e = seg['end']
            times.append({'begin': b, 'end': e})
            for indx in range(len(self.gacts)):
                if (indx in seg):
                    label[iseg] = seg[indx].real
                    if (self.mode == 1):
                        probs[iseg, :] += np.array(
                            seg[indx].pred_prob
                        ) * self.train_quality[indx]['f1'] / len(self.gacts)
                    elif self.mode == 2:
                        p = np.zeros(len(self.acts))
                        p[np.argmax(seg[indx].pred_prob)] = 1
                        probs[iseg, :] += p
                    else:
                        p = np.zeros(len(self.acts))
                        p[np.argmax(seg[indx].pred_prob
                                    )] = self.train_quality[indx]['f1']
                        probs[iseg, :] += p
            iseg += 1
        plabel = np.argmax(probs, 1)

        #EVALUATE #######################
        result = Data('result')
        result.results = results
        result.predicted = probs
        result.predicted_classes = plabel
        # predicted   = np.argmax(model.predict(f), axis=1)
        pred_events = []
        ptree = {}
        epsilon = pd.to_timedelta('1s')

        for i in range(len(segments)):
            start = times[i]['begin']
            end = times[i]['end']
            pclass = result.predicted_classes[i]
            pred_events.append({
                'Activity': pclass,
                'StartTime': start,
                'EndTime': end
            })

        pred_events = pd.DataFrame(pred_events)
        pred_events = pred_events.sort_values(['StartTime'])
        pred_events = pred_events.reset_index()
        pred_events = pred_events.drop(['index'], axis=1)

        result.shortrunname = "fusion model" + str(
            {r: results[r].shortrunname
             for r in results})
        result.times = times
        result.pred_events = pred_events
        result.real_events = real_events

        result.event_cm = event_confusion_matrix(result.real_events,
                                                 result.pred_events, self.acts)
        result.quality = CMbasedMetric(result.event_cm, 'macro')
        result.functions = {r: results[r].functions for r in results}
        logger.debug('Evalution quality is %s' % result.quality)

        return result