def cmTreePlot(tree): ptree = IntervalTree() rtree = IntervalTree() for item in tree: if not (item.data.R is None): rtree[item.begin:item.end] = item if not (item.data.P is None): ptree[item.begin:item.end] = item from result_analyse.visualisation import plotJoinTree plotJoinTree(rtree, ptree)
def precompute(self, dataset): maxact = 0 self.a_events_tree = IntervalTree() for i, act in dataset.a_events.iterrows(): self.a_events_tree[act.StartTime.value:act.EndTime.value] = act self.acts = list(range(dataset.a_events.Activity.max() + 1))
def train(self, datasetdscr, data, acts): self.gacts = self.groupize(datasetdscr, acts) self.acts = acts self.strategies = {} self.acts_name = {} train_results = {} self.train_quality = {} intree = IntervalTree() # intree = IntervalTree() for indx, tacts in enumerate(self.gacts): logger.info("=======================working on activties " + tacts.__str__() + "=========") weight = np.ones(len(acts)) for a in tacts: weight[a] = self.alpha datasetdscr.indx = indx self.strategies[indx] = ml_strategy.Simple.SimpleStrategy() self.strategies[indx].train(datasetdscr, data, acts, weight) if ('result' in self.strategies[indx].bestOpt.result): result = self.strategies[indx].bestOpt.result['result'] else: result = self.strategies[indx].test(data) utils.saveState(self.strategies[indx].get_info(), 'wgroupact', 'n-%d' % (indx)) train_results[indx] = result utils.saveState( [self.strategies[indx].get_info() for indx in self.strategies], 'wgroupact', 'n-all') return self.fusion(train_results, data.a_events, True)
def train(self, datasetdscr, data, acts): self.gacts = self.groupize(datasetdscr, acts) self.acts = acts self.strategies = {} self.acts_name = {} train_results = {} self.train_quality = {} intree = IntervalTree() # intree = IntervalTree() for indx, tacts in enumerate(self.gacts): logger.info("=======================working on activties " + tacts.__str__() + "=========") Tdata = self.justifySet(tacts, data, False) self.acts_name[indx] = datasetdscr.activities[tacts] self.strategies[indx] = ml_strategy.Simple.SimpleStrategy() self.strategies[indx].train(datasetdscr, Tdata, list(range(len(tacts)))) if ('result' in self.strategies[indx].bestOpt.result): result = self.strategies[indx].bestOpt.result['result'] else: result = self.strategies[indx].test(Tdata) utils.saveState(self.strategies[indx].get_info(), 'groupact', str(indx)) train_results[indx] = result utils.saveState( [self.strategies[indx].get_info() for indx in self.strategies], 'groupact', 'all') return self.fusion(train_results, data.a_events, True)
def load_memory_mappings(avatar, target, forward=False, update=True): """ Load memory maps from the specified target :param forward: Enable forwarding of memory to that target :param update: If true, replaces avatars memory_ranges with the loaded ones :return: An Intervaltree object containing the mappings """ if not isinstance(target, GDBTarget): raise TypeError("The memory mapping can be loaded ony from GDBTargets") ret, resp = target.protocols.execution.get_mappings() lines = resp.split("\n")[4:] mappings = [{ "start": int(x[0], 16), "end": int(x[1], 16), "size": int(x[2], 16), "offset": int(x[3], 16), "obj": x[4], } for x in [y.split() for y in lines]] memory_ranges = IntervalTree() for m in mappings: avatar.add_memory_range( m["start"], m["size"], name=m["obj"], forwarded=forward, forwarded_to=target if forward else None, interval_tree=memory_ranges, ) if update is True: avatar.memory_ranges = memory_ranges return memory_ranges
def _calculate_activity(self): self.activity_events = self.activity_events.sort_values( ['StartTime', 'EndTime']) print(self.activities) self.activities.sort() self.activities = np.insert(self.activities, 0, 'None') self.activities_map_inverse = { k: v for v, k in enumerate(self.activities) } self.activities_map = {v: k for v, k in enumerate(self.activities)} self.activity_events.Activity = self.activity_events.Activity.apply( lambda x: self.activities_map_inverse[x]) self.activity_events['Duration'] = self.activity_events.EndTime - \ self.activity_events.StartTime self.activity_events_tree = IntervalTree() for i, act in self.activity_events.iterrows(): if (act.StartTime.value == act.EndTime.value): self.activity_events_tree[act.StartTime. value:act.StartTime.value + 1] = { 'StartTime': act.StartTime, 'EndTime': act.EndTime, 'Activity': act.Activity } else: self.activity_events_tree[act.StartTime.value:act.EndTime. value] = { 'StartTime': act.StartTime, 'EndTime': act.EndTime, 'Activity': act.Activity }
def combine2(self, times, act_data): predicted = np.argmax(act_data, axis=1) events = [] ptree = {} epsilon=pd.to_timedelta('1s') for i in range(len(times)): start = times[i][0] end = times[i][1] #pclass = np.argmax(predicted[i]) pclass = predicted[i] if not(pclass in ptree): ptree[pclass] = IntervalTree() ptree[pclass][start:end+epsilon] = { 'Activity': pclass, 'StartTime': start, 'EndTime': end } if(i>0 and pclass>0 and predicted[i-1]==predicted[i] and False): #fix gap start = times[i-1][1] end = times[i][0] if(end>start): #pclass = np.argmax(predicted[i]) ptree[pclass][start:end] = { 'Activity': pclass, 'StartTime': start, 'EndTime': end } tree = IntervalTree() def datamerger(x, y): start = min(x['StartTime'], y['StartTime']) end = max(x['EndTime'], y['EndTime']) return {'Activity': x['Activity'], 'StartTime': start, 'EndTime': end} for a in ptree: ptree[a].merge_overlaps(data_reducer=datamerger) tree |= ptree[a] tree.split_overlaps() def data_reducer(x, y): if(x['EndTime'] > y['EndTime']): return y return x tree.merge_equals(data_reducer=data_reducer) for inv in tree: events.append({'Activity': inv.data['Activity'], 'StartTime': inv.begin, 'EndTime': inv.end}) events = pd.DataFrame(events) events = events.sort_values(['StartTime']) events = events.reset_index() events = events.drop(['index'], axis=1) return events
def merge_split_overlap_IntervalTree(p_acts, r_acts): tree = IntervalTree() from result_analyse.visualisation import plotJoinTree PACT = column_index(p_acts, 'Activity') PSTIME = column_index(p_acts, 'StartTime') PETIME = column_index(p_acts, 'EndTime') for row in p_acts.values: if (row[PACT] == 0): continue start = row[PSTIME] end = row[PETIME] startv = start.value endv = end.value if (startv == endv): startv = startv - 1 #tree[start:end]={'P':{'Activitiy':act.Activity,'Type':'P','Data':act}] d = Data('P-act') d.P = {'Activity': row[PACT], 'StartTime': start, 'EndTime': end} d.R = None tree[startv:endv] = d RACT = column_index(r_acts, 'Activity') RSTIME = column_index(r_acts, 'StartTime') RETIME = column_index(r_acts, 'EndTime') for row in r_acts.values: if (row[RACT] == 0): continue start = row[RSTIME] end = row[RETIME] startv = start.value endv = end.value if (startv == endv): startv = startv - 1 #tree[start:end]=[{'Activitiy':act.Activity,'Type':'R','Data':act}] d = Data('R-act') d.P = None d.R = {'Activity': row[RACT], 'StartTime': start, 'EndTime': end} tree[startv:endv] = d # cmTreePlot(tree) tree.split_overlaps() # cmTreePlot(tree) def data_reducer(x, y): res = Data('merge') res.R = x.R res.P = x.P if not (y.P is None): if (res.P is None) or y.P['EndTime'] < res.P['EndTime']: res.P = y.P if not (y.R is None): if (res.R is None) or y.R['EndTime'] < res.R['EndTime']: res.R = y.R return res tree.merge_equals(data_reducer=data_reducer) return tree
def __show_diff_scatter(self): intervaltrees_dict = dict() min_coord = 1e10 max_coord = -1 for idx in xrange(len(self.input)): data = pandas.read_table(self.input[idx], skiprows=[0]).ix[:, 1:3] itree = IntervalTree() curmin = min(data.ix[:, 0]) if curmin < min_coord: min_coord = curmin curmax = max(data.ix[:, 1]) if curmax > max_coord: max_coord = curmax for line in xrange(len(data)): start = data.ix[line, 0] end = data.ix[line, 1] + 1 itree[start:end] = (start, end) intervaltrees_dict[self.toolnames[idx]] = itree print min_coord, max_coord for out1, out2 in combinations(self.toolnames, 2): if out1 != out2: simm_diff = intervaltrees_dict[out1] ^ intervaltrees_dict[out2] tree_size = len(simm_diff) x = np.zeros(tree_size) s = np.zeros(tree_size) items = list(simm_diff.items()) for i in xrange(tree_size): it = items[i] x[i] = it.begin + (it.end - it.begin) / float(2) s[i] = x[i]**0.27 plt.scatter(x, x, s=s, c='r', alpha=0.3, edgecolors='none') plt.ylabel('Genome coordinates') plt.xlabel('Genome coordinates') plt.title("Coverage difference for tools: " + out1 + " vs " + out2) plt.savefig("peaks_covdif_%s_%s" % (out1, out2))
def merge_split_overlap_IntervalTree(p_acts, r_acts): tree = IntervalTree() for act in p_acts: if (act['Activity'] == 0): continue start = act['StartTime'].value end = act['EndTime'].value if (start == end): start = start - 1 #tree[start:end]={'P':{'Activitiy':act.Activity,'Type':'P','Data':act}] d = Data('P-act') d.P = act d.R = None tree[start:end] = d #{'P':act,'PActivitiy':act.Activity} for act in r_acts: start = act['StartTime'].value end = act['EndTime'].value if (start == end): start = start - 1 #tree[start:end]=[{'Activitiy':act.Activity,'Type':'R','Data':act}] d = Data('P-act') d.P = None d.R = act tree[start:end] = d #{'R':act,'RActivitiy':act.Activity} tree.split_overlaps() def data_reducer(x, y): res = x if not (y.P is None): if (res.P is None) or y.P['EndTime'] < res.P['EndTime']: res.P = y.P if not (y.R is None): if (res.R is None) or y.R['EndTime'] < res.R['EndTime']: res.R = y.R return res tree.merge_equals(data_reducer=data_reducer) return tree
def fusion(self, results, real_events, isTrain): intree = IntervalTree() logger.info("\n=======================fusion activties ========") # intree = IntervalTree() # Segmentaion ########################### for indx, tacts in enumerate(self.gacts): result = results[indx] for i in range(0, len(result.Sdata.set_window)): idx = result.Sdata.set_window[i] start = result.Sdata.s_event_list[idx[0], 1] end = result.Sdata.s_event_list[idx[-1], 1] rcls = result.Sdata.label[i] pcls = result.predicted_classes[i] fullprob = result.predicted[i] if (end == start): continue d = Data(str(i)) d.real = rcls d.pred = pcls d.pred_prob = fullprob if (isTrain): self.train_quality[indx] = result.quality d.gindx = indx # {'real':rcls,'pred':pcls,'pred_prob':fullprob,'train_q':result.quality} intree[start:end] = d intree.split_overlaps() segments = defaultdict(dict) for item in intree.items(): segments[item.begin.value << 64 | item.end.value]['begin'] = item.begin segments[item.begin.value << 64 | item.end.value]['end'] = item.end segments[item.begin.value << 64 | item.end.value][item.data.gindx] = item.data # Feature Extraction ########################### f = np.zeros((len(segments), len(self.gacts) * len(self.acts))) label = np.zeros(len(segments)) times = [] iseg = 0 for timeseg in segments: seg = segments[timeseg] b = seg['begin'] e = seg['end'] times.append({'begin': b, 'end': e}) for indx in range(len(self.gacts)): if (indx in seg): label[iseg] = seg[indx].real start = indx * len(self.acts) end = (indx + 1) * len(self.acts) if (self.train_quality[indx]['f1'] < 0.1): continue f[iseg, start:end] = seg[indx].pred_prob iseg += 1 #TRAIN ####################### if (isTrain): inputsize = (len(f[0]), ) outputsize = len(self.acts) self.fusion_model = tf.keras.models.Sequential([ tf.keras.layers.Dense(128, input_shape=inputsize), tf.keras.layers.Dense(512, activation=tf.nn.relu), tf.keras.layers.Dropout(0.2), tf.keras.layers.Dense(outputsize, activation=tf.nn.softmax) ], name='fusion') if (np.max(label) == 0): # self.trained=False cw = np.ones(len(self.acts)) else: cw = compute_class_weight("balanced", self.acts, label) self.fusion_model.summary() self.fusion_model.compile( optimizer='adam', loss='sparse_categorical_crossentropy', metrics=[ tf.keras.metrics.SparseCategoricalAccuracy(name='acc') ]) self.fusion_model.fit(f, label, epochs=10, class_weight=cw) #EVALUATE ####################### result = Data('result') result.results = results result.predicted = self.fusion_model.predict(f) result.predicted_classes = self.fusion_model.predict_classes(f) # predicted = np.argmax(model.predict(f), axis=1) pred_events = [] ptree = {} epsilon = pd.to_timedelta('1s') for i in range(len(f)): start = times[i]['begin'] end = times[i]['end'] pclass = result.predicted_classes[i] pred_events.append({ 'Activity': pclass, 'StartTime': start, 'EndTime': end }) pred_events = pd.DataFrame(pred_events) pred_events = pred_events.sort_values(['StartTime']) pred_events = pred_events.reset_index() pred_events = pred_events.drop(['index'], axis=1) result.shortrunname = "fusion model" + str( {r: results[r].shortrunname for r in results}) result.times = times result.pred_events = pred_events result.real_events = real_events result.event_cm = event_confusion_matrix(result.real_events, result.pred_events, self.acts) result.quality = CMbasedMetric(result.event_cm, 'macro') result.functions = {r: results[r].functions for r in results} logger.debug('Evalution quality is %s' % result.quality) return result
def fusion(self, results, real_events, isTrain): intree = IntervalTree() logger.info("\n=======================fusion activties ========") # intree = IntervalTree() # Segmentaion ########################### for indx, tacts in enumerate(self.gacts): result = results[indx] for i in range(0, len(result.Sdata.set_window)): idx = result.Sdata.set_window[i] start = result.Sdata.s_event_list[idx[0], 1] end = result.Sdata.s_event_list[idx[-1], 1] rcls = result.Sdata.label[i] pcls = result.predicted_classes[i] fullprob = result.predicted[i] if (end == start): continue d = Data(str(i)) d.real = rcls d.pred = pcls d.pred_prob = fullprob if (isTrain): self.train_quality[indx] = result.quality d.gindx = indx # {'real':rcls,'pred':pcls,'pred_prob':fullprob,'train_q':result.quality} intree[start:end] = d intree.split_overlaps() segments = defaultdict(dict) for item in intree.items(): segments[item.begin.value << 64 | item.end.value]['begin'] = item.begin segments[item.begin.value << 64 | item.end.value]['end'] = item.end segments[item.begin.value << 64 | item.end.value][item.data.gindx] = item.data probs = np.zeros((len(segments), len(self.acts))) # Feature Extraction ########################### label = np.zeros(len(segments)) times = [] iseg = 0 for timeseg in segments: seg = segments[timeseg] b = seg['begin'] e = seg['end'] times.append({'begin': b, 'end': e}) for indx in range(len(self.gacts)): if (indx in seg): label[iseg] = seg[indx].real if (self.mode == 1): probs[iseg, :] += np.array( seg[indx].pred_prob ) * self.train_quality[indx]['f1'] / len(self.gacts) elif self.mode == 2: p = np.zeros(len(self.acts)) p[np.argmax(seg[indx].pred_prob)] = 1 probs[iseg, :] += p else: p = np.zeros(len(self.acts)) p[np.argmax(seg[indx].pred_prob )] = self.train_quality[indx]['f1'] probs[iseg, :] += p iseg += 1 plabel = np.argmax(probs, 1) #EVALUATE ####################### result = Data('result') result.results = results result.predicted = probs result.predicted_classes = plabel # predicted = np.argmax(model.predict(f), axis=1) pred_events = [] ptree = {} epsilon = pd.to_timedelta('1s') for i in range(len(segments)): start = times[i]['begin'] end = times[i]['end'] pclass = result.predicted_classes[i] pred_events.append({ 'Activity': pclass, 'StartTime': start, 'EndTime': end }) pred_events = pd.DataFrame(pred_events) pred_events = pred_events.sort_values(['StartTime']) pred_events = pred_events.reset_index() pred_events = pred_events.drop(['index'], axis=1) result.shortrunname = "fusion model" + str( {r: results[r].shortrunname for r in results}) result.times = times result.pred_events = pred_events result.real_events = real_events result.event_cm = event_confusion_matrix(result.real_events, result.pred_events, self.acts) result.quality = CMbasedMetric(result.event_cm, 'macro') result.functions = {r: results[r].functions for r in results} logger.debug('Evalution quality is %s' % result.quality) return result