def fw_init(self, datapath): parser = datapath.ofproto_parser ofproto = datapath.ofproto self.logger.info("FW Initialization started (dpid: %d)...", datapath.id) self.fileLoader = FileLoader() topology = self.fileLoader.getTopology() listofmatches = self.fileLoader.getFWRulesMatches(parser, datapath.id) self.logger.info("Topology loaded... \nFile with rules loaded... \nApplying %s rules...", len(listofmatches)) for match in listofmatches: self.add_flow(datapath, 5, self.IDLE_TIMEOUTS, self.HW_TABLE_ID, match, [parser.OFPActionOutput(ofproto.OFPP_NORMAL)])
class ConverterTest(unittest.TestCase): def setUp(self): self.loader = FileLoader() self.converter = Converter() self.extractor = Extractor() self.loader.load_file("logs_old/JOURNAL00.TXT") self.converter.read_entries(self.loader.lines) def test_read_all_lines(self): self.assertTrue(len(self.converter.entries) > 0) self.assertEqual(len(self.converter.entries), len(self.loader.lines)) def test_converted_entries_are_valid(self): for i in xrange(len(self.loader.lines)): self.extractor.extract_data(self.loader.lines[i]) self.assertEqual(self.converter.entries[i].instrumentname, self.extractor.instrumentname) self.assertEqual(self.converter.entries[i].runnumber, self.extractor.runnumber) self.assertEqual(self.converter.entries[i].username, self.extractor.username) self.assertEqual(self.converter.entries[i].experimenttitle, self.extractor.experimenttitle) self.assertEqual(self.converter.entries[i].startdate, self.extractor.startdate) self.assertEqual(self.converter.entries[i].starttime, self.extractor.starttime) self.assertEqual(self.converter.entries[i].charge, self.extractor.charge)
return False if __name__=="__main__": import multiprocessing from localization import Localization from planner import Planner from mapper import Mapper from file_loader import FileLoader from enums import Action, Sign import properties import time filename = properties.file_name f_loader = FileLoader() f_loader.read_map(filename) f_loader.generate_undirected_graph() f_loader.generate_directed_graph() f_loader.estimate_distances() location = f_loader.starts[0] goals = f_loader.goals max_col = f_loader.max_cols max_row = f_loader.max_rows graph = f_loader.directed_graph node_distance = f_loader.node_distance walls = f_loader.walls keys = f_loader.keys signals = {}
def __init__(self): train = 0 dev = 1 test = 0 load_processed_doc = 1 load_doc_from_pkl = 1 load_train_qs_from_pkl = 1 load_dev_qs_from_pkl = 1 load_test_qs_from_pkl = 1 train_sens_embedding = 0 tr = Trainer() tr.load_dummy() tr.run() self.data = Data() self.config = Config() self.fileLoader = FileLoader(self.config, self.data) self.bdp = BasicDataProcessorForTrain(self.config, self.data) self.fileLoader.load_doc() if load_processed_doc: if load_doc_from_pkl: with open(self.config.doc_processed_path, 'rb') as f: self.data.doc_processed = pickle.load(f) else: self.data.doc_processed = self.bdp.process_docs( self.data.doc_texts) with open(self.config.doc_processed_path, 'wb') as f: pickle.dump(self.data.doc_processed, f) if train: self.fileLoader.load_training_data() if load_train_qs_from_pkl: with open(self.config.train_qs_processed_path, 'rb') as f: self.data.train_qs_processed = pickle.load(f) else: self.data.train_qs_processed = self.bdp.preprocess_questions( self.data.train_questions) with open(self.config.train_qs_processed_path, 'wb') as f: pickle.dump(self.data.train_qs_processed, f) if train_sens_embedding: self.bdp.generate_training_embeddings() if dev: self.fileLoader.load_dev_data() if load_dev_qs_from_pkl: with open(self.config.dev_qs_processed_path, 'rb') as f: self.data.dev_qs_processed = pickle.load(f) else: self.data.dev_qs_processed = self.bdp.preprocess_questions( self.data.dev_questions) with open(self.config.dev_qs_processed_path, 'wb') as f: pickle.dump(self.data.dev_qs_processed, f) if test: self.fileLoader.load_test_data() if load_test_qs_from_pkl: with open(self.config.test_qs_processed_path, 'rb') as f: self.data.test_qs_processed = pickle.load(f) else: self.data.test_qs_processed = self.bdp.preprocess_questions( self.data.test_questions) with open(self.config.test_qs_processed_path, 'wb') as f: pickle.dump(self.data.test_qs_processed, f) tr = Trainer() tr.load_dummy() tr.run() dev_question_vectors, dev_qs = self.bdp.generate_dev_qs_embeddings() self.trn.predict_data(dev_question_vectors, dev_qs)
class SimpleSwitch13(app_manager.RyuApp): OFP_VERSIONS = [ofproto_v1_3.OFP_VERSION] _CONTEXTS = { 'wsgi' : WSGIApplication } #VARIABLES to set IDLE_TIMEOUTS = 180 #TODO Set idle_timeouts to 0 = infinity (180 only for testing purposes) DENY_RULES_IDLE_TIMEOUT = 30 #How long unallowed traffic will be blocked HW_TABLE_ID = 100 #Set id of the flow table (100 = HP switches) SWITCH_POLL_TIMER = 1 #How often are switches queried (in seconds) PACKET_HISTORY_BUFFER_SIZE = 10 #In seconds MAC_SPOOFPROT_MAX_PPS = 100 #Maximum number of packets, which can be sent per second flowtablesdict = {} #Flow Tables of all switches trafficdict = {} #DPIDS, Array of captured traffic - dicts #fileloader #datapathdict def __init__(self, *args, **kwargs): super(SimpleSwitch13, self).__init__(*args, **kwargs) self.mac_to_port = {} self.datapathdict = {} #for storing datapaths wsgi = kwargs['wsgi'] wsgi.register(SGController, {sg_controller_instance_name : self}) #Thread for periodic polling of information from switches switchPoll = SwitchPoll() pollThread = Thread(target=switchPoll.run, args=(self.SWITCH_POLL_TIMER,self.datapathdict)) pollThread.start() @set_ev_cls(ofp_event.EventOFPSwitchFeatures, CONFIG_DISPATCHER) def switch_features_handler(self, ev): datapath = ev.msg.datapath ofproto = datapath.ofproto parser = datapath.ofproto_parser self.datapathdict[datapath.id] = datapath # install table-miss flow entry # # We specify NO BUFFER to max_len of the output action due to # OVS bug. At this moment, if we specify a lesser number, e.g., # 128, OVS will send Packet-In with invalid buffer_id and # truncated packet data. In that case, we cannot output packets # correctly. The bug has been fixed in OVS v2.1.0. match = parser.OFPMatch() actions = [parser.OFPActionOutput(ofproto.OFPP_CONTROLLER, ofproto.OFPCML_NO_BUFFER)] action_normal = [parser.OFPActionOutput(ofproto.OFPP_NORMAL)] action_copy = [parser.OFPActionOutput(ofproto.OFPP_NORMAL), parser.OFPActionOutput(ofproto.OFPP_CONTROLLER)] #LLDP frames self.add_flow(datapath, 10, self.IDLE_TIMEOUTS, self.HW_TABLE_ID, parser.OFPMatch(eth_type=0x88cc), actions) #Hybrid SDN Config ----------------------------------------- #BDDP frames self.add_flow(datapath, 10, self.IDLE_TIMEOUTS, self.HW_TABLE_ID, parser.OFPMatch(eth_type=0x8999), actions) #ARP frames - send in normal self.add_flow(datapath, 10, self.IDLE_TIMEOUTS, self.HW_TABLE_ID, parser.OFPMatch(eth_type=2054), action_normal) #Deny everything else - send it to the controller self.add_flow(datapath, 1, self.IDLE_TIMEOUTS, self.HW_TABLE_ID, match, actions) self.fw_init(datapath) def fw_init(self, datapath): parser = datapath.ofproto_parser ofproto = datapath.ofproto self.logger.info("FW Initialization started (dpid: %d)...", datapath.id) self.fileLoader = FileLoader() topology = self.fileLoader.getTopology() listofmatches = self.fileLoader.getFWRulesMatches(parser, datapath.id) self.logger.info("Topology loaded... \nFile with rules loaded... \nApplying %s rules...", len(listofmatches)) for match in listofmatches: self.add_flow(datapath, 5, self.IDLE_TIMEOUTS, self.HW_TABLE_ID, match, [parser.OFPActionOutput(ofproto.OFPP_NORMAL)]) def add_flow(self, datapath, priority, idle_timeout, table_id, match, actions, buffer_id=None): ofproto = datapath.ofproto parser = datapath.ofproto_parser inst = [parser.OFPInstructionActions(ofproto.OFPIT_APPLY_ACTIONS, actions)] if buffer_id: mod = parser.OFPFlowMod(datapath=datapath, buffer_id=buffer_id, priority=priority, match=match, idle_timeout=idle_timeout, instructions=inst, table_id=table_id, flags=ofproto.OFPFF_SEND_FLOW_REM) else: mod = parser.OFPFlowMod(datapath=datapath, priority=priority, match=match, idle_timeout=idle_timeout, instructions=inst, table_id=table_id, flags=ofproto.OFPFF_SEND_FLOW_REM) datapath.send_msg(mod) self.logger.info("Added flow: %s, %s", datapath, match) @set_ev_cls(ofp_event.EventOFPPortStatus, MAIN_DISPATCHER) def _port_status_handler(self, ev): self.logger.info("F: OFPPortStatus Message Received! ") msg = ev.msg datapath = msg.datapath port = ev.msg.desc number = port.port_no reason = ev.msg.reason if port.state == 2: link_blocked_flg = 1 else: link_blocked_flg = 0 self.logger.info("F: Port: %s, reason: %s, blocked: %s, datapath: %s", number,reason,link_blocked_flg, datapath) if link_blocked_flg: self.logger.info("F: Link blocked. ") else: self.logger.info("F: Link not blocked. ") @set_ev_cls(ofp_event.EventOFPPacketIn, MAIN_DISPATCHER) def _packet_in_handler(self, ev): # If you hit this you might want to increase # the "miss_send_length" of your switch if ev.msg.msg_len < ev.msg.total_len: self.logger.debug("packet truncated: only %s of %s bytes", ev.msg.msg_len, ev.msg.total_len) msg = ev.msg datapath = msg.datapath ofproto = datapath.ofproto parser = datapath.ofproto_parser in_port = msg.match['in_port'] pkt = packet.Packet(msg.data) eth = pkt.get_protocols(ethernet.ethernet)[0] eth_vlan = pkt.get_protocols(vlan.vlan)[0] #self.logger.info("F: protocol: %s, %s", eth, eth_vlan) #test = packet.Packet(array.array('B', ev.msg.data)) #for p in test.protocols: # self.logger.info("Protocols: %s", p) allow_traffic = 0 allow_reason = "" if eth.ethertype == ether_types.ETH_TYPE_8021Q: #self.logger.info("F: Received 802.1Q frame!") if eth_vlan.ethertype == 2054: allow_reason = "ARP packet (in 802.1Q)... " #allow_traffic = 1 if eth_vlan.ethertype == 35020: self.logger.info("Received LLDP frame (in 802.1Q). Exiting... ") return if eth.ethertype == ether_types.ETH_TYPE_LLDP: self.logger.info("Received LLDP frame! Exiting... ") return #if eth.dst == '01:80:c2:00:00:0e': #self.logger.info("LLDP Multicast Destination found... ") #return dst = eth.dst src = eth.src dpid = datapath.id self.mac_to_port.setdefault(dpid, {}) #self.logger.info("packet in %s %s %s %s", dpid, src, dst, in_port) self.captureTraffic(ev) # learn a mac address to avoid FLOOD next time. self.mac_to_port[dpid][src] = in_port if dst in self.mac_to_port[dpid]: out_port = self.mac_to_port[dpid][dst] else: out_port = ofproto.OFPP_FLOOD actions = [parser.OFPActionOutput(out_port)] action_normal = [parser.OFPActionOutput(ofproto.OFPP_NORMAL)] # install a flow to avoid packet_in next time #if out_port != ofproto.OFPP_FLOOD: #match = parser.OFPMatch(in_port=in_port, eth_dst=dst, eth_src=src) # verify if we have a valid buffer_id, if yes avoid to send both # flow_mod & packet_out #if msg.buffer_id != ofproto.OFP_NO_BUFFER: #self.add_flow(datapath, 1, match, action_normal, msg.buffer_id) #return #else: #self.add_flow(datapath, 1, match, action_normal) data = None if msg.buffer_id == ofproto.OFP_NO_BUFFER: data = msg.data out = parser.OFPPacketOut(datapath=datapath, buffer_id=msg.buffer_id, in_port=in_port, actions=actions, data=data) if allow_traffic == 1: self.logger.info("Traffic allowed, reason: " + allow_reason) datapath.send_msg(out) elif out_port == ofproto.OFPP_FLOOD: self.logger.info("Flooding not allowed anymore... ") self.logger.info("Deny rule inserted to block this traffic... ") self.add_flow(datapath, 3, self.DENY_RULES_IDLE_TIMEOUT, self.HW_TABLE_ID, parser.OFPMatch (eth_dst = dst, eth_src = src, eth_type = eth_vlan.ethertype), []) #datapath.send_msg(out) else: self.logger.info("Traffic blocked by Controller... ") match = parser.OFPMatch(eth_dst = dst, eth_src = src, eth_type = eth_vlan.ethertype) self.add_flow(datapath, 3, self.DENY_RULES_IDLE_TIMEOUT, self.HW_TABLE_ID, match, []) def captureTraffic(self, ev): msg = ev.msg datapath = msg.datapath pkt = packet.Packet(msg.data) eth = pkt.get_protocols(ethernet.ethernet)[0] eth_type = eth.ethertype #if encapsulated in VLAN - 802.1Q if eth.ethertype == ether_types.ETH_TYPE_8021Q: eth_vlan = pkt.get_protocols(vlan.vlan)[0] eth_type = eth_vlan.ethertype capturedTraffic = {} #message = eth.src + " -> " + eth.dst + ", proto: " + str(eth.ethertype) capturedTraffic['eth_src'] = eth.src capturedTraffic['eth_dst'] = eth.dst capturedTraffic['eth_type'] = eth_type capturedTraffic['priority'] = 3 allTraffic = [] if datapath.id in self.trafficdict: allTraffic = self.trafficdict[datapath.id] if capturedTraffic in allTraffic: self.logger.info('Traffic already captured... ') return allTraffic.append(capturedTraffic) self.trafficdict[datapath.id] = allTraffic self.logger.info('New traffic captured... ') def deleteTraffic(self, dpid, matchString): if dpid in self.trafficdict: allTraffic = self.trafficdict[dpid] if matchString in allTraffic: self.logger.info('Deleting existing traffic' ) allTraffic.remove(deleted) self.trafficdict[dpid] = allTraffic return 1 return 0 @set_ev_cls(ofp_event.EventOFPFlowRemoved, MAIN_DISPATCHER) def flow_removed_handler(self, ev): self.logger.info('Flow_removed notification received... ') msg = ev.msg dp = msg.datapath ofp = dp.ofproto matchfields = msg.match #OFPMatch(oxm_fields={'eth_src': 'fa:16:3e:30:cc:04', 'eth_dst': 'fa:16:3e:57:f6:e8', 'eth_type': 2054}) eth_src = 0 eth_dst = 0 eth_type = 0 for the_key, value in matchfields.iteritems(): if the_key == "eth_src": eth_src = value if the_key == "eth_dst": eth_dst = value if the_key == "eth_type": eth_type = value deleted = {} deleted['eth_src'] = eth_src deleted['eth_dst'] = eth_dst deleted['eth_type'] = eth_type self.deleteTraffic(dp.id, deleted) @set_ev_cls(ofp_event.EventOFPFlowStatsReply, MAIN_DISPATCHER) def flow_stats_reply_handler(self, ev): self.logger.info('Flow_stats_reply received... ') datapath = ev.msg.datapath flows = [] flowdict = {} for stat in ev.msg.body: flowdict = {} flowdict['table_id'] = stat.table_id flowdict['priority'] = stat.priority flowdict['duration_sec'] = stat.duration_sec flowdict['idle_timeout'] = stat.idle_timeout flowdict['packet_count'] = stat.packet_count flowdict['match'] = str(stat.match) flowdict['instructions'] = str(stat.instructions) matchdict = self.createMatchDict(str(stat.match)) flowdict['matchdict'] = matchdict match = stat.match #self.logger.info(match.OFPMatch) previous_flowdict = self.flow_exists(datapath.id, flowdict) if previous_flowdict != 0: flowdict = self.add_packet_count_history(previous_flowdict, flowdict) #self.logger.info('FlowHistory: %s', flowdict['packet_count_history']) flows.append(flowdict) #self.logger.info('FlowStats: %s', flows) self.flowtablesdict[datapath.id] = flows self.check_mac_spoofing(ev.msg) def flow_exists(self, dpid, newflowdict): if dpid in self.flowtablesdict: for flowdict in self.flowtablesdict[dpid]: if newflowdict['table_id'] == flowdict['table_id'] and newflowdict['priority'] == flowdict['priority'] and newflowdict['idle_timeout'] == flowdict['idle_timeout'] and newflowdict['match'] == flowdict['match'] and newflowdict['instructions'] == flowdict['instructions']: return flowdict #self.logger.info('No match found...') return 0 def add_packet_count_history(self, old_flow, new_flow): history = [] if 'packet_count_history' not in old_flow: history.append(new_flow['packet_count']) new_flow['packet_count_history'] = history return new_flow #Already exists packet_count_history = old_flow['packet_count_history'] if len(packet_count_history) >= self.PACKET_HISTORY_BUFFER_SIZE: packet_count_history.pop(0) packet_count_history.append(new_flow['packet_count']) new_flow['packet_count_history'] = packet_count_history return new_flow def check_mac_spoofing(self, msg): #TODO self.logger.info("Checking MAC address spoofing... ") for dpid in self.flowtablesdict: flows = self.flowtablesdict[dpid] for flow in flows: if 'match' in flow: match = flow['matchdict'] if 'packet_count_history' in flow: history = copy.deepcopy(flow['packet_count_history']) if len(history) >= 2: if history.pop() - history.pop() >= self.MAC_SPOOFPROT_MAX_PPS: self.logger.info("Number of PPS exceeded. Enabling MAC spoofing protection!") self.disable_flow(msg, match) #self.logger.info("Count: %s, match: %s ", count, match) def disable_flow(self, msg, match): match = copy.deepcopy(match) typehex = int(match['eth_type']) #typehex = '{0:x}'.format(int(typehex)) typehex = hex(typehex) self.logger.info("Match proto: %s", str(typehex)) match['eth_type'] = typehex self.deleteExistingRule(match) self.logger.info("Flow deleted") return 1 @set_ev_cls(ofp_event.EventOFPAggregateStatsReply, MAIN_DISPATCHER) def aggregate_stats_reply_handler(self, ev): body = ev.msg.body self.logger.info('AggregateStats: packet_count=%d byte_count=%d ' 'flow_count=%d', body.packet_count, body.byte_count, body.flow_count) def createMatchDict(self, matchstring): #sepparated = matchstring.split(" ") single = matchstring.replace("'","") #single = single.replace("'","") single = re.split('{|, | ',matchstring) #self.logger.info('---') matchdict = {} previous = "" for w in single: w = w.replace("'","") w = w.replace("}","") w = w.replace(")","") #self.logger.info('match: %s', w) if previous == "eth_dst:": matchdict['eth_dst'] = w if previous == "eth_src:": matchdict['eth_src'] = w if previous == "eth_type:": matchdict['eth_type'] = w previous = w #self.logger.info('---') return matchdict def getFlows(self, dpid): if int(dpid) not in self.flowtablesdict: self.logger.info('DPID not found... ' ) return 0 else: return self.flowtablesdict[int(dpid)] def getTraffic(self, dpid, allowed): traffic = [] flows = self.getFlows(dpid) if flows == 0: return 0 for flow in flows: if 'match' in flow: newMatchDict = flow['matchdict'] if 'packet_count_history' in flow: newMatchDict['packet_count_history'] = flow['packet_count_history'] else: newMatchDict['packet_count_history'] = 0 if allowed == 1 and flow['priority'] == 3: continue if allowed == 0 and flow['priority'] != 3: continue newMatchDict['priority'] = flow['priority'] traffic.append(newMatchDict) return traffic #For traffic visualization def getTrafficVis(self): traffic = [] for dpid in self.flowtablesdict: flows = self.flowtablesdict[dpid] for flow in flows: if 'match' in flow: newMatchDict = flow['matchdict'] if flow['priority'] == 3: continue newMatchDict['priority'] = flow['priority'] traffic.append(newMatchDict) return self.fileLoader.createVisualizationData(traffic) def deleteExistingRule(self, data): self.logger.info('New request for deleting FW rule received... %s', data['eth_type'] ) rule = self.fileLoader.createANewRule(data) for dpid in self.datapathdict: datapath = self.datapathdict[dpid] match = self.fileLoader.createMatch(rule, datapath.ofproto_parser, dpid) self.logger.info('Match: %s ', match ) if match == 0: continue else: self.deleteRule(datapath, match, int(rule.rulepriority)) if rule.ruletype == 2 and rule.dst != str('ff:ff:ff:ff:ff:ff'): rule = self.fileLoader.swapRuleSrcDst(rule) match = self.fileLoader.createMatch(rule, datapath.ofproto_parser, dpid) self.deleteRule(datapath, match, int(rule.rulepriority)) return 200 def setNewFWRule(self, data): self.logger.info('New FW rule received... ' ) rule = self.fileLoader.createANewRule(data) for dpid in self.datapathdict: datapath = self.datapathdict[dpid] match = self.fileLoader.createMatch(rule, datapath.ofproto_parser, dpid) self.logger.info('Destination MAC: %s', rule.dst) if match == 0: self.logger.info('Match couldnt be created! DPID: %s', dpid) continue else: self.deleteRule(datapath, match, 3) self.applyNewFWRule(datapath, match, int(rule.rulepriority)) if rule.ruletype == 2 and rule.dst != str('ff:ff:ff:ff:ff:ff'): rule2 = self.fileLoader.swapRuleSrcDst(rule) match2 = self.fileLoader.createMatch(rule2, datapath.ofproto_parser, dpid) self.deleteRule(datapath, match2, 3) self.applyNewFWRule(datapath, match2, int(rule.rulepriority)) else: self.logger.info('One way rule only or destination broadcast! ') return 200 def applyNewFWRule(self, datapath, match, priority): parser = datapath.ofproto_parser ofproto = datapath.ofproto self.add_flow(datapath, priority, self.IDLE_TIMEOUTS, self.HW_TABLE_ID, match, [parser.OFPActionOutput(ofproto.OFPP_NORMAL)]) self.logger.info('New FW rule applied... ' ) def deleteRule(self, datapath, match, priority): ofproto = datapath.ofproto parser = datapath.ofproto_parser #inst = [parser.OFPInstructionActions(ofproto.OFPIT_APPLY_ACTIONS,actions)] mod = parser.OFPFlowMod(datapath=datapath, match=match, priority = priority, table_id = self.HW_TABLE_ID, out_port = ofproto.OFPP_ANY, out_group = ofproto.OFPG_ANY, command = ofproto.OFPFC_DELETE) datapath.send_msg(mod) self.logger.info('Duplicated deny rule deleted... ' ) @set_ev_cls(ofp_event.EventOFPErrorMsg, [HANDSHAKE_DISPATCHER, CONFIG_DISPATCHER, MAIN_DISPATCHER]) def error_msg_handler(self, ev): msg = ev.msg self.logger.info('OFPErrorMsg received: type=0x%02x code=0x%02x ' 'message=%s', msg.type, msg.code, utils.hex_array(msg.data))
class TestLambda(unittest.TestCase): def setUp(self): with open('data/lambda/event1.json') as inf: self.event = json.load(inf) uri = 'bolt://localhost:7687' user = '******' password = os.environ['NEO_PASSWORD'] self.driver = GraphDatabase.driver(uri, auth=(user, password)) props = Props('../config/props-icdc.yml') self.schema = ICDC_Schema(['data/icdc-model.yml', 'data/icdc-model-props.yml'], props) config = BentoConfig('../config/config.ini') self.processor = FileLoader('', self.driver, self.schema, config, 'ming-icdc-file-loader', 'Final/Data_loader/Manifests') self.loader = DataLoader(self.driver, self.schema) self.file_list = [ "data/Dataset/COP-program.txt", "data/Dataset/NCATS-COP01-case.txt", "data/Dataset/NCATS-COP01-diagnosis.txt", "data/Dataset/NCATS-COP01_cohort_file.txt", "data/Dataset/NCATS-COP01_study_file.txt" ] def test_join_path(self): self.assertEqual(self.processor.join_path(), '') self.assertEqual(self.processor.join_path('abc'), 'abc') self.assertEqual(self.processor.join_path('/abc'), '/abc') self.assertEqual(self.processor.join_path('/abc/'), '/abc') self.assertEqual(self.processor.join_path('abd/def', 'ghi.zip'), 'abd/def/ghi.zip') self.assertEqual(self.processor.join_path('abd/def/', 'ghi.zip'), 'abd/def/ghi.zip') self.assertEqual(self.processor.join_path('abd/def//', '//ghi.zip'), 'abd/def/ghi.zip') self.assertEqual(self.processor.join_path('http://abd/def//', '//ghi.zip//'), 'http://abd/def/ghi.zip') # Test multiple paths joining self.assertEqual(self.processor.join_path('abd/def', 'xy/z', 'ghi.zip'), 'abd/def/xy/z/ghi.zip') self.assertEqual(self.processor.join_path('abd/def/', '/xy/z/' , 'ghi.zip'), 'abd/def/xy/z/ghi.zip') self.assertEqual(self.processor.join_path('abd/def/', '///xy/z///', '///ghi.zip'), 'abd/def/xy/z/ghi.zip') def test_lambda(self): load_result = self.loader.load(self.file_list, True, False, 'upsert', False, 1) self.assertIsInstance(load_result, dict, msg='Load data failed!') self.assertTrue(self.processor.handler(self.event))
def test_load_file(self): loader = FileLoader() try: loader.load_file("logs_old/JOURNAL00.TXT") except Exception: self.fail("Invalid File Path")
if hipothesis_location in self.graph.nodes: if hipothesis_location in self.graph.edges[observed_location]: self.graph.edges[observed_location].remove(hipothesis_location) observed_location = (self.location[0], self.location[1]+observation, (orientation+2)%4) hipothesis_location = (self.location[0], self.location[1]+observation+1, (orientation+2)%4) if observed_location in self.graph.edges[hipothesis_location]: self.graph.edges[hipothesis_location].remove(observed_location) if __name__ == "__main__": from file_loader import FileLoader from time import sleep f_loader = FileLoader() f_loader.read_map('Mapas/With_Start/lab4.map') f_loader.generate_undirected_graph() f_loader.estimate_distances() location = f_loader.starts[0] goals = f_loader.goals max_col = f_loader.max_cols max_row = f_loader.max_rows mapper = Mapper(max_col,max_row,location,goals) mapper.init_map() print 'Location: ', mapper.location print 'Goals: ', mapper.goal
class TestGame(Widget): def init_collision_ids(self): self.ultrasound_count = 10 # self.collide_control = CollideControl(self.ultrasound_count) # self.collision_ids = self.collide_control.collision_ids self.collision_ids = { "wall": 1, "obstacle_rect": 2, "obstacle": 3, "asteroid": 5, "ultrasound_detectable": 0, "ultrasound": [50 + i for i in range(self.ultrasound_count)], "robot": 100, # let it be free after robot for num of robot create "candy": 42, } detected_names = ["wall", "obstacle", "obstacle_rect", "robot"] self.collision_ids["ultrasound_detectable"] = list( {self.collision_ids[name] for name in detected_names}) print("ultrasound_detectable") print(self.collision_ids["ultrasound_detectable"]) # ignore touch of user self.ignore_groups = [] self.ignore_groups.extend(self.collision_ids["ultrasound"]) # [ self.ignore_groups.append(self.collision_ids[key]) for key in ['robot']] def __init__(self, **kwargs): self.init_collision_ids() super(TestGame, self).__init__(**kwargs) self.gameworld.init_gameworld( [ "cymunk_physics", "poly_renderer", "rotate_poly_renderer", "rotate_renderer", #'steering_system' "rotate", "position", "cymunk_touch", ], callback=self.init_game, ) def info(self, text): self.app.info_text += "\n" + str(text) def init_game(self): # called automatically? probably self.pp = pprint.PrettyPrinter(indent=4) self.pprint = self.pp.pprint self.field_size = 800, 600 self.to_draw_obstacles = 0 self.robot = None self.robots = None self.setup_states() self.set_state() self.init_loaders() print("init_physicals") self.init_physicals() # self.init_space_constraints() self.init_properties_updater() self.init_control_logic() def init_control_logic(self): self.init_chase_candy_updater() def init_loaders(self): self.fl = FileLoader(self) def init_physicals(self): # self._entities = {} self.robot_names = ["dalek", "drWho", "k9", "kachna"] self.num_of_robots = len(self.robot_names) self.setup_collision_callbacks() self.entities = Entities(self.app) self.map = Map2D(self) self.asteroids = Asteroids(self) self.init_robots() def init_robots(self): self.robots = [ self.get_robot(name, i) for i, name in enumerate(self.robot_names) ] self.candy = Candy(self) def unused_load_robot_svg(self, robot): self.fl.load_svg(robot.path, self.gameworld) def add_robot(self): i = len(self.robots) name = f"robot_{i}" robot = self.get_robot(name, i) self.robots.append(robot) def get_robot(self, name, i): drive = "mecanum" us_count = 3 return Robot( root=self, drive=drive, robot_name=name, us_id_offset=i * us_count, robot_number=i, ) def toggle_robot_control(self, state): self.robot_controlled = state if not state: return for r in self.robots: r.add_state("INIT") r.reset_ultrasounds() def init_chase_candy_updater(self): for r in self.robots: r.chase_candy(self.candy) self.robot_controlled = False Clock.schedule_once(self.chase_candy_update) def chase_candy_update(self, dt): if self.robot_controlled: for r in self.robots: r.goto_target() Clock.schedule_once(self.chase_candy_update, 0.05) def draw_asteroids(self): self.asteroids.draw_asteroids() def setup_collision_callbacks(self): """Setup the correct collisions for the cymunk physics system manager. use the physics_system.add_collision_handler to define between which collision_ids the collision should happen and between which not Following handler functions are passed - begin_func - called once on collision begin - separate_func - called once on collision end """ physics_system = self.gameworld.system_manager["cymunk_physics"] def ignore_collision(na, nb): """Returns false to indicate ignoring the collision.""" return False # collide_remove_first # add robots us_detectable = self.collision_ids["ultrasound_detectable"] rob_collision_ids = [ self.collision_ids["robot"] + ct for ct in range(self.num_of_robots) ] us_detectable.extend(rob_collision_ids) self.begin_ultrasound_callback = {} # ignore_collision of ultrasound triangle with 0-1024 collision_ids # to enable the triangles to clip through other objects # ! this should be done on robot / on ultrasound creation for us_id in self.collision_ids["ultrasound"]: for index_id in range(1024): physics_system.add_collision_handler( index_id, us_id, begin_func=ignore_collision, separate_func=ignore_collision, ) # add ultrasound triangles object detection via collision # ! this should be done on robot / on ultrasound creation for us_id in self.collision_ids["ultrasound"]: for detectable in us_detectable: print("us_id", us_id) physics_system.add_collision_handler( detectable, us_id, begin_func=self.return_begin_ultrasound_callback( us_id, True), separate_func=self.return_begin_ultrasound_callback( us_id, False), ) for r_ct in rob_collision_ids: from pudb.remote import set_trace set_trace(term_size=(238, 54), host="0.0.0.0", port=6900) # noqa physics_system.add_collision_handler( self.collision_ids["candy"], r_ct, begin_func=self.begin_candy_callback, separate_func=self.begin_candy_callback, ) def candy_caught(self, robot_ent_id): print("candy eaten! by robot:", robot_ent_id) self.candy.reset_position() self.to_draw_obstacles = 2 def begin_candy_callback(self, space, arbiter): # self.r robot_ent_id = arbiter.shapes[1].body.data # us[us_id] = rob self.candy_caught(robot_ent_id) return False def get_robot_from_us_id(self, us_id): for r in self.robots: if r.is_this_us_mine(us_id): return r return None def get_robot_from_ent_id(self, robot_id): for r in self.robots: if r.ent == robot_id: return r return None def return_begin_ultrasound_callback(self, us_id, state): # this adds the segmentation fault on exit - but currently I am not able to simulate ultrasounds any other way than # returning def begin_ultrasound_callback(self, space, arbiter): # ent0_id = arbiter.shapes[0].body.data #detectable_object # ent1_id = arbiter.shapes[1].body.data #robot space.enable_contact_graph = True # print(space.bodies) ent0 = arbiter.shapes[0] e_id = ent0.body.data # a = ent0.body # print(len(arbiter.shapes)) con = arbiter.contacts # print(a) # print(dir(a)) # print(a.contact) rob_ent = arbiter.shapes[1].body.data if con is not None: r = self.get_robot_from_ent_id(rob_ent) # r = self.get_robot_from_us_id(us_id) r.ultrasound_detection(us_id, ent0, state) ent = self.gameworld.entities[e_id] cat = [ cat for cat, id_list in self.entities.items() if e_id in id_list ] # print('detect', cat, e_id) return False ind = 2 * us_id + int(state) self.begin_ultrasound_callback[ind] = types.MethodType( begin_ultrasound_callback, self) return self.begin_ultrasound_callback[ind] # return begin_ultrasound_callback def add_entity(self, ent, category): # add to entity counter print("added entity", category) if category not in self.entities.keys(): self.entities[category] = [] self.entities.add_item(category, ent) def set_robots_rand(self): for r in self.robots: r.set_random_position() def kick_robots(self): for r in self.robots: self.kick_robot(r) def kick_robot(self, r): rob_ent = r.ent print(rob_ent) rob_body = self.gameworld.entities[rob_ent].cymunk_physics.body im = (10000, 10000) seq = [-1, 1] imp = (choice(seq) * randint(*im), choice(seq) * randint(*im)) rob_body.apply_impulse(imp) print("impulse", imp) def init_entity( self, component_dict, component_order, category="default_category", object_info=None, ): if object_info is not None: category = object_info.get("category", category) else: object_info = {} ent = self.gameworld.init_entity(component_dict, component_order) # add to counter self.add_entity(ent, category) object_info.update({"ent": ent}) entity_info = object_info # print('@'*42) # self.pprint(entity_info) # print(Robot.cats, category in Robot.cats) # add to specific subobjects # if self.robot is not None: # self.robot.add_entity(entity_info) # if category == 'robot': # print('added robot') return ent def destroy_all_entities(self): self.destroy_entities() def destroy_entities(self, cat_list=None, skip_cat_list=None): for ent_cat, ent_list in self.entities.items(): delete = False if cat_list is None and skip_cat_list is None: delete = True else: if cat_list is None: if ent_cat not in skip_cat_list: delete = True else: if ent_cat in cat_list: delete = True if delete: prinf("Clearing entities of " + ent_cat) for ent in ent_list: self.destroy_created_entity(ent, 0) self.entities[ent_cat].clear() for r in self.robots: r.reset_ultrasounds() def destroy_created_entity(self, ent_id, dt): self.gameworld.remove_entity(ent_id) # def draw_some_stuff(self): # self.load_svg('objects.svg', self.gameworld) # self.load_svg('map.svg', self.gameworld) # self.map.draw_stuff() # self.load_svg('map.svg', self.gameworld) def draw_obstacles(self): self.map.draw_obstacles(5) def draw_rect_obstacles(self): self.map.draw_rect_obstacles(5) def update(self, dt): self.gameworld.update(dt) def setup_states(self): self.gameworld.add_state( state_name="main", systems_added=["poly_renderer"], systems_removed=[], systems_paused=[], systems_unpaused=["poly_renderer"], screenmanager_screen="main", ) def set_state(self): self.gameworld.state = "main" def init_properties_updater(self): Clock.schedule_once(self.update_properties) def update_properties(self, dt): self.app.ultrasound_status = "\n".join( [r.ultrasound_status() for r in self.robots]) self.app.robot_states = "\n\n".join( [str(r.states) for r in self.robots]) # self.app.robot_score = # self.r.reset_ultrasounds() if self.to_draw_obstacles > 0: self.map.draw_obstacles(self.to_draw_obstacles) self.to_draw_obstacles = 0 Clock.schedule_once(self.update_properties, 0.05)
def __init__(self, key): self.check_key_size(key) self.converter = Converter() self.file_loader = FileLoader() self.key = key # 1 x 56 бит self.sub_keys = [] # 16 x 48 бит
def __init__(self): # switch of train, dev, test model train = 0 dev = 0 test = 1 # switch of loading data from pkl or reprocessing load_processed_doc = 1 load_doc_from_pkl = 1 # switch of testing BM25 accuracy test_BM25 = 0 self.data = Data() self.config = Config() self.fileLoader = FileLoader(self.config, self.data) self.bdp = BasicDataProcessor(self.config, self.data) self.bm25 = BM25(self.config, self.data) # not used ner tags, will merge them together with 'O' tag self.other = [ 'SET', "MISC", 'EMAIL', 'URL', 'TITLE', 'IDEOLOGY', 'CRIMINAL_CHARGE' ] self.fileLoader.load_doc() # load doc data if load_processed_doc: if load_doc_from_pkl: with open(self.config.doc_processed_path, 'rb') as f: self.data.doc_processed = pickle.load(f) else: self.data.doc_processed = self.bdp.process_docs( self.data.doc_texts) with open(self.config.doc_processed_path, 'wb') as f: pickle.dump(self.data.doc_processed, f) # load train data if train: self.fileLoader.load_training_data() if test_BM25: self.bm25.test_training_BM25_accuracy(10) return # predict answer # self.predict_with_bm25_pars_sents(0) self.predict_with_bm25_sents(0) # load dev data if dev: self.fileLoader.load_dev_data() if test_BM25: self.bm25.test_BM25_par_on_dev() return # predict answer self.predict_with_bm25_pars_sents(1) # self.predict_with_bm25_sents(1) # load test data if test: self.fileLoader.load_test_data() # predict answer # self.predict_with_bm25_pars_sents(2) self.predict_with_bm25_sents(2)
class RuleBasedQA: def __init__(self): # switch of train, dev, test model train = 0 dev = 0 test = 1 # switch of loading data from pkl or reprocessing load_processed_doc = 1 load_doc_from_pkl = 1 # switch of testing BM25 accuracy test_BM25 = 0 self.data = Data() self.config = Config() self.fileLoader = FileLoader(self.config, self.data) self.bdp = BasicDataProcessor(self.config, self.data) self.bm25 = BM25(self.config, self.data) # not used ner tags, will merge them together with 'O' tag self.other = [ 'SET', "MISC", 'EMAIL', 'URL', 'TITLE', 'IDEOLOGY', 'CRIMINAL_CHARGE' ] self.fileLoader.load_doc() # load doc data if load_processed_doc: if load_doc_from_pkl: with open(self.config.doc_processed_path, 'rb') as f: self.data.doc_processed = pickle.load(f) else: self.data.doc_processed = self.bdp.process_docs( self.data.doc_texts) with open(self.config.doc_processed_path, 'wb') as f: pickle.dump(self.data.doc_processed, f) # load train data if train: self.fileLoader.load_training_data() if test_BM25: self.bm25.test_training_BM25_accuracy(10) return # predict answer # self.predict_with_bm25_pars_sents(0) self.predict_with_bm25_sents(0) # load dev data if dev: self.fileLoader.load_dev_data() if test_BM25: self.bm25.test_BM25_par_on_dev() return # predict answer self.predict_with_bm25_pars_sents(1) # self.predict_with_bm25_sents(1) # load test data if test: self.fileLoader.load_test_data() # predict answer # self.predict_with_bm25_pars_sents(2) self.predict_with_bm25_sents(2) ''' extract wh word from questions return wh word if found otherwise return -1 ''' def extract_wh_word(self, words): for word in words: if word.lower() in self.config.WH_words or word.lower() == 'whom': return word return -1 ''' identify question types based on rules return ranked ner tags and classified type ''' def identify_question_type(self, wh, q_words): lower = self.bdp.lower_tokens(q_words) # open_words = self.dataProcessor.remove_stop_words(lower) raw_q_sent = ' '.join(lower) if 'rank' in raw_q_sent: return ['ORDINAL'], 'rank' elif 'average' in raw_q_sent: return ['NUMBER', 'MONEY'], 'average' elif wh == 'what': if 'what century' in raw_q_sent: return ['ORDINAL'], 'century' if 'what language' in raw_q_sent: return ['NATIONALITY'], 'language' if 'nationality' in raw_q_sent: return ['NATIONALITY', 'PERSON'], 'nationality' if 'length' in raw_q_sent: return ['NUMBER'], 'length' if 'what year' in raw_q_sent: return ['DATE'], 'year' if 'what date' in raw_q_sent: return ['DATE'], 'date' if 'what percent' in raw_q_sent or 'what percentage' in raw_q_sent: return ['PERCENT'], 'percentage' if 'number' in raw_q_sent: return ['NUMBER'], 'number' if 'in what place' in raw_q_sent: return ['ORDINAL'], 'order' if 'what country' in raw_q_sent: return ['COUNTRY'], 'country' if 'what city' in raw_q_sent: return ['STATE_OR_PROVINCE', 'CITY', 'LOCATION'], 'city' if 'what region' in raw_q_sent: return ['NATIONALITY'], 'region' if 'location' in raw_q_sent: return ['LOCATION'], 'place' if 'population' in raw_q_sent: return ['PERCENT', 'NUMBER'], 'population' if 'fraction' in raw_q_sent: return ['ORDINAL'], 'fraction' if 'what age' in raw_q_sent: return ['NUMBER'], 'age' if 'what decade' in raw_q_sent: return ['DATE'], 'decade' if 'temperature' in raw_q_sent: return ['NUMBER'], 'temperature' if 'abundance' in raw_q_sent: return ['PERCENT'], 'abundance' if 'capacity' in raw_q_sent: return ['NUMBER'], 'capacity' else: return ['O', 'OTHER', 'PERSON', 'LOCATION', 'NUMBER'], 'else' elif wh == 'when': return ['DATE', 'TIME', 'NUMBER'], 'time' elif wh == 'who' or wh == 'whom': return ['PERSON', 'ORGANIZATION', 'OTHER'], 'person' elif wh == 'where': if 'headquarter' in raw_q_sent or 'capital' in raw_q_sent: return ['CITY'], 'headquarter' return ['LOCATION', 'ORDINAL', 'OTHER'], 'location' elif wh == 'how': if 'old' in raw_q_sent or 'large' in raw_q_sent: return ['NUMBER'], 'number' elif 'how long' in raw_q_sent: return ['DURATION', 'NUMBER'], 'length' elif 'how far' in raw_q_sent or 'how fast' in raw_q_sent: return ['NUMBER', 'TIME', 'PERCENT'], 'length' elif 'how many' in raw_q_sent: return ['NUMBER'], 'times' elif 'how much money' in raw_q_sent: return ['MONEY', 'PERCENT', 'NUMBER'], 'money' elif 'how much' in raw_q_sent: return ['MONEY', 'PERCENT', 'NUMBER'], 'money' elif 'how tall' in raw_q_sent: return ['number'], 'tall' else: return ['O', 'NUMBER', 'LOCATION', 'PERSON', 'ORGANIZATION'], 'else' elif wh == 'which': if 'which language' in raw_q_sent: return ['NATIONALITY'], 'language' if 'which year' in raw_q_sent: return ['TIME', 'NUMBER'], 'year' if 'which country' in raw_q_sent: return ['COUNTRY'], 'country' if 'which city' in raw_q_sent: return ['CITY'], 'country' if 'place' in raw_q_sent or 'location' in raw_q_sent or 'site' in raw_q_sent: return ['LOCATION', 'ORGANIZATION', 'OTHER', 'PERSON'], 'place' if 'person' in raw_q_sent: return ['PERSON', 'ORGANIZATION', 'OTHER', 'LOCATION'], 'person' else: return ['O', 'OTHER', 'LOCATION', 'PERSON', 'NUMBER'], 'else' elif 'activism' in raw_q_sent or 'philosophy' in raw_q_sent or 'ideology' in raw_q_sent: return ['IDEOLOGY'], 'ideology' elif 'war' in raw_q_sent or 'blood' in raw_q_sent: return ['CAUSE_OF_DEATH'], 'war' else: return ['O', 'OTHER', 'LOCATION', 'PERSON', 'NUMBER'], 'else' def pred_answer_type(self, entities, qs_processed, possible_qs_type_rank, qs_type): # doubt!!!!!!!!!!!!!!!!!!!!!!!!!!!! # remove doc entities appeared in the question not_in_qs_entities = self.remove_entity_in_qs(qs_processed, entities) # get entity dict by ner tags ner_type_to_entities_dict = self.get_ner_type_to_entities_dict( not_in_qs_entities) # get lemmatized eneities strings grouped_entities_strings_lemmatized = [ self.bdp.lemmatize_entity_name(tup[0]) for tup in entities ] if not possible_qs_type_rank: return -1, [] # iterate possible answer ner tag in likelihood order for type in possible_qs_type_rank: if len(ner_type_to_entities_dict[type]) != 0: assert ner_type_to_entities_dict[type] # get all this kind tag entities one_type_entities = ner_type_to_entities_dict[type] one_type_grouped_entities_strings = [ x[0] for x in one_type_entities ] # if the type is 'O', only get 'NN' pos tag entities if type == 'O': one_type_grouped_entities_strings = [ x[0] for x in pos_tag(one_type_grouped_entities_strings) if 'NN' in x[1] ] # distance between candidate answer entity to all question tokens # in the text distance = [] # candidate answer entity position possible_entity_pos = [] # position of question token in text qs_token_in_entity_pos = [] # position of question token in text for qs_token in qs_processed: if qs_token in grouped_entities_strings_lemmatized: for i in range( len(grouped_entities_strings_lemmatized)): entity_string = grouped_entities_strings_lemmatized[ i] if entity_string.lower() in qs_token: qs_token_in_entity_pos.append(i) # calculate distance between candidate answer entity to all question tokens # in the text for entity in one_type_grouped_entities_strings: for j in range(len(entities)): word = entities[j][0] if word.lower() == entity.lower(): sum_dist = 0 for k in qs_token_in_entity_pos: sum_dist += (abs(j - k)) distance.append(sum_dist) possible_entity_pos.append(j) break assert len(possible_entity_pos) == len(distance) if distance: # choose the entities with the minimum distance to the question tokens min_idx = np.argmin(distance) best_entity = one_type_grouped_entities_strings[min_idx] # if the question type is year, choose a 4-length-number entity with # minimum distance if qs_type == 'year': while len(best_entity) != 4 and len(distance) > 1: distance.remove(distance[min_idx]) min_idx = np.argmin(distance) best_entity = one_type_grouped_entities_strings[ min_idx] return best_entity.lower( ), one_type_grouped_entities_strings return best_entity.lower( ), one_type_grouped_entities_strings return -1, [] ''' combine neighbouring same kind of ner tag together except 'O' ''' def get_combined_entities(self, ner_par): entities = [] ner_group = [] prev_ner_type = '' for ner_tuple in ner_par: current_ner_type = ner_tuple[1] if not prev_ner_type: ner_group.append(ner_tuple) prev_ner_type = current_ner_type else: if current_ner_type == prev_ner_type: ner_group.append(ner_tuple) else: entities += self.process_combined_entity( ner_group, prev_ner_type) ner_group = [ner_tuple] prev_ner_type = current_ner_type entities += self.process_combined_entity(ner_group, prev_ner_type) return entities ''' combine neighbouring same kind of ner tag together except 'O' ''' def process_combined_entity(self, ner_group, ner_type): entities = [] if ner_type == 'O': for ner_tuple in ner_group: entities.append(ner_tuple) else: entity = [ner_tuple[0] for ner_tuple in ner_group] entity_item = [' '.join(entity), ner_type] entities.append(entity_item) return entities def remove_entity_in_qs(self, qs, entities): valid_entities = [] for entity in entities: entity_words = entity[0].split() for word in entity_words: word = word.lower() if self.bdp.lemmatize(word) not in qs: valid_entities.append(entity) break return valid_entities def get_ner_type_to_entities_dict(self, entities): ner_type_to_entities_dict = defaultdict(list) for entity in entities: ner_type = entity[1] ner_type_to_entities_dict[ner_type].append(entity) return ner_type_to_entities_dict ''' preprocess questions and return tokens ''' def preprocess_questions(self, raw_qs): # remove special characters raw_split = word_tokenize( raw_qs.replace("\u200b", '').replace("\u2014", '')) # remove pure punctuation tokens remove_pure_punc = [ token for token in raw_split if not self.bdp.is_pure_puncs(token) ] # remove punctuations within a token remove_punc_in_words = [ self.bdp.remove_punc_in_token(token) for token in remove_pure_punc ] lemmatized = self.bdp.lemmatize_tokens(remove_punc_in_words) return lemmatized ''' input string of text return processed combined ner tags ''' def ner_process(self, text): # get ner tags ner_par = self.bdp.nlp.ner(text) original_ner = [] for tup in ner_par: tup = list(tup) # change tags in 'OTHER' set to 'O' if tup[1] in self.other: tup[1] = 'O' # remove certain kind of punctuations ina token tup[0] = self.bdp.remove_punc_in_token_for_rule(tup[0]) original_ner.append(tup) # combine neighbouring same kind of ner tag together except 'O' original_ner = self.get_combined_entities(original_ner) # remove pure punctuation tokens original_ner = [ item for item in original_ner if not self.bdp.is_pure_puncs(item[0]) ] # remove stop word tokens original_ner = [ item for item in original_ner if item[0].lower() not in stopwords.words("english") ] return original_ner ''' predict answers by using bm25 finding answer sentence ''' def predict_with_bm25_sents(self, type): # count correctly predicted questions correct = 0 # count correctly predicted paragraphs correct_id = 0 # save already processed doc entities for reuse to improve performance doc_entity_temp = {} # save already separated sentences of docs doc_text_temp = {} doc_all = self.data.doc_texts qs_all = [] doc_id_all = [] answer_all = [] answer_par_id_all = [] if type == 0: # train qs_all = self.data.train_questions doc_id_all = self.data.train_doc_ids answer_all = self.data.train_answers answer_par_id_all = self.data.train_answer_par_ids fname = self.config.predict_train_output_path elif type == 1: # dev qs_all = self.data.dev_questions doc_id_all = self.data.dev_doc_ids answer_all = self.data.dev_answers answer_par_id_all = self.data.dev_answer_par_ids fname = self.config.predict_dev_output_path else: # test qs_all = self.data.test_questions doc_id_all = self.data.test_doc_ids test_ids = self.data.test_ids fname = self.config.predict_test_output_path total = int(len(qs_all)) with open(fname, 'wb') as csv_file: csv_writer = csv.writer(csv_file) if type == 0 or type == 1: csv_writer.writerow([ 'W/R', 'query', 'predicted_id_R/W', 'actual_id', 'predicted_answer', 'actual_answer', 'predicted_answer_type', 'predicated_candidates' ]) else: csv_writer.writerow(['id', 'answer']) for i in range(total): # for i in range(20): print(i, " / ", total) qs = qs_all[i] doc_id = doc_id_all[i] doc = doc_all[doc_id] if type == 0 or type == 1: answer = answer_all[i] answer_par_id = answer_par_id_all[i] # preprocess questions and return tokens qs_processed = self.preprocess_questions(qs) doc_processed = self.data.doc_processed[doc_id] # get doc entities saving in format of # [sentence...[entitiy...]] doc_entities = [] # get doc sentences saving in format of # [sentence1, sentence2..] doc_sents_text = [] if doc_id in doc_entity_temp: doc_entities = doc_entity_temp[doc_id] doc_sents_text = doc_text_temp[doc_id] else: # iterate paragraphs of that doc for par in doc: sents_text = sent_tokenize(par) doc_sents_text += sents_text # iterate sentences of the paragraph for sent in sents_text: doc_entities.append(self.ner_process(sent)) doc_entity_temp[doc_id] = doc_entities doc_text_temp[doc_id] = doc_sents_text # extract wh word wh = self.extract_wh_word(qs_processed) # identify answer ner tag ranks and question type possible_qs_type_rank, qs_type = self.identify_question_type( wh, qs_processed) pred_answer = 'unknown' # predicted answer predict_answer = 'unknown' # predicated answer ner tags answer_types = [] # predicated paragraph id pred_par_id = -1 # finded candidate answers candidate_answers = '' if possible_qs_type_rank: self.bm25.k1 = 1.2 self.bm25.b = 0.75 # tokenize sentences sent_tokens = self.bdp.preprocess_doc(doc_sents_text) # rank sentences based on bm25 scores bm25_sent_tokens_rank = self.bm25.sort_by_bm25_score( qs_processed, sent_tokens) bm25_sent_tokens_rank_ids = [ x[0] for x in bm25_sent_tokens_rank ] # iterate sentences from higher bm25 score to lower for sent_id in bm25_sent_tokens_rank_ids: # find a answer and candidate answers temp_answer, temp_candidate_answers = self.pred_answer_type( doc_entities[sent_id], qs_processed, possible_qs_type_rank, qs_type) # if find a answer, break out if temp_answer != -1: pred_answer = temp_answer answer_types = possible_qs_type_rank pred_sent_id = sent_id candidate_answers = '; '.join( temp_candidate_answers) break if type == 0 or type == 1: if pred_sent_id != -1: for par_id in range(len(doc)): if doc_sents_text[pred_sent_id] in doc[par_id]: pred_par_id = par_id break candidate_answers = '; '.join(temp_candidate_answers) types = ' '.join(answer_types) if pred_par_id == answer_par_id: correct_id += 1 if answer == pred_answer: csv_writer.writerow([ "##right##", qs, pred_par_id, answer_par_id, pred_answer, answer, types, candidate_answers ]) correct += 1 else: csv_writer.writerow([ "##wrong##", qs, pred_par_id, answer_par_id, pred_answer, answer, types, candidate_answers ]) print(answer, " ; ", pred_answer) # print "correct :", correct else: csv_writer.writerow([test_ids[i], pred_answer]) if type == 0 or type == 1: csv_writer.writerow( [str(correct), str(correct * 100.0 / total)]) csv_writer.writerow( [str(correct_id), str(correct_id * 100.0 / total)]) csv_writer.writerow([str(total)]) print(correct * 100.0 / total) print(correct_id * 100.0 / total) print("best : 19.470455279302552") ''' predict answers by using bm25 firstly finding answer paragraph then within that paragraph finding answer sentence ''' def predict_with_bm25_pars_sents(self, type): # count correctly predicted questions correct = 0 # count correctly predicted paragraphs correct_id = 0 # save already processed doc entities for reuse to improve performance doc_entity_temp = {} # save already separated doc sentences doc_text_temp = {} doc_all = self.data.doc_texts qs_all = [] doc_id_all = [] answer_all = [] answer_par_id_all = [] if type == 0: # train qs_all = self.data.train_questions doc_id_all = self.data.train_doc_ids answer_all = self.data.train_answers answer_par_id_all = self.data.train_answer_par_ids fname = self.config.predict_train_output_path elif type == 1: # dev qs_all = self.data.dev_questions doc_id_all = self.data.dev_doc_ids answer_all = self.data.dev_answers answer_par_id_all = self.data.dev_answer_par_ids fname = self.config.predict_dev_output_path else: # test qs_all = self.data.test_questions doc_id_all = self.data.test_doc_ids test_ids = self.data.test_ids fname = self.config.predict_test_output_path total = int(len(qs_all)) with open(fname, 'wb') as csv_file: csv_writer = csv.writer(csv_file) if type == 0 or type == 1: csv_writer.writerow([ 'W/R', 'query', 'predicted_id_R/W', 'actual_id', 'predicted_answer', 'actual_answer', 'predicted_answer_type', 'predicated_candidates' ]) else: csv_writer.writerow(['id', 'answer']) for i in range(total): # for i in range(20): print(i, " / ", total) qs = qs_all[i] doc_id = doc_id_all[i] doc = doc_all[doc_id] if type == 0 or type == 1: answer = answer_all[i] answer_par_id = answer_par_id_all[i] # preprocess questions and return tokens qs_processed = self.preprocess_questions(qs) doc_processed = self.data.doc_processed[doc_id] # get doc entities saving in format of # [paragraph...[sentence...[entitiy...]]] doc_entities = [] if doc_id in doc_entity_temp: doc_entities = doc_entity_temp[doc_id] else: # iterate paragraphs of that doc for par in doc: par_entities = [] sent_text = sent_tokenize(par) # iterate sentences of the paragraph for sent in sent_text: par_entities.append(self.ner_process(sent)) doc_entities.append(par_entities) doc_entity_temp[doc_id] = doc_entities # extract wh word wh = self.extract_wh_word(qs_processed) # identify answer ner tag ranks and question type possible_qs_type_rank, qs_type = self.identify_question_type( wh, qs_processed) # predicted answer predict_answer = 'unknown' # predicated answer ner tags answer_types = [] # predicated paragraph id pred_par_id = -1 # finded candidate answers candidate_answers = '' if possible_qs_type_rank: self.bm25.k1 = 1.2 self.bm25.b = 0.75 # rank paragraphs based on bm25 scores bm25_rank = self.bm25.sort_by_bm25_score( qs_processed, doc_processed) bm25_rank_par_ids = [x[0] for x in bm25_rank] # iterate paragraphs from higher bm25 score to lower for par_id in bm25_rank_par_ids: par_text = doc[par_id] sents_text = sent_tokenize(par_text) # tokenize sentences of the paragraph sent_tokens = self.bdp.preprocess_doc(sents_text) # rank sentences based on bm25 scores bm25_sent_tokens_rank = self.bm25.sort_by_bm25_score( qs_processed, sent_tokens) bm25_sent_tokens_rank_ids = [ x[0] for x in bm25_sent_tokens_rank ] # iterate sentences from higher bm25 score to lower for sent_id in bm25_sent_tokens_rank_ids: # find a answer and candidate answers temp_answer, temp_candidate_answers = self.pred_answer_type( doc_entities[par_id][sent_id], qs_processed, possible_qs_type_rank, qs_type) # if find a answer, break out if temp_answer != -1: predict_answer = temp_answer answer_types = possible_qs_type_rank pred_par_id = par_id candidate_answers = '; '.join( temp_candidate_answers) break # if find a answer, break out if temp_answer != -1: break if type == 0 or type == 1: types = ' '.join(answer_types) if pred_par_id == int(answer_par_id): correct_id += 1 if predict_answer == answer: csv_writer.writerow([ "##right##", qs, pred_par_id, answer_par_id, predict_answer, answer, types, candidate_answers ]) correct += 1 else: csv_writer.writerow([ "##wrong##", qs, pred_par_id, answer_par_id, predict_answer, answer, types, candidate_answers ]) print(predict_answer, " ; ", answer) # print "correct :", correct else: csv_writer.writerow([test_ids[i], predict_answer]) if type == 0 or type == 1: csv_writer.writerow( [str(correct), str(correct * 100.0 / total)]) csv_writer.writerow( [str(correct_id), str(correct_id * 100.0 / total)]) csv_writer.writerow([str(total)]) print(correct * 100.0 / total) print(correct_id * 100.0 / total) print("best : 19.470455279302552")
converter_rule = ConverterRule(converter_expression) converter = converter_factory.create(from_type, converter_rule) result = converter.convert(value_from_xml) else: result = value_from_xml return result def match_keys(attributes: list, config: dict): attributes_result: dict = dict() if attributes: attributes_dict: dict = config["attributes"] key_count = len(attributes_dict.keys()) for i in range(0, key_count): key = list(attributes_dict.keys())[i] value_key = str(key).split("]")[1] if value_key in attributes_list: attributes_result[key] = attributes_dict[key] config["attributes"] = attributes_result if __name__ == '__main__': fileLoader = FileLoader() fileStrContent = fileLoader.loadFile(sys.argv[1]) attributes_list: list = str(sys.argv[2]).split(",") configObject = json.loads(fileStrContent) match_keys(attributes_list, configObject) m = Main(configObject) m.main()
import sys from file_loader import FileLoader from converter import Converter from xml_outputter import XMLOutputter loader = FileLoader() converter = Converter() try: path = sys.argv[1] loader.load_file("logs_old/" + path + ".txt") except IOError: sys.exit("Invalid File Path - Usage: main.py <filename of file in logs_old> (no extension)") converter.read_entries(loader.lines) outputter = XMLOutputter(path) for entry in converter.entries: outputter.write_line(entry) outputter.output_to_file() print "Saved under logs_new/" + path + ".xml"
def init_loaders(self): self.fl = FileLoader(self)
def setUp(self): self.loader = FileLoader() self.converter = Converter() self.extractor = Extractor() self.loader.load_file("logs_old/JOURNAL00.TXT") self.converter.read_entries(self.loader.lines)
class DynamicCallGraphMatrix(): def __init__(self, pathdir): self.conf = Configuration(pathdir) self.loader = FileLoader() self.total_pass_test_cases = 0 self.total_fail_test_cases = 0 self.total_runtime_of_pass_test_cases = 0 self.total_runtime_of_fail_test_cases = 0 self.test_suite_activity_matrix_density = 0 self.test_suite_matrix_diversity = 0 self.test_suite_matrix_uniqueness = 0 self.test_suite_matrix_ddu = 0 self.test_suite_matrix_sparsity = 0 self.test_suite_matrix_density = 0 self.dynamic_call_graph_metrics_list = [] self.dynamic_call_graph_metrics_math_data = [] self.dynamic_call_graph_metrics_list_chart = [] self.dynamic_call_graph_metrics_list_lang = [] self.dynamic_call_graph_metrics_list_math = [] self.dynamic_call_graph_metrics_list_time = [] self.dynamic_call_graph_metrics_list_closure = [] self.dynamic_call_graph_metrics_list_mockito = [] def get_ranking_by_spectra(self, projectName, bug_version, spectraLineNum): if spectraLineNum == None: return #print('spectraLineNum>' + str(spectraLineNum)) rankPath = os.path.join( self.conf.ROOT_PATH, 'suspiciousness_ranking/' + projectName + '_' + str(bug_version) + '.csv') with open(rankPath, 'r') as csvfile: readCSV = csv.reader(csvfile, delimiter=',') for lineno, row in enumerate(readCSV): if lineno == spectraLineNum: #print(row) return row[2] def get_featurenode_linenum_from_spectra(self, projectName, bug_version, faultMethodFeatureName): #print('project>' + projectName + ' id> ' + str(bug_version) + ' methodName> ' + faultMethodFeatureName) filepath = os.path.join( self.conf.ROOT_PATH, projectName + '/' + str(bug_version) + '/spectra') with open(filepath, 'r') as spectra: for num, line in enumerate(spectra, 1): if faultMethodFeatureName in line.strip(): return num def process_math_dynamic_metrics_from_call_graph(self): for subdir, dirs, files in sorted( os.walk(self.conf.MATH_CALL_GRPAH_PATH)): for file in sorted(files): filepath = os.path.join(subdir, file) print(filepath) buggy_version = re.findall('\d+', filepath) fname = file.replace(".dot", "") faultNodes = fname.strip() #print(faultNodes) #print('buggy_version> '+ str(buggy_version[0]) + ' dot file name: ' + fname) # load matrix file matrix_filepath = os.path.join( self.conf.ROOT_PATH, self.conf.MATH_ID + '/' + str(buggy_version[0]) + '/matrix') mat = self.loader.load_coverage_file(matrix_filepath) mat_arr = np.asarray(mat) #print(mat_arr.shape) pass_fail_col_arr = mat_arr[:, mat_arr.shape[1] - 1] test_case_result_col_arr = np.asarray(pass_fail_col_arr) unique_value, unique_counts = np.unique( test_case_result_col_arr, return_counts=True) #print(unique_value) #print(unique_counts) total_test_case_coverage_by_dynamic_call_graph = 0 # total test cases that cover/execute for this dynamic call graph getSpectraLineNum = self.get_featurenode_linenum_from_spectra( self.conf.MATH_ID, buggy_version[0], faultNodes) #print(str(getSpectraLineNum)) if getSpectraLineNum != None: fault_node_feature_col_arr = mat_arr[:, getSpectraLineNum - 1] for t in range(0, len(fault_node_feature_col_arr)): if fault_node_feature_col_arr[t] == 1: total_test_case_coverage_by_dynamic_call_graph += 1 cnt = 0 diffu_feature_modified_list = [] diffu_feature_added_list = [] #faultNodeInDegList = [] #if getSpectraLineNum != None: faultNodeInDegList = [] faultNodeOutDegList = [] faultClass_CBO = 0 faultClass_RFC_List = [] faultClass_RFC = 0 # dot file edge calculation begin> node_split = faultNodes.split('#', 1) faultNodeClassName = node_split[0] file = open(filepath, 'r') #READING DOT FILE with open(filepath, 'r') as file: text = file.readlines() for row in text: #print(row) line_split = row.split('->', 1) #print(len(line_split)) if len(line_split) > 1: sourceNode = line_split[0].strip().replace('"', '') #sourceNode #print(sourceNode) dNode = line_split[1].strip().replace('"', '') destNode = dNode.replace(";", "") #print(destNode) if sourceNode.strip() == faultNodes: #print(sourceNode) faultNodeOutDegList.append(destNode) if destNode.strip() == faultNodes: #print(destNode) faultNodeInDegList.append(sourceNode) # calculate CBO if faultNodeClassName in sourceNode or faultNodeClassName in destNode: faultClass_CBO += 1 #print(e.to_string()) if faultNodeClassName in sourceNode and faultNodeClassName in destNode: faultClass_CBO -= 1 #calculate RFC if faultNodeClassName in sourceNode: if sourceNode not in faultClass_RFC_List: faultClass_RFC_List.append(sourceNode) if faultNodeClassName in destNode: if destNode not in faultClass_RFC_List: faultClass_RFC_List.append(destNode) #ource(text) #graph = pydotplus.Graph(filepath) #print(graph.to_string()) #edgeList = graph.get_edges() #print(edgeList) #nodeList = graph.get_nodes() #print(nodeList) outDegCount = len(faultNodeOutDegList) inDegCount = len(faultNodeInDegList) faultClass_RFC = len(faultClass_RFC_List) + outDegCount no_of_test_cases_covers_fault_node = 0 no_of_test_cases_passes_for_fault_node = 0 no_of_test_cases_fails_for_fault_node = 0 print('OutDeg> ' + str(outDegCount) + ' InDeg> ' + str(inDegCount) + ' CBO> ' + str(faultClass_CBO) + ' RFC> ' + str(faultClass_RFC)) if outDegCount != 0 and inDegCount != 0: #print('OutDeg> ' + str(outDegCount) + ' InDeg> ' + str(inDegCount) + ' CBO> ' + str(faultClass_CBO) + ' RFC> ' + str(faultClass_RFC)) # code for matrix file getSpectraLineNum = self.get_featurenode_linenum_from_spectra( self.conf.MATH_ID, buggy_version[0], faultNodes) rank = self.get_ranking_by_spectra(self.conf.MATH_ID, buggy_version[0], getSpectraLineNum) #print(str(getSpectraLineNum)) if getSpectraLineNum != None: fault_node_feature_col_arr = mat_arr[:, getSpectraLineNum - 1] # fault node test coverage pass/fail info calculation for t in range(0, len(fault_node_feature_col_arr)): if fault_node_feature_col_arr[t] == 1: no_of_test_cases_covers_fault_node += 1 if pass_fail_col_arr[t] == 1: no_of_test_cases_passes_for_fault_node += 1 else: no_of_test_cases_fails_for_fault_node += 1 #print('Total test cases for fn>', no_of_test_cases_covers_fault_node) #print('pass for fn>', no_of_test_cases_passes_for_fault_node) #print('fail for fn>', no_of_test_cases_fails_for_fault_node) output_row = [ self.conf.MATH_ID, # + '_' + str(buggy_version), int(buggy_version[0]), faultNodes, rank, inDegCount, outDegCount, inDegCount + outDegCount, faultClass_CBO, faultClass_RFC, no_of_test_cases_covers_fault_node, no_of_test_cases_passes_for_fault_node, no_of_test_cases_fails_for_fault_node, total_test_case_coverage_by_dynamic_call_graph, "/".join(faultNodeInDegList), "/".join(faultNodeOutDegList) ] diffu_feature_modified_list.append(output_row) else: #print('OutDeg> ' + str(outDegCount) + ' InDeg> ' + str(inDegCount) + ' CBO> ' + str(faultClass_CBO) + ' RFC> ' + str(faultClass_RFC)) impl_action = 'Unknown' output_row = [ self.conf.MATH_ID, int(buggy_version[0]), impl_action, 9999, 0, 0, 0, 0, 0, 0, 0, 0, 0, '', '' ] diffu_feature_added_list.append(output_row) if len(diffu_feature_modified_list) > 0: self.dynamic_call_graph_metrics_list_math.append( diffu_feature_modified_list[0]) else: self.dynamic_call_graph_metrics_list_math.append( diffu_feature_added_list[0]) print( '============== Printing ' + self.conf.MATH_ID + ' Projects Dynamic Call Graph Metrics Features =================') output_sorted_list = sorted(self.dynamic_call_graph_metrics_list_math, key=itemgetter(1)) self.dynamic_call_graph_metrics_math_data.extend(output_sorted_list) #print(output_sorted_list) print('============== Finished Printing ' + self.conf.MATH_ID + ' Projects Dynamic Call Graph Metrics Version ================') def print_math_dynamic_call_graph_metrics(self): result_arr = np.array(self.dynamic_call_graph_metrics_math_data) #print(result_arr.shape) with open(self.conf.ROOT_PATH + 'dynamic_call_graph_math_data.csv', 'w') as csvfile: # PassedTestRuntime, FailedTestRuntime, columnTitleRow = 'ProjectID, '\ 'BugId, '\ 'FaultNodeName, '\ 'Rank, '\ 'FaultNode_InDegree, '\ 'FaultNode_OutDegree, '\ 'FaultNodeDegreeCentrality, '\ 'CBO, '\ 'RFC, '\ 'NoOfTestCasesExecuteFaultMethod, '\ 'NoOfTestCasesPassesCoversFaultNode, '\ 'NoOfTestCasesFailsCoversFaultNode, '\ 'NoOfTestCasesExecutesDynamicCallGraph, '\ 'InDegreeMethodCallsList, '\ 'OutDegreeMethodCallsList\n' csvfile.write(columnTitleRow) for i in range(0, result_arr.shape[0]): row = (str(result_arr[i][0]) + ', ' + str(result_arr[i][1]) + ', ' + str(result_arr[i][2]) + ', ' + str(result_arr[i][3]) + ', ' + str(result_arr[i][4]) + ', ' + str(result_arr[i][5]) + ', ' + str(result_arr[i][6]) + ', ' + str(result_arr[i][7]) + ', ' + str(result_arr[i][8]) + ', ' + str(result_arr[i][9]) + ', ' + str(result_arr[i][10]) + ', ' + str(result_arr[i][11]) + ', ' + str(result_arr[i][12]) + ', ' + str(result_arr[i][13]) + ', ' + str(result_arr[i][14]) + '\n') csvfile.write(row) print('Math Dynamic Call Graph Metrics file is saved.') def process_dynamic_metrics_from_call_graph(self): for i in range(0, len(self.conf.CALL_GRAPH_PROJECTS_ID)): for subdir, dirs, files in sorted( os.walk(self.conf.DYNAMIC_CALL_GRAPH_PROJECTS_PATH[i])): for file in sorted(files): filepath = os.path.join(subdir, file) print(filepath) fn = file.replace(".dot", "") buggy_version = int(fn) #re.findall('\d+', filepath) graph = pydotplus.graphviz.graph_from_dot_file(filepath) edgeList = graph.get_edge_list() nodeList = graph.get_node_list() faultNodes = [] # load matrix file matrix_filepath = os.path.join( self.conf.ROOT_PATH, self.conf.CALL_GRAPH_PROJECTS_ID[i] + '/' + str(buggy_version) + '/matrix') mat = self.loader.load_coverage_file(matrix_filepath) mat_arr = np.asarray(mat) #print(mat_arr.shape) pass_fail_col_arr = mat_arr[:, mat_arr.shape[1] - 1] test_case_result_col_arr = np.asarray(pass_fail_col_arr) unique_value, unique_counts = np.unique( test_case_result_col_arr, return_counts=True) #print(unique_value) #print(unique_counts) total_test_case_coverage_by_dynamic_call_graph = 0 for n in nodeList: #nodeName = n.get_name() colorName = json.loads( n.obj_dict['attributes']['fillcolor']) nodeName = json.loads(n.get_name()) if colorName.strip() == "red": faultNodes.append(json.loads(n.get_name())) # total test cases that cover/execute for this dynamic call graph getSpectraLineNum = self.get_featurenode_linenum_from_spectra( self.conf.CALL_GRAPH_PROJECTS_ID[i], buggy_version, nodeName) #print(str(getSpectraLineNum)) if getSpectraLineNum != None: fault_node_feature_col_arr = mat_arr[:, getSpectraLineNum - 1] for t in range(0, len(fault_node_feature_col_arr)): if fault_node_feature_col_arr[t] == 1: total_test_case_coverage_by_dynamic_call_graph += 1 #print(nodeList) #print(faultNodes) cnt = 0 diffu_feature_modified_list = [] diffu_feature_added_list = [] for node in range(0, len(faultNodes)): faultNodeInDegList = [] faultNodeOutDegList = [] faultClass_CBO = 0 faultClass_RFC_List = [] faultClass_RFC = 0 for e in edgeList: cnt += 1 dottedEdge = False att = e.obj_dict['attributes'] node_split = faultNodes[node].split('#', 1) faultNodeClassName = node_split[0] sourceNode = json.loads(e.get_source()) destNode = json.loads(e.get_destination()) if len(att) > 0: if att['style'] == 'dotted': dottedEdge = True # for RFC calculation if faultNodeClassName in sourceNode: if sourceNode not in faultClass_RFC_List: faultClass_RFC_List.append( sourceNode) if faultNodeClassName in destNode: if destNode not in faultClass_RFC_List: faultClass_RFC_List.append( destNode) if dottedEdge != True: # calculate in/out degree if sourceNode.strip() == faultNodes[node]: faultNodeOutDegList.append(destNode) if destNode.strip() == faultNodes[node]: faultNodeInDegList.append(sourceNode) # calculate CBO if faultNodeClassName in sourceNode or faultNodeClassName in destNode: faultClass_CBO += 1 #print(e.to_string()) if faultNodeClassName in sourceNode and faultNodeClassName in destNode: faultClass_CBO -= 1 #calculate RFC if faultNodeClassName in sourceNode: if sourceNode not in faultClass_RFC_List: faultClass_RFC_List.append(sourceNode) if faultNodeClassName in destNode: if destNode not in faultClass_RFC_List: faultClass_RFC_List.append(destNode) outDegCount = len(faultNodeOutDegList) inDegCount = len(faultNodeInDegList) faultClass_RFC = len(faultClass_RFC_List) + outDegCount #print(faultClass_CBO) if outDegCount != 0 and inDegCount != 0: print('OutDeg> ' + str(outDegCount) + ' InDeg> ' + str(inDegCount) + ' CBO> ' + str(faultClass_CBO) + ' RFC> ' + str(faultClass_RFC)) # code for matrix file getSpectraLineNum = self.get_featurenode_linenum_from_spectra( self.conf.CALL_GRAPH_PROJECTS_ID[i], buggy_version, faultNodes[node]) #print(str(getSpectraLineNum)) if getSpectraLineNum != None: fault_node_feature_col_arr = mat_arr[:, getSpectraLineNum - 1] # fault node test coverage pass/fail info calculation no_of_test_cases_covers_fault_node = 0 no_of_test_cases_passes_for_fault_node = 0 no_of_test_cases_fails_for_fault_node = 0 for t in range( 0, len(fault_node_feature_col_arr)): if fault_node_feature_col_arr[t] == 1: no_of_test_cases_covers_fault_node += 1 if pass_fail_col_arr[t] == 1: no_of_test_cases_passes_for_fault_node += 1 else: no_of_test_cases_fails_for_fault_node += 1 #print('Total test cases for fn>', no_of_test_cases_covers_fault_node) #print('pass for fn>', no_of_test_cases_passes_for_fault_node) #print('fail for fn>', no_of_test_cases_fails_for_fault_node) output_row = [ self.conf.CALL_GRAPH_PROJECTS_ID[i], # + '_' + str(buggy_version), buggy_version, faultNodes[node], inDegCount, outDegCount, inDegCount + outDegCount, faultClass_CBO, faultClass_RFC, no_of_test_cases_covers_fault_node, no_of_test_cases_passes_for_fault_node, no_of_test_cases_fails_for_fault_node, total_test_case_coverage_by_dynamic_call_graph, "/".join(faultNodeInDegList), "/".join(faultNodeOutDegList) ] diffu_feature_modified_list.append(output_row) else: #print('OutDeg> ' + str(outDegCount) + ' InDeg> ' + str(inDegCount) + ' CBO> ' + str(faultClass_CBO) + ' RFC> ' + str(faultClass_RFC)) impl_action = 'Unknown' output_row = [ self.conf.CALL_GRAPH_PROJECTS_ID[i], buggy_version, impl_action, 0, 0, 0, 0, 0, 0, 0, 0, 0, '', '' ] diffu_feature_added_list.append(output_row) if len(diffu_feature_modified_list) > 0: #lowRank = self.get_ranking(diffu_feature_modified_list[0][0], diffu_feature_modified_list[0][1], diffu_feature_modified_list[0][2]) spectraLineNum = self.get_featurenode_linenum_from_spectra( self.conf.CALL_GRAPH_PROJECTS_ID[i], diffu_feature_modified_list[0][1], diffu_feature_modified_list[0][2]) lowRank = self.get_ranking_by_spectra( self.conf.CALL_GRAPH_PROJECTS_ID[i], diffu_feature_modified_list[0][1], spectraLineNum) #print('lowRank> ', lowRank) #low rank == highest suspicious value. index = 0 #print('length> ' + str(len(diffu_feature_modified_list))) if len(diffu_feature_modified_list) > 1: for x in range(1, len(diffu_feature_modified_list)): spectraLineNum = self.get_featurenode_linenum_from_spectra( self.conf.CALL_GRAPH_PROJECTS_ID[i], diffu_feature_modified_list[x][1], diffu_feature_modified_list[x][2]) curRank = self.get_ranking_by_spectra( self.conf.CALL_GRAPH_PROJECTS_ID[i], diffu_feature_modified_list[x][1], spectraLineNum) #print('curRank> ', curRank) if curRank < lowRank: lowRank = curRank index = x #print('survivedindex>', diffu_feature_modified_list[index][2]) #print(index) #print('length> ' + str(len(diffu_feature_modified_list))) resultarr = diffu_feature_modified_list[index] diffu_feature_modified_list = [] diffu_feature_modified_list.append(resultarr) #print('modified length>' + str(len(diffu_feature_modified_list))) #print('modified arr>', diffu_feature_modified_list) if self.conf.CALL_GRAPH_PROJECTS_ID[i] == 'Chart': self.dynamic_call_graph_metrics_list_chart.append( diffu_feature_modified_list[0]) elif self.conf.CALL_GRAPH_PROJECTS_ID[i] == 'Lang': self.dynamic_call_graph_metrics_list_lang.append( diffu_feature_modified_list[0]) elif self.conf.CALL_GRAPH_PROJECTS_ID[i] == 'Time': self.dynamic_call_graph_metrics_list_time.append( diffu_feature_modified_list[0]) else: self.dynamic_call_graph_metrics_list_closure.append( diffu_feature_modified_list[0]) else: if self.conf.CALL_GRAPH_PROJECTS_ID[i] == 'Chart': self.dynamic_call_graph_metrics_list_chart.append( diffu_feature_added_list[0]) elif self.conf.CALL_GRAPH_PROJECTS_ID[i] == 'Lang': self.dynamic_call_graph_metrics_list_lang.append( diffu_feature_added_list[0]) elif self.conf.CALL_GRAPH_PROJECTS_ID[i] == 'Time': self.dynamic_call_graph_metrics_list_time.append( diffu_feature_added_list[0]) else: self.dynamic_call_graph_metrics_list_closure.append( diffu_feature_added_list[0]) print( '============== Printing ' + self.conf.CALL_GRAPH_PROJECTS_ID[i] + ' Projects Dynamic Call Graph Metrics Features =================' ) if self.conf.CALL_GRAPH_PROJECTS_ID[i] == 'Chart': output_sorted_list = sorted( self.dynamic_call_graph_metrics_list_chart, key=itemgetter(1)) self.dynamic_call_graph_metrics_list.extend(output_sorted_list) #print(output_sorted_list) if self.conf.CALL_GRAPH_PROJECTS_ID[i] == 'Lang': output_sorted_list = sorted( self.dynamic_call_graph_metrics_list_lang, key=itemgetter(1)) #print(output_sorted_list) self.dynamic_call_graph_metrics_list.extend(output_sorted_list) if self.conf.CALL_GRAPH_PROJECTS_ID[i] == 'Time': output_sorted_list = sorted( self.dynamic_call_graph_metrics_list_time, key=itemgetter(1)) #print(output_sorted_list) self.dynamic_call_graph_metrics_list.extend(output_sorted_list) if self.conf.CALL_GRAPH_PROJECTS_ID[i] == 'Closure': output_sorted_list = sorted( self.dynamic_call_graph_metrics_list_closure, key=itemgetter(1)) #print(output_sorted_list) self.dynamic_call_graph_metrics_list.extend(output_sorted_list) print( '============== Finished Printing ' + self.conf.CALL_GRAPH_PROJECTS_ID[i] + ' Projects Dynamic Call Graph Metrics Version ================' ) def print_dynamic_call_graph_metrics(self): #print(len(self.dynamic_call_graph_metrics_list)) #print(self.dynamic_call_graph_metrics_list) result_arr = np.array(self.dynamic_call_graph_metrics_list) #print(result_arr.shape) with open(self.conf.ROOT_PATH + 'dynamic_call_graph_metrics.csv', 'w') as csvfile: # PassedTestRuntime, FailedTestRuntime, columnTitleRow = 'ProjectID, '\ 'BugId, '\ 'FaultNodeName, '\ 'FaultNode_InDegree, '\ 'FaultNode_OutDegree, '\ 'FaultNodeDegreeCentrality, '\ 'CBO, '\ 'RFC, '\ 'NoOfTestCasesExecuteFaultMethod, '\ 'NoOfTestCasesPassesCoversFaultNode, '\ 'NoOfTestCasesFailsCoversFaultNode, '\ 'NoOfTestCasesExecutesDynamicCallGraph, '\ 'InDegreeMethodCallsList, '\ 'OutDegreeMethodCallsList\n' csvfile.write(columnTitleRow) for i in range(0, result_arr.shape[0]): row = (str(result_arr[i][0]) + ', ' + str(result_arr[i][1]) + ', ' + str(result_arr[i][2]) + ', ' + str(result_arr[i][3]) + ', ' + str(result_arr[i][4]) + ', ' + str(result_arr[i][5]) + ', ' + str(result_arr[i][6]) + ', ' + str(result_arr[i][7]) + ', ' + str(result_arr[i][8]) + ', ' + str(result_arr[i][9]) + ', ' + str(result_arr[i][10]) + ', ' + str(result_arr[i][11]) + ', ' + str(result_arr[i][12]) + ', ' + str(result_arr[i][13]) + '\n') csvfile.write(row) print('Dynamic Call Graph Metrics file is saved.') def process_dynamic_call_graph(self): #self.calculate_oo_metrics() self.process_dynamic_metrics_from_call_graph() self.print_dynamic_call_graph_metrics() self.process_math_dynamic_metrics_from_call_graph() self.print_math_dynamic_call_graph_metrics()
def test_load_file_loads_contents(self): loader = FileLoader() loader.load_file("logs_old/JOURNAL00.TXT") self.assertTrue(len(loader.lines) > 0)
def __init__(self): # Settings pygame.mixer.init() pygame.mixer.music.load('latenight.ogg') pygame.mixer.music.play(0) self.WIDTH = 640 self.HEIGHT = 360 # Config self.tps_max = 100 # Initialization pygame.init() font = pygame.font.SysFont("Arial", 18) self.resolution = (self.screen_width, self.screen_height) = (self.WIDTH, self.HEIGHT) self.screen = pygame.display.set_mode(self.resolution, pygame.RESIZABLE) self.tps_clock = pygame.time.Clock() self.tps_delta = 0.0 self.scroll = Vector2(0, 0) self.map = Map(self) self.player = Player( self ) # przy inicjalizacji przekazuje playerowi wszystko Player(self) self.enemy = Enemy(self) self.weapon = Weapon(self) self.fire = Fire(self) self.physics = Physics(self) self.platforms = Platforms(self) self.collision = Collision(self) self.sprite = Sprite(self) self.menu = Menu(self) self.file_loader = FileLoader(self) self.sprite.load_images() def create_fonts(font_sizes_list): "Creates different fonts with one list" fonts = [] for size in font_sizes_list: fonts.append(pygame.font.SysFont("Arial", size)) return fonts def render(fnt, what, color, where): "Renders the fonts as passed from display_fps" text_to_show = fnt.render(what, 0, pygame.Color(color)) self.screen.blit(text_to_show, where) def display_fps(): "Data that will be rendered and blitted in _display" render(fonts[0], what=str(int(self.tps_clock.get_fps())), color="white", where=(0, 0)) fonts = create_fonts([32, 16, 14, 8]) while True: # Events for event in pygame.event.get(): if event.type == pygame.QUIT: sys.exit(0) elif event.type == pygame.KEYDOWN and event.key == pygame.K_ESCAPE: ############# klik i cos sie dzieje raz sys.exit(0) # Ticking self.tps_delta += self.tps_clock.tick( ) / 1000.0 # zamieniam MS na sekundy while self.tps_delta > 1 / self.tps_max: self.tick() self.tps_delta -= 1 / self.tps_max # Rendering/Drawing self.screen.fill((0, 0, 0)) self.draw() display_fps() pygame.display.flip()
""" A simple undirected, weighted graph """ def __init__(self): self.nodes = set() self.edges = {} self.distances = {} def add_node(self, value): #print '\t>>UndirectedGraph::Adding node: ', value self.nodes.add(value) def add_edge(self, from_node, to_node, distance=1): #print '\t>>UndirectedGraph::Adding edge: ', from_node, ' - ', to_node self._add_edge(from_node, to_node, distance) self._add_edge(to_node, from_node, distance) def _add_edge(self, from_node, to_node, distance): self.edges.setdefault(from_node, []) self.edges[from_node].append(to_node) self.distances[(from_node, to_node)] = distance if __name__ == '__main__': from file_loader import FileLoader loader=FileLoader() loader.read_map("Mapas/With_Start/lab4.map") loader.generate_directed_graph() print loader.directed_graph.get_map()
class FaultLocalization(): def __init__(self, pathdir): self.conf = Configuration(pathdir) self.loader = FileLoader() self.num_of_failed_test_cases_cover_statement_Ncf = 0 self.num_of_failed_test_cases_not_cover_statement_Nuf = 0 self.num_of_successful_test_cases_cover_statement_Ncs = 0 self.num_of_successful_test_cases_Ns = 0 self.num_of_failed_test_cases_Nf = 0 self.suspicious_values = [] self.distinct_values = [] self.suspicious_path = os.path.join(self.conf.ROOT_PATH, 'suspiciousness_ranking/') #print(self.suspicious_path) if os.path.isdir(self.suspicious_path): print(self.suspicious_path + ' directory exists...') else: print(self.suspicious_path + ' directory created...') self.conf.handle_dir(self.suspicious_path) def update_values(self): self.num_of_failed_test_cases_cover_statement_Ncf = 0 self.num_of_failed_test_cases_not_cover_statement_Nuf = 0 self.num_of_successful_test_cases_cover_statement_Ncs = 0 self.num_of_successful_test_cases_Ns = 0 self.num_of_failed_test_cases_Nf = 0 def suspicious_matrix_by_dstar(self, mat_arr): mat = np.array(mat_arr) #print(mat.shape) #print(mat) col = mat.shape[1] - 1 #print(col) self.update_values() del self.suspicious_values[:] for i in range(0, col): self.update_values() for j in range(0, mat.shape[0]): if mat[j][col] == 0: # for fail test case if mat[j][i] == 1: self.num_of_failed_test_cases_cover_statement_Ncf += 1 if mat[j][i] == 0: self.num_of_failed_test_cases_not_cover_statement_Nuf += 1 if mat[j][col] == 1: # for pass test case if mat[j][i] == 1: self.num_of_successful_test_cases_cover_statement_Ncs += 1 #print(self.num_of_failed_test_cases_cover_statement_Ncf) #print(self.num_of_successful_test_cases_cover_statement_Ncs) sum = self.num_of_failed_test_cases_not_cover_statement_Nuf + self.num_of_successful_test_cases_cover_statement_Ncs #print(sum) if sum == 0: dstar_value = 0 else: dstar_value = pow( self.num_of_failed_test_cases_cover_statement_Ncf, 2) / sum #print('DStar value for ' + str(i) + ' th col value: ' + str(dstar_value)) self.suspicious_values.append(dstar_value) def generate_ranking(self): susp_arr = np.array(self.suspicious_values) print('---processing ranking---') self.distinct_values = [] dist_arr = np.unique(susp_arr) # dist_array needs to reverse for making descending order self.distinct_values = dist_arr[::-1] #print(self.distinct_values) #for x in range(distinct_values.shape[0]): def print_suspiciousness_ranking_table(self, filename): print('writing csv for :', filename) #print(self.suspicious_path) susp_arr = np.array(self.suspicious_values) filename = filename + '.csv' with open(self.suspicious_path + filename, 'w') as csvfile: columnTitleRow = "Method_Call_No,Suspiciousness,Ranking\n" csvfile.write(columnTitleRow) for i in range(0, susp_arr.shape[0]): for y in range(0, len(self.distinct_values)): if susp_arr[i] == self.distinct_values[y]: row = str(i + 1) + "," + str( susp_arr[i]) + "," + str(y + 1) + "\n" csvfile.write(row) #print('DStar value for ' + str(i+1) + ' th col value: ' + str(susp_arr[i]) + ' Ranking - ' + str(y+1)) def calculate_suspiciousness(self): for i in range(0, len(self.conf.PROJECTS_DATA_PATH)): for subdir, dirs, files in sorted( os.walk(self.conf.PROJECTS_DATA_PATH[i])): for file in sorted(files): filepath = os.path.join(subdir, file) if file == 'matrix': print(filepath) mat = self.loader.load_coverage_file(filepath) self.suspicious_matrix_by_dstar(mat) self.generate_ranking() buggy_version = re.findall('\d+', filepath) filename = self.conf.PROJECTS_ID[ i] + '_' + buggy_version[0] #print(filename) self.print_suspiciousness_ranking_table(filename) #mat = self.load_matrix() #mat_arr = np.asarray(mat) print('---Suspicious Ranking of Matrix Done----')