def modiPath(): host_collector_address = base_address + '\\CompleteModel\\HuaJiao_30_host_collector_modipath.pickle' whole_result_address = base_address + '\\Path_Modi\\result_huajiao_wvs_1113_modipath_modi' path_para_dic_address = base_address + '\\Path_Modi\\modi_path_dict.pickle' path_collector = [] with open(host_collector_address, 'rb') as read_host_collector: host_collector = pickle.load(read_host_collector) anomaly_detector = AnomalyDetector() anomaly_writer = AnomalyWriter(whole_result_address) try: # save the begin time begin_time = datetime.datetime.now() detect_time_flag = True global study_ready_time global detect_ready_time with open(url_address, 'rb') as infile: record_num = 0 for line in infile: line = line.strip(' \n') record = line.split('\t') if len(record) == 13 and record[_host] != '': a_record = UrlRecord(record) current_model = host_collector.get_host_model( a_record.get_host()) pattern_flag = current_model.getDetectFlag() if pattern_flag == 'Study ready': if detect_time_flag: detect_time_flag = False study_ready_time = datetime.datetime.now() study_interval = study_ready_time - begin_time print 'Study consuming: %s' % study_interval anomaly_status = anomaly_detector.detectpath( a_record, current_model, 'new') path_collector.append(anomaly_status['path_para']) if anomaly_status['Result'] is True: anomaly_writer.writePathResult( a_record, anomaly_status) record_num += 1 if record_num % 1000 == 0: print 'Record completed: ', record_num, print '\tUrl amount: %s\tDifferent url amount: %s\tDifferent sip: %s\t' % ( current_model.getUrlAmount(), current_model.getDifUrlAmount(), current_model.getSipAmount()) except IndexError: print 'Lancer says:\n' \ '\tIt has error through the system.' raise with open(path_para_dic_address, 'wb') as save: pickle.dump(path_collector, save) end_time = datetime.datetime.now() detect_ready_time = end_time - study_ready_time print "Detection consuming: %s" % detect_ready_time print 'Whole system test finished.'
def modiPath(): host_collector_address = base_address + '\\CompleteModel\\HuaJiao_30_host_collector_modipath.pickle' whole_result_address = base_address + '\\Path_Modi\\result_huajiao_wvs_1113_modipath_modi' path_para_dic_address = base_address + '\\Path_Modi\\modi_path_dict.pickle' path_collector = [] with open(host_collector_address, 'rb') as read_host_collector: host_collector = pickle.load(read_host_collector) anomaly_detector = AnomalyDetector() anomaly_writer = AnomalyWriter(whole_result_address) try: # save the begin time begin_time = datetime.datetime.now() detect_time_flag = True global study_ready_time global detect_ready_time with open(url_address, 'rb') as infile: record_num = 0 for line in infile: line = line.strip(' \n') record = line.split('\t') if len(record) == 13 and record[_host] != '': a_record = UrlRecord(record) current_model = host_collector.get_host_model(a_record.get_host()) pattern_flag = current_model.getDetectFlag() if pattern_flag == 'Study ready': if detect_time_flag: detect_time_flag = False study_ready_time = datetime.datetime.now() study_interval = study_ready_time - begin_time print 'Study consuming: %s' % study_interval anomaly_status = anomaly_detector.detectpath(a_record, current_model, 'new') path_collector.append(anomaly_status['path_para']) if anomaly_status['Result'] is True: anomaly_writer.writePathResult(a_record, anomaly_status) record_num += 1 if record_num % 1000 == 0: print 'Record completed: ', record_num, print '\tUrl amount: %s\tDifferent url amount: %s\tDifferent sip: %s\t' % (current_model.getUrlAmount(), current_model.getDifUrlAmount(), current_model.getSipAmount()) except IndexError: print 'Lancer says:\n' \ '\tIt has error through the system.' raise with open(path_para_dic_address, 'wb') as save: pickle.dump(path_collector, save) end_time = datetime.datetime.now() detect_ready_time = end_time - study_ready_time print "Detection consuming: %s" % detect_ready_time print 'Whole system test finished.'
def main(): with open(host_collector_address, 'rb') as read_host_collector: host_collector = pickle.load(read_host_collector) anomaly_detector = AnomalyDetector() anomaly_collector = AnomalyCollector() result_controller = ResultController() anomaly_writer = AnomalyWriter(whole_result_address) try: # save the begin time begin_time = datetime.datetime.now() detect_time_flag = True global study_ready_time global detect_ready_time with open(url_address, 'rb') as infile: record_num = 0 for line in infile: line = line.strip(' \n') record = line.split('\t') if len(record) == 13 and record[_host] != '': a_record = UrlRecord(record) current_model = host_collector.get_host_model(a_record.get_host()) pattern_flag = current_model.getDetectFlag() if pattern_flag == 'Study ready': if detect_time_flag: detect_time_flag = False study_ready_time = datetime.datetime.now() study_interval = study_ready_time - begin_time print 'Study consuming: %s' % study_interval anomaly_status = anomaly_detector.detect(a_record, current_model) if anomaly_status['Result'] is True: valid_flag = result_controller.isValid(a_record, anomaly_collector) if valid_flag is True: anomaly_writer.writeCompleteResult(a_record, anomaly_status) # anomaly_writer.writeTimeAttribute(a_record) else: current_model.reStudy() record_num += 1 if record_num % 1000 == 0: print 'Record completed: ', record_num, print '\tUrl amount: %s\tDifferent url amount: %s\tDifferent sip: %s\t' % (current_model.getUrlAmount(), current_model.getDifUrlAmount(), current_model.getSipAmount()) except IndexError: print 'Lancer says:\n' \ '\tIt has error through the system.' raise end_time = datetime.datetime.now() detect_ready_time = end_time - study_ready_time print "Detection consuming: %s" % detect_ready_time print 'Whole system test finished.'
def main(): cookie = get_cookie() url_record = UrlRecord() current_url_id = get_current_url_id() while current_url_id < 1000: person_url = url_record.get(current_url_id) if person_url is None: break url = person_url[0] name = person_url[1] file_name = build_file_name(current_url_id, url, name) fp = open(file_name, 'w') # 将用户的微博写入文件中 people = PeoplePage(url, cookie) total_num = people.get_blog_page_total_num() for i in range(1, total_num + 1): blog_list = people.get_blog_list(i) for blog in blog_list: fp.write(blog[0]) fp.write('\t') fp.write(blog[1]) fp.write('\t') fp.write(blog[2]) fp.write('\t') fp.write('\n') print("%s Complete %d" % (name, i * 100 / total_num)) fp.close() # 将当前用户的关注用户写入数据库 care_page_url = people.get_care_people_page_url() print care_page_url if care_page_url == '': break care_people_page = CarePeoplePage(care_page_url, cookie) total_num = care_people_page.get_page_total_num() for i in range(1, total_num + 1): people_list = care_people_page.get_people_list(i) for people in people_list: url_record.add(people[1].decode('utf-8'), people[0].decode('utf-8')) current_url_id += 1 set_current_url_id(current_url_id)
def main(): host_collector = HostCollector() anomaly_detector = AnomalyDetector() anomaly_collector = AnomalyCollector() result_controller = ResultController() anomaly_writer = AnomalyWriter(whole_result_address) try: # save the begin time begin_time = datetime.datetime.now() detect_time_flag = True global study_ready_time global detect_ready_time with open(url_address, 'rb') as infile: record_num = 0 study_record_num = 0 for line in infile: record = line.strip(' \n').split('\t') if len(record) == 13 and record[_host] != '': a_url = UrlRecord(record) current_model = host_collector.getHostModel( a_url.get_host()) pattern_flag = current_model.getDetectFlag() # Judge Host-model's pattern('Study ready' or 'Study...') if pattern_flag == 'Study ready': if detect_time_flag: detect_time_flag = False study_ready_time = datetime.datetime.now() study_interval = study_ready_time - begin_time study_record_num = record_num print 'Study consuming: %s' % study_interval print 'Study Record: %s' % study_record_num print '\tUrl amount: %s\tDifferent url amount: %s\tDifferent sip: %s\t' % \ (current_model.getUrlAmount(), current_model.getDifUrlAmount(), current_model.getSipAmount()) # Detect whether the record is anomaly anomaly_status = anomaly_detector.detect( a_url, current_model) # If the record is detected to be anomaly if anomaly_status['Result'] is True: valid_flag = result_controller.isValid( a_url, anomaly_collector) if valid_flag is True: anomaly_writer.writeResult( a_url, anomaly_status) else: current_model.reStudy() elif pattern_flag == 'Study...': current_model.add_record(a_url) else: raise ValueError( 'Lancer says: pattern selection error.') record_num += 1 if record_num % 10000 == 0: print 'Record completed: ', record_num, else: detect_record_num = record_num - study_record_num end_time = datetime.datetime.now() detect_ready_time = end_time - study_ready_time print 'Detect Record: %s' % detect_record_num print "Detection consuming: %s" % detect_ready_time print 'Whole system test finished.' # Store the completed Host Model. with open(host_collector_address, 'wb') as save_host_collector: pickle.dump(host_collector, save_host_collector) except IndexError: print 'Lancer says:\n\tIt has error through the system.' raise
def main(): host_collector = HostCollector() anomaly_detector = AnomalyDetector() anomaly_collector = AnomalyCollector() result_controller = ResultController() anomaly_writer = AnomalyWriter(whole_result_address) try: # save the begin time begin_time = datetime.datetime.now() detect_time_flag = True global study_ready_time global detect_ready_time with open(url_address, 'rb') as infile: record_num = 0 study_record_num = 0 for line in infile: record = line.strip(' \n').split('\t') if len(record) == 13 and record[_host] != '': a_url = UrlRecord(record) current_model = host_collector.getHostModel(a_url.get_host()) pattern_flag = current_model.getDetectFlag() # Judge Host-model's pattern('Study ready' or 'Study...') if pattern_flag == 'Study ready': if detect_time_flag: detect_time_flag = False study_ready_time = datetime.datetime.now() study_interval = study_ready_time - begin_time study_record_num = record_num print 'Study consuming: %s' % study_interval print 'Study Record: %s' % study_record_num print '\tUrl amount: %s\tDifferent url amount: %s\tDifferent sip: %s\t' % \ (current_model.getUrlAmount(), current_model.getDifUrlAmount(), current_model.getSipAmount()) # Detect whether the record is anomaly anomaly_status = anomaly_detector.detect(a_url, current_model) # If the record is detected to be anomaly if anomaly_status['Result'] is True: valid_flag = result_controller.isValid(a_url, anomaly_collector) if valid_flag is True: anomaly_writer.writeResult(a_url, anomaly_status) else: current_model.reStudy() elif pattern_flag == 'Study...': current_model.add_record(a_url) else: raise ValueError('Lancer says: pattern selection error.') record_num += 1 if record_num % 10000 == 0: print 'Record completed: ', record_num, else: detect_record_num = record_num - study_record_num end_time = datetime.datetime.now() detect_ready_time = end_time - study_ready_time print 'Detect Record: %s' % detect_record_num print "Detection consuming: %s" % detect_ready_time print 'Whole system test finished.' # Store the completed Host Model. with open(host_collector_address, 'wb') as save_host_collector: pickle.dump(host_collector, save_host_collector) except IndexError: print 'Lancer says:\n\tIt has error through the system.' raise
def main(): fp_male = open('./male.txt', 'a', encoding='utf-8') fp_female = open('./female.txt', 'a', encoding='utf-8') cookie_index = 1 url_record = UrlRecord() current_url_id = get_current_url_id() sleep_time = 4 while True: print('current cookie index is', cookie_index) cookie = get_cookie(cookie_index) print('current url id is', current_url_id) people_url = url_record.get(current_url_id) if people_url is None: print("people url is None") break url = people_url[0] people = PeoplePage(url, cookie) time.sleep(sleep_time) state = people.get_state() if state: if state.find('HTTP Error 403: Forbidden') != -1: break info_url = people.get_people_info_page_url() if info_url is None: print('info url is None') current_url_id += 1 set_current_url_id(current_url_id) continue print(info_url) info_page = PeopleInfoPage(info_url, cookie) time.sleep(sleep_time) info = info_page.get_people_info() # print(info) if 'name' in info and 'gender' in info: name = info['name'] # print(name) if info['gender'] == u'男': fp_male.write(name) fp_male.write('\n') fp_male.flush() elif info['gender'] == u'女': fp_female.write(name) fp_female.write('\n') fp_female.flush() # 将当前用户的关注用户写入数据库 care_page_url = people.get_care_people_page_url() if care_page_url is None: print('care page url is None') current_url_id += 1 set_current_url_id(current_url_id) continue care_people_page = CarePeoplePage(care_page_url, cookie) time.sleep(sleep_time) people_list = care_people_page.get_people_list(1) time.sleep(sleep_time) for people in people_list: url_record.add(people[1], people[0]) current_url_id += 1 set_current_url_id(current_url_id) fp_male.close() fp_female.close()