def wonderland(winlength, holder): q = [] cq = DataManager.completeTable(holder) cnt = 0 while len(q) < winlength: x = holder[cnt] y = x.lower() q.append(y) cnt += 1 data = DataManager.manager(q) print(q) print(data) print("\n") while cnt < len(holder): x = holder[cnt] y = x.lower() q.pop(0) q.append(y) cnt += 1 data = DataManager.manager(q) print(cnt) print(q) print(data) print("\n")
def getFeatureData(self, strategyInfo): print '********* get feature file' dataManager = DataManager() data = dataManager.get('feature', strategyInfo['feature_version']) data = self.moveFromTmpToJobDir(data) print data return data
def __init__(self, ArduinoHoistingData, ArduinoRotationData, ArduinoCirculationData, Hoistingsystem, CirculationSystem, RotaitonSystem): Thread.__init__(self,daemon=True) self.ArduinoHoistingData = ArduinoHoistingData self.ArduinoRotationData = ArduinoRotationData self.ArduinoCirculationData = ArduinoCirculationData self.HoistingSystem = Hoistingsystem self.CirculationSystem = CirculationSystem self.RotaitonSystem = RotaitonSystem self.dataManager = DataManager() self.dataBaseBuffer = { "RPM" :0, "Top_Drive_Torque" :0, "Pressure" : 0, "Loadcell_z1" : 0, "Loadcell_z2" : 0, "Loadcell_z3" :0, "ROP_15s_avg" : 0, "ROP_3m_avg" : 0, "Flow_Rate" : 0, "MSE" : 0, "UCS" : 0, "TVD" : 0, "Bit_Torque" : 0, "WOB" : 0, "d_exponent" : 0, "act1" : 0, "act2" : 0, "act3" : 0, "Velocity" : 0, "Height_Sensor": 0 }
def setUp(self): self.d1 = DataManager() self.wd1 = Word('software', '소프트웨어', (True, self.d1)) self.wd2 = Word('project', '프로젝트', (True, self.d1)) self.d1.words = [self.wd1, self.wd2]
def saveOrUpdateMotion(self, motion): dm = DataManager() #Prüfen ob die Motion existiert #Sie existiert wenn es eine MotionFile mit passendem #Namen gibt path = None if motion.getName() in self.getAllMotionFiles(): path = self.TEMPLATES_PATH + self.getMotionFile(motion.getName()) #print(path) else: i = 0 plainPath = self.TEMPLATES_PATH + 'template' while os.path.exists(plainPath + "%s.txt" % i): i = i + 1 else: plainPath = plainPath + "%s.txt" % i if not os.path.exists(os.path.dirname(self.TEMPLATES_PATH)): os.makedirs(os.path.dirname(self.TEMPLATES_PATH)) path = plainPath dm.saveMotion(motion, path) if motion.getName() not in self.__motions: self.__motions[motion.getName()] = motion self.__motionFiles[motion.getName()] = path print('Motion Saved or Updated')
def getSampleData(self, strategyInfo): print '********* get sample file' dataManager = DataManager() data = dataManager.get('sample', strategyInfo['sample_version']) data = self.moveFromTmpToJobDir(data) print data return data
def getActionData(self, strategyInfo): print '********* get action file' dataManager = DataManager() data = dataManager.get('action', strategyInfo['action_version']) data = self.moveFromTmpToJobDir(data) print data return data
def __init__(self, root): Frame.__init__(self, root) self.__root = root self.__data_manager = DataManager() self.__check_button_type = namedtuple('CheckButtonType', 'widget var') self.__natures = [ "Single Carriageway", "Traffic Island Link", "Dual Carriageway", "Roundabout", "Traffic Island Link At Junction", "Slip Road" ] self.__roads = [ "M3", "M40", "M4", "A1(M)", "M11", "M23", "M20", "M25", "M1", "HIGH STREET", "LONDON ROAD", "HIGH ROAD", "UXBRIDGE ROAD", "STATION ROAD", "BRIGHTON ROAD", "GREEN LANES", "FINCHLEY ROAD", "HARROW ROAD", "NORTH CIRCULAR ROAD", "KINGSTON ROAD", "PORTSMOUTH ROAD", "HERTFORD ROAD", "STAINES ROAD", "CROYDON ROAD", "MAIN ROAD", "CHURCH ROAD", "PARK ROAD" ] self.__motorways = [ "M3", "M40", "M4", "A1(M)", "M11", "M23", "M20", "M25", "M1" ] self.__init_grid() self.__draw_grid()
def beginScan(aPaths, aUsers, aDM=None, aMergeRootPath=True): if aDM is None: aDM = DataManager() users = makeUsers(aDM, aUsers) sd = ScanDisk(users, aPaths, aDM) if aMergeRootPath is not None: sd.mergeRootPaths = aMergeRootPath sd.startScan() aDM.save()
def saveMotion(self, template): dm = DataManager() i = 0 plainPath = self.TEMPLATES_PATH + 'template' while os.path.exists(plainPath + "%s.txt" % i): i = i + 1 else: plainPath = plainPath + "%s.txt" % i if not os.path.exists(os.path.dirname(self.TEMPLATES_PATH)): os.makedirs(os.path.dirname(self.TEMPLATES_PATH)) dm.saveMotion(template, plainPath) print('Motion Saved')
class UrlManager(object): def __init__(self): self.data_manager = DataManager() #拼接公司黄页的链接 def tel_url(self): memberId_list, shop_url = self.data_manager.read_local() url_list = [] shopurl1_list = [] shopurl2_list = [] for memberId in memberId_list: url = 'https://corp.1688.com/page/index.htm?memberId='+str(memberId)+'&fromSite=company_site&tab=companyWeb_contact' url_list.append(url) for shopurl in shop_url: url1 = shopurl + '/page/merchants.htm' url2 = shopurl + '/page/contactinfo.htm??smToken=d6f92a6aadd34fa3aef88809a6d9f7d1&smSign=ADUiGA9MZ4pScu4JQD9FWg%3D%3D' shopurl1_list.append(url1) shopurl2_list.append(url2) return url_list,shopurl1_list,shopurl2_list #从本地获取已经爬取过的url,不要再爬第二次 # 从本地读取数据 def crawred_url(self): crawred_url = [] try: with open('D:\\data\\1688factory_tel.csv') as csvfile: reader = csv.DictReader(csvfile) for row in reader: crawred_url.append(row['url']) except Exception,e: print '未发现电话记录保存文件' return crawred_url
def _getDbManager(): "按需获取数据库对象" db = getattr(g, "__dataManager", None); if db is None: db = DataManager() g.__dataManager = db return db
def run(): #数据爬取器 factory_spider = Spiser() #数据解析器 json_parser = dataParser.DataParser() data_manager = DataManager() #总页数:250页 total_page = 251 #获取数据 for i in range(total_page): print i pagedata = factory_spider.get_pageData(i) if pagedata is not None: factory_list = json_parser.json_parser(pagedata) data_manager.save_local(factory_list) time.sleep(1.5)
def run(): #数据爬取器 factory_spider = Spiser() #数据解析器 json_parser = dataParser.DataParser() data_manager = DataManager() # #总页数:250页 # total_page = 251 # #获取数据 # for i in range(total_page): # print i # pagedata = factory_spider.get_pageData(i) # if pagedata is not None: # factory_list = json_parser.json_parser(pagedata) # data_manager.save_local(factory_list) # time.sleep(1.5) memberId_list = data_manager.read_local() for i in memberId_list: print i
def __init__(self, pushsendingstart): self.dataManager = DataManager() topic, intervalStart, intervalEnd = self.dataManager.readAllSettings() self.webCrawler = WebCrawler() self.pushSender = PushSender(self.dataManager, "default") if pushsendingstart: self.pushSender.pushSendingThreadStart() self.nowListedWords = self.getAllWords() self.mainWindow = MainWindow(self) self.memorizeWindow = MemorizeWindow(self) self.settingWindow = SettingWindow(self) self.settingWindow.settingLoad(topic, intervalStart, intervalEnd) self.mainWindow.showWindow() self.memorizeWindow.hideWindow() self.settingWindow.hide()
def __init__(self, pushsendingstart): self.dataManager = DataManager() sts = self.dataManager.realAllSettings() self.webCrawler = WebCrawler() self.pushSender = PushSender(self.dataManager, "wowawesome") if pushsendingstart: self.pushSender.pushSendingThreadStart() self.nowListedWords = self.getAllWords() self.mainWindow = MainWindow(self) self.memorizeWindow = MemorizeWindow(self) self.settingWindow = SettingWindow(self) self.settingWindow.settingLoad(sts) self.mainWindow.showWindow() self.memorizeWindow.hideWindow() self.settingWindow.hide()
def initMotions(self): dm = DataManager() #Hier alle Devices intitialisieren oldPath = os.getcwd() try: os.chdir(self.TEMPLATES_PATH) except FileNotFoundError: print("Could not load Motions") return #For jede Geste for file in sorted(os.listdir()): filePath = os.getcwd() + "/" + file if os.path.isdir(filePath): continue motion = dm.getMotion(filePath) self.__motions[motion.getName()] = motion self.__motionFiles[motion.getName()] = file os.chdir(oldPath)
def __init__(self): self.tp=TextProcess() #数据库管理,加载政要数据 self.dataManager=DataManager() # self.political_person_dict=list() #改用aho形式进行存储,方便进行多模匹配。 self.aho_policical_person=ahocorasick.Automaton() try: # load_file = open('./mod/political_person_dict.bin', 'rb') # self.political_person_dict = pickle.load(load_file) # logging.info('political_person_dict count %d' % (len(self.political_person_dict))) file = open('./mod/aho_policical_person.aho', 'rb') self.aho_policical_person = pickle.load(file) logging.info('aho_policical_person count %d' % (len(self.aho_policical_person))) except: pass self.detector=MultiSenDetect() #加载地名数据索引,用于判断词性为hs的是否是地名 load_file = open('./mod/place_dict.bin', 'rb') self.place_dict = pickle.load(load_file) logging.info('place_dict count %d' % (len(self.aho_policical_person))) return
def run(): #数据爬取器 factory_spider = Spiser() #数据解析器 data_parser = dataParser.DataParser() data_manager = DataManager() url_manager = UrlManager() #获取到三个url列表,均为需要爬取的数据 url_list, shopurl1_list, shopurl2_list = url_manager.tel_url() total_num = len(url_list) crawred_url = url_manager.crawred_url() company_dataList = [] for i in range(total_num): url = url_list[i] shopurl1 = shopurl1_list[i] shopurl2 = shopurl2_list[i] if url not in crawred_url: page_data = factory_spider.get_urlpage(url) page_shop1 = factory_spider.get_urlpage(shopurl1) page_shop2 = factory_spider.get_urlpage(shopurl2) #使用解析器,解析三个页面的数据 companydata = data_parser.get_company_data(page_data, page_shop1, page_shop2, url) #将解析后的数据元组保存至列表 company_dataList.append(companydata) time.sleep(1.1) # elif url in crawred_url: # print '已经爬取过了',url # 将爬取结果保存至本地csv文件,爬5家店铺保存一次 print '=========', i, '==========' if i % 10 == 0 and len(company_dataList) > 0: data_manager.save_local_tel(company_dataList) company_dataList = [] time.sleep(10)
def test1(): data = DataManager().query_e2019('%', '7,1') for d in data: print(d) data = [{'content': d[1]} for d in data] # data=[{'content':'在华盛顿期间,习近平还先后会见了前来参加本届核安全峰会的丹麦首相拉斯穆森、韩国总统朴槿惠和阿根廷总统马克里,并出席了伊核问题六国机制领导人会议。'}] data = [{ 'content': '谈及美国以所谓“安全威胁”为由打压中国高科技企业,马尔科表示,美国必须提供有说服力的证据,否则指控只是猜测而已。马尔科希望,中美两国能以建设性的方式解决贸易分歧,“贸易战没有赢家,应尽快找到和平解决方案”。\ 摩尔多瓦共产党人党意识形态秘书康斯坦丁·斯塔里什说,近几十年,全球经济体系已被证实行之有效,并推动了各国经济和贸易发展。如今,美国借保护自身经济利益为由,破坏这一体系基础,这好比“大象闯进瓷器店”,破坏了现有模式,却又不提供替代方案。\ 斯塔里什认为,美国盲目挑起经贸摩擦,是为保持自身“世界经济霸主地位”,此举严重违背市场规律,表明美国不愿对世界经济发展负责,同时也将影响美国自身经济发展。\ 巴勒斯坦法塔赫革命委员会委员巴萨姆说,美国挑起对华贸易摩擦的行为是“霸凌逻辑”,贸易战对中美双方都会造成损失。\ 巴勒斯坦人民党总书记萨利希指出,美国此举违背市场规律和国际贸易规则,不仅对美国和中国,乃至对世界经济都造成威胁。此外,美国对华为等中国企业进行打压,是因为相关企业具有强劲的全球竞争力。(执笔记者:马湛;参与记者:张修智、林惠芬、陈进、杨媛媛、赵悦、周天翮)' }] data = extract_all(data) + extra_info for d in data: print(d) db_manager = NeoManager() db_manager.clear() db_manager.write2db(data)
def scanByJsonFile(aJsonFileName): "根据配置文件扫描数据" try: with open(aJsonFileName) as f: config = json.load(f) except Exception as e: print(e) return if not isinstance(config, list): print("文件格式有误") return dm = DataManager() for item in config: try: beginScan(item.get("paths"), item.get("users"), dm, item.get("merge")) except Exception as e: print(e)
def __init__(self, root): Frame.__init__(self, root) self.__root = root self.__data_manager = DataManager() self.__check_button_type = namedtuple('CheckButtonType', 'widget var') self.__natures = [ "Single Carriageway", "Traffic Island Link", "Dual Carriageway", "Roundabout", "Traffic Island Link At Junction", "Slip Road" ] self.__roads = [ "M3","M40","M4","A1(M)","M11","M23","M20","M25","M1","HIGH STREET", "LONDON ROAD","HIGH ROAD","UXBRIDGE ROAD","STATION ROAD", "BRIGHTON ROAD","GREEN LANES","FINCHLEY ROAD","HARROW ROAD", "NORTH CIRCULAR ROAD","KINGSTON ROAD","PORTSMOUTH ROAD","HERTFORD ROAD", "STAINES ROAD","CROYDON ROAD","MAIN ROAD","CHURCH ROAD","PARK ROAD" ] self.__motorways = ["M3","M40","M4","A1(M)","M11","M23","M20","M25","M1"] self.__init_grid() self.__draw_grid()
def getFeaturesFavorAgainst(self, mode, listOfFeats): #only tweets with favor or against X, y = self.getFeaturesMatrix(mode, listOfFeats, 'stance') # print X,y nonerows = np.where(y == self.labelenc.transform('NONE'))[0] # print y # print nonerows X = np.delete(X, nonerows, axis=0) y = np.delete(y, nonerows) return X, y def getFeaturesStanceNone(self, mode, listOfFeats): X, y = self.getFeaturesMatrix(mode, listOfFeats, 'stance') y[y == self.labelenc.transform('FAVOR')] = 3 y[y == self.labelenc.transform('AGAINST')] = 3 return X, y if __name__ == '__main__': dp = DataManager('../data/train.csv', '../data/test.csv') fe = FeatureExtractor(dp) # fe.getYStanceNone('train') # fe.getFeaturesFavorAgainst('train',['words2vec']) # fe.getFeaturesStanceNone('train',['words2vec']) # X,y = fe.getFeaturesFavorAgainst('train',['words2vec']) # print fe.getFeaturesMatrix('train',['words'],'topic','Hillary Clinton')[0].shape # print fe.getFeaturesTopicNontopic('train',['words'],'topic', 'Hillary Clinton')[0].shape # print fe.getX('train',fe.data.trainTweets, ['words2vec']).shape
def active_learning(network: str, dataset: str, pool_length: int, method: str, k: str, num_trainings: int, batch_size: int, num_epochs: int, learning_rate: float, use_cuda: bool): """ Function that execute an active learning depending on several arguments Args: network: the model that will be trained and tested dataset: the data used for the active learning method: method used to select the k samples to add to the training set at each active learning loop k: number of samples to add num_trainings: number of active learning loops batch_size: number of samples in a batch num_epochs: number of loops during one training learning_rate: learning rate of the optimizer use_cuda: boolean to use the gpu for training Returns: The list of accuracies of each test phase """ print("Starting active learning with" "\n\tmodel: " + network + "\n\tdataset: " + dataset + "\n\tselection method: " + method + "\n\tk: " + k + "\n\tnum trainings: " + str(num_trainings) + "\n\tbatch size: " + str(batch_size) + "\n\tnum epochs: " + str(num_epochs) + "\n\tlearning rate: " + str(learning_rate) + "\n\tuse cuda: " + str(use_cuda)) model = getModel(network) data = getData(dataset, pool_length) selection_method = getSelectionMethod(method) k = int(k) if len(data.get_pool_data()[0]) < k * num_trainings: raise ValueError( "'k' or 'num-trainings' is too big, " "the program will not be able to extract the training " "samples from the pool at some point") # Set the optimizer factory function optimizer = optimizer_setup(SGD, lr=learning_rate, momentum=0.9) # Create the network depending on the number of classes model = model(num_classes=len(data.get_label_names())) # First index samples to train train_idx = np.arange(k) # List that will contain the test accuracy of each training accuracies = [] for num_training in range(num_trainings): print("\nActive learning loop " + str(num_training + 1) + "/" + str(num_trainings)) # Set data loaders depending on training samples dataManager = DataManager(data=data, \ train_idx=train_idx, \ batch_size=batch_size) # Set the network trainer and launch the training netTrainer = NetTrainer(model=model, \ data_manager=dataManager, \ selection_method=selection_method, \ loss_fn=nn.CrossEntropyLoss() , \ optimizer_factory=optimizer, \ use_cuda=use_cuda) netTrainer.train(num_epochs) # Select k samples depending on the selection method # and add them to the training samples add_to_train_idx = netTrainer.evaluate_on_validation_set(k) train_idx = np.concatenate((train_idx, add_to_train_idx)) print("Selected next training set indexes") # Compute the accuracy on the test set and save it accuracy = netTrainer.evaluate_on_test_set() accuracies.append(accuracy) return accuracies
class TelegramPostillonBot: def __init__(self): self.postillon_crawler = PostillonCrawler(verbosity = BE_VERBOSE) self.data_manager = DataManager() self.last_request_id = 0 self.start_polling_loop() def cleanup(self): self.data_manager.cleanup() def start_polling_loop(self): if BE_VERBOSE: print('start polling loop...') while True: try: parameters = { 'offset': self.last_request_id + 1, 'timeout': 1 } requests = self.perform_api_call(API_CALL_GET_UPDATES, parameters) if (requests != False): for request in requests: chat_id, command = self.parse_message_object(request['message']) if command in KNOWN_COMMANDS: if BE_VERBOSE: print('received "', command, '" from chat_id ', chat_id) self.respond_to_request(chat_id, command) if request['update_id'] > self.last_request_id: self.last_request_id = request['update_id'] else: if BE_VERBOSE: print('no requests, wait for ' + str(API_POLL_INTERVAL) + 's') sleep(API_POLL_INTERVAL) except KeyboardInterrupt: if BE_VERBOSE: print('Terminate bot because of keyboard interruption') self.cleanup() sys.exit(0) except: print('Uncatched error in run loop; terminating bot!') self.cleanup() sys.exit(0) def perform_api_call(self, function, parameters={}): parsed_parameters = urllib.parse.urlencode(parameters) encoded_parameters = parsed_parameters.encode('utf-8') try: # if BE_VERBOSE: # print('requesting API with parameters: ' + parsed_parameters) request = urllib.request.Request( API_URL + '/' + function, encoded_parameters) response = urllib.request.urlopen(request).read() decoded_response = json.loads(response.decode('utf-8')) if decoded_response['ok'] == True: return decoded_response['result'] else: if BE_VERBOSE: print('API error, bad respond code') except: if BE_VERBOSE: print('Uncatched error while requesting the bot API') return False def parse_message_object(self, message): chat_id = -1 command = '' # save the received message to the DB self.data_manager.new_message(message) if 'chat' in message: chat_id = message['chat']['id'] elif BE_VERBOSE: print('no chat object in responded message. \ Unable to identify user or group to respond to.') if 'text' in message: command = message['text'] return chat_id, command def respond_to_request(self, chat_id, command): data = {'chat_id': chat_id} if command == KNOWN_COMMANDS[0]: if BE_VERBOSE: print('responding with newsticker') self.postillon_crawler.check_for_updates() data['text'] = self.data_manager.get_newsticker_for_chat(chat_id) data['text'] += ORIGINATOR_REFERENCE elif command == KNOWN_COMMANDS[1]: if BE_VERBOSE: print('responding with statistics') chats, newsticker, requests = self.data_manager.get_stistic() data['text'] = 'I answered ' + str(requests) + ' requests from ' data['text'] += str(chats) + ' people and I know ' data['text'] += str(newsticker) + ' headlines!' else: if BE_VERBOSE: print('responding with help text') data['text'] = self.create_information_respnse() self.perform_api_call(API_CALL_SEND_MESSAGE, data) def create_information_respnse(self): return '++++ Unofficial Postillon bot: \
from __future__ import absolute_import, unicode_literals from .celery import app from configs import configs, locations from dataManager import DataManager import time from datetime import datetime import requests import json # import redis dm = DataManager() def get_utc_time(date_t: datetime): """获取指定的北京时间的utc时间""" t = date_t.timestamp() # 目标时间的unix时间戳 return datetime.utcfromtimestamp(t) def send(url, method='get', data=None) -> dict: if method == 'get' or method == 'GET': headers = {'Authorization': configs['Authorization']} r = requests.get(url, headers=headers) # request elif method == 'post' or method == 'POST': headers = { 'Authorization': configs['Authorization'], 'Content-Type': 'application/json;charset=UTF-8' } if data: r = requests.post(url, data=data, headers=headers)
def main(self): usage = "usage: %prog [options] <cmd> arg1 arg2\n" + \ "\n<cmd> should be one of download/list/plot/select:" + \ "\n download - download stock data from finance service" + \ "\n list - list all stocks in local database" + \ "\n plot - plot stock diagram" + \ "\n select - use selectors to filter stocks" parser = OptionParser(usage=usage) parser.add_option("-l", "--local", action="store_true", dest="local", default=False, help="update local stock only") parser.add_option("-t", "--threads", type="int", dest="threads", default=5, help = "threads number to work") parser.add_option("-a", "--append", action="store_true", dest="append", default=True, help="download data by append [default]") parser.add_option("--config", metavar="FILE", help="specific config file"), parser.add_option("--dbfile", metavar="FILE", dest="dbfile", default="data/stock.sqlite", help="specific database file [default: data/stock.sqlite]"), parser.add_option("-s", "--selector", default="all", dest = "selector", help="selectors: all, trend, macd, or hot [default: %default]") parser.add_option("--adjusted", action="store_true", dest="adjusted", default=False, help="adjust price") (options, args) = parser.parse_args() if len(args) < 1: parser.error("incorrect number of arguments, missing cmd") return -1 command = args[0] cmd_args = args[1:] if len(args) > 1 else () if options.config: print("using config %s" % options.config) self.dataManager = DataManager(dbpath=options.dbfile) self.selectEngine = SelectEngine() if command == "download": print("download data ...") if len(cmd_args) == 0: self.dataManager.downloadAll(localOnly=options.local, append=options.append, threads=options.threads) else: self.dataManager.downloadStocks(cmd_args, append=options.append, threads=options.threads) elif command == "plot": if len(cmd_args) != 1: parser.error("missing argument stock symbol") return -1 symbol = cmd_args[0] print("show diagram for stock %s ..." % symbol) stock = self.dataManager.loadStockAndHistory(symbol) if stock is None: print("stock %s is not found in database" % symbol) return -1 plot = StockPlot(stock) plot.setAdjusted(options.adjusted) plot.plot() elif command == "list": stocks = self.dataManager.loadAllStocks() print("List of stocks (symbol, name, price):") for stock in stocks: print("%s - %s - %d" % (stock.symbol, stock.name, stock.price)) elif command == "select": engine = SelectEngine() engine.globallMarketData = self.dataManager.loadGlobalMarketData() passed = [] for stock in self.dataManager.loadAllStocks(): _stock = self.dataManager.loadStockAndHistory(stock.symbol) if engine.select(_stock): passed.append(stock.symbol) print("Passed stocks:\n%s" % passed) else: parser.error("unrecognized command %s" % command) return -1 return 0
# -*- coding: utf-8 -*- """ Created on Mon May 14 17:12:19 2018 @author: greg """ import requests from bs4 import BeautifulSoup import codecs import time import pandas as pd import numpy as np from competition import Competition from competitor import Competitor from dataManager import DataManager competition_manager = Competition() competitor_manager = Competitor() data_manager = DataManager()
cur.execute( "select distinct classification from itn_link where description = 'Motorway';" ) motorways = [("classification", m[0].replace("'", "''")) for m in cur.fetchall()] cur.execute('select distinct street from itn_link;') streets = [("street", s[0].replace("'", "''")) for s in cur.fetchall()] natures = [ "Single Carriageway", "Traffic Island Link", "Dual Carriageway", "Roundabout", "Traffic Island Link At Junction", "Slip Road" ] dM = DataManager() def days_to_binary(day): if day == 0 or day == 6: return 0 return 1 def func0(params, speed, rainfall_depth, day, hour): p0, e0, p1, e1, p2, e2, c = params return p0 * rainfall_depth**e0 + p1 * day**e1 + p2 * hour**e2 + c - speed def plot_func0(params, rainfall_depth, day, hour): p0, e0, p1, e1, p2, e2, c = params
def __init__(self): self.postillon_crawler = PostillonCrawler(verbosity = BE_VERBOSE) self.data_manager = DataManager() self.last_request_id = 0 self.start_polling_loop()
class TestWord(unittest.TestCase): def setUp(self): self.d1 = DataManager() self.wd1 = Word('software', '소프트웨어', (True, self.d1)) self.wd2 = Word('project', '프로젝트', (True, self.d1)) self.d1.words = [self.wd1, self.wd2] def tearDown(self): pass def testWords(self): wd3 = Word('clear', '끝') self.d1.wordAdd(wd3) self.assertEqual(self.d1.getWords(), [self.wd1, self.wd2, wd3]) wd4 = Word('bye', '안녕') self.d1.wordAdd(wd4) self.assertTrue( isListSame(self.d1.getWords(), [self.wd1, self.wd2, wd3, wd4])) self.d1.wordDelete(self.wd2) self.assertTrue(isListSame(self.d1.getWords(), [self.wd1, wd3, wd4])) self.d1.wordDelete(wd4) self.assertTrue(isListSame(self.d1.getWords(), [self.wd1, wd3])) self.assertTrue(isListSame(self.d1.getFocusedWords(), [self.wd1])) self.d1.saveAllWords() self.d1.readAllWords() self.assertTrue( isListSameOfIsWordSame(self.d1.getWords(), [self.wd1, wd3])) self.assertTrue( isListSameOfIsWordSame(self.d1.getFocusedWords(), [self.wd1]))
class WindowsManager: def __init__(self, pushsendingstart): self.dataManager = DataManager() topic, intervalStart, intervalEnd = self.dataManager.readAllSettings() self.webCrawler = WebCrawler() self.pushSender = PushSender(self.dataManager, "default") if pushsendingstart: self.pushSender.pushSendingThreadStart() self.nowListedWords = self.getAllWords() self.mainWindow = MainWindow(self) self.memorizeWindow = MemorizeWindow(self) self.settingWindow = SettingWindow(self) self.settingWindow.settingLoad(topic, intervalStart, intervalEnd) self.mainWindow.showWindow() self.memorizeWindow.hideWindow() self.settingWindow.hide() def getAllWords(self): return self.dataManager.getWords() def setNowListedWords(self, to_this): self.nowListedWords = to_this def getNowListedWords(self): return self.nowListedWords def memorizeModeStart(self): self.memorizeWindow.showWindow() def memorizeModeEnd(self): self.memorizeWindow.hideWindow() def qButtonMaker(self, name, callback, fixedSize=None, sizePolicy=None): bt = QToolButton() bt.setText(name) bt.clicked.connect(callback) if fixedSize is not None: wid, hei = fixedSize if wid == -1: bt.setFixedHeight(hei) elif hei == -1: bt.setFixedWidth(wid) else: bt.setFixedSize(fixedSize[0], fixedSize[1]) if sizePolicy is not None: bt.setSizePolicy(sizePolicy[0], sizePolicy[1]) return bt def qTextWidgetSetter(self, widget, startText, isReadOnly, alignment=None, fixedSize=None, sizePolicy=None, fontSizeUp=0): widget.setText(startText) widget.setReadOnly(isReadOnly) if alignment is not None: widget.setAlignment(alignment) if fixedSize is not None: wid, hei = fixedSize if wid == -1: widget.setFixedHeight(hei) elif hei == -1: widget.setFixedWidth(wid) else: widget.setFixedSize(fixedSize[0], fixedSize[1]) if sizePolicy is not None: widget.setSizePolicy(sizePolicy[0], sizePolicy[1]) if fontSizeUp != 0: font = widget.font() font.setPointSize(font.pointSize() + fontSizeUp) widget.setFont(font) return widget
conn = psycopg2.connect(database="tfl", user="******", password="******", host="127.0.0.1", port=9999) cur = conn.cursor() cur.execute("select distinct classification from itn_link where description = 'Motorway';") motorways = [("classification", m[0].replace("'", "''")) for m in cur.fetchall()] cur.execute('select distinct street from itn_link;') streets = [("street", s[0].replace("'", "''")) for s in cur.fetchall()] natures = [ "Single Carriageway", "Traffic Island Link", "Dual Carriageway", "Roundabout", "Traffic Island Link At Junction", "Slip Road" ] dM = DataManager() def days_to_binary(day): if day == 0 or day == 6: return 0 return 1 def func0(params, speed, rainfall_depth, day, hour): p0, e0, p1, e1, p2, e2, c = params return p0 * rainfall_depth ** e0 + p1 * day ** e1 + p2 * hour ** e2 + c - speed def plot_func0(params, rainfall_depth, day, hour): p0, e0, p1, e1, p2, e2, c = params return p0 * rainfall_depth ** e0 + p1 * day ** e1 + p2 * hour ** e2 + c def func1(params, speed, rainfall_depth, day, hour):
class PersonDisambiguation(): def __init__(self): self.tp=TextProcess() #数据库管理,加载政要数据 self.dataManager=DataManager() # self.political_person_dict=list() #改用aho形式进行存储,方便进行多模匹配。 self.aho_policical_person=ahocorasick.Automaton() try: # load_file = open('./mod/political_person_dict.bin', 'rb') # self.political_person_dict = pickle.load(load_file) # logging.info('political_person_dict count %d' % (len(self.political_person_dict))) file = open('./mod/aho_policical_person.aho', 'rb') self.aho_policical_person = pickle.load(file) logging.info('aho_policical_person count %d' % (len(self.aho_policical_person))) except: pass self.detector=MultiSenDetect() #加载地名数据索引,用于判断词性为hs的是否是地名 load_file = open('./mod/place_dict.bin', 'rb') self.place_dict = pickle.load(load_file) logging.info('place_dict count %d' % (len(self.aho_policical_person))) return ''' 分辨政要人物,保存基本数据,生成政要人物对应百度数据字典 ''' def checkPersonBaike(self): rows=self.dataManager.query_sql("select * from psm_cityfather") persons=[] for row in rows: person=dict() person['id']=row[0] person['nationlity']=row[1] person['region']=row[2] person['cname']=row[3] person['duty']=row[7] persons.append(person) logging.info('persons count: %d' % len(persons)) #使用消歧工具 detector=MultiSenDetect() count=0 persons_temp=self.political_person_dict bar=tqdm(persons) for person in bar: bar.set_description_str(person['cname']) # self.political_person_dict=list() for p in self.political_person_dict: if p['cname'] == person['cname'] and p['duty'] == person['duty']: person['baikename'] = p['baikename'] person['baikeurl']=p['baikeurl'] person['baikeconcept']=p['baikeconcept'] person.update() break if person.get('baikeconcept'): count = count + 1 persons_temp.append(person) continue else: sent_embedding_res, wds_embedding_res=detector.detect_main(person['duty'],person['cname']) # print(sent_embedding_res) # print(wds_embedding_res) person['baikename']=wds_embedding_res[0][0] person['baikeurl']=detector.getConcept(person['baikename'])['link'] person['baikeconcept']=detector.getConcept(person['baikename']) person.update() # pprint.pprint(person) count=count+1 persons_temp.append(person) if count % 5==0: fou = open('./mod/political_person_dict.bin', 'wb') pickle.dump(persons_temp, fou) fou.close() detector.save_cache() detector.save_cache() fou = open('./mod/political_person_dict.bin', 'wb') pickle.dump(persons, fou) fou.close() # 服务器版的,完整化宾语,但下一句的后置宾语不能识别 def complete_VOB_server(self, arcs, word_index): word = arcs[word_index][1] prefix = '' postfix = '' for arc in arcs: if arc[5] == word_index and arc[2] < word_index: prefix += self.complete_VOB_server(arcs, arc[2]) if arc[5] == word_index and arc[2] > word_index: postfix += self.complete_VOB_server(arcs, arc[2]) return prefix + word + postfix def findPerson(self,content): #1先分句 sents=self.tp.cut_sentences(content) nrs=dict() geos=set() for sent in sents: # nr=set(self.tp.posseg(sent,POS=['nr'])) # nrs=nrs.union(nr) # return nrs arcs=self.parseContent(sent) for arc in arcs: # 可能是人名了 if arc[3]=='nh': #从这里找到定中关键词,放进去 # nrs.add(arc[1]) prefix = '' for arc_ in arcs: if arc_[5] == arc[2] and arc_[2] < arc[2]: prefix += self.complete_VOB_server(arcs, arc_[2]) # if prefix=='' : # nrs[arc[1]] = [prefix] # continue pattern = r',|\.|/|;|\'|`|\[|\]|<|>|\?|:|"|\{|\}|\~|!|@|#|\$|%|\^|&|\(|\)|-|=|\_|\+|,|。|、|;|‘|’|【|】|·|!| |…|(|)' prefix_list = re.split(pattern, prefix) for prefix_ in prefix_list: if nrs.get(arc[1]): if prefix_ not in nrs.get(arc[1]) and prefix_!='': nrs[arc[1]].append(prefix_) else: nrs[arc[1]]=[prefix_] if arc[3]=='ns': if (self.place_dict.get(arc[1])): geos.add(arc[1]) return nrs,geos '''用LTP Server形成arcs和child_dict_list''' '''这部分可以有其它LTP工具代替''' def parser_main_ltpserver(self, sentence): url = 'http://192.168.1.101:8020/ltp' wb_data = requests.post(url, data={'s': sentence, 't': 'dp'}, json=True, allow_redirects=True) wb_data.encoding = 'utf-8' arcs_list = [] try: content = wb_data.json() for c in content[0][0]: p = c.get('parent') pc = content[0][0][p] pname = pc.get('cont') ppos = pc.get('pos') arcs_list.append( [c.get('relate'), c.get('cont'), c.get('id'), c.get('pos'), pname, c.get('parent'), ppos]) child_dict_list = [] for index in range(len(content[0][0])): child_dict = dict() for arc_index in range(len(arcs_list)): # if arcs[arc_index].relation=='HED': # print('hed') if arcs_list[arc_index][5] == index: # arcs的索引从1开始---->把HED去掉了 if arcs_list[arc_index][0] in child_dict: child_dict[arcs_list[arc_index][0]].append(arc_index) else: child_dict[arcs_list[arc_index][0]] = [] child_dict[arcs_list[arc_index][0]].append(arc_index) child_dict_list.append(child_dict) except: None return arcs_list, child_dict_list def parseContent(self,sent): arcs, child_dict_list = self.parser_main_ltpserver(sent) return arcs def test1(self): load_file = open('./mod/political_person_dict.bin', 'rb') political_person_dict = pickle.load(load_file) # pprint.pprint(political_person_dict) for i, person in enumerate(political_person_dict): if person['cname']=='哈勒特马·巴特图勒嘎': pprint.pprint(person) pprint.pprint(i) break ''' 更新political_person_dict的数据,而不全重新生成 ''' def update_political_person_dict(self,cname,duty): load_file = open('./mod/political_person_dict.bin', 'rb') political_person_dict = pickle.load(load_file) for i, person in enumerate(political_person_dict): if person['cname']==cname and person['duty']==duty: sent_embedding_res, wds_embedding_res = self.detector.detect_main(person['duty'], person['cname'],[person['duty']]) # print(sent_embedding_res) # print(wds_embedding_res) person['baikename'] = wds_embedding_res[0][0] person['baikeurl'] = self.detector.getConcept(person['baikename'])['link'] person['baikeconcept'] = self.detector.getConcept(person['baikename']) person.update() pprint.pprint(person) fou = open('./mod/political_person_dict.bin', 'wb') pickle.dump(political_person_dict, fou) fou.close() ''' 利用百分点服务,得到同义词,用于对齐 ''' def get_sim(self, something): url = 'http://10.122.141.12:9006/similar' r = requests.post(url, json={"ck": "synonym", "synonym_word": something, "synonym_selectedMode": "auto", "homoionym_word": "", "homoionym_selectedMode": "auto", "homoionym_num": ""}) json = r.json() result = json['detail']['res']['synonym'] return result ''' 生成模匹配索引,也可以用dict来代替, 实际没有真正用模式匹配来取得人名,而是用LTP词性识别来做的,这样准确度比较好。 ''' def genAhocorasick(self): load_file = open('./mod/political_person_dict.bin', 'rb') self.political_person_dict = pickle.load(load_file) self.aho_policical_person=ahocorasick.Automaton() for i,person in enumerate(self.political_person_dict): word=person.get('cname') #这里发现要有外国人名对齐功能,唐纳德·特朗普===》特朗普、川普 习近平---》习主席, #但大部分中国人名,不需要对齐, aliasPerson = self.get_sim(word) baidualias=person.get('baikeconcept').get('别名') if word.find('·')>-1: aliasPerson.append(word[word.index('·')+1:]) aliasPerson.append(word[word.rindex('·')+1:]) #去掉中间名 aliasPerson.append(word[word.index('·') + 1:]+word[word.rindex('·'):]) baidualias_list=[] if baidualias: pattern = r',|\.|/|;|\'|`|\[|\]|<|>|\?|:|"|\{|\}|\~|!|@|#|\$|%|\^|&|\(|\)|-|=|\_|\+|,|。|、|;|‘|’|【|】|·|!| |…|(|)' baidualias_list = re.split(pattern, baidualias) person_all = set([word]).union(set(aliasPerson)).union(set(baidualias_list)) for word_ in person_all: persons=[] if self.aho_policical_person.exists(word_): persons=self.aho_policical_person.get(word_) persons.append(person) self.aho_policical_person.add_word(word_,persons) self.aho_policical_person.make_automaton() # s=self.aho_policical_person.get('习近平') # pprint.pprint(s) out=open('./mod/aho_policical_person.aho','wb') out.write(pickle.dumps(self.aho_policical_person)) out.close() def testAho(self): sent='本院受理的原告易纲诉被告吴勇、王国珍机动车交通事故责任纠纷一案,现已审理终结。判决如下:一、自本判决生效之日起三日内,王国珍赔偿杨旭维修费11703元;二、驳回杨旭的其他诉讼请求。因你下落不明,现依法向你公告送达本案的民事判决书。自本公告发出之日起,经过60日即视为送达。如不服本判决,可在判决书送达之日起十五日内,向本院递交上诉状,并按对方当事人的人数提出副本,上诉于广州市中级人民法院。特此公告。' file=open('./mod/aho_policical_person.aho','rb') aho_policical_person=pickle.load(file) for word in aho_policical_person.iter('刘惠'): pprint.pprint(word) ''' 识别文本中的政要人物 repeat:是否要对每一个名字(即使文章中多次出现)进行识别 att_weight:是否要进行人物头衔的加权 geo_weight:是否要进行地理位置的加权 ''' def recongnizePoliticalPerson(self,sent,repeat=False,att_weight=True,geo_weight=True): pperon_candidates=[] pperson_sure=[] npperson_sure=[] #一句话中也可能有多个政要人名,多个重名怎么办,这种模式下会对有重复字的名字进行抽取 # 如果有两个字的政要,恰好有三个字的其它人员,则会出现误判,所以最合理的 # 方式仍然是利用分词和句法分析来定中分析。 # 要先进行词法分析才行,这里用了LTP的server来做的,jieba的不准确,需要启动ltp_server nrs,geos=self.findPerson(sent) # for word in self.aho_policical_person.iter(sent): for nr in nrs: if not self.aho_policical_person.exists(nr):#只处理政要名字,以及与政要重名的名字,其它人名不处理 continue ppersons=self.aho_policical_person.get(nr)#此处已包括重名政要,但不包括非政要 #一句话里出现多次名字,只取一次,提高效率 flag=True if not repeat: for pperon_candidate in pperon_candidates: if pperon_candidate.get('cname')==ppersons[0].get('cname'): flag=False if not flag: continue pperon_candidates=pperon_candidates+ppersons #把定中的关键词加权给到判断过程中 att = [] if att_weight: att = nrs.get(nr) #地理位置加权 geo=[] if geo_weight: # geo=self.geoKG.parseDoc_global(sent) geo=geos # sent_embedding_res暂时无用,顺接原来的接口, # 能不能在这里加一个类别的判断呢,通过title判断是否是官员,再对官员进行过滤? # ATT与全句相比,更加贴近人物本身,其它关键词是背景 # 应该先进行类似知识图谱级的判断,再进行消歧综合判断 sent_embedding_res, wds_embedding_res = self.detector.detect_main(sent, ppersons[0].get('cname'), att, geo) concept=self.detector.getConcept(wds_embedding_res[0][0])#拿回元数据 for pperson in ppersons: #政治人物 是百度给人物打的标签,这里为加强准确性,判断是否符合标签 if concept.get('出生日期')==pperson.get('baikeconcept').get('出生日期'):# and '政治人物' in concept.get('tags'): # logging.info(pperson) # pprint.pprint(pperson) pperson_sure.append(pperson) break if pperson not in pperson_sure: concept['是否政要']='否' # pprint.pprint(concept) npperson_sure.append(concept) #保存baidu的访问缓存 self.detector.save_cache() pprint.pprint(pperson_sure) pprint.pprint(npperson_sure) return pperson_sure,npperson_sure def recongnizePerson(self,sent,repeat=False,att_weight=True,geo_weight=True): pperon_candidates = [] # 一句话中也可能有多个政要人名,多个重名怎么办,这种模式下会对有重复字的名字进行抽取 # 如果有两个字的政要,恰好有三个字的其它人员,则会出现误判,所以最合理的 # 方式仍然是利用分词和句法分析来定中分析。 # 要先进行词法分析才行,这里用了LTP的server来做的,jieba的不准确,需要启动ltp_server nrs, geos = self.findPerson(sent) # for word in self.aho_policical_person.iter(sent): for nr in set(nrs): att = [] if att_weight: att = nrs.get(nr) # 地理位置加权 geo = [] if geo_weight: # geo=self.geoKG.parseDoc_global(sent) geo = geos # sent_embedding_res暂时无用,顺接原来的接口, # 能不能在这里加一个类别的判断呢,通过title判断是否是官员,再对官员进行过滤? # ATT与全句相比,更加贴近人物本身,其它关键词是背景 # 应该先进行类似知识图谱级的判断,再进行消歧综合判断 sent_embedding_res, wds_embedding_res = self.detector.detect_main(sent, nr, att, geo) concept = self.detector.getConcept(wds_embedding_res[0][0]) # 拿回元数据 pperon_candidates.append(concept) pprint.pprint(pperon_candidates)
import traceback from flask import Flask, render_template, request, redirect, url_for from dataManager import DataManager import datetime from dbManager import DashboardInfo OPTIONS = [ "ALL_TEST_CASES", "FAILED_IN_CURRENT_RUN", "FAILING_FROM_LAST_10_RUNS", "FAILING_FROM_LAST_3_RUNS", "UNSTABLE_TEST_CASES", "PASS_STATUS_SWITCHED" ] VIEWS = ['TABLE_VIEW', 'GRAPH_VIEW'] dataManager = DataManager() ####################### HOME ###################### app = Flask(__name__) @app.errorhandler(Exception) def default_error_handler(error): '''Default error handler''' original = getattr(error, "original_exception", error) traceback.print_tb(error.__traceback__) print("ERROR occurded during handling message:", original) return render_template("view_error.html") @app.route('/', methods=["GET", "POST"]) def hello(): all_dashboards = dataManager.get_all_dashboards()
def updateMotion(self, motion): dm = DataManager() path = self.TEMPLATES_PATH + self.getMotionFile(motion.getName()) dm.saveMotion(motion, path) print('Motion Updated')
class GraphAnalyzer(Frame): def __init__(self, root): Frame.__init__(self, root) self.__root = root self.__data_manager = DataManager() self.__check_button_type = namedtuple('CheckButtonType', 'widget var') self.__natures = [ "Single Carriageway", "Traffic Island Link", "Dual Carriageway", "Roundabout", "Traffic Island Link At Junction", "Slip Road" ] self.__roads = [ "M3","M40","M4","A1(M)","M11","M23","M20","M25","M1","HIGH STREET", "LONDON ROAD","HIGH ROAD","UXBRIDGE ROAD","STATION ROAD", "BRIGHTON ROAD","GREEN LANES","FINCHLEY ROAD","HARROW ROAD", "NORTH CIRCULAR ROAD","KINGSTON ROAD","PORTSMOUTH ROAD","HERTFORD ROAD", "STAINES ROAD","CROYDON ROAD","MAIN ROAD","CHURCH ROAD","PARK ROAD" ] self.__motorways = ["M3","M40","M4","A1(M)","M11","M23","M20","M25","M1"] self.__init_grid() self.__draw_grid() def __init_grid(self): # Road list self.__roads_list_box = Listbox(self.__root, selectmode=MULTIPLE, height=27, exportselection=0) for road in self.__roads: self.__roads_list_box.insert('end', road) # Nature list self.__natures_list_box = Listbox(self.__root, selectmode=MULTIPLE, height=6, width=22, exportselection=0) for nature in self.__natures: self.__natures_list_box.insert('end', nature) # Start with all natures selected self.__natures_list_box.select_set(0, END)\ # Days list self.__days_list_box = Listbox(self.__root, selectmode=MULTIPLE, height=8, width=22, exportselection=0) for day in ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']: self.__days_list_box.insert('end', day) # Hours list self.__hours_list_box = Listbox(self.__root, selectmode=MULTIPLE, height=24, width=7, exportselection=0) for hour in range(24): self.__hours_list_box.insert('end', hour) # Check button draw overall self.__draw_overall_var = IntVar() self.__draw_overall_check_box = \ Checkbutton(self.__root, text = "Draw Overall Curve?", variable = self.__draw_overall_var, onvalue = 1, offvalue = 0, height=2, width = 20) # Check button draw nature self.__draw_nature_var = IntVar() self.__draw_nature_check_box = \ Checkbutton(self.__root, text = "Draw Curve Per Nature?", variable = self.__draw_nature_var, onvalue = 1, offvalue = 0, height=2, width = 20) # Check button show data self.__show_data_var = IntVar() self.__show_data_var.set(1) self.__show_data_check_box = \ Checkbutton(self.__root, text = "Show data?", variable = self.__show_data_var, onvalue = 1, offvalue = 0, height=2, width = 20) # Go button self.__go_button = Button(self.__root, text='GO', command = lambda: self.__generate_graph()) # Errors text box self.__error_text_box = Text(self.__root, height=28, width=18, fg="red") self.__error_text_box.tag_config('justified', justify=CENTER) def __draw_grid(self): # Roads label and list box Label(self.__root, text="Roads", justify=CENTER).grid(row=0, column=0) self.__roads_list_box.grid(row=1, column=0, rowspan=27) # Natures label and list box Label(self.__root, text="Natures", justify=CENTER).grid(row=0, column=1) self.__natures_list_box.grid(row=1, column=1, rowspan=6) # Days label and list box Label(self.__root, text="Days", justify=CENTER).grid(row=7, column=1) self.__days_list_box.grid(row=8, column=1, rowspan=8) # Hours label and list box Label(self.__root, text="Hours", justify=CENTER).grid(row=0, column=3) self.__hours_list_box.grid(row=1, column=3, rowspan=24) # Check boxes Label(self.__root, text="Drawing Options", justify=CENTER).grid(row=0, column=4) self.__draw_overall_check_box.grid(row=1, column=4, rowspan=2) self.__draw_nature_check_box.grid(row=3, column=4, rowspan=2) self.__show_data_check_box.grid(row=5, column=4, rowspan=2) # Go button self.__go_button.grid(row=10, column=4) # Error Column Label(self.__root, text="Error Report", height=1, width=18, justify=CENTER).grid(row=0, column=5) self.__error_text_box.grid(row=1, column=5, rowspan=28) def __generate_graph(self): # Get parameters roads = tuple(self.__roads_list_box.get(road_index) for road_index in self.__roads_list_box.curselection()) roads = [ ("classification" if road in self.__motorways else "street", road) for road in roads] natures = tuple(self.__natures_list_box.get(nature_index) for nature_index in self.__natures_list_box.curselection()) days = self.__days_list_box.curselection() hours = self.__hours_list_box.curselection() errors = self.__error_check(roads, natures, days, hours) if len(errors): self.__error_text_box.delete("1.0",END) for e in errors: self.__error_text_box.insert(END, e + '\n', 'justified') else: data = self.__data_manager.get_data("traffic", "rainfall", roads, natures, hours, days) self.__plot_data(data) def __error_check(self, roads, natures, hours, days): errors = [] if not len(roads): errors.append("No roads selected") if not len(natures): errors.append("No natures selected") if not len(hours): errors.append("No hours selected") if not len(days): errors.append("No days selected") if not (self.__show_data_var.get() or self.__draw_nature_var.get() or self.__draw_overall_var.get()): errors.append("Nothing to draw") return errors def __plot_data(self, data): max_depth = data.depth.max() max_speed = data.speed.max() dfs_to_plot = [] if self.__show_data_var.get(): dfs_to_plot.append(data) if self.__draw_overall_var.get(): dfs_to_plot.append(self.__get_best_fit_curve(data, max_depth, max_speed, "Best fit curve")) if self.__draw_nature_var.get(): for nature, nature_df in data.groupby(['nature']): dfs_to_plot.append(self.__get_best_fit_curve(nature_df, max_depth, max_speed, nature)) data = pd.concat(dfs_to_plot, ignore_index=True) fg = sns.FacetGrid(data=data, hue='nature', aspect=1.9, legend_out=False, size=8) fg.map(plt.scatter, 'depth', 'speed', s=20).add_legend(None, "Legend") axes = fg.axes ylim = 120 if max_speed > 200 else max_speed xlim = 1.0 if max_depth < 1.0 else 2.0 axes[0,0].set_ylim(0,ylim) axes[0,0].set_xlim(0,xlim) sns.plt.show() def __get_best_fit_curve(self, data, max_depth, max_speed, nature_str): try: popt, pcov = curve_fit(self.curve_func, data.depth, data.speed) except RuntimeError: return pd.DataFrame({'depth':[], 'speed':[], 'nature':[], 'identifier':[]}) a = popt[0] b = popt[1] c = popt[2] depths = list(np.arange(0, max_depth, max_depth/10000.0)) speeds = map(lambda x: self.curve_func(x, a, b, c), depths) if max(speeds) > max_speed: speeds = [s for s in speeds if s <= max_speed] depths = depths[0:len(speeds)] natures = [nature_str] * len(depths) identifiers = [''] * len(depths) return pd.DataFrame({'depth':depths, 'speed':speeds, 'nature':natures, 'identifier':identifiers}) def curve_func(self, x, a, b, c): return a * np.exp(-b * x) + c
import numpy as np from event import Event from fiducialCuts import FiducialCuts import time import matplotlib.pyplot as plt from dataManager import DataManager import math # Read in data here start = time.time() # data_file = '/media/tylerviducic/Elements/aidapt/data/synthetic/clasfilter2_5M780.npy' # change to your path, obviously data_file = '/media/tylerviducic/Elements/aidapt/data/recon/twopi_ppip.10.zzz' data_manager = DataManager(data_file) input_array = data_manager.get_numpy_array() output_list = [] phi_list = [] num_bins = 180 num_rows, num_columns = input_array.shape for n in range(num_rows): row = input_array[n] event = Event(row) phi = math.degrees(abs(event.get_proton_phi())) phi_list.append(phi)
def __init__(self, fileName, ignoreList, shortcutList, response): global filename if shortcutList != None: ignoreList = ignoreList + list(shortcutList.values()) with open(fileName, encoding="utf-8") as f: data = f.readlines() f.close() io = WriteNewFile.fileCreator() #I tried to make this flag system it's own method #But it needs to be in here. #It's so it can print out a sort of loading bar a = False b = False c = False d = False e = False f = False remainingTime = len(data) tracker = 0 flagCnt = 1 for x in data: #print(x) #print("\n") #This is where it prints out the progress. flag = LoadingBar.bar(tracker, remainingTime, a, b, c, d, e, f) if flag == True: if flagCnt == 1: a = True flagCnt += 1 elif flagCnt == 2: b = True flagCnt += 1 elif flagCnt == 3: c = True flagCnt += 1 elif flagCnt == 4: d = True flagCnt += 1 elif flagCnt == 5: e = True flagCnt += 1 elif flagCnt == 6: f = True flagCnt += 1 if shortcutList != None: for vals in shortcutList: x = x.replace(vals, shortcutList[vals]) loweredArray = x.lower() if response == 1: sentences = loweredArray.split('.') for y in sentences: splitArray = re.findall(r"[\w']+", y) freqTable = DataManager.manager(ignoreList, splitArray) #print(freqTable) #replaceEngine(freqTable) would replace this part of the code wordsToReplace = [] for y in freqTable: if freqTable.get(y) != 1: wordsToReplace.append(y) #print(wordsToReplace) for y in wordsToReplace: wordsToUse = syn.synonyms(y) #print("\nSynonyms for '" + y + "' are: ",wordsToUse) #print('\n') x = repl.replace(y, x, wordsToUse) #print("\nnew string: " + x+"\n") tracker += 1 WriteNewFile.newFile(x, io) else: splitArray = re.findall(r"[\w']+", loweredArray) freqTable = DataManager.manager(ignoreList, splitArray) #print(freqTable) #replaceEngine(freqTable) would replace this part of the code toReplace = self.replaceEngine(freqTable) for j in toReplace: wordsToUse = syn.synonyms(j) #print ("\nsynonyms for " + j +" are ", wordsToUse) #print("\n") x = repl.replace(j, x, wordsToUse) #print("\nnew string: " + x+"\n") tracker += 1 WriteNewFile.newFile(x, io) LoadingBar.bar(tracker, remainingTime, a, b, c, d, e, f) io.close()