class Option: def __init__(self, strike, timeMadurity, numSim, riskFreeRate, company, source): self.k = strike self.t = timeMadurity self.numSim = numSim self.r = riskFreeRate self.company = company self.source = source self.dc = None def collectData(self): self.dc = DataCollector(self.source, self.company, self.t * 365) if self.source == "Yahoo" or self.source == "Alpha": self.dc.getDataFromGateway() else: self.dc.getDataFromCSV() def simulateCall(self): self.collectData() return simulate(self.dc.data, self.numSim, self.t * 365, self.t, self.r, self.k, "Call") def simulatePut(self): self.collectData() return simulate(self.dc.data, self.numSim, self.t * 365, self.t, self.r, self.k, "Put")
def __init__(self, post, client_secret, client_id, user_agent, username, password): self.username = username self.password = password DataCollector.__init__(self, client_secret, client_id, user_agent) self.post = self.r.subreddit(post) self.flair_id = tuple(self.post.flair.link_templates)[0]["id"] del self.survived del self.del_no_reply
def __init__(self, _Fs, _filter_r, _threshold, _min_length, _min_freq): DataCollector.__init__(self, _Fs, _filter_r) self.responses = [[]] self.found = False self.threshold = _threshold self.min_length = _min_length self.min_freq = _min_freq
def makeItemFile(): my_dc = DataCollector('z','z',True) j = my_dc.getJSON() with open('items.txt',"w") as infile: for k,v in j["data"].items(): #print("KEY: {} -- ITEMS {}".format(k,v)) infile.write("{} : {}".format(j["data"][k]["name"],j["data"][k]["id"])) infile.write("\n")
def publish_data(): dataCollector = DataCollector() temp = dataCollector.get_temp() humid = dataCollector.get_humidity() message = { 'temperature': temp, 'humidity': humid, 'sac_temperature': sac_temp } pubnub.publish().channel('Temperature_Status_Channel').message( message). async (publish_callback)
def data_loader(self): dataCollector = DataCollector() dataCollector.open_dialog_box() self.path = dataCollector.path self.file = dataCollector.file if self.file[0]: f = open(self.file[0], "r") with f: ddata = f.read() self.textedit.setText(ddata)
def main(): m_id_list = [1852548676, 1852558827, 1852559208, 1852560871, 1852561073] total_games = len(m_id_list) crystal_scepter_id = 3116 liandry_id = 3151 crystal_scepter_wins = 0 liandry_wins = 0 both_wins = 0 for id in m_id_list: my_dc = DataCollector(id,'na') exit() #my_dc.printData() j = my_dc.getJSON() # teamX [id, winner] team1 = getTeamInfo(j,0) team2 = getTeamInfo(j,1) teams = [team1, team2] #updateTeamBools(j,teams,crystal_scepter_id,liandry_id) for x in range(10): my_team = j["participants"][x]["teamId"] for k,v in j["participants"][x]["stats"].items(): if re.search("item",k): if v == crystal_scepter_id: for t in teams: if my_team == t["teamId"]: t["rylai"] = True elif v == liandry_id: if my_team == t["teamId"]: t["liandry"] = True for t in teams: if t["winner"] is True: if t["rylai"] is True: crystal_scepter_wins += 1 if t["liandry"] is True: liandry_wins += 1 if t["rylai"] is True and t["liandry"] is True: both_wins += 1 print("Liandry win %: {}%".format(liandry_wins/total_games*100)) print("Rylai win %: {}%".format(crystal_scepter_wins/total_games*100)) print("Total win %: {}%".format(both_wins/total_games*100))
def get_data(sensor): sensor = sensor.replace("%20", " ") # CHeck it is a valid request logger.debug(f"API request STARTING for sensor {sensor}") if sensor not in DataCollector.get_sensors(): raise NameError('Invaid sensor name') data_collector = DataCollector() unit = data_collector.sensors[sensor].get_unit() logger.debug(f"Unit fetched and is {unit}") df = data_collector.get_data(sensor) graph_json = build_graph(df, sensor, unit) logger.debug(f"API request SUCCESSFUL for sensor {sensor}") logger.debug(f"GRAPH JSON looks like {graph_json}") return graph_json
def __init__(self): self.BONDSMANPORT = 14000 self.taskSet = {} self.taskSet['DoNothing'] = {'handler':DoNothing(), 'name': 'DoNothing', 'initBounty': 0.65, 'bountyRate': 0.0, 'deadline': 30.0, 'currentBounty': 0.65} ## in the future the task handlers will be dynamically ## loaded and refreshed to make sure the latest and ## onewest available... self.taskHandlers = {} self.taskHandlers['default'] = DoNothing self.taskHandlers['visualServoing'] = VisualServoing ## create the learner #alphaT, alphaP, oneUpdateGamma, hasOneUpdate, epsilonChooseRandomTask self.bountyLearner = BountyHunterLearner(0.1,0.2,0.001,True, 0.002) self.myIP = str([l for l in ([ip for ip in socket.gethostbyname_ex(socket.gethostname())[2] if not ip.startswith("127.")][:1], [[(s.connect(('8.8.8.8', 53)), s.getsockname()[0], s.close()) for s in [socket.socket(socket.AF_INET, socket.SOCK_DGRAM)]][0][1]]) if l][0][0]) self.bondSock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) self.bondSock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) self.bondSock.bind(('0.0.0.0', self.BONDSMANPORT)) self.timesSent = 1.0 self.datacollector = DataCollector() self.currentHZ = 5 self.timesSucc = 0.0 self.doneSim = False self.currentset = '' while self.doneSim == False: self.curtask = self.bountyLearner.getTask(self.taskSet) #print 'current bounty rate for task %s is %f' % (self.curtask['name'], self.curtask['bountyRate']) for task in self.taskSet.values(): task['currentBounty'] += task['bountyRate'] self.timesSent += self.curtask['handler'].doTask() self.doneSim = self.bondsmanRecv()
def publish_data(self, sac_temp, channel_name): dataCollector = DataCollector() temp = dataCollector.get_temp() humid = dataCollector.get_humidity() message = { 'temperature': temp, 'humidity': humid, 'sac_temperature': sac_temp } if (channel_name == 'Temperature_Real_Time_Channel'): message = { 'eon': { 'temperature': temp, 'humidity': humid, 'sac_temperature': sac_temp } } self.pubnub.publish().channel(channel_name).message(message). async ( self.publish_callback)
def __init__(self, _Fs, _filter_r): DataCollector.__init__(self, _Fs, _filter_r) self.clead = [] self.filtered_clead = [] self.emg_filter_r = int(0.5 * _Fs / 34) self.clead_filter_r = int(0.5 * _Fs / 24) self.initialized = False self.init_time = 5 self.beats = [] self.last = 0 self.detect_interval = int(0.2 * _Fs) self.r = 0 self.rr = 0 self.RR = [0, 0, 0, 0, 0] self.r_i = -1 self.f_interval = int(_Fs * 0.35) self.f_interval2 = int(_Fs * 0.05)
def __init__(self, _Fs, _filter_r): DataCollector.__init__(self, _Fs, _filter_r) self.clead = [] self.filtered_clead = [] self.emg_filter_r = int(0.5*_Fs/34) self.clead_filter_r = int(0.5*_Fs/24) self.initialized = False self.init_time = 5 self.beats = [] self.last = 0 self.detect_interval = int(0.2*_Fs) self.r = 0 self.rr = 0 self.RR = [0, 0, 0, 0, 0] self.r_i = -1 self.f_interval = int(_Fs*0.35) self.f_interval2 = int(_Fs*0.05)
def __init__(self, files, listofreaders=[], testsetname='', solufilename=''): self.files = files[0:len(files)] assert self.__listelementsdiffer(self.files) self.testruns = [] self.readers = [] self.datakeys = [] for filename in files: testrun = TestRun(filename, solufilename, testsetname) self.testruns.append(testrun) testrun.settings = filename.split('.')[-2] self.datacollector = DataCollector() self.datacollector.registerListOfReaders(listofreaders) if solufilename != '': # print 'solufiledatacollector initialized for solufilename:', solufilename self.solufiledatacollector = SoluFileDataCollector() self.readers = listofreaders for reader in self.readers: self.addDataKey(reader.datakey)
def process_data(self): while self.alive: try: for i in self.to_check: if i["fullname"][1] == "1": comment = True t = self.r.comment(i["fullname"][3:]) try: t.refresh() except: self.to_check.remove(i) continue else: comment = False t = self.r.submission(i["fullname"][3:]) deleted = False try: if comment: replies = t.replies else: replies = t.comments except: #If there was an exception, the object has no replies/comments replies = None if replies: replies.replace_more(limit=None) for c in replies: if self.mod_message in c.body and c.distinguished: #If a moderator replies with the standard message when something has been removed temp = i temp["score"] = t.score temp["permalink"] = t.permalink temp[ "mod_reply"] = DataCollector.organize_data( c) self.deleted.append(temp) deleted = True print("Appended " + i["fullname"] + " to deleted") self.to_check.remove(temp) break if deleted: continue if time.time( ) - i["accessed"] >= self.wait: #If enough time has passed to be confident that the comment won't be deleted. It's 6 hours by default self.to_check.remove(i) except Exception as e: print(str(e)) self.r = self.new_reddit()
def runClient(stateSize, actionSize, camerashape, functions, getEnvironment, optimizer, loss, modelSrcWeights, dataCollectionPath, dataCollerctorNumber = 1,bellmannNumber = 1, trainingsWorkerNumber = 1, replayLog = True, loadWeights = False, replayBufferPath = "localhost:5000", modelPath = "localhost:5001" ): #Inits main_lock = Lock() model_lock = Lock() loss_lock = Lock() _, getState, _, getReward, policyFunction, get_cem_action_size = functions() print("\n DataCollectors:", dataCollerctorNumber, "\n Bellmans:", bellmannNumber, "\n Trainingsworkers:", trainingsWorkerNumber, "\n replayLog", replayLog) # Client- Helpers client = Client(replayBufferPath) modelClient = ModelClient(modelPath) agent = Md(modelClient, model_lock, getEnvironment(), optimizer, loss, policyFunction, get_cem_action_size(), modelSrcWeights, state_size=stateSize, action_size= actionSize, camerashape=camerashape) bellmannUpdater = BellmanUpdater(client, agent) trainingsworker = Trainingworkers(client, agent, loss_lock, dataCollectionPath) if replayLog: print("Run ReplayLog") start_new_thread( ReplayLog(dataCollectionPath+"_0/", client)) print("Finish ReplayLog") for i in range(dataCollerctorNumber): print("start datacollector", i) start_new_thread(DataCollector(i, client, agent, getEnvironment(), policyFunction, getState, getReward, dataCollectionPath).start, (main_lock, True)) for i in range(bellmannNumber): print("start belmann updater", i) start_new_thread(bellmannUpdater.start, ()) for i in range(trainingsWorkerNumber): print("start tainingworkers", i) start_new_thread(trainingsworker.start, ())
class Executor: draw_chart = False write_file = False exec_chart_sec = False exec_chart_anom = False def __init__(self, log): self.dc = DataCollector(log) def drawCharts(self): exec_chart_sec = False if exec_chart_sec: path_to_plt = '../outs/charts/main/' list_of_df = self.dc.getWithAnomaly() ts = 1 for df in list_of_df: if df.shape[0] > 0: plt.rcParams["font.family"] = "Times New Roman" csfont = {'fontname': 'Times New Roman'} df.plot(figsize=(12, 9), subplots=True) plt.xlabel('Time', **csfont) plt.ylabel('Value', **csfont) plt.title('Signals[{}]'.format(ts), **csfont) plt.legend(loc='upper right') plt.savefig(path_to_plt + 'main-{}.png'.format(ts), format='png') plt.close('all') print('The Chart of File {} is Generated.'.format(ts)) ts += 1 exec_chart_anom = False if exec_chart_anom: path_to_plt = '../outs/charts/anomalies/' list_of_df = self.dc.getWithAnomaly() ts = 1 for df in list_of_df: if df.shape[0] > 0: data = df.drop(['anomaly', 'changepoint'], axis=1) pc = PCA(n_components=2).fit_transform(data) df[['X', 'Y']] = pc plt.figure() sb.set(font='Times New Roman') sns = sb.scatterplot(data=df, x='X', y='Y', hue='anomaly', palette='bright') sns.set_title('The Anomaly Chart of File {}'.format(ts)) sns.figure.savefig(path_to_plt + 'anom-{}.png'.format(ts)) plt.close('all') print('The Chart of File {} is Generated.'.format(ts)) ts += 1 def IsolationForestExecutor(self): isofh = IsolationForestHandler(self.dc) isofh.findAnomalies(self.draw_chart, self.write_file) def KNNOExecutor(self): knno = KNNOHandler(self.dc) knno.findAnomalies(self.draw_chart, self.write_file) def LOFExecutor(self): lof = LOFHandler(self.dc) lof.findAnomalies(self.draw_chart, self.write_file) def RobustCovExecutor(self): rocov = RobustCovHandler(self.dc) rocov.findAnomalies(self.draw_chart, self.write_file) def OneClassSVMExecutor(self): ocsvm = OneClassSVMHandler(self.dc) ocsvm.findAnomalies(self.draw_chart, self.write_file) def SSDOExecutor(self): ssdo = SSDOHandler(self.dc) ssdo.findAnomalies(self.draw_chart, self.write_file) def SSkNNOExecutor(self): ssknno = SSkNNOHandler(self.dc) ssknno.findAnomalies(self.draw_chart, self.write_file)
def __init__(self, log): self.dc = DataCollector(log)
class Comparator: ''' manages the collection of all log (.out) and .solu file data ''' files = [] testruns = [] datacollector = None solufiledatacollector = None readers = [] probnamelist = [] datakeys = [] datakey_err = 'ERROR: Datakey is not a suitable key' probname_err = 'ERROR: Probname is not in list of problem names' datakey_gap = 'SoluFileGap' EXCLUDE_REASON_TIMEOUT = 'timout' EXCLUDE_REASON_INFEASIBLE = 'infeasible' EXCLUDE_REASON_ZEROSOLUTION = 'zerosolution' EXCLUDE_REASON_NOSOLUTIONKNOWN = 'nosolution' EXCLUDE_REASON_NOOPTIMALSOLUTIONKNOWN = 'nooptsolution' INFINITY = 1e09 COMP_SIGN_LE = 1 COMP_SIGN_GE = -1 def __listelementsdiffer(self, listname): for listitem in listname: if listname.count(listitem) > 1: return False else: return True def __init__(self, files, listofreaders=[], testsetname='', solufilename=''): self.files = files[0:len(files)] assert self.__listelementsdiffer(self.files) self.testruns = [] self.readers = [] self.datakeys = [] for filename in files: testrun = TestRun(filename, solufilename, testsetname) self.testruns.append(testrun) testrun.settings = filename.split('.')[-2] self.datacollector = DataCollector() self.datacollector.registerListOfReaders(listofreaders) if solufilename != '': # print 'solufiledatacollector initialized for solufilename:', solufilename self.solufiledatacollector = SoluFileDataCollector() self.readers = listofreaders for reader in self.readers: self.addDataKey(reader.datakey) def addDataKey(self, datakey): self.datakeys.append(datakey) def setTestRun(self, testrun): self.datacollector.setTestRun(testrun) if self.solufiledatacollector != None: self.solufiledatacollector.setTestRun(testrun) def __makeProbNameList__(self): self.probnamelist = [] for testrun in self.testruns: if self.probnamelist == []: self.probnamelist = testrun.problist else: if testrun.datacollected and testrun.problist == []: print testrun.getIdentification() assert not testrun.datacollected or testrun.problist != [] for probname in testrun.problist: if not probname in self.probnamelist: self.probnamelist.append(probname) def collectData(self): for testrun in self.testruns: self.setTestRun(testrun) self.datacollector.collectData() if self.solufiledatacollector != None: #print 'Collecting Solu File Data' assert self.solufiledatacollector.testrun == self.datacollector.testrun self.solufiledatacollector.collectData() # for testrun in self.testruns: #print testrun.problist self.__makeProbNameList__() # self.calculateData() def problemCompareData(self, probname, testrun1, testrun2, datakey, compsign): if not datakey in self.datakeys: raise self.datakey_err # if not testrun1.datacollected or not testrun2.datacollected: # print 'Collect all Data First' if not probname in self.probnamelist: raise self.probname_err data1 = float(testrun1.problemGetData(probname, datakey)) data2 = float(testrun2.problemGetData(probname, datakey)) return (data1 - data2) * compsign def getBestTestRunForProblem(self, probname, datakey, compsign): bestrun = None for testrun in self.testruns: if bestrun == None: bestrun = testrun elif self.problemCompareData(probname, bestrun, testrun, datakey, compsign) > 0: bestrun = testrun return bestrun def getBestRuns(self, datakey, compsign): bestruns = [] for probname in self.probnamelist: bestruns.append( self.getBestTestRunForProblem(probname, datakey, compsign)) return bestruns def excludeProb(self, probname, excludereasons=[]): for testrun in self.testruns: for reason in excludereasons: if Comparator.EXCLUDE_REASON_TIMEOUT == reason: if testrun.problemGetData( probname, TimeLimitReader.datakey ) == TimeLimitReader.timelimit_reached: return True elif Comparator.EXCLUDE_REASON_NOOPTIMALSOLUTIONKNOWN == reason: if testrun.problemGetSoluFileStatus(probname) != 'opt': return True elif Comparator.EXCLUDE_REASON_NOSOLUTIONKNOWN == reason: if testrun.problemGetSoluFileStatus(probname) == 'unkn': return True elif Comparator.EXCLUDE_REASON_ZEROSOLUTION == reason: if testrun.problemGetOptimalSolution(probname) == 0: return True elif Comparator.EXCLUDE_REASON_INFEASIBLE == reason: if testrun.problemGetSoluFileStatus(probname) == 'inf': return True else: return False def testrunGetKeyGeomMean(self, testrun, datakey, exclude=False): listofnumbers = [] for probname in self.probnamelist: if not self.excludeProb(probname) or not exclude: listofnumbers.append( float(testrun.problemGetData(probname, datakey))) return Misc.listGetGeomMean(listofnumbers) def keyGetBestValue(self, probname, datakey, compsign): bestrun = self.getBestTestRunForProblem(probname, datakey, compsign) assert bestrun != None return bestrun.problemGetData(probname, datakey) def keyGetBestValues(self, datakey, compsign): values = [] for probname in self.probnamelist: values.append(self.keyGetBestValue(probname, datakey, compsign)) return values def testrunGetProbGapToOpt(self, testrun, probname): assert probname in testrun.problist optsol = testrun.problemGetOptimalSolution(probname) status = testrun.problemGetSoluFileStatus(probname) pb = testrun.problemGetData(probname, PrimalBoundReader.datakey) if status == 'opt' or status == 'best': return Misc.getGap(float(pb), float(optsol)) else: return Misc.FLOAT_INFINITY def calculateData(self): self.calculateGaps() def calculateGaps(self): for probname in self.probnamelist: for testrun in self.testruns: gap = self.testrunGetProbGapToOpt(testrun, probname) data = (probname, Comparator.datakey_gap, '%8.2g' % (gap)) testrun.addData(data) self.addDataKey(Comparator.datakey_gap)
log = args.clusters[0].split('.')[0] + '_log' if '/' in log: log = log.split('/')[-1] else: log = args.points[0].split('.')[0] + '_log' if '/' in log: log = log.split('/')[-1] export = log if args.nolog: log = False randomCount = args.count gridSize = args.grid # create DataCollector object dc = DataCollector() # upload data from datafile dc.uploadFromTextFile(datafile, params=pointsParams) # get data from dc object X = dc.getData() initM = InitMachine() if init == 'random': bounds = initM.getBounds(X) initM.random(count=randomCount, bounds=bounds) elif init == 'grid': bounds = initM.getBounds(X) initM.grid(grid=gridSize, bounds=bounds) elif init == 'file': initM.file(filename=filename, params=clusterParams) else:
def setUp(self) -> None: self.s3 = boto3.resource('s3') self.bucket = self.s3.Bucket(self.test_bucket) self.data_collector = DataCollector()
def auto_water(save_data=True, calibrate_pump=False): logger.debug('Starting the auto_water script') data_collector = DataCollector(save_data) rain_sim = RainSimulator(data_collector=data_collector)
def collectData(self): self.dc = DataCollector(self.source, self.company, self.t * 365) if self.source == "Yahoo" or self.source == "Alpha": self.dc.getDataFromGateway() else: self.dc.getDataFromCSV()
udpCon.send('HI I am udp motion message') while len(udpCon.recv(1)) == 0: udpCon.send('HI I am udp motion message') print 'Recved something...?' curFor = 0.0 curAng = 0.0 preID = -1 count = 0 frequency = 0 endFreq = 65 startTime = 0.0 succCount = 0.0 # this is the total number of times sent succ message recvCount = 0.0 # this is the total number of times recv vel messages freqData = [] freqTS = DataCollector() curTS = '' interval = 15.0 hzRecv = True lastID = -1 switchFreqID = 1 #signal.setitimer(signal.ITIMER_REAL, 0.5, 0.5) #signal.signal(signal.SIGALRM, handler) while not rospy.is_shutdown() and frequency <= endFreq: if count == 100: #udpCon.send('HI I am udp motion message')
#!/usr/bin/env python3 # -*- coding: utf-8 -*- import constants from DataCollector import DataCollector # REDDIT reddit_collector = DataCollector(constants.REDDIT_NAME) reddit_collector.collect_data_today() reddit_collector.save_data()
class BountyHunter(object): def __init__(self): self.BONDSMANPORT = 14000 self.taskSet = {} self.taskSet['DoNothing'] = {'handler':DoNothing(), 'name': 'DoNothing', 'initBounty': 0.65, 'bountyRate': 0.0, 'deadline': 30.0, 'currentBounty': 0.65} ## in the future the task handlers will be dynamically ## loaded and refreshed to make sure the latest and ## onewest available... self.taskHandlers = {} self.taskHandlers['default'] = DoNothing self.taskHandlers['visualServoing'] = VisualServoing ## create the learner #alphaT, alphaP, oneUpdateGamma, hasOneUpdate, epsilonChooseRandomTask self.bountyLearner = BountyHunterLearner(0.1,0.2,0.001,True, 0.002) self.myIP = str([l for l in ([ip for ip in socket.gethostbyname_ex(socket.gethostname())[2] if not ip.startswith("127.")][:1], [[(s.connect(('8.8.8.8', 53)), s.getsockname()[0], s.close()) for s in [socket.socket(socket.AF_INET, socket.SOCK_DGRAM)]][0][1]]) if l][0][0]) self.bondSock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) self.bondSock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) self.bondSock.bind(('0.0.0.0', self.BONDSMANPORT)) self.timesSent = 1.0 self.datacollector = DataCollector() self.currentHZ = 5 self.timesSucc = 0.0 self.doneSim = False self.currentset = '' while self.doneSim == False: self.curtask = self.bountyLearner.getTask(self.taskSet) #print 'current bounty rate for task %s is %f' % (self.curtask['name'], self.curtask['bountyRate']) for task in self.taskSet.values(): task['currentBounty'] += task['bountyRate'] self.timesSent += self.curtask['handler'].doTask() self.doneSim = self.bondsmanRecv() def bondsmanRecv(self): ''' Task MSG 0 string msgType (task) 1 string taskName 2 string[] bountyHunters 3 float64 initialBounty 4 float64 bountyRate 5 float64 deadline 6 uint32 inputPort 7 uint32 outputPort Succ MSG 0 string msgType (success) 1 string task 2 uint32 taskID 3 string winnerIP 4 float64 totalTime 5 uint32 succCount 6 uint32 recvCount ''' ready = select.select([self.bondSock], [], [], 0.03) if not ready[0]: return False data, addr = self.bondSock.recvfrom(32768) ## check if success or task #print data listData = json.loads(data) if listData[0] == 'task': print listData[1] ## if task then add it to the learning function? ## and add it to the task list if listData[1] + '-' + addr[0] not in self.taskSet: # so if I can actually do the task then add the task. if listData[1] in self.taskHandlers: self.taskSet[listData[1] + '-' + addr[0]] = {'handler':self.taskHandlers[listData[1]](addr[0], listData[6], addr[0], listData[7], listData[1]), 'name': listData[1], 'initBounty': listData[3], 'bountyRate': listData[4], 'deadline': listData[5], 'hunters': listData[2], 'currentBounty': listData[3]} else: # we already have that task self.taskSet[listData[1] + '-' + addr[0]]['deadline'] = listData[5] self.currentDataset = str(listData[5]) + 'hzData_'+ self.myIP elif listData[0] == 'success': if listData[2] == -1: # THEN WE ARE FINISHED self.datacollector.writeData() return True #print 'Recv a success message for task %s total time = %s SuccCount = %d' % (listData[1], listData[4], listData[5]) totalTime = float(listData[4]) * 1000.0 # convert to milliseconds self.curtask['currentBounty'] = self.curtask['initBounty'] if listData[5] == 1: print 'list data was 1!' self.currentHZ += 5 self.timesSent = 1.0 # reset. self.timesSucc = 0.0 self.currentset = 'bountyhunter-' + self.myIP + '-' + str(self.currentHZ) if listData[3] == self.myIP: ## Then I won!! self.bountyLearner.learn(listData[1] + '-' + addr[0], totalTime, 0, 1) self.timesSucc += 1.0 else: self.bountyLearner.learn(listData[1] + '-' + addr[0], totalTime, 0, 0) if self.curtask['name'] == 'DoNothing': self.datacollector.addPoint(self.currentset, (int(listData[5]), -1.0)) else: self.datacollector.addPoint(self.currentset, (int(listData[5]), self.timesSucc / self.timesSent)) else: print 'ERROR unexpected message: %s' % (data) return False
def collect_data(sensor): data_collector = DataCollector() data_collector.collect_data(0, 0) data_collector.save_current_data() graph_json = get_data(sensor) return graph_json
def get_sensors(): sensors = DataCollector.get_sensors() return build_response(200, sensors)
bellmannUpdater = BellmanUpdater(client, agent) trainingsworker = Trainingworkers(client, agent) dataCollerctorNumber = 0 bellmannNumber = 0 trainingsWorkerNumber = 1 #ReplayLog(dataCollectionPath+"_0/", client) for i in range(dataCollerctorNumber): print("start datacollector", i) start_new_thread(DataCollector(i, client, agent, createEnvironemnt(), policyFunction, getState, dataCollectionPath).start, (main_lock, True)) for i in range(bellmannNumber): print("start belmann updater", i) start_new_thread(bellmannUpdater.start, ()) for i in range(trainingsWorkerNumber): print("start tainingworkers", i) start_new_thread(trainingsworker.start, ()) #start_new_thread( DataCollector(replayBuffer, agent, createEnvironemnt(), policyFunction, getState).start, (False,)) input("Testen des modells")
("LINKRESOLVE", "outbox"): ("self", "outbox"), ("LINKRESOLVE", "urlrequests"): ("LINKREQUESTER", "inbox"), ("LINKREQUESTER", "outbox"): ("LINKRESOLVE", "responses") }).activate() system = Graphline( CURRENTPROG=WhatsOn(proxy), REQUESTER=Requester( "all", dbuser, dbpass ), # Can set this for specific channels to limit Twitter requests whilst doing dev FIREHOSE=TwitterStream( username, password, proxy, True, 40 ), # Twitter API sends blank lines every 30 secs so timeout of 40 should be fine SEARCH=PeopleSearch(consumerkeypair, keypair, proxy), COLLECTOR=DataCollector(dbuser, dbpass), RAWCOLLECTOR=RawDataCollector(dbuser, dbpass), HTTPGETTER=HTTPGetter(proxy, "BBC R&D Grabber", 10), HTTPGETTERRDF=HTTPGetter(proxy, "BBC R&D Grabber", 10), TWOWAY=TwoWaySplitter(), ANALYSIS=LiveAnalysis(dbuser, dbpass), NLTKANALYSIS=LiveAnalysisNLTK(dbuser, dbpass), TWEETCLEANER=Pipeline( LINKER, RetweetFixer(), RetweetCorrector(dbuser, dbpass), TweetCleaner(['user_mentions', 'urls', 'hashtags'])), NLTKANALYSISFINAL=FinalAnalysisNLTK(dbuser, dbpass), TWEETCLEANERFINAL=Pipeline( LINKERFINAL, RetweetFixer(), RetweetCorrector(dbuser, dbpass), TweetCleaner(['user_mentions', 'urls', 'hashtags'])), linkages={ ("REQUESTER", "whatson"):
from DataCollector import DataCollector from GUI import * from Map import Map if __name__ == '__main__': dataCollector = DataCollector() m = Map(dataCollector.GetPollutionDataCO(), dataCollector.GetPollutionDataSO(), dataCollector.GetCities()) app = QApplication(sys.argv) app.setApplicationName("Pollution Map") window = MainWindow(m.GetMapUrl()) app.exec_()
def collect(self, dir): DataCollector.collect(self, dir) main_branch = 'master' lines = getpipeoutput(['git branch -a']).split('\n') for line in lines: if len(line) < 2: continue line = line[2:] branch_name = line.split(' ')[0].replace('remotes/origin/', '') if branch_name == 'HEAD': main_branch = line.split(' ')[2] continue self.branches.append(branch_name) self.total_authors += int(getpipeoutput(['git shortlog -s %s %s' % (getcommitrange(), get_commit_time()), 'wc -l'])) #self.total_lines = int(getoutput('git-ls-files -z |xargs -0 cat |wc -l')) # tags lines = getpipeoutput(['git show-ref --tags']).split('\n') for line in lines: if len(line) == 0: continue (hash, tag) = line.split(' ') tag = tag.replace('refs/tags/', '') output = getpipeoutput(['git log "%s" --pretty=format:"%%at %%aN" -n 1' % hash]) if len(output) > 0: parts = output.split(' ') stamp = 0 try: stamp = int(parts[0]) except ValueError: stamp = 0 self.tags[tag] = { 'stamp': stamp, 'hash' : hash, 'date' : datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), 'commits': 0, 'authors': {} } # collect info on tags, starting from latest tags_sorted_by_date_desc = map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), self.tags.items())))) prev = None for tag in reversed(tags_sorted_by_date_desc): cmd = 'git shortlog -s "%s"' % tag if prev != None: cmd += ' "^%s"' % prev output = getpipeoutput([cmd]) if len(output) == 0: continue prev = tag for line in output.split('\n'): parts = re.split('\s+', line, 2) commits = int(parts[1]) author = parts[2] if author in conf['merge_authors']: author = conf['merge_authors'][author] self.tags[tag]['commits'] += commits self.tags[tag]['authors'][author] = commits # Collect revision statistics # Outputs "<stamp> <date> <time> <timezone> <author> '<' <mail> '>'" rev_list_output = getpipeoutput(['git rev-list --pretty=format:"%%at %%ai %%aN <%%aE>" %s %s' % (getcommitrange('HEAD'), get_commit_time()), 'grep -v ^commit']) if rev_list_output: lines = rev_list_output.split('\n') else: lines = [] for line in lines: parts = line.split(' ', 4) author = '' try: stamp = int(parts[0]) except ValueError: stamp = 0 timezone = parts[3] author, mail = parts[4].split('<', 1) author = author.rstrip() if author in conf['merge_authors']: author = conf['merge_authors'][author] mail = mail.rstrip('>') domain = '?' if mail.find('@') != -1: domain = mail.rsplit('@', 1)[1] date = datetime.datetime.fromtimestamp(float(stamp)) # First and last commit stamp (may be in any order because of cherry-picking and patches) if stamp > self.last_commit_stamp: self.last_commit_stamp = stamp if self.first_commit_stamp == 0 or stamp < self.first_commit_stamp: self.first_commit_stamp = stamp # activity # hour hour = date.hour self.activity_by_hour_of_day[hour] = self.activity_by_hour_of_day.get(hour, 0) + 1 # most active hour? if self.activity_by_hour_of_day[hour] > self.activity_by_hour_of_day_busiest: self.activity_by_hour_of_day_busiest = self.activity_by_hour_of_day[hour] # day of week day = date.weekday() self.activity_by_day_of_week[day] = self.activity_by_day_of_week.get(day, 0) + 1 # domain stats if domain not in self.domains: self.domains[domain] = {} # commits self.domains[domain]['commits'] = self.domains[domain].get('commits', 0) + 1 # hour of week if day not in self.activity_by_hour_of_week: self.activity_by_hour_of_week[day] = {} self.activity_by_hour_of_week[day][hour] = self.activity_by_hour_of_week[day].get(hour, 0) + 1 # most active hour? if self.activity_by_hour_of_week[day][hour] > self.activity_by_hour_of_week_busiest: self.activity_by_hour_of_week_busiest = self.activity_by_hour_of_week[day][hour] # month of year month = date.month self.activity_by_month_of_year[month] = self.activity_by_month_of_year.get(month, 0) + 1 # yearly/weekly activity yyw = date.strftime('%Y-%W') self.activity_by_year_week[yyw] = self.activity_by_year_week.get(yyw, 0) + 1 if self.activity_by_year_week_peak < self.activity_by_year_week[yyw]: self.activity_by_year_week_peak = self.activity_by_year_week[yyw] # author stats if author not in self.authors: self.authors[author] = {} # commits, note again that commits may be in any date order because of cherry-picking and patches if 'last_commit_stamp' not in self.authors[author]: self.authors[author]['last_commit_stamp'] = stamp if stamp > self.authors[author]['last_commit_stamp']: self.authors[author]['last_commit_stamp'] = stamp if 'first_commit_stamp' not in self.authors[author]: self.authors[author]['first_commit_stamp'] = stamp if stamp < self.authors[author]['first_commit_stamp']: self.authors[author]['first_commit_stamp'] = stamp # author of the month/year yymm = date.strftime('%Y-%m') if yymm in self.author_of_month: self.author_of_month[yymm][author] = self.author_of_month[yymm].get(author, 0) + 1 else: self.author_of_month[yymm] = {} self.author_of_month[yymm][author] = 1 self.commits_by_month[yymm] = self.commits_by_month.get(yymm, 0) + 1 yy = date.year if yy in self.author_of_year: self.author_of_year[yy][author] = self.author_of_year[yy].get(author, 0) + 1 else: self.author_of_year[yy] = {} self.author_of_year[yy][author] = 1 self.commits_by_year[yy] = self.commits_by_year.get(yy, 0) + 1 # authors: active days yymmdd = date.strftime('%Y-%m-%d') if 'last_active_day' not in self.authors[author]: self.authors[author]['last_active_day'] = yymmdd self.authors[author]['active_days'] = set([yymmdd]) elif yymmdd != self.authors[author]['last_active_day']: self.authors[author]['last_active_day'] = yymmdd self.authors[author]['active_days'].add(yymmdd) # project: active days if yymmdd != self.last_active_day: self.last_active_day = yymmdd self.active_days.add(yymmdd) # timezone self.commits_by_timezone[timezone] = self.commits_by_timezone.get(timezone, 0) + 1 # outputs "<stamp> <files>" for each revision revlines = getpipeoutput(['git rev-list --pretty=format:"%%at %%T" %s %s' % (getcommitrange('HEAD'), get_commit_time()), 'grep -v ^commit']).strip().split('\n') lines = [] revs_to_read = [] time_rev_count = [] #Look up rev in cache and take info from cache if found #If not append rev to list of rev to read from repo for revline in revlines: if not (' ' in revline): continue time, rev = revline.split(' ') #if cache empty then add time and rev to list of new rev's #otherwise try to read needed info from cache if 'files_in_tree' not in self.cache.keys(): revs_to_read.append((time,rev)) continue if rev in self.cache['files_in_tree'].keys(): lines.append('%d %d' % (int(time), self.cache['files_in_tree'][rev])) else: revs_to_read.append((time,rev)) #Read revisions from repo pool = Pool(processes=conf['processes']); time_rev_count = pool.map(getnumoffilesfromrev, revs_to_read) pool.close() #Update cache with new revisions and append then to general list for (time, rev, count) in time_rev_count: if 'files_in_tree' not in self.cache: self.cache['files_in_tree'] = {} self.cache['files_in_tree'][rev] = count lines.append('%d %d' % (int(time), count)) self.total_commits += len(lines) for line in lines: parts = line.split(' ') if len(parts) != 2: continue (stamp, files) = parts[0:2] try: self.files_by_stamp[int(stamp)] = int(files) except ValueError: print('Warning: failed to parse line "%s"' % line) # extensions and size of files lines = getpipeoutput(['git ls-tree -r -l -z %s' % (getcommitrange('HEAD', end_only = True)) ]).split('\000') blobs_to_read = [] for line in lines: if len(line) == 0: continue parts = re.split('\s+', line, 5) if parts[0] == '160000' and parts[3] == '-': # skip submodules continue blob_id = parts[2] size = int(parts[3]) fullpath = parts[4] self.total_size += size self.total_files += 1 filename = fullpath.split('/')[-1] # strip directories if filename.find('.') == -1 or filename.rfind('.') == 0: ext = '' else: ext = filename[(filename.rfind('.') + 1):] if len(ext) > conf['max_ext_length']: ext = '' if ext not in self.extensions: self.extensions[ext] = {'files': 0, 'lines': 0} self.extensions[ext]['files'] += 1 #if cache empty then add ext and blob id to list of new blob's #otherwise try to read needed info from cache if 'lines_in_blob' not in self.cache.keys(): blobs_to_read.append((ext,blob_id)) continue if blob_id in self.cache['lines_in_blob'].keys(): self.extensions[ext]['lines'] += self.cache['lines_in_blob'][blob_id] else: blobs_to_read.append((ext,blob_id)) #Get info abount line count for new blob's that wasn't found in cache pool = Pool(processes=24); ext_blob_linecount = pool.map(getnumoflinesinblob, blobs_to_read) pool.close() #Update cache and write down info about number of number of lines for (ext, blob_id, linecount) in ext_blob_linecount: if 'lines_in_blob' not in self.cache: self.cache['lines_in_blob'] = {} self.cache['lines_in_blob'][blob_id] = linecount self.extensions[ext]['lines'] += self.cache['lines_in_blob'][blob_id] # line statistics # outputs: # N files changed, N insertions (+), N deletions(-) # <stamp> <author> self.changes_by_date = {} # stamp -> { files, ins, del } # computation of lines of code by date is better done # on a linear history. extra = '' if conf['linear_linestats']: extra = '--first-parent -m' lines = getpipeoutput(['git log --shortstat %s --pretty=format:"%%at %%aN" %s %s' % (extra, getcommitrange('HEAD'), get_commit_time())]).split('\n') lines.reverse() files = 0; inserted = 0; deleted = 0; total_lines = 0 author = None for line in lines: if len(line) == 0: continue # <stamp> <author> if re.search('files? changed', line) is None: pos = line.find(' ') if pos != -1: try: (stamp, author) = (int(line[:pos]), line[pos+1:]) if author in conf['merge_authors']: author = conf['merge_authors'][author] self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted, 'lines': total_lines } date = datetime.datetime.fromtimestamp(stamp) yymm = date.strftime('%Y-%m') self.lines_added_by_month[yymm] = self.lines_added_by_month.get(yymm, 0) + inserted self.lines_removed_by_month[yymm] = self.lines_removed_by_month.get(yymm, 0) + deleted yy = date.year self.lines_added_by_year[yy] = self.lines_added_by_year.get(yy,0) + inserted self.lines_removed_by_year[yy] = self.lines_removed_by_year.get(yy, 0) + deleted files, inserted, deleted = 0, 0, 0 except ValueError: print('Warning: unexpected line "%s"' % line) else: print('Warning: unexpected line "%s"' % line) else: numbers = getstatsummarycounts(line) if len(numbers) == 3: (files, inserted, deleted) = map(lambda el : int(el), numbers) total_lines += inserted total_lines -= deleted self.total_lines_added += inserted self.total_lines_removed += deleted else: print('Warning: failed to handle line "%s"' % line) (files, inserted, deleted) = (0, 0, 0) #self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted } self.total_lines += total_lines # Per-author statistics # defined for stamp, author only if author commited at this timestamp. self.changes_by_date_by_author = {} # stamp -> author -> lines_added # Similar to the above, but never use --first-parent # (we need to walk through every commit to know who # committed what, not just through mainline) lines = getpipeoutput(['git log --shortstat --date-order --pretty=format:"%%at %%aN" %s %s' % (getcommitrange('HEAD'), get_commit_time())]).split('\n') lines.reverse() files = 0; inserted = 0; deleted = 0 author = None stamp = 0 for line in lines: if len(line) == 0: continue # <stamp> <author> if re.search('files? changed', line) is None: pos = line.find(' ') if pos != -1: try: oldstamp = stamp (stamp, author) = (int(line[:pos]), line[pos+1:]) if author in conf['merge_authors']: author = conf['merge_authors'][author] if oldstamp > stamp: # clock skew, keep old timestamp to avoid having ugly graph stamp = oldstamp if author not in self.authors: self.authors[author] = { 'lines_added' : 0, 'lines_removed' : 0, 'commits' : 0} self.authors[author]['commits'] = self.authors[author].get('commits', 0) + 1 self.authors[author]['lines_added'] = self.authors[author].get('lines_added', 0) + inserted self.authors[author]['lines_removed'] = self.authors[author].get('lines_removed', 0) + deleted if stamp not in self.changes_by_date_by_author: self.changes_by_date_by_author[stamp] = {} if author not in self.changes_by_date_by_author[stamp]: self.changes_by_date_by_author[stamp][author] = {} self.changes_by_date_by_author[stamp][author]['lines_added'] = self.authors[author]['lines_added'] self.changes_by_date_by_author[stamp][author]['commits'] = self.authors[author]['commits'] files, inserted, deleted = 0, 0, 0 except ValueError: print('Warning: unexpected line "%s"' % line) else: print('Warning: unexpected line "%s"' % line) else: numbers = getstatsummarycounts(line); if len(numbers) == 3: (files, inserted, deleted) = map(lambda el : int(el), numbers) else: print('Warning: failed to handle line "%s"' % line) (files, inserted, deleted) = (0, 0, 0)
class TestCollectData(unittest.TestCase): test_bucket = 'example-bucket-whitew1994' def setUp(self) -> None: self.s3 = boto3.resource('s3') self.bucket = self.s3.Bucket(self.test_bucket) self.data_collector = DataCollector() # Delete contents of test bucket prior to testing def empty_bucket(self): bucket_versioning = self.s3.BucketVersioning(self.test_bucket) if bucket_versioning.status == 'Enabled': self.bucket.object_versions.delete() else: self.bucket.objects.all().delete() def test_collect_data(self): self.empty_bucket() rain_rate = random.randint(1, 100) self.data_collector.collect_data(rain_rate, 100) last_added = self.data_collector.df.iloc[-1:] self.assertGreater(len(self.data_collector.df.index), 0) print(last_added) self.assertEqual(last_added['Rain Rate'].iloc[0], rain_rate) self.assertIn('Shower Volume', list(last_added.columns)) self.assertIsNotNone(last_added['Shower Volume'].iloc[0]) def test_save_current_data(self): self.empty_bucket() sensors_rand_walk = {field: 50 for field in self.data_collector.get_sensors()} for day in range(-5, 0): test_data = {field: [] for field in self.data_collector.get_sensors()} test_data['Timestamp'] = [] for minutes in range((24*60) // 10): test_data['Timestamp'].append(pd.Timestamp.now() + pd.Timedelta(day, unit='day') \ + pd.Timedelta(minutes * 10, unit='minutes')) for sensor in sensors_rand_walk.keys(): sensors_rand_walk[sensor] = sensors_rand_walk[sensor] + random.randint(-5, 5) sensors_rand_walk[sensor] = max(sensors_rand_walk[sensor], 0) test_data[sensor].append(sensors_rand_walk[sensor]) self.data_collector.df = pd.DataFrame(test_data) previous_day = pd.Timestamp.now() + pd.Timedelta(days=day) self.data_collector.save_current_data(previous_day=previous_day) bucket_file_name = 's3://' + self.test_bucket + '/year=' + str(previous_day.year) + '/month=' + str(previous_day.month) + '/' + \ str(previous_day.day) + '.csv' test_df = pd.read_csv(bucket_file_name) self.data_collector.df = pd.DataFrame(test_data) test_df_dict = test_df.to_dict() dc_dict = self.data_collector.df.to_dict() del test_df_dict['Timestamp'], dc_dict['Timestamp'] self.assertEqual(test_df_dict, dc_dict) def test_get_data(self): sensor = "Temperature" df = self.data_collector.get_data(sensor) self.assertIsNotNone(df['Timestamp'].values) self.assertIsNotNone(df['Sensor'].values) self.assertGreater(len(df['Timestamp']), 1) self.assertGreater(len(df['Sensor']), 1)
# import matplotlib as mpl # mpl.use('Agg') import matplotlib.pyplot as plt import pandas as pd from LinearRegressionObject import LinearRegressionObject from DataCollector import DataCollector if len(sys.argv) != 2 or sys.argv[1] not in ['all', 'Bank account or service', 'Debt collection', 'Mortgage', 'Consumer Loan', 'Credit reporting', 'Credit card', 'Student loan', 'Payday loan', 'Prepaid card', 'Money transfers', 'Other financial service']: print 'Usage: python ComplaintsvsPopulation.py <product>' print "where <product> is 'Bank account or service', 'Debt collection', 'Mortgage', 'Consumer Loan', 'Credit reporting', 'Credit card', 'Student loan', 'Payday loan', 'Prepaid card', 'Money transfers', 'Other financial service'" else: states = ['AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA', 'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', 'MA', 'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', 'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY', 'DC'] collector = DataCollector() collector.readStateAnnualPop() #options for products: #'Bank account or service', 'Debt collection', 'Mortgage', 'Consumer Loan', #'Credit reporting', 'Credit card', 'Student loan', 'Payday loan', 'Prepaid card', #'Money transfers', 'Other financial service' collector.readStateAnnualComplaintsByProduct(sys.argv[1]) #run the linear regression on each state comp_dic = collector.annualComplaintsByProduct pop_array = collector.annualPop i = 0 #get the average correlation between the data across all the states avg_correl = 0
actionSize = 1 agent = Md(enviroment, optimizer, loss, policyFunction, modelSrcWeights, state_size=stateSize, action_size=actionSize, camerashape=enviroment.render(mode="rgb_array").shape) agent.loadWeights() replayBuffer = ReplayBuffer( state_size=stateSize, action_size=actionSize, camerashape=enviroment.render(mode="rgb_array").shape) bellmannUpdater = BellmanUpdater(replayBuffer, agent) dataCollector = DataCollector(replayBuffer, agent, enviroment, policyFunction, getState) trainingsworker = Trainingworkers(replayBuffer, agent) start_new_thread(dataCollector.start, ("Thread-1", 1)) start_new_thread(bellmannUpdater.start, ("Thread-2", 1)) start_new_thread(trainingsworker.start, ()) batch_size = 32 num_of_episodes = 500 agent.q_network.summary() print("Train") #Training.train(enviroment, agent, policyFunction, observationsize=stateSize, num_of_episodes=num_of_episodes, train=True , maxStepSize=100, loadModell=True, saveModell=True) input("Testen des modells") #Training.train(enviroment, agent, policyFunction, observationsize=stateSize, num_of_episodes=100, train=False, maxStepSize=100, loadModell=True)
def __init__(self, log=False): self.dc = dc = DataCollector(log) self.train_x = dc.getTrainData() self.train_y = dc.getTrainLabel() self.test_x = dc.getTestData() self.test_y = dc.getTestLabel()