def __AlternateCollectionMethodMultiThread(self,fnKillSignalled,startIndex): processedWithoutRestdummymakelooklikeother = 0 ThreadCount = -1 ProcessThreadCount = self.__CreateInitialCollectorThreadGroupings() AddActiveProcessingThreads(ProcessThreadCount) firstGroupID = None firstGroupCollectors = [] collectorCount = len(self._Collectors) if collectorCount < 1: Log.getLogger().error("No Collectors to process") return for processThreadID,collectorList in self.__ProcessThreadGroupings.items(): if None == firstGroupID: firstGroupID = processThreadID firstGroupCollectors = collectorList else: ID = str(self) + processThreadID ThreadManager.GetThreadManager().CreateThread(ID,self.__SlicedThreadProc,processThreadID) # create a worker thread and pass it a list of collectors to update ThreadManager.GetThreadManager().StartThread(ID) ThreadCount += 1 while not fnKillSignalled(): # now go process the 1st group in this thread processed = self.__CollectSingleRange(fnKillSignalled,firstGroupID) if processed == 0: Sleep.SleepMs(Namespace.SleepIntervalIfNoDataCollected) if collectorCount != len(self._Collectors): # dynamic collectos must have added some pass
def Shunt(self, namespace, ID, dataTuple, Value): Statistics.GetStatistics().OnPacketShunted() newTuple = (namespace, ID, dataTuple[4], Value) shuntFile = dataTuple[2] self.__ShuntLock.acquire() try: if not shuntFile in self.__ShuntedDataByFile: self.__ShuntedDataByFile[shuntFile] = [] self.__ShuntedDataByFile[shuntFile].append(newTuple) except Exception as Ex: Log.getLogger().info("Unknown in Shunt function: " + str(Ex)) finally: self.__ShuntLock.release() if not self.__ShuntThreadCreated: self.__ShuntThreadCreated = True threadName = "ShuntProc:" + str(self) ThreadManager.GetThreadManager().CreateThread( threadName, self.ShuntWorkerProc) ThreadManager.GetThreadManager().StartThread(threadName)
def Start(self): if None != self.m_Name: return self.m_Name = str(self) self.m_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) try: self.m_socket.bind((self.getIP(), self.getPort())) self.m_socket.setblocking(True) self.m_socket.settimeout(0.01) except Exception: Log.getLogger().error("Invalid Socket IP or Port " + str(self)) return False if 0 == self.getPort(): # let the OS choose the port self.__Port = self.m_socket.getsockname()[ 1] #can use this to pass to Marvin self.__Port = self.__Port # ungly kludge self.__IP = self.__IP # ungly kludge self._objNamespace.__ListenPort = self.__Port Log.getLogger().debug("Namespace[" + str(self._objNamespace) + "] listening on -->" + str(self)) ThreadManager.GetThreadManager().CreateThread(self.m_Name, self.workerProc) ThreadManager.GetThreadManager().StartThread(self.m_Name)
def Start(self): if None != self.m_Name: return self.m_Name = "ServerUDP:" + str(self) self.m_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) try: self.m_socket.bind((self.getIP(), self.getPort())) self.m_socket.setblocking(True) self.m_socket.settimeout(0.001) except Exception as ex: Log.getLogger().error("Invalid Socket IP or Port " + str(self) + " - " + str(ex)) return False if 0 == self.getPort(): # let the OS choose the port self.Port = self.m_socket.getsockname()[ 1] #can use this to pass to Marvin self.m_ConnPoint.Port = self.Port # ungly kludge self.m_ConnPoint.IP = self.IP # ungly kludge Log.getLogger().info(self.getTypeStr() + " listening on -->" + str(self)) ThreadManager.GetThreadManager().CreateThread(self.m_Name, self.workerProc) ThreadManager.GetThreadManager().StartThread(self.m_Name) return True
def Start(self): Log.getLogger().info(self.getTypeStr() + " Connecting to -->" + str(self)) ThreadManager.GetThreadManager().CreateThread( self.m_Name, self.recvTCP_WorkerProc_Client, None) ThreadManager.GetThreadManager().StartThread(self.m_Name) return True
def CollectionProc(self): if True == self.SpawnThread: if None == self.ThreadName: self.ThreadName = "UserPlugin." + str( UserPluginFramework.Instance ) + "." + self.ScriptName + self.FunctionName UserPluginFramework.Instance += 1 ThreadManager.GetThreadManager().CreateThread( self.ThreadName, self.__CallUserPlugin) ThreadManager.GetThreadManager().StartThread(self.ThreadName) return "HelenKeller" else: self.__CallUserPlugin()
def acceptThread(self, fnKillSignalled, userData): self.m_socket.listen(5) while not fnKillSignalled( ): # run until signalled to end - call passed function to check for the signal try: clientSock, clientAddr = self.m_socket.accept() clientThreadName = self.m_Name + str(clientAddr) ThreadManager.GetThreadManager().CreateThread( clientThreadName, self.recvTCP_WorkerProc, (clientSock, clientAddr)) ThreadManager.GetThreadManager().StartThread(clientThreadName) self.__clients.append(clientSock) except socket.timeout: time.sleep(.5) pass
def Begin(self,runOnce=False): #start udp server # start collectors if False and False == Namespace._UseSingleCollectorThreadPerNamespace and not Namespace._UseMultiThreadPerNamespace: # deprecated for collector in self._Collectors: if not collector.IsInGroup() and not collector.IsOnDemand(): collector.BeginCollecting(runOnce) Sleep.SleepMs(5) # so not at all same time # this has now really been deprecated and should not be used anymore elif True == Namespace._UseSingleCollectorThreadPerNamespace or runOnce: #one thread to do all collecting ThreadManager.GetThreadManager().CreateThread(self._ID,self.__AlternateCollectionMethod,runOnce) ThreadManager.GetThreadManager().StartThread(self._ID) else: # many threads, with multiple collectors per thread ThreadManager.GetThreadManager().CreateThread(self._ID,self.__AlternateCollectionMethodMultiThread) ThreadManager.GetThreadManager().StartThread(self._ID) if True == runOnce: return len(self._Collectors) self._Server = ServerUDP.ServerUDP(self.__ListenIP,self.__ListenPort,self) self._Server.Start() threadName = "ConnUpdateThread:" + str(self) + ":" + str(self.__ListenPort) ThreadManager.GetThreadManager().CreateThread(threadName,self.__sendConnectionInfoProc) ThreadManager.GetThreadManager().StartThread(threadName) return len(self._Collectors)
def __init__(self): if None != Playback._instance: return Playback._instance = self self.PlaybackData=[] self.LoopCount = 1 self.PlaybackSpeed=1 self.CurrentIndex=0 self.LoopMode = RepeatMode.NONE self.Stopped=True self.Paused=False self.StartTime=None self.threadName = "PlaybackObject" + ":" + str(self) self.IndexExternallySet=False self.startIndex=0 self.endIndex = None ThreadManager.GetThreadManager().CreateThread(self.threadName,self.__workerProc) ThreadManager.GetThreadManager().StartThread(self.threadName)
def main(): if not HandleCommandlineArguments(): return if not Configuration.get().ReadConfigFile(): return PrintVersion() downstreamConnInfo = Configuration.get().GetDownstreamConnection() upstreamConnInfo = Configuration.get().GetUpstreamConnection() downstreamServer = ServerUDP.ServerUDP(downstreamConnInfo,ConnectionType.DownstreamServer) upstreamServer = ServerUDP.ServerUDP(upstreamConnInfo,ConnectionType.UpstreamServer) # if None == Configuration.get().GetAutorunFilename() or 1==1: if upstreamServer.Start(): if None == downstreamConnInfo: return if None == upstreamConnInfo: return ThreadManager.GetThreadManager().CreateThread("StartupStuff",StartupWorkerProc,(downstreamServer,upstreamServer)) if not Configuration.get().GetUseGUI(): GuiMgr.Initialize(GuiMgr.UI.NONE,downstreamServer,upstreamServer) else: try: GuiMgr.Initialize(GuiMgr.UI.TKINTR,downstreamServer,upstreamServer) except Exception as Ex: print(str(Ex)) GuiMgr.Initialize(GuiMgr.UI.NONE,downstreamServer,upstreamServer) ThreadManager.GetThreadManager().StartThread("StartupStuff") GuiMgr.Start() ThreadManager.GetThreadManager().StopAllThreads()
def __init__(self, ip=None, Port=None, ConnType=ConnectionType.Unknown, canTimeout=True): super(Target, self).__init__(ip, Port, ConnType) self.ConfigurationDefinedTarget = ip self.m_IP_InUse = None #m_ip could be DNS name self.m_socket = None self.m_lastHeartbeat = Time.GetCurrMS() self.m_PacketsSent = 0 self.m_BytestSent = 0 self.m_InitialRefreshSent = False self.m_objLockDict = threading.Lock() self.m_SendList = [] self.m_hasTimedOut = False self.m_LastDNSResolution = Time.GetCurrMS() self.m_DNSResolutionPeriod = 30000 #30 seconds self.m_CanTimeout = canTimeout self.threadName = None self.lastRefreshRequestID = 0 self.MarkedForRemoval = False try: self.m_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM, socket.IPPROTO_UDP) self.m_socket.setblocking(True) self.m_socket.settimeout(0.001) except Exception as _: Log.getLogger().error("Error setting up Target Socket -->" + str(super.m_Connection)) self.threadName = "Target:" + self.getIP() + "[" + str( self.getPort()) + "]" ThreadManager.GetThreadManager().CreateThread(self.threadName, self.WorkerProc) ThreadManager.GetThreadManager().StartThread(self.threadName)
def __SimpleWorker(self): while not ThreadManager.GetThreadManager().AllStopSignalled(): dataBlock = self.GetItemFromSynchQueue() # get data to process if None != dataBlock: rawData, FromAddr = dataBlock self.__HandleLiveData(rawData, FromAddr) # go process teh data else: # no data to process, maybe reduce woker count if self._GetWorkerThreadCount() > 2: self._DecrementWorkerThreadCount() #Log.getLogger().debug("Reducing worker threads") return else: Sleep.SleepMs(10)
def Start(self): try: self.m_socket.bind((self.getIP(), self.getPort())) #self.m_socket.setblocking(True) except Exception as ex: Log.getLogger().error("Invalid Socket IP or Port " + str(self) + " - " + str(ex)) return False # this should not be OK for TCP, remove if 0 == self.getPort(): # let the OS choose the port self.Port = self.m_socket.getsockname()[ 1] #can use this to pass to Marvin self.m_ConnPoint.Port = self.Port # ungly kludge self.m_ConnPoint.IP = self.IP # ungly kludge Log.getLogger().info(self.getTypeStr() + " listening on -->" + str(self)) ThreadManager.GetThreadManager().CreateThread(self.m_Name, self.acceptThread) ThreadManager.GetThreadManager().StartThread(self.m_Name) return True
def __SimpleWorker(self): while not ThreadManager.GetThreadManager().AllStopSignalled(): packet = self.GetDownstreamPacket() # get data to process if None != packet: sendBuffer, ignoreTimeout, domNode, isGroup = packet self._BroadcastDownstream(sendBuffer, ignoreTimeout, domNode, isGroup) else: # no data to process, maybe reduce woker count if self.GetWorkerThreadCount() > 2: self.DecrementWorkerThreadCount() #Log.getLogger().debug("Reducing worker threads [" + str(self.GetWorkerThreadCount()) +']') return else: Sleep.SleepMs(10)
def BeginCollecting(self, runOnce): ThreadManager.GetThreadManager().CreateThread(self._Name, self.__collectionProc) ThreadManager.GetThreadManager().StartThread(self._Name)
def main(): parser = argparse.ArgumentParser(description='Minion Data Collector.') parser.add_argument("-i", "--input", dest='argFilename', help='specifies input file', type=extant_file, metavar="FILE") parser.add_argument("-v", "--verbose", help="prints information, values 0-3", type=int) parser.add_argument("-r", "--runonce", help="calls all collectors once and exits", action="store_true") parser.add_argument( "-a", "--aliasfile", help="specify an external file that has alias defintions", type=str) try: args = parser.parse_args() if None == args.verbose: _VerboseLevel = 0 else: _VerboseLevel = args.verbose _RunOnce = args.runonce except: return ShowVersion() if not VersionCheck.CheckVersion(): Log.getLogger().error("Invalid version of Python") return if 3 <= _VerboseLevel: Log.setLevel(logging.DEBUG) elif 2 == _VerboseLevel: Log.setLevel(logging.WARNING) elif 1 == _VerboseLevel: Log.setLevel(logging.INFO) else: Log.setLevel(logging.ERROR) curr_dir_path = os.path.dirname(os.path.realpath(__file__)) Alias.AliasMgr.AddAlias("WORKING_DIR", curr_dir_path) Alias.AliasMgr.AddEnvironmentVariables() if None != args.aliasfile: if not Alias.AliasMgr.LoadExternalAliasFile(args.aliasfile): return signal.signal( signal.SIGINT, signal.SIG_IGN ) # turn of Ctrl+C signal handler (will get inherted by sub processes if not os.path.exists(_ConfigFilename): Log.getLogger().error("Config file [" + _ConfigFilename + "] not found!") return config = Configuration.Configuration(_ConfigFilename, True) if None == config or not config.IsValid(): pass else: print("Starting Collectors...") totalCollectors = 0 for namespace in config.GetNamespaces(): totalCollectors += namespace.Begin(_RunOnce) signal.signal(signal.SIGINT, signal_handler) # make my own Ctrl+C handler now print(str(totalCollectors) + " Collectors started.") if False == _RunOnce: print("Press CTRL+C to Exit") else: print("Running Once") if False == _RunOnce: while _ThreadActive: if 0 == _VerboseLevel: for c in spinning_cursor(): countStr = '[' + str(config.GetCollectorCount()) + '] ' sys.stdout.write(countStr) sys.stdout.write(c) Sleep.SleepMs(100) sys.stdout.flush() sys.stdout.write('\b') for c in countStr: sys.stdout.write('\b') else: Sleep.SleepMs(100) print("Shutting down...") try: ThreadManager.GetThreadManager().StopAllThreads() except: pass
class BoWIDFIntelligence: masterWord = "__MASTER_WORD__" wordTable = "Words" articleTable = "Articles" predictQueueTable = "ToPredictQueue" learnQueueTable = "ToLearnQueue" parseQueueTable = "ToParseQueue" predictLabel = "PREDICT" learnLabel = "LEARN" parseLabel = "PARSE" def __init__(self, aCompanyInformationList, aBoWIDFDatabaseName, aIDFinanceClientIntelligent, LearningThreads=3, PredictingThreads=2, ParsingThreads=4): self.status = "OPEN" self.companyinformationlist = aCompanyInformationList self.bowidfdbconnection = TSSQLConnection(aBoWIDFDatabaseName) UpdateWord(self.bowidfdbconnection) UpdateArticle(self.bowidfdbconnection) UpdateWords(self.bowidfdbconnection) self.intradayconnection = aIDFinanceClientIntelligent self.bowidfdbconnection.execute("""CREATE TABLE IF NOT EXISTS {0}( Word text UNIQUE, DocumentCount int, TotalCount int, Prediction text)""".format(self.wordTable), block=True) self.bowidfdbconnection.execute("""CREATE TABLE IF NOT EXISTS {0}( Service text, Ticker text, Title text, PubDate int, Link text, NewsID text UNIQUE, Content text, Prediction text, Actual text, PCount int)""".format(self.articleTable), block=True) self.bowidfdbconnection.execute("""CREATE TABLE IF NOT EXISTS {0}( Service text, Ticker text, Title text, PubDate text, Link text, NewsID text UNIQUE, FullContent text, Caller text)""".format(self.predictQueueTable), block=True) self.bowidfdbconnection.execute("""CREATE TABLE IF NOT EXISTS {0}( Service text, Ticker text, Title text, PubDate text, Link text, NewsID text UNIQUE, FullContent text, Caller text)""".format(self.learnQueueTable), block=True) self.bowidfdbconnection.execute("""CREATE TABLE IF NOT EXISTS {0}( Service text, Ticker text, Title text, PubDate text, Link text, NewsID text UNIQUE, FullContent text, Caller text)""".format(self.parseQueueTable), block=True) self.longlearnqueue = queue.Queue() self.longlearntimer = IntervalTimer(60*60, self.__moveLongLearn2Regular) self.predicterManager = ThreadManager(self.predictLabel, PredictingThreads, self.__predicterWorker) self.learnerManager = ThreadManager(self.learnLabel, LearningThreads, self.__learnerWorker) self.parserManager = ThreadManager(self.parseLabel, ParsingThreads, self.__parserWorker) self.__tableQueues2Managers() self.longlearntimer.start() def predict(self, aService, aTicker, aTitle, aPubDate, aLink, aNewsID, aFullSource, doBlock=False, doLearn=True): if self.__canPredictInput(aService, aTicker, aTitle, aPubDate, aLink, aNewsID, aFullSource): self.predicterManager.put((self.predictLabel, (aService, aTicker, aTitle, aPubDate, aLink, aNewsID, aFullSource))) def learn(self, aService, aTicker, aTitle, aPubDate, aLink, aNewsID, aFullSource): if self.__canLearnInput(aService, aTicker, aTitle, aPubDate, aLink, aNewsID, aFullSource): self.learnerManager.put((self.learnLabel, (aService, aTicker, aTitle, aPubDate, aLink, aNewsID, aFullSource))) def __canPredictInput(self, aService, aTicker, aTitle, aPubDate, aLink, aNewsID, aFullSource): return True def __canLearnInput(self, aService, aTicker, aTitle, aPubDate, aLink, aNewsID, aFullSource): return True def join(self): for _ in range(4): self.learnerManager.join() self.predicterManager.join() self.parserManager.join() def close(self, block=False): if(self.status != "CLOSED"): self.status = "CLOSED" if block: self.join() self.learnerManager.close() self.predicterManager.close() self.parserManager.close() self.longlearntimer.cancel() self.__moveLongLearn2Regular() self.__managers2TableQueues() self.bowidfdbconnection.close() def __del__(self): self.close() def __tableQueues2Managers(self): self.__tableQueue2Manager(self.predictQueueTable, self.predicterManager) self.__tableQueue2Manager(self.learnQueueTable, self.learnerManager) self.__tableQueue2Manager(self.parseQueueTable, self.parserManager) def __tableQueue2Manager(self, aTable, aManager): rows = self.bowidfdbconnection.execute("SELECT * FROM {0};".format(aTable), block=True) for row in rows: toput = (row[7], (row[0], row[1], row[2], row[3], row[4], row[5], row[6])) aManager.put(toput) self.bowidfdbconnection.execute("DELETE FROM {0};".format(aTable), block=True) self.bowidfdbconnection.execute("VACUUM;", block=True) def __managers2TableQueues(self): self.__manager2TableQueue(self.predictQueueTable, self.predicterManager) self.__manager2TableQueue(self.learnQueueTable, self.learnerManager) self.__manager2TableQueue(self.parseQueueTable, self.parserManager) def __manager2TableQueue(self, aTable, aManager): try: while True: item = aManager.queue.get(block=False)[1] toput = (item[1][0], item[1][1], item[1][2], item[1][3], item[1][4], item[1][5], item[1][6], item[0],) self.bowidfdbconnection.execute("INSERT OR IGNORE INTO {0} VALUES (?,?,?,?,?,?,?,?)".format(aTable), toput) aManager.queue.task_done() except queue.Empty: pass def __predicterWorker(self, aQueue, aStopToken): while True: item = aQueue.get() if item[1] == aStopToken: break newsitem = self.__getNewsItemByID(item[1][1][5]) if newsitem == None: self.parserManager.put(item[1]) elif newsitem[7] == None: textinfo = self.__getTextInformation(newsitem[6]) masterinfo = None wordcount = 0 for word in textinfo: wordcount += textinfo[word][0] if word == self.masterWord: masterinfo = textinfo[word] weights = {} weighttotal = 0.0 for word in textinfo: if word != self.masterWord: tf = float(textinfo[word][0]) / float(wordcount) idf = log( 1.0 + (float(masterinfo[1][1]) / (1 + float(textinfo[word][1][1]))) ) tfidf = tf*idf weights[word] = tfidf weighttotal += tfidf mastervaltime = self.__getEmptyPrediction() if weighttotal != 0: for word in weights: valtime = ValueTimes.loads(textinfo[word][1][3]) if weights[word] > 0 and not valtime.isZero(): mastervaltime = mastervaltime + valtime*((128.0 * weights[word]) / weighttotal) mastervaltime = mastervaltime * (1 / 128.0) self.bowidfdbconnection.execute("UPDATEARTICLE {0} SET Prediction=? WHERE NewsID=?".format(self.articleTable), (mastervaltime, item[1][1][5],), block=True) if item[1][0] == self.learnLabel: self.learnerManager.put(item[1]) aQueue.task_done() aQueue.task_done() def __learnerWorker(self, aQueue, aStopToken): while True: item = aQueue.get() if item[1] == aStopToken: break newsitem = self.__getNewsItemByID(item[1][1][5]) if newsitem == None: self.parserManager.put(item[1]) elif newsitem[7] == None or len(newsitem[7]) < 5: toput = (item[1][0], (item[1][1][0], item[1][1][1], item[1][1][2], item[1][1][3], item[1][1][4], item[1][1][5], None)) self.predicterManager.put(toput) elif newsitem[8] == None: data = self.intradayconnection.getData(item[1][1][1], newsitem[3]) if data == None: toput = (item[1][0], (item[1][1][0], item[1][1][1], item[1][1][2], item[1][1][3], item[1][1][4], item[1][1][5], None)) self.longlearnqueue.put(toput) else: tokens = self.__tokenize(newsitem[6]) self.bowidfdbconnection.execute("UPDATEWORDS", (tokens, data), block=True) self.bowidfdbconnection.execute("UPDATE {0} SET Actual=? WHERE NewsID=?".format(self.articleTable), (data.dumps(), item[1][1][5],)) aQueue.task_done() aQueue.task_done() def __parserWorker(self, aQueue, aStopToken): config = MyConfig() while True: item = aQueue.get() if item[1] == aStopToken: break newsitem = self.__getNewsItemByID(item[1][1][5]) if newsitem == None: print("Parsing " + item[1][1][5]) tse = None try: tse = time.mktime(time.strptime(item[1][1][3], "%a, %d %b %Y %H:%M:%S %Z")) except: pass try: tse = time.mktime(time.strptime(item[1][1][3].replace(":", ""), "%Y-%m-%dT%H%M%S%z")) except: pass if self.intradayconnection.hasProperStart(item[1][1][1], tse) and tse != None: for _ in range(3): try: nparticle = newspaper.article.Article(item[1][1][4], config=config) nparticle.download(html=item[1][1][6]) nparticle.parse() text = nparticle.text if text!= None and len(text) > 40 and item[1][1][1] in text: rowvalues = (item[1][1][0], item[1][1][1], item[1][1][2], tse, item[1][1][4], item[1][1][5], text, None, None, None) self.bowidfdbconnection.execute("INSERT INTO {0} VALUES (?,?,?,?,?,?,?,?,?,?)".format(self.articleTable), rowvalues, block=True) toput = (item[1][0], (item[1][1][0], item[1][1][1], item[1][1][2], item[1][1][3], item[1][1][4], item[1][1][5], None)) if item[1][0] == self.predictLabel: self.predicterManager.put(toput) elif item[1][0] == self.learnLabel: self.learnerManager.put(toput) break except newspaper.article.ArticleException as e: pass except Exception as e: pass else: toput = (item[1][0], (item[1][1][0], item[1][1][1], item[1][1][2], item[1][1][3], item[1][1][4], item[1][1][5], None)) if item[1][0] == self.predictLabel: self.predicterManager.put(toput) elif item[1][0] == self.learnLabel: self.learnerManager.put(toput) aQueue.task_done() aQueue.task_done() def __getNewsItemByID(self, aNewsID): newsitemlist = self.bowidfdbconnection.execute("SELECT * FROM {0} WHERE NewsID=? LIMIT 1".format(self.articleTable), (aNewsID,)) if not newsitemlist: return None return newsitemlist[0] def __getTextInformation(self, text): textinformation = {} wordcounts = self.__tokenize(text) wordlist = list(wordcounts.keys()) wordlist.append(self.masterWord) blocksize = 500 for i in range(math.ceil(len(wordlist)/blocksize)): curparam = tuple(wordlist[i*blocksize:(i+1)*blocksize]) sqlstatement = "SELECT * FROM {0} WHERE".format(self.wordTable) + " Word=? OR " * (len(curparam)-1) + " Word=? " for wordrow in self.bowidfdbconnection.execute(sqlstatement, curparam): if wordrow[0] == self.masterWord: textinformation[wordrow[0]] = (0, wordrow) else: textinformation[wordrow[0]] = (wordcounts[wordrow[0]], wordrow) for word in wordcounts: if word not in textinformation: textinformation[word] = (wordcounts[word], (word, 0, 0, self.__getEmptyPrediction().dumps())) if self.masterWord not in textinformation: textinformation[self.masterWord] = (0, (self.masterWord, 0, 0, self.__getEmptyPrediction().dumps())) return textinformation def tokenize(self, text): return self.__tokenize(text) def __tokenize(self, text): ret = {} textwot = re.sub("\([.]+:[.]+\)", " ", text, flags=re.DOTALL) wordlist = nltk.tokenize.word_tokenize(textwot) stemmer = nltk.stem.snowball.EnglishStemmer() stopwords = set( nltk.corpus.stopwords.words('english')) for word in wordlist: try: word = stemmer.stem(word).lower() if word and word not in stopwords and word.isalpha(): if word not in ret: ret[word] = 0 ret[word] += 1 except: pass return ret def __moveLongLearn2Regular(self): try: while True: newsitem = self.longlearnqueue.get(False) self.learnerManager.put(newsitem) except queue.Empty: pass def __getEmptyPrediction(self): valtime = {} for i in range(39): valtime[i*30*60] = 0 return ValueTimes(valtime)
def __init__(self, aCompanyInformationList, aBoWIDFDatabaseName, aIDFinanceClientIntelligent, LearningThreads=3, PredictingThreads=2, ParsingThreads=4): self.status = "OPEN" self.companyinformationlist = aCompanyInformationList self.bowidfdbconnection = TSSQLConnection(aBoWIDFDatabaseName) UpdateWord(self.bowidfdbconnection) UpdateArticle(self.bowidfdbconnection) UpdateWords(self.bowidfdbconnection) self.intradayconnection = aIDFinanceClientIntelligent self.bowidfdbconnection.execute("""CREATE TABLE IF NOT EXISTS {0}( Word text UNIQUE, DocumentCount int, TotalCount int, Prediction text)""".format(self.wordTable), block=True) self.bowidfdbconnection.execute("""CREATE TABLE IF NOT EXISTS {0}( Service text, Ticker text, Title text, PubDate int, Link text, NewsID text UNIQUE, Content text, Prediction text, Actual text, PCount int)""".format(self.articleTable), block=True) self.bowidfdbconnection.execute("""CREATE TABLE IF NOT EXISTS {0}( Service text, Ticker text, Title text, PubDate text, Link text, NewsID text UNIQUE, FullContent text, Caller text)""".format(self.predictQueueTable), block=True) self.bowidfdbconnection.execute("""CREATE TABLE IF NOT EXISTS {0}( Service text, Ticker text, Title text, PubDate text, Link text, NewsID text UNIQUE, FullContent text, Caller text)""".format(self.learnQueueTable), block=True) self.bowidfdbconnection.execute("""CREATE TABLE IF NOT EXISTS {0}( Service text, Ticker text, Title text, PubDate text, Link text, NewsID text UNIQUE, FullContent text, Caller text)""".format(self.parseQueueTable), block=True) self.longlearnqueue = queue.Queue() self.longlearntimer = IntervalTimer(60*60, self.__moveLongLearn2Regular) self.predicterManager = ThreadManager(self.predictLabel, PredictingThreads, self.__predicterWorker) self.learnerManager = ThreadManager(self.learnLabel, LearningThreads, self.__learnerWorker) self.parserManager = ThreadManager(self.parseLabel, ParsingThreads, self.__parserWorker) self.__tableQueues2Managers() self.longlearntimer.start()
def __init__(self): name = "Connection Update Timer Thread" self.__WorkerThread = ThreadManager.GetThreadManager().CreateThread( name, self.WorkerProc) ThreadManager.GetThreadManager().StartThread(name)
def main(): if not HandleCommandlineArguments(): return if not Configuration.get().ReadConfigFile(): return PrintVersion() downstreamConnInfo = Configuration.get().GetDownstreamConnection() upstreamConnInfo = Configuration.get().GetUpstreamConnection() downstreamServer = Server.ServerUDP(downstreamConnInfo, ConnectionType.DownstreamServer) upstreamServer = Server.ServerUDP(upstreamConnInfo, ConnectionType.UpstreamServer) proxyServer = None proxyClient = None goodToGo = True if upstreamServer.Start(): if None == downstreamConnInfo: return if None == upstreamConnInfo: return ThreadManager.GetThreadManager().CreateThread( "StartupStuff", StartupWorkerProc, (downstreamServer, upstreamServer)) if not Configuration.get().GetUseGUI(): GuiMgr.Initialize(GuiMgr.UI.NONE, downstreamServer, upstreamServer) else: try: GuiMgr.Initialize(GuiMgr.UI.TKINTR, downstreamServer, upstreamServer) except Exception as Ex: print(str(Ex)) GuiMgr.Initialize(GuiMgr.UI.NONE, downstreamServer, upstreamServer) if None != Configuration.get().GetProxyConnection(): proxyConnInfo_server = Configuration.get().GetProxyConnection() proxyServer = Server.ServerTCP( proxyConnInfo_server, ConnectionType.ProxyConnection_Server, downstreamServer, upstreamServer) if False == proxyServer.Start(): goodToGo = False elif None != Configuration.get().GetProxyServerConnection(): proxyConnInfo_Client = Configuration.get( ).GetProxyServerConnection() proxyClient = Server.ClientTCP( proxyConnInfo_Client, ConnectionType.ProxyConnection_Client, downstreamServer, upstreamServer) if False == proxyClient.Start(): goodToGo = False if True == goodToGo: ThreadManager.GetThreadManager().StartThread("StartupStuff") GuiMgr.Start() ThreadManager.GetThreadManager().StopAllThreads()
def StopProcessing(self): ThreadManager.GetThreadManager().StopThread(self.threadName) ThreadManager.GetThreadManager().RemoveThread(self.threadName)
def Quit(self): ThreadManager.GetThreadManager().StopAllThreads() self.pGui.OnQuit()
class BoWIDFIntelligence: masterWord = "__MASTER_WORD__" wordTable = "Words" articleTable = "Articles" predictQueueTable = "ToPredictQueue" learnQueueTable = "ToLearnQueue" parseQueueTable = "ToParseQueue" predictLabel = "PREDICT" learnLabel = "LEARN" parseLabel = "PARSE" def __init__(self, aCompanyInformationList, aBoWIDFDatabaseName, aIDFinanceClientIntelligent, LearningThreads=3, PredictingThreads=2, ParsingThreads=4): self.status = "OPEN" self.companyinformationlist = aCompanyInformationList self.bowidfdbconnection = TSSQLConnection(aBoWIDFDatabaseName) UpdateWord(self.bowidfdbconnection) UpdateArticle(self.bowidfdbconnection) UpdateWords(self.bowidfdbconnection) self.intradayconnection = aIDFinanceClientIntelligent self.bowidfdbconnection.execute("""CREATE TABLE IF NOT EXISTS {0}( Word text UNIQUE, DocumentCount int, TotalCount int, Prediction text)""".format(self.wordTable), block=True) self.bowidfdbconnection.execute("""CREATE TABLE IF NOT EXISTS {0}( Service text, Ticker text, Title text, PubDate int, Link text, NewsID text UNIQUE, Content text, Prediction text, Actual text, PCount int)""".format( self.articleTable), block=True) self.bowidfdbconnection.execute("""CREATE TABLE IF NOT EXISTS {0}( Service text, Ticker text, Title text, PubDate text, Link text, NewsID text UNIQUE, FullContent text, Caller text)""".format( self.predictQueueTable), block=True) self.bowidfdbconnection.execute("""CREATE TABLE IF NOT EXISTS {0}( Service text, Ticker text, Title text, PubDate text, Link text, NewsID text UNIQUE, FullContent text, Caller text)""".format( self.learnQueueTable), block=True) self.bowidfdbconnection.execute("""CREATE TABLE IF NOT EXISTS {0}( Service text, Ticker text, Title text, PubDate text, Link text, NewsID text UNIQUE, FullContent text, Caller text)""".format( self.parseQueueTable), block=True) self.longlearnqueue = queue.Queue() self.longlearntimer = IntervalTimer(60 * 60, self.__moveLongLearn2Regular) self.predicterManager = ThreadManager(self.predictLabel, PredictingThreads, self.__predicterWorker) self.learnerManager = ThreadManager(self.learnLabel, LearningThreads, self.__learnerWorker) self.parserManager = ThreadManager(self.parseLabel, ParsingThreads, self.__parserWorker) self.__tableQueues2Managers() self.longlearntimer.start() def predict(self, aService, aTicker, aTitle, aPubDate, aLink, aNewsID, aFullSource, doBlock=False, doLearn=True): if self.__canPredictInput(aService, aTicker, aTitle, aPubDate, aLink, aNewsID, aFullSource): self.predicterManager.put( (self.predictLabel, (aService, aTicker, aTitle, aPubDate, aLink, aNewsID, aFullSource))) def learn(self, aService, aTicker, aTitle, aPubDate, aLink, aNewsID, aFullSource): if self.__canLearnInput(aService, aTicker, aTitle, aPubDate, aLink, aNewsID, aFullSource): self.learnerManager.put( (self.learnLabel, (aService, aTicker, aTitle, aPubDate, aLink, aNewsID, aFullSource))) def __canPredictInput(self, aService, aTicker, aTitle, aPubDate, aLink, aNewsID, aFullSource): return True def __canLearnInput(self, aService, aTicker, aTitle, aPubDate, aLink, aNewsID, aFullSource): return True def join(self): for _ in range(4): self.learnerManager.join() self.predicterManager.join() self.parserManager.join() def close(self, block=False): if (self.status != "CLOSED"): self.status = "CLOSED" if block: self.join() self.learnerManager.close() self.predicterManager.close() self.parserManager.close() self.longlearntimer.cancel() self.__moveLongLearn2Regular() self.__managers2TableQueues() self.bowidfdbconnection.close() def __del__(self): self.close() def __tableQueues2Managers(self): self.__tableQueue2Manager(self.predictQueueTable, self.predicterManager) self.__tableQueue2Manager(self.learnQueueTable, self.learnerManager) self.__tableQueue2Manager(self.parseQueueTable, self.parserManager) def __tableQueue2Manager(self, aTable, aManager): rows = self.bowidfdbconnection.execute( "SELECT * FROM {0};".format(aTable), block=True) for row in rows: toput = (row[7], (row[0], row[1], row[2], row[3], row[4], row[5], row[6])) aManager.put(toput) self.bowidfdbconnection.execute("DELETE FROM {0};".format(aTable), block=True) self.bowidfdbconnection.execute("VACUUM;", block=True) def __managers2TableQueues(self): self.__manager2TableQueue(self.predictQueueTable, self.predicterManager) self.__manager2TableQueue(self.learnQueueTable, self.learnerManager) self.__manager2TableQueue(self.parseQueueTable, self.parserManager) def __manager2TableQueue(self, aTable, aManager): try: while True: item = aManager.queue.get(block=False)[1] toput = ( item[1][0], item[1][1], item[1][2], item[1][3], item[1][4], item[1][5], item[1][6], item[0], ) self.bowidfdbconnection.execute( "INSERT OR IGNORE INTO {0} VALUES (?,?,?,?,?,?,?,?)". format(aTable), toput) aManager.queue.task_done() except queue.Empty: pass def __predicterWorker(self, aQueue, aStopToken): while True: item = aQueue.get() if item[1] == aStopToken: break newsitem = self.__getNewsItemByID(item[1][1][5]) if newsitem == None: self.parserManager.put(item[1]) elif newsitem[7] == None: textinfo = self.__getTextInformation(newsitem[6]) masterinfo = None wordcount = 0 for word in textinfo: wordcount += textinfo[word][0] if word == self.masterWord: masterinfo = textinfo[word] weights = {} weighttotal = 0.0 for word in textinfo: if word != self.masterWord: tf = float(textinfo[word][0]) / float(wordcount) idf = log(1.0 + (float(masterinfo[1][1]) / (1 + float(textinfo[word][1][1])))) tfidf = tf * idf weights[word] = tfidf weighttotal += tfidf mastervaltime = self.__getEmptyPrediction() if weighttotal != 0: for word in weights: valtime = ValueTimes.loads(textinfo[word][1][3]) if weights[word] > 0 and not valtime.isZero(): mastervaltime = mastervaltime + valtime * ( (128.0 * weights[word]) / weighttotal) mastervaltime = mastervaltime * (1 / 128.0) self.bowidfdbconnection.execute( "UPDATEARTICLE {0} SET Prediction=? WHERE NewsID=?".format( self.articleTable), ( mastervaltime, item[1][1][5], ), block=True) if item[1][0] == self.learnLabel: self.learnerManager.put(item[1]) aQueue.task_done() aQueue.task_done() def __learnerWorker(self, aQueue, aStopToken): while True: item = aQueue.get() if item[1] == aStopToken: break newsitem = self.__getNewsItemByID(item[1][1][5]) if newsitem == None: self.parserManager.put(item[1]) elif newsitem[7] == None or len(newsitem[7]) < 5: toput = (item[1][0], (item[1][1][0], item[1][1][1], item[1][1][2], item[1][1][3], item[1][1][4], item[1][1][5], None)) self.predicterManager.put(toput) elif newsitem[8] == None: data = self.intradayconnection.getData(item[1][1][1], newsitem[3]) if data == None: toput = (item[1][0], (item[1][1][0], item[1][1][1], item[1][1][2], item[1][1][3], item[1][1][4], item[1][1][5], None)) self.longlearnqueue.put(toput) else: tokens = self.__tokenize(newsitem[6]) self.bowidfdbconnection.execute("UPDATEWORDS", (tokens, data), block=True) self.bowidfdbconnection.execute( "UPDATE {0} SET Actual=? WHERE NewsID=?".format( self.articleTable), ( data.dumps(), item[1][1][5], )) aQueue.task_done() aQueue.task_done() def __parserWorker(self, aQueue, aStopToken): config = MyConfig() while True: item = aQueue.get() if item[1] == aStopToken: break newsitem = self.__getNewsItemByID(item[1][1][5]) if newsitem == None: print("Parsing " + item[1][1][5]) tse = None try: tse = time.mktime( time.strptime(item[1][1][3], "%a, %d %b %Y %H:%M:%S %Z")) except: pass try: tse = time.mktime( time.strptime(item[1][1][3].replace(":", ""), "%Y-%m-%dT%H%M%S%z")) except: pass if self.intradayconnection.hasProperStart(item[1][1][1], tse) and tse != None: for _ in range(3): try: nparticle = newspaper.article.Article( item[1][1][4], config=config) nparticle.download(html=item[1][1][6]) nparticle.parse() text = nparticle.text if text != None and len( text) > 40 and item[1][1][1] in text: rowvalues = (item[1][1][0], item[1][1][1], item[1][1][2], tse, item[1][1][4], item[1][1][5], text, None, None, None) self.bowidfdbconnection.execute( "INSERT INTO {0} VALUES (?,?,?,?,?,?,?,?,?,?)" .format(self.articleTable), rowvalues, block=True) toput = (item[1][0], (item[1][1][0], item[1][1][1], item[1][1][2], item[1][1][3], item[1][1][4], item[1][1][5], None)) if item[1][0] == self.predictLabel: self.predicterManager.put(toput) elif item[1][0] == self.learnLabel: self.learnerManager.put(toput) break except newspaper.article.ArticleException as e: pass except Exception as e: pass else: toput = (item[1][0], (item[1][1][0], item[1][1][1], item[1][1][2], item[1][1][3], item[1][1][4], item[1][1][5], None)) if item[1][0] == self.predictLabel: self.predicterManager.put(toput) elif item[1][0] == self.learnLabel: self.learnerManager.put(toput) aQueue.task_done() aQueue.task_done() def __getNewsItemByID(self, aNewsID): newsitemlist = self.bowidfdbconnection.execute( "SELECT * FROM {0} WHERE NewsID=? LIMIT 1".format( self.articleTable), (aNewsID, )) if not newsitemlist: return None return newsitemlist[0] def __getTextInformation(self, text): textinformation = {} wordcounts = self.__tokenize(text) wordlist = list(wordcounts.keys()) wordlist.append(self.masterWord) blocksize = 500 for i in range(math.ceil(len(wordlist) / blocksize)): curparam = tuple(wordlist[i * blocksize:(i + 1) * blocksize]) sqlstatement = "SELECT * FROM {0} WHERE".format( self.wordTable) + " Word=? OR " * (len(curparam) - 1) + " Word=? " for wordrow in self.bowidfdbconnection.execute( sqlstatement, curparam): if wordrow[0] == self.masterWord: textinformation[wordrow[0]] = (0, wordrow) else: textinformation[wordrow[0]] = (wordcounts[wordrow[0]], wordrow) for word in wordcounts: if word not in textinformation: textinformation[word] = (wordcounts[word], (word, 0, 0, self.__getEmptyPrediction().dumps())) if self.masterWord not in textinformation: textinformation[self.masterWord] = (0, ( self.masterWord, 0, 0, self.__getEmptyPrediction().dumps())) return textinformation def tokenize(self, text): return self.__tokenize(text) def __tokenize(self, text): ret = {} textwot = re.sub("\([.]+:[.]+\)", " ", text, flags=re.DOTALL) wordlist = nltk.tokenize.word_tokenize(textwot) stemmer = nltk.stem.snowball.EnglishStemmer() stopwords = set(nltk.corpus.stopwords.words('english')) for word in wordlist: try: word = stemmer.stem(word).lower() if word and word not in stopwords and word.isalpha(): if word not in ret: ret[word] = 0 ret[word] += 1 except: pass return ret def __moveLongLearn2Regular(self): try: while True: newsitem = self.longlearnqueue.get(False) self.learnerManager.put(newsitem) except queue.Empty: pass def __getEmptyPrediction(self): valtime = {} for i in range(39): valtime[i * 30 * 60] = 0 return ValueTimes(valtime)
def __init__(self, aCompanyInformationList, aBoWIDFDatabaseName, aIDFinanceClientIntelligent, LearningThreads=3, PredictingThreads=2, ParsingThreads=4): self.status = "OPEN" self.companyinformationlist = aCompanyInformationList self.bowidfdbconnection = TSSQLConnection(aBoWIDFDatabaseName) UpdateWord(self.bowidfdbconnection) UpdateArticle(self.bowidfdbconnection) UpdateWords(self.bowidfdbconnection) self.intradayconnection = aIDFinanceClientIntelligent self.bowidfdbconnection.execute("""CREATE TABLE IF NOT EXISTS {0}( Word text UNIQUE, DocumentCount int, TotalCount int, Prediction text)""".format(self.wordTable), block=True) self.bowidfdbconnection.execute("""CREATE TABLE IF NOT EXISTS {0}( Service text, Ticker text, Title text, PubDate int, Link text, NewsID text UNIQUE, Content text, Prediction text, Actual text, PCount int)""".format( self.articleTable), block=True) self.bowidfdbconnection.execute("""CREATE TABLE IF NOT EXISTS {0}( Service text, Ticker text, Title text, PubDate text, Link text, NewsID text UNIQUE, FullContent text, Caller text)""".format( self.predictQueueTable), block=True) self.bowidfdbconnection.execute("""CREATE TABLE IF NOT EXISTS {0}( Service text, Ticker text, Title text, PubDate text, Link text, NewsID text UNIQUE, FullContent text, Caller text)""".format( self.learnQueueTable), block=True) self.bowidfdbconnection.execute("""CREATE TABLE IF NOT EXISTS {0}( Service text, Ticker text, Title text, PubDate text, Link text, NewsID text UNIQUE, FullContent text, Caller text)""".format( self.parseQueueTable), block=True) self.longlearnqueue = queue.Queue() self.longlearntimer = IntervalTimer(60 * 60, self.__moveLongLearn2Regular) self.predicterManager = ThreadManager(self.predictLabel, PredictingThreads, self.__predicterWorker) self.learnerManager = ThreadManager(self.learnLabel, LearningThreads, self.__learnerWorker) self.parserManager = ThreadManager(self.parseLabel, ParsingThreads, self.__parserWorker) self.__tableQueues2Managers() self.longlearntimer.start()
def Stop(self): ThreadManager.GetThreadManager().StopThread(self.m_Name) if None != self.m_socket: self.m_socket.close() self.m_socket = None
def __init__(self): name = "Watchdog Timer Thread" self.__WorkerThread = ThreadManager.GetThreadManager().CreateThread( name, self.WatchdogProc) ThreadManager.GetThreadManager().StartThread(name)