def __init__(self, cId, socket, address): self.id = cId self.socket = socket self.address = address self.isActive = True self.formattedAddr = logger.formatBrackets(str(str(address[0]) + ":" + str(address[1]))) logger.log(logging.INFO, "Working node connected " + self.formattedAddr)
def main(): #config config = ConfigParser.RawConfigParser(allow_no_value=True) config.read('config') host = config.get('client', 'hostAddr') port = config.getint('client', 'hostPort') logPath = config.get('common', 'logPath') verbose = config.get('common', 'verbose') if verbose == "True" or verbose == "true": verbose = True else: verbose = False #setup logger.init(logPath, "client-" + str(datetime.datetime.now())) logger.debugFlag = verbose node = WorkingNode() node.connect(host, port) node.readConfig() node.run() while node.isActive: time.sleep(0.5) node.disconnect() logger.log(logging.INFO, "Exiting. ByeBye")
def connectionHandler(self, socket, address): """Creates a server-side client object and makes it listen for inputs""" clientID = uuid.uuid4() client = SSClient(clientID, socket, address) self.clientDict[clientID] = client #temp testing, could take a parameter from config global serverRunning if len(self.clientDict) > 0 and serverRunning == False: self.run() serverRunning = True #for clients in self.clientDict: # logger.log(logging.DEBUG, "Working node connected : " + str(self.clientDict[clients].id)) try: client.sendConfig(self.configurationPayload) client.run() while client.isActive: time.sleep(0.3) except EOFError: pass except: client.isActive = False exc_type, exc_value, exc_traceback = sys.exc_info() message = "\n" + ''.join(traceback.format_exception(exc_type, exc_value, exc_traceback)) logger.log(logging.ERROR, message) finally: client.disconnect() del self.clientDict[clientID]
def readSocket(self, timeOut=None): self.socket.settimeout(timeOut) data = self.data if "\n\n12345ZEEK6789\n" in data: data = data.split("\n\n12345ZEEK6789\n") self.data = "\n\n12345ZEEK6789\n".join(data[1:]) return pickle.loads(data[0]) while self.isActive: buffer = self.socket.recv(buffSize) data = data + buffer if not buffer: logger.log( logging.INFO, logger.RED + self.formattedAddr + "Lost connection" + logger.NOCOLOR) self.isActive = False if "\n\n12345ZEEK6789\n" in data: data = data.split("\n\n12345ZEEK6789\n") self.data = "\n\n12345ZEEK6789\n".join(data[1:]) break if self.isActive == False: return logger.log( logging.DEBUG, self.formattedAddr + "Receiving " + str(len(data[0])) + " bytes") return pickle.loads(data[0])
def readSocket(self, timeOut=None): self.s.settimeout(timeOut) data = self.data if "\n\n12345ZEEK6789\n" in data: data = data.split("\n\n12345ZEEK6789\n") self.data = "\n\n12345ZEEK6789\n".join(data[1:]) return pickle.loads(data[0]) while self.isActive: buffer = self.s.recv(buffSize) data = data + buffer if not buffer: logger.log(logging.INFO, "\nLost connection to server " + self.masterNodeFormattedAddr) self.isActive = False if "\n\n12345ZEEK6789\n" in data: data = data.split("\n\n12345ZEEK6789\n") self.data = "\n\n12345ZEEK6789\n".join(data[1:]) break if self.isActive == False: return logger.log(logging.DEBUG, "Receiving " + str(len(data[0])) + " bytes from server") return pickle.loads(data[0])
def connectionHandler(self, socket, address): """Creates a server-side client object and makes it listen for inputs""" clientID = uuid.uuid4() client = SSClient(clientID, socket, address) self.clientDict[clientID] = client #temp testing, could take a parameter from config global serverRunning if len(self.clientDict) > 0 and serverRunning == False: self.run() serverRunning = True #for clients in self.clientDict: # logger.log(logging.DEBUG, "Working node connected : " + str(self.clientDict[clients].id)) try: client.sendConfig(self.configurationPayload) client.run() while client.isActive: time.sleep(0.3) except EOFError: pass except: client.isActive = False exc_type, exc_value, exc_traceback = sys.exc_info() message = "\n" + ''.join( traceback.format_exception(exc_type, exc_value, exc_traceback)) logger.log(logging.ERROR, message) finally: client.disconnect() del self.clientDict[clientID]
def main(): signal.signal(signal.SIGINT, handler) logger.printAsciiLogo() config = ConfigParser.RawConfigParser(allow_no_value=True) config.read('config') host = config.get('server', 'listeningAddr') port = config.getint('server', 'listeningPort') logPath = config.get('common', 'logPath') verbose = config.get('common', 'verbose') if verbose == "True" or verbose == "true": verbose = True else: verbose = False #logging logger.init(logPath, "server-" + str(datetime.datetime.now())) logger.debugFlag = verbose #server server = Server(host, port) server.setup() #server.listen() thread.start_new_thread(server.listen, ()) #testing while server.isActive: time.sleep(0.5) #time.sleep(9) #testing #server.isActive = False #server.disconnectAllClient() logger.log(logging.INFO, "Exiting. ByeBye")
def writeSocket(self, obj): try: serializedObj = pickle.dumps(obj) logger.log(logging.DEBUG, self.formattedAddr + "Sending " + str(len(serializedObj + delimiter)) + " bytes") self.socket.sendall(serializedObj + delimiter) except: raise Exception("Unable to write to socket (client disconnected)")
def connectionHandler(self, socket, address): clientID = uuid.uuid4() client = SSClient(clientID, socket, address) self.clientDict[clientID] = client #temp testing if len(self.clientDict) > 0: self.run() for clients in self.clientDict: logger.log(logging.DEBUG, "Working node connected : " + str(self.clientDict[clients].id)) try: client.sendConfig() client.run() while client.isActive: time.sleep(1) except EOFError: pass except: exc_type, exc_value, exc_traceback = sys.exc_info() message = "\n" + ''.join(traceback.format_exception(exc_type, exc_value, exc_traceback)) logger.log(logging.ERROR, message) finally: client.disconnect() del self.clientDict[clientID]
def readSocket(self, timeOut=None): self.socket.settimeout(timeOut) data = self.data if "\n\n12345ZEEK6789\n" in data: data = data.split("\n\n12345ZEEK6789\n") self.data = "\n\n12345ZEEK6789\n".join(data[1:]) return pickle.loads(data[0]) while self.isActive: buffer = self.socket.recv(buffSize) data = data + buffer if not buffer: logger.log(logging.INFO, logger.RED + self.formattedAddr + "Lost connection" + logger.NOCOLOR) self.isActive = False if "\n\n12345ZEEK6789\n" in data: data = data.split("\n\n12345ZEEK6789\n") self.data = "\n\n12345ZEEK6789\n".join(data[1:]) break if self.isActive == False: return logger.log(logging.DEBUG, self.formattedAddr + "Receiving " + str(len(data[0])) + " bytes") return pickle.loads(data[0])
def writeSocket(self, obj): try: logger.log(logging.DEBUG, "Write " + self.formattedAddr) serializedObj = pickle.dumps(obj) self.socket.send(serializedObj) except: raise Exception("Error writting")
def run(self): """Lunches main threads""" logger.log(logging.INFO, "\n\nStarting Crawling/Scrapping sequence...") if self.isActive: thread.start_new_thread(self.outputThread, ()) thread.start_new_thread(self.inputThread, ()) thread.start_new_thread(self.interpretingThread, ()) thread.start_new_thread(self.crawlingThread, ())
def disconnect(self): """Disconnects the client""" if self.socket != None: logger.log(logging.INFO, logger.RED + self.formattedAddr + "Disconnecting" + logger.NOCOLOR) self.isActive = False self.socket.close() self.socket = None
def run(self): """Launches main threads""" logger.log(logging.INFO, "\n\nStarting Crawling/Scrapping sequence...") if self.isActive: thread.start_new_thread(self.outputThread, ()) thread.start_new_thread(self.inputThread, ()) thread.start_new_thread(self.interpretingThread, ()) thread.start_new_thread(self.crawlingThread, ())
def writeSocket(self, obj): try: serializedObj = pickle.dumps(obj) logger.log( logging.DEBUG, self.formattedAddr + "Sending " + str(len(serializedObj + delimiter)) + " bytes") self.socket.sendall(serializedObj + delimiter) except: raise Exception("Unable to write to socket (client disconnected)")
def urlDispatcher(self): logger.log(logging.INFO, "Starting urlDispatcher") while self.isActive: obj = urlPool.get(True) # if not visited # verification urlToVisit.put(obj)
def writeSocket(self, obj): try: serializedObj = pickle.dumps(obj) logger.log(logging.DEBUG, "Sending " + str(len(serializedObj + delimiter)) + " bytes to server") self.s.sendall(serializedObj + delimiter) except: exc_type, exc_value, exc_traceback = sys.exc_info() message = ''.join(traceback.format_exception(exc_type, exc_value, exc_traceback)) logger.log(logging.CRITICAL, message) raise Exception("Unable to write to socket (lost connection to server)")
def readSocket(self, timeOut=None): self.socket.settimeout(timeOut) data = self.socket.recv(buffSize) #broken connection if not data: logger.log(logging.INFO, "Lost connection - Working node " + self.formattedAddr) self.isActive = False return pickle.loads(data)
def dispatcher(self, packet): if packet.type is protocol.INFO: self.infoQueue.put(packet) elif packet.type is protocol.URL: self.urlToVisit.put(packet) else: logger.log(logging.CRITICAL, "Unrecognized packet type : " + str(packet.type) + ". This packet was dropped") return logger.log(logging.DEBUG, "Dispatched packet of type: " + str(packet.type))
def disconnect(self): """Disconnects the client""" if self.socket != None: logger.log( logging.INFO, logger.RED + self.formattedAddr + "Disconnecting" + logger.NOCOLOR) self.isActive = False self.socket.close() self.socket = None
def readSocket(self, timeOut=None): self.s.settimeout(timeOut) data = self.s.recv(buffSize) #broken connection if not data: logger.log(logging.INFO, "Lost connection to server " + self.masterNodeFormattedAddr) self.isActive = False return pickle.loads(data)
def __init__(self, cId, socket, address): self.id = cId self.socket = socket self.address = address self.isActive = True self.formattedAddr = logger.formatBrackets(str(str(address[0]) + ":" + str(address[1]))) + " " self.sentCount = 0 self.data = "" self.configuration = None logger.log(logging.INFO, logger.GREEN + self.formattedAddr + "Working node connected" + logger.NOCOLOR)
def setup(self, configuration): """Basic setup operation (socket binding, listen, etc)""" logger.log(logging.DEBUG, "Socket initialization") self.s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) self.s.bind((self.host, self.port)) self.s.listen(5) logger.log(logging.INFO, "Listening on [" + str(self.host) + ":" + str(self.port) + "]") self.configurationPayload = configuration self.requestLimit = configuration.config.requestLimit
def shoot(cid): global countdownThread filename = "%s/output/%s-%d.jpg" % (os.path.dirname(os.path.realpath(__file__)), cid, int(time.time())) logger.log("filename: %s"%filename) try: subprocess.call(["fswebcam", "-r", RESOLUTIONS[RES_USE], "--no-banner", filename]) files = {'file': open(filename, 'rb')} r = requests.post(UPL_URL+'?id='+GROUPID, files=files) except Exception, e: logger.log(e) raise e
def listen(self): print("- - - - - - - - - - - - - - -") logger.log(logging.INFO, "Waiting for working nodes to connect...") while self.isActive: try: client, address = self.s.accept() thread.start_new_thread(self.connectionHandler, (client, address)) except: exc_type, exc_value, exc_traceback = sys.exc_info() message = ''.join(traceback.format_exception(exc_type, exc_value, exc_traceback)) logger.log(logging.CRITICAL, message) self.isActive = False
def inputThread(self): """Listens for inputs from the client""" logger.log(logging.DEBUG, self.formattedAddr + "Listening for packets") while self.isActive: try: deserializedPacket = self.readSocket() self.dispatcher(deserializedPacket) except EOFError: #Fixes the pickle error if clients disconnects self.isActive = False
def outputThread(self): while self.isActive: try: site = urlToVisit.get(True) payload = protocol.URLPayload(site) packet = protocol.Packet(protocol.URL, payload) self.writeSocket(packet) logger.log(logging.DEBUG, "Sending obj of type " + str(packet.type) + " to " + self.formattedAddr) except: exc_type, exc_value, exc_traceback = sys.exc_info() message = ''.join(traceback.format_exception(exc_type, exc_value, exc_traceback)) logger.log(logging.CRITICAL, message) self.isActive = False
def setup(self, configuration): """Basic setup operation (socket binding, listen, etc)""" logger.log(logging.DEBUG, "Socket initialization") self.s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) self.s.bind((self.host, self.port)) self.s.listen(5) logger.log( logging.INFO, "Listening on [" + str(self.host) + ":" + str(self.port) + "]") self.configurationPayload = configuration self.requestLimit = configuration.config.requestLimit
def __init__(self, cId, socket, address): self.id = cId self.socket = socket self.address = address self.isActive = True self.formattedAddr = logger.formatBrackets( str(str(address[0]) + ":" + str(address[1]))) + " " self.sentCount = 0 self.data = "" self.configuration = None logger.log( logging.INFO, logger.GREEN + self.formattedAddr + "Working node connected" + logger.NOCOLOR)
def inputThread(self): logger.log(logging.DEBUG, "InputThread started") while self.isActive: try: deserializedPacket = self.readSocket() self.dispatcher(deserializedPacket) except EOFError: self.isActive = False except: exc_type, exc_value, exc_traceback = sys.exc_info() message = ''.join(traceback.format_exception(exc_type, exc_value, exc_traceback)) logger.log(logging.CRITICAL, message) self.isActive = False
def outputThread(self): """Checks if there are messages to send to the client and sends them""" while self.isActive: if self.sentCount > 5: time.sleep(0.03) continue packetToBroadCast = protocol.deQueue([outputQueue]) if not packetToBroadCast: continue for packet in packetToBroadCast: self.writeSocket(packet) self.sentCount = self.sentCount+1 logger.log(logging.DEBUG, self.formattedAddr + "Sending URL " + str(packet.payload.urlList[0]))
def inputThread(self): """Listens for inputs from the server""" logger.log(logging.DEBUG, "InputThread started") while self.isActive: try: deserializedPacket = self.readSocket() self.dispatcher(deserializedPacket) except EOFError: self.isActive = False except: exc_type, exc_value, exc_traceback = sys.exc_info() message = ''.join(traceback.format_exception(exc_type, exc_value, exc_traceback)) logger.log(logging.CRITICAL, message) self.isActive = False
def urlDispatcher(self): """Reads from the urlPool, makes sure the url has not been visited and adds it to the urlToVisit Queue""" logger.log(logging.INFO, "Starting server urlDispatcher") while self.isActive: try: url = urlPool.get(True) if url not in urlVisited: urlVisited[url] = True #logic if static crawling will come here urlToVisit.put(url) scrappedURLlist.append(url) except: exc_type, exc_value, exc_traceback = sys.exc_info() message = "\n" + ''.join(traceback.format_exception(exc_type, exc_value, exc_traceback)) logger.log(logging.ERROR, message)
def listen(self): """Waits for new clients to connect and launches a new client thread accordingly""" print("- - - - - - - - - - - - - - -") logger.log(logging.INFO, "Waiting for working nodes to connect...") while self.isActive: try: client, address = self.s.accept() thread.start_new_thread(self.connectionHandler, (client, address)) except: exc_type, exc_value, exc_traceback = sys.exc_info() message = ''.join( traceback.format_exception(exc_type, exc_value, exc_traceback)) logger.log(logging.CRITICAL, message) self.isActive = False
def outputThread(self): """Checks if there are messages to send to the client and sends them""" while self.isActive: if self.sentCount > 5: time.sleep(0.03) continue packetToBroadCast = protocol.deQueue([outputQueue]) if not packetToBroadCast: continue for packet in packetToBroadCast: self.writeSocket(packet) self.sentCount = self.sentCount + 1 logger.log( logging.DEBUG, self.formattedAddr + "Sending URL " + str(packet.payload.urlList[0]))
def storageRoutine(self): """Stores session and data""" logger.log(logging.INFO, "Starting server storageRoutine") while self.isActive: try: sessions = protocol.deQueue([sessionStorageQueue]) if not sessions: continue for session in sessions: storage.writeToFile(session, session.dataContainer) except: exc_type, exc_value, exc_traceback = sys.exc_info() message = "\n" + ''.join(traceback.format_exception(exc_type, exc_value, exc_traceback)) logger.log(logging.ERROR, message)
def urlDispatcher(self): """Reads from the urlPool, makes sure the url has not been visited and adds it to the urlToVisit Queue""" logger.log(logging.INFO, "Starting server urlDispatcher") while self.isActive: try: url = urlPool.get(True) if url not in urlVisited: urlVisited[url] = True #logic if static crawling will come here urlToVisit.put(url) scrappedURLlist.append(url) except: exc_type, exc_value, exc_traceback = sys.exc_info() message = "\n" + ''.join( traceback.format_exception(exc_type, exc_value, exc_traceback)) logger.log(logging.ERROR, message)
def __init__(self): """ Intiates variables for account balances and stocks data. """ self.config = config.load() self.logger = logger.log() self.get_token(1) self.get_token(2) self.RRSPbalance, self.RRSPdata = self.questradeRRSP() self.TFSAbalance, self.TFSAdata = self.questradeTFSA() self.TFSA2balance, self.TFSA2data = self.questradeTFSA2()
def crawlingThread(self): """Takes URL from the urlToVisit queue and visits them""" logger.log(logging.DEBUG, "CrawlingThread started") self.scrapper = scrapping.Scrapper(self.config.userAgent, self.config.robotParserEnabled, self.config.domainRestricted, self.config.crawling) while self.isActive: try: urlList = protocol.deQueue([self.urlToVisit]) if not urlList: time.sleep(0.2) #temp - For testing continue for url in urlList: session = self.scrapper.visit(url) logger.log( logging.DEBUG, "Session \n" + str(session.url) + "\nCode : " + str(session.returnCode) + "\nRequest time : " + str(session.requestTime) + "\nBs time : " + str(session.bsParsingTime)) if not session.failed: if self.crawlingType == protocol.ConfigurationPayload.DYNAMIC_CRAWLING: payload = protocol.URLPayload( session.scrappedURLs, protocol.URLPayload.SCRAPPED_URL) packet = protocol.Packet(protocol.URL, payload) self.outputQueue.put(packet) payload = protocol.URLPayload( [url], protocol.URLPayload.VISITED, session=session) packet = protocol.Packet(protocol.URL, payload) self.outputQueue.put(packet) else: logger.log(logging.INFO, "Skipping URL : " + url) payload = protocol.URLPayload( [url], protocol.URLPayload.SKIPPED, session) packet = protocol.Packet(protocol.URL, payload) self.outputQueue.put(packet) continue except: exc_type, exc_value, exc_traceback = sys.exc_info() message = ''.join( traceback.format_exception(exc_type, exc_value, exc_traceback)) logger.log(logging.CRITICAL, message) self.isActive = False
def storageRoutine(self): """Stores session and data""" logger.log(logging.INFO, "Starting server storageRoutine") while self.isActive: try: sessions = protocol.deQueue([sessionStorageQueue]) if not sessions: continue for session in sessions: storage.writeToFile(session, session.dataContainer) except: exc_type, exc_value, exc_traceback = sys.exc_info() message = "\n" + ''.join( traceback.format_exception(exc_type, exc_value, exc_traceback)) logger.log(logging.ERROR, message)
def inputThread(self): logger.log(logging.DEBUG, "Listening for packets " + self.formattedAddr) while self.isActive: try: obj = self.readSocket() if obj.type is protocol.INFO: print("PACKET INFO") # ie : Treat end of crawl raise Exception("INFO PACKET RECEIVED") elif obj.type is protocol.URL: urlPool.put(obj.payload.urlList) time.sleep(1) except EOFError: self.isActive = False except: exc_type, exc_value, exc_traceback = sys.exc_info() message = ''.join(traceback.format_exception(exc_type, exc_value, exc_traceback)) logger.log(logging.CRITICAL, message) self.isActive = False
def dispatcher(self, packet): """Dispatches packets to the right packet queue""" if packet is None: return elif packet.type == protocol.INFO: logger.log(logging.DEBUG, "Dispatching INFO packet") self.infoQueue.put(packet) elif packet.type == protocol.URL: logger.log(logging.DEBUG, "Dispatching url packet : " + str(packet.payload.urlList[0])) for site in packet.payload.urlList: self.urlToVisit.put(site) else: logger.log(logging.CRITICAL, "Unrecognized packet type : " + str(packet.type) + ". This packet was dropped") return logger.log(logging.DEBUG, "Dispatched packet of type: " + str(packet.type))
def readConfig(self): logger.log(logging.DEBUG, "Waiting for configuration from the server.") if self.isActive: try: deserializedPacket = self.readSocket() if deserializedPacket.type is protocol.CONFIG: self.crawlingType = deserializedPacket.payload.crawlingType payload = protocol.InfoPayload(protocol.InfoPayload.CLIENT_ACK) packet = protocol.Packet(protocol.INFO, payload) self.writeSocket(packet) logger.log(logging.DEBUG, "Configuration received.") logger.log(logging.DEBUG, "Sending ACK for configuration.") else: raise Exception("Unable to parse configuration.") except: exc_type, exc_value, exc_traceback = sys.exc_info() message = ''.join(traceback.format_exception(exc_type, exc_value, exc_traceback)) logger.log(logging.CRITICAL, message) self.isActive = False
def crawlingThread(self): """Takes URL from the urlToVisit queue and visits them""" logger.log(logging.DEBUG, "CrawlingThread started") self.scrapper = scrapping.Scrapper(self.config.userAgent, self.config.robotParserEnabled, self.config.domainRestricted, self.config.crawling) while self.isActive: try: urlList = protocol.deQueue([self.urlToVisit]) if not urlList: time.sleep(0.2) #temp - For testing continue for url in urlList: session = self.scrapper.visit(url) logger.log(logging.DEBUG, "Session \n" + str(session.url) + "\nCode : " + str(session.returnCode) + "\nRequest time : " + str(session.requestTime) + "\nBs time : " + str(session.bsParsingTime)) if not session.failed: if self.crawlingType == protocol.ConfigurationPayload.DYNAMIC_CRAWLING: payload = protocol.URLPayload(session.scrappedURLs, protocol.URLPayload.SCRAPPED_URL) packet = protocol.Packet(protocol.URL, payload) self.outputQueue.put(packet) payload = protocol.URLPayload([url], protocol.URLPayload.VISITED, session=session) packet = protocol.Packet(protocol.URL, payload) self.outputQueue.put(packet) else: logger.log(logging.INFO, "Skipping URL : " + url) payload = protocol.URLPayload([url], protocol.URLPayload.SKIPPED, session) packet = protocol.Packet(protocol.URL, payload) self.outputQueue.put(packet) continue except: exc_type, exc_value, exc_traceback = sys.exc_info() message = ''.join(traceback.format_exception(exc_type, exc_value, exc_traceback)) logger.log(logging.CRITICAL, message) self.isActive = False
def outputThread(self): logger.log(logging.DEBUG, "OutputThread started") while self.isActive: try: obj = self.outputQueue.get(True) self.writeSocket(obj) logger.log(logging.DEBUG, "Sending obj of type " + str(obj.type)) except: exc_type, exc_value, exc_traceback = sys.exc_info() message = ''.join(traceback.format_exception(exc_type, exc_value, exc_traceback)) logger.log(logging.CRITICAL, message) self.isActive = False
def outputThread(self): """Checks if there are messages to send to the server and sends them""" logger.log(logging.DEBUG, "OutputThread started") while self.isActive: try: obj = self.outputQueue.get(True) #fix with helper method to prevent block self.writeSocket(obj) logger.log(logging.DEBUG, "Sending obj of type " + str(obj.type)) except: exc_type, exc_value, exc_traceback = sys.exc_info() message = ''.join(traceback.format_exception(exc_type, exc_value, exc_traceback)) logger.log(logging.CRITICAL, message) self.isActive = False
def mainRoutine(self): """To Come in da future. For now, no use""" logger.log(logging.INFO, "Starting server mainRoutine") for url in self.configurationPayload.config.rootUrls: payload = protocol.URLPayload([str(url)], protocol.URLPayload.TOVISIT) packet = protocol.Packet(protocol.URL, payload) urlVisited[url] = True outputQueue.put(packet) if self.configurationPayload.crawlingType == protocol.ConfigurationPayload.STATIC_CRAWLING and ( self.configurationPayload.config.crawlDelay != 0): if self.configurationPayload.config.crawlDelay != 0: time.sleep(self.configurationPayload.config.crawlDelay) while self.isActive: try: if self.configurationPayload.crawlingType == protocol.ConfigurationPayload.DYNAMIC_CRAWLING: url = urlToVisit.get(True) payload = protocol.URLPayload([str(url)], protocol.URLPayload.TOVISIT) packet = protocol.Packet(protocol.URL, payload) outputQueue.put(packet) self.requestCount = self.requestCount + 1 if self.configurationPayload.config.crawlDelay != 0: time.sleep(self.configurationPayload.config.crawlDelay) if self.requestLimit != 0 and len( visitedURLlist) + 1 > self.requestLimit: break elif self.configurationPayload.crawlingType == protocol.ConfigurationPayload.STATIC_CRAWLING: if (len(skippedURLlist + visitedURLlist) == len( self.configurationPayload.config.rootUrls)): break else: time.sleep(0.3) except: exc_type, exc_value, exc_traceback = sys.exc_info() message = "\n" + ''.join( traceback.format_exception(exc_type, exc_value, exc_traceback)) logger.log(logging.ERROR, message) logger.log(logging.INFO, "Scrapping complete. Terminating...") self.disconnectAllClient() self.isActive = False
def interpretingThread(self): """Interprets message from the server other than type URL. (ie: INFO)""" logger.log(logging.DEBUG, "InterpretingThread started") while self.isActive: try: time.sleep(0.01) #temp - For testing packets = protocol.deQueue([self.infoQueue]) if not packets: continue for packet in packets: if packet.type == protocol.INFO: logger.log(logging.INFO, "Interpreting INFO packet : " + str(packet.payload.urlList)) except: exc_type, exc_value, exc_traceback = sys.exc_info() message = ''.join(traceback.format_exception(exc_type, exc_value, exc_traceback)) logger.log(logging.CRITICAL, message) self.isActive = False
def sendConfig(self, configuration): """Sends the configuration to the client""" logger.log(logging.DEBUG, self.formattedAddr + "Sending configuration") self.configuration = configuration packet = protocol.Packet(protocol.CONFIG, self.configuration) self.writeSocket(packet) logger.log(logging.DEBUG, self.formattedAddr + "Configuration sent waiting for ACK") packet = self.readSocket(5) if packet.type == protocol.INFO: if packet.payload.info == protocol.InfoPayload.CLIENT_ACK: logger.log( logging.DEBUG, self.formattedAddr + "Working node ACK received (configuration)") return else: self.isActive = False raise Exception("Unable to transmit configuration")
def run(self): """Launches the urlDispatcher and mainRoutine threads""" logger.log(logging.DEBUG, "Starting beginCrawlingProcedure") thread.start_new_thread(self.urlDispatcher, ()) thread.start_new_thread(self.mainRoutine, ()) thread.start_new_thread(self.storageRoutine, ())
# reset the countdownThread def reset(): global countdownThread countdownThread = False if __name__ == "__main__": logger.createlogClient("testing", os.path.dirname(os.path.realpath(__file__))+"/") # thread used to show the countdown and call back for the actual picture taking process global countdownThread countdownThread = False # button listener thread managing callbacks for different events (photo, shuffle, random) buttonThread = ButtonThread(cheese, BUTTONS, shuffle, 'SH', refresh, 'RF') buttonThread.daemon = True buttonThread.start() try: while True: pass except Exception, e: logger.log(e) raise e finally: print "stopping" countdownThread.stop() buttonThread.stop() GPIO.cleanup()
def connect(self, host, port): """Sets up the connection to the server (max 6 attemps)""" self.host = host self.port = port self.masterNodeFormattedAddr = "[" + str(self.host) + ":" + str(self.port) + "]" logger.log(logging.DEBUG, "Socket initialization") self.s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) for connectionAttempt in range(6, 0, -1): if connectionAttempt == 1: logger.log(logging.CRITICAL, "Unable to connect to host " + self.masterNodeFormattedAddr) sys.exit() try: logger.log(logging.DEBUG, "Connecting to host... " + self.masterNodeFormattedAddr) self.s.connect((self.host, self.port)) logger.log(logging.INFO, "Connected to " + self.masterNodeFormattedAddr) break except socket.error: logger.log(logging.INFO, "Connection failed to " + self.masterNodeFormattedAddr) logger.log(logging.INFO, "Retrying in 3 seconds.") time.sleep(3)
def readConfig(self): """Reads the configuration from the server""" logger.log(logging.DEBUG, "Waiting for configuration from the server.") if self.isActive: try: deserializedPacket = self.readSocket() logger.log(logging.DEBUG, "Configuration received.") if deserializedPacket.type == protocol.CONFIG: self.crawlingType = deserializedPacket.payload.crawlingType self.config = deserializedPacket.payload.config # dynamic module reload basePath = os.path.dirname(sys.argv[0]) if basePath: basePath = basePath + "/" # path building rulePath = basePath + "modules/rule.py" scrappingPath = basePath + "modules/scrapping.py" # re-writing source .py logger.log(logging.INFO, "Importing rule.py from server") ruleFd = open(rulePath, 'w') ruleFd.write(self.config.rule_py) ruleFd.close() logger.log(logging.INFO, "Importing scrapping.py from server") scrappingFd = open(scrappingPath, 'w') scrappingFd.write(self.config.scrapping_py) scrappingFd.close() # compilation test try: code=open(rulePath, 'rU').read() compile(code, "rule_test", "exec") except: exc_type, exc_value, exc_traceback = sys.exc_info() message = ''.join(traceback.format_exception(exc_type, exc_value, exc_traceback)) logger.log(logging.CRITICAL, message) logger.log(logging.ERROR, "Unable to compile rule.py (is the syntax right?)") sys.exit(0) try: code=open(scrappingPath, 'rb').read(os.path.getsize(scrappingPath)) compile(code, "scrapping_test", "exec") except: exc_type, exc_value, exc_traceback = sys.exc_info() message = ''.join(traceback.format_exception(exc_type, exc_value, exc_traceback)) logger.log(logging.CRITICAL, message) logger.log(logging.ERROR, "Unable to compile scrapping.py (is the syntax right?)") sys.exit(0) # dynamic reload of modules # TODO reloading of rule.py should eventually come here logger.log(logging.INFO, "Reloading modules imported for server") reload(sys.modules["modules.scrapping"]) payload = protocol.InfoPayload(protocol.InfoPayload.CLIENT_ACK) packet = protocol.Packet(protocol.INFO, payload) self.writeSocket(packet) logger.log(logging.DEBUG, "Sending ACK for configuration.") else: raise Exception("Unable to parse configuration.") except: exc_type, exc_value, exc_traceback = sys.exc_info() message = ''.join(traceback.format_exception(exc_type, exc_value, exc_traceback)) logger.log(logging.CRITICAL, message) self.isActive = False
def storageRoutine(self): """Stores session and data""" logger.log(logging.INFO, "Starting server storageRoutine") try: connection = psycopg2.connect(user="******", password="******", host="localhost", port="5432", database="crawler2") cursor = connection.cursor() # Print PostgreSQL Connection properties print(connection.get_dsn_parameters(), "\n") # Print PostgreSQL version cursor.execute("SELECT version();") record = cursor.fetchone() print("You are connected to - ", record, "\n") except (Exception, psycopg2.Error) as error: print("Error while connecting to PostgreSQL", error) """finally: #closing database connection. if(connection): cursor.close() connection.close() print("PostgreSQL connection is closed")""" while self.isActive: try: sessions = protocol.deQueue([sessionStorageQueue]) if not sessions: continue for session in sessions: #storage.writeToFile(session, session.dataContainer) #storage.writeToDb(session, session.dataContainer) try: if (not session.failed): #insert_one(session.url) #connection = None try: cursor.execute("INSERT INTO url VALUES (%s)", session.url) # read database configuration #params = config() # connect to the PostgreSQL database #conn = psycopg2.connect(**params) # create a new cursor #cur = connection.cursor() # execute the INSERT statement #cursor.execute(sql, (session.url,)) # commit the changes to the database #conn.commit() # close communication with the database #cursor.close() except (Exception, psycopg2.DatabaseError) as error: print(error) print "ez" elif session.failed: #insert_one(session.url.replace) print "hola" #else: # raise Exception("..") except: logger.log(logging.ERROR, "Unhandled exception in storage.py") except: exc_type, exc_value, exc_traceback = sys.exc_info() message = "\n" + ''.join( traceback.format_exception(exc_type, exc_value, exc_traceback)) logger.log(logging.ERROR, message) cursor.close()
def dispatcher(self, packet): """Dispatches packets to the right packet queue or takes action if needed (ie: infoPacket)""" if packet is None: return logger.log(logging.DEBUG, "Dispatching packet of type: " + str(packet.type)) if packet.type == protocol.INFO: logger.log(logging.DEBUG, self.formattedAddr + "Received INFO packet") elif packet.type == protocol.URL: if packet.payload.type == protocol.URLPayload.SCRAPPED_URL: logger.log( logging.INFO, self.formattedAddr + "Receiving scrapped URLs : " + str(len(packet.payload.urlList)).center(5) + " / " + str(len(scrappedURLlist)).center(7) + " - " + str(len(skippedURLlist)).center(5)) for url in packet.payload.urlList: urlPool.put(url) if packet.payload.type == protocol.URLPayload.VISITED: self.sentCount = self.sentCount - 1 for url in packet.payload.urlList: logger.log(logging.INFO, self.formattedAddr + "Receiving scrapped data") logger.log( logging.DEBUG, self.formattedAddr + "Receiving scrapped data" + url) visitedURLlist.append(url) if hasattr(packet.payload, 'session'): if packet.payload.session is not None: sessionStorageQueue.put(packet.payload.session) if packet.payload.type == protocol.URLPayload.SKIPPED: self.sentCount = self.sentCount - 1 for url in packet.payload.urlList: skippedURLlist.append(url) if hasattr(packet.payload, 'session'): if packet.payload.session is not None: sessionStorageQueue.put(packet.payload.session) if packet.payload.session.returnCode == -1: logger.log( logging.INFO, logger.PINK + self.formattedAddr + "Skipped (timeout) : " + url + logger.NOCOLOR) elif packet.payload.session.returnCode == -2: logger.log( logging.INFO, logger.PINK + self.formattedAddr + "Skipped (request not allowed - robot parser) : " + url + logger.NOCOLOR) elif packet.payload.session.returnCode == -100: logger.log( logging.INFO, logger.YELLOW + self.formattedAddr + "Skipped (unknown error) : " + url + logger.NOCOLOR) else: logger.log( logging.INFO, logger.BLUE + self.formattedAddr + "Skipped (html error " + str(packet.payload.session.returnCode) + ") : " + url + logger.NOCOLOR) else: logger.log( logging.INFO, logger.RED + self.formattedAddr + "No session returned" + url + logger.NOCOLOR) else: logger.log( logging.CRITICAL, "Unrecognized packet type : " + str(packet.type) + ". This packet was dropped") return