def run(self): while True: # gets the url from the queue url = self.queue.get() #URLs not being processing if not self._check_urlBlackList(url): try: objfile = ObjFile(url) #Zeitmessung Start self.task.update({"started_on":datetime.datetime.now()}) #Download the file objfile = self._process_url(url) if objfile.is_permittedType(): #Run processing-plugins results = self._run_processing(objfile) if results: log.debug(results) # Run report-plugins self._run_reporting(results, objfile) else: log.warn("url %s does not provide any allowed file type (%s)" % (url, objfile.get_type())) except Exception, e: import traceback log.warn(traceback.print_exc()) log.warn("Thread("+self.threadName+") - %s - Error parsing %s" % (e, url)) finally: if objfile:
def run(self, url): # URLs not being processing if not self.__check_urlBlackList(url): try: objfile = ObjFile(url) # Zeitmessung Start self.task.update({"started_on": datetime.datetime.now()}) # Download the file objfile = self.__process_url(url) # Permittet Type and file must be processed if objfile.file.is_permittedType() and self.isFileToProcess( objfile): # PreProcessing e.g. unpacking objfile = self.__run_preProcessing(objfile) # Run processing-plugins results = self.__run_processing(objfile) if results: log.debug(results) # Run Yara on results yaraHits = self.__runYara(results) if yaraHits: results.update(yaraHits) # Run report-plugins self.__run_reporting(results, objfile) elif not objfile.file.is_permittedType(): log.warn( "url %s does not provide any allowed file type (%s)" % (url, objfile.file.get_type())) except urllib2.HTTPError as e: log.warn("Unable to perform HTTP request (http code=%s)" % e) except urllib2.URLError as e: log.warn("Unable to establish connection: %s" % e) except IOError as e: log.warn("Unable to establish connection: %s" % e) except Exception, e: import traceback log.warn(traceback.print_exc()) log.warn("Thread(" + self.processName + ") - %s - Error parsing %s" % (e, url)) finally:
data = None headers = { 'User-Agent': cfgCrawler.get("browser_user_agent", "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)"), 'Accept-Language': cfgCrawler.get("browser_accept_language", "en-US"), } request = urllib2.Request(url, data, headers) try: url_dl = urllib2.urlopen(request, timeout=30).read() except Exception, e: raise IOError("Thread("+self.threadName+") - %s - Error parsing %s" % (e, url)) try: objfile = ObjFile(url) objfile.setFileData(url_dl) except Exception, e: raise Exception("Thread("+self.threadName+") - %s - Error create ObjFile %s" % (e, url)) return objfile def _setScore(self, score): self.totalScore = self.totalScore + score self.numberScores = self.numberScores + 1 def _getScoring(self): if self.numberScores != 0: # division by zero would be a run-time error average = float(self.totalScore) / self.numberScores average = round(average, 1) else:
url_dl = urllib2.urlopen(request, timeout=30).read() except urllib2.HTTPError as e: raise e except urllib2.URLError as e: raise e except Exception, e: raise IOError("Thread(" + self.processName + ") - %s - Error parsing %s" % (e, url)) finally: # Removing SOCKS Tor Proxy socket.socket = originalSocket log.info("Download from URL %s" % url) try: objfile = ObjFile(url) objfile.set_file_from_stream(url_dl) except Exception, e: raise Exception("Thread(" + self.processName + ") - %s - Error create ObjFile %s" % (e, url)) return objfile def _setScore(self, score): self.totalScore = self.totalScore + score self.numberScores = self.numberScores + 1 def _getScoring(self): if self.numberScores != 0: # division by zero would be a run-time error average = float(self.totalScore) / self.numberScores average = round(average, 1)
try: url_dl = urllib2.urlopen(request, timeout=30).read() except urllib2.HTTPError as e: raise e except urllib2.URLError as e: raise e except Exception, e: raise IOError("Thread(" + self.processName + ") - %s - Error parsing %s" % (e, url)) finally: # Removing SOCKS Tor Proxy socket.socket = originalSocket log.info("Download from URL %s" % url) try: objfile = ObjFile(url) objfile.set_file_from_stream(url_dl) except Exception, e: raise Exception("Thread(" + self.processName + ") - %s - Error create ObjFile %s" % (e, url)) return objfile def _setScore(self, score): self.totalScore = self.totalScore + score self.numberScores = self.numberScores + 1 def _getScoring(self): if self.numberScores != 0: # division by zero would be a run-time error average = float(self.totalScore) / self.numberScores average = round(average, 1) else: