示例#1
0
    def get(self, crawlID):
        logging.info("Starting execution of task " + crawlID)
        crawl = Crawl.get_by_id(long(crawlID))

        startTime = time.time()
        crawl.status = "RUNNING"
        crawl.save()

        """ Download VoID """
        voidURI = crawl.dataset.voidURI
        logging.info("Downloading VoID from " + voidURI)
        result = urlfetch.fetch(voidURI)
        logging.info("Completed download from " + voidURI + " with HTTP " + str(result.status_code))

        if result.status_code != 200:
            crawl.changeDetected = False
            crawl.status = "ERROR"
            crawl.message = "VoID Download Failed"
        else:
            """ Compute VoID hash """
            voidHash = self.computeHash(result.content)
            if crawl.dataset.voidHash != voidHash:
                """ VoID changed - update DB """
                logging.info("VoID file at " + voidURI + " changed")
                crawl.dataset.voidHash = voidHash
                crawl.dataset.save()

                self.processVoID(result.content, crawl)

                """ No matter what the task status for the data dumps """
                """ is - just overwrite with info about VoID change   """
                crawl.changeDetected = True
                crawl.message = "VoID file changed"
            else:
                """ No change in VoID """
                logging.info("No changes in VoID file " + voidURI)

                self.processVoID(result.content, crawl)

        """ Complete task log with timing info """
        crawl.finishedAt = datetime.datetime.utcnow()
        crawl.duration = int(time.time() - startTime)
        crawl.save()
示例#2
0
 def view_crawl(self, mid, cid):
     robot = Robot.get_by_id(int(mid), parent=self.current_user.key)
     crawl = Crawl.get_by_id(int(cid), parent=robot.key)
     jobs = crawl.jobs
     self.render_response('robot/crawl.html', robot=robot, crawl=crawl, jobs=jobs)