Пример #1
0
    def get(self, jobid):

        # Let's do some ORM mapping and such... nothing really
        # exciting so far.
        dbhdl = tools.getdbhandle()
        metadata = sqlalchemy.MetaData(dbhdl)
        session = sqlalchemy.orm.sessionmaker(bind=dbhdl)()

        class Job(object):
            pass

        class Result(object):
            pass

        jobs = sqlalchemy.Table('jobs', metadata, autoload=True)
        results = sqlalchemy.Table('results', metadata, autoload=True)

        sqlalchemy.orm.mapper(Job, jobs)
        sqlalchemy.orm.mapper(Result, results)

        # If the job requested does not exist then we bail out.
        job = session.query(Job).get(jobid)
        if job == None:
            raise tornado.web.HTTPError(400)

        # Let's fetch all the image URLs encountered so far sorted alphabetically.
        ret = []
        for result in session.query(Result.image).filter_by(job_id = jobid).group_by(Result.image).all():
            ret.append('\"%s\"' % result.image)

        tools.putdbhandle(dbhdl)
    
        self.set_status(200)
        self.write('{ "images": [ %s ] }' % ',\n'.join(ret))
Пример #2
0
    def _createjob(self, urls, depth):
    
        print >> sys.stderr, "-- received request for %s with max depth %s" % (",".join(urls),
                                                                               depth)

        # Let's do some ORM mapping and such... nothing really
        # exciting so far.
        dbhdl = tools.getdbhandle()
        metadata = sqlalchemy.MetaData(dbhdl)
        session = sqlalchemy.orm.sessionmaker(bind=dbhdl)()

        class Job(object):
            pass

        jobs = sqlalchemy.Table('jobs', metadata, autoload=True)
        sqlalchemy.orm.mapper(Job, jobs)

        # Actually create the job in the table.
        job = Job()
        job.nburls = len(urls)
        job.posted = datetime.datetime.now()
        session.add(job)
        session.commit()

        tools.putdbhandle(dbhdl)

        # Finally push a message for each URL. The message format is
        # <jobid>:<url>. Crawlers use the jobid while creating task and result
        # entries.
        mqhdl, mqchannel = tools.getmqhandle()

        for url in urls:
            tools.pushtomq(mqchannel,
                           '{ "jobid": "%d",'
                           '  "url": "%s",'
                           '  "depth": "%s" }' % (job.id, url, depth))
            
        tools.putmqhandle(mqhdl, mqchannel)

        self.set_header('Content-Type', 'text/plain')
        self.set_header('Location', '/{0}'.format(job.id))
        self.set_status(201)
Пример #3
0
    def get(self, jobid):
        
        # Let's do some ORM mapping and such... nothing really
        # exciting so far.
        dbhdl = tools.getdbhandle()
        metadata = sqlalchemy.MetaData(dbhdl)
        session = sqlalchemy.orm.sessionmaker(bind=dbhdl)()

        class Job(object):
            pass

        class Task(object):
            pass

        jobs = sqlalchemy.Table('jobs', metadata, autoload=True)
        tasks = sqlalchemy.Table('tasks', metadata, autoload=True)

        sqlalchemy.orm.mapper(Job, jobs)
        sqlalchemy.orm.mapper(Task, tasks)

        # If the job requested does not exist then we bail out.
        job = session.query(Job).get(jobid)
        if job == None:
            raise tornado.web.HTTPError(400)

        # Let's count how many URLs initially pushed with the job description
        # have been completed so far. 
        nbcompleted = len(session.query(Task).filter_by(completed = True, job_id = jobid).all())

        tools.putdbhandle(dbhdl)
    
        self.set_status(200)
        self.write('{ "result": {\n'
                   '     "urls_completed": "%d",\n'
                   '     "urls_requested": "%d",\n'
                   '     "creation": "%s" } }\n' % (nbcompleted,
                                                    job.nburls,
                                                    job.posted))