示例#1
0
 def resource_count_visited(self, crawl_id):
     conn = engine.connect()
     cmd = select([func.count(resources.c.id)])
     record = conn.execute(cmd).first()
     n = record.count_1
     conn.close()
     return n
示例#2
0
 def user_get_by_email_and_password(self, email, password):
     conn = engine.connect()
     cmd = select([users]).where(users.c.email == email,
                                 users.c.password == password)
     result = conn.execute(cmd)
     user = User()
     user.load_from_rs(result.first())
     conn.close()
     return user
示例#3
0
 def _get_by_id(self, table, class_name, id):
     conn = engine.connect()
     s = select([table]).where(table.c.id == id)
     rp = conn.execute(s)
     record = rp.first()
     o = class_name()
     o.load_from_rs(record)
     conn.close()
     return o
示例#4
0
 def resource_is_present(self, absolute_address, crawlId):
     """Checks to see if a certain Resource is present inside a DB"""
     conn = engine.connect()
     cmd = select([func.count(resources.c.id)
                   ]).where(resources.c.absolute_url == absolute_address,
                            resources.c.crawl_id == crawlId)
     record = conn.execute(cmd).first()
     n = record.count_1
     conn.close()
     return n > 0
示例#5
0
 def url_count_internal_full(self, crawl_id):
     """Counts no of internal urls that have both source and destingation resources"""
     conn = engine.connect()
     cmd = select([func.count(urls.c.id)]).where(
         urls.c.src_resource_id != None, urls.c.dst_resource_id != None,
         urls.c.type == Url.TYPE_INTERNAL, urls.c.crawl_id == crawl_id)
     record = conn.execute(cmd).first()
     n = record.count_1
     conn.close()
     return n
示例#6
0
 def crawl_get_all_for_site(self, site_id):
     entities = []
     conn = engine.connect()
     cmd = select([crawls]).where(crawls.c.site_id == site_id)
     rp = conn.execute(cmd)
     for record in rp:
         e = Crawl()
         e.load_from_rs(record)
         entities.append(e)
     conn.close()
     return entities
示例#7
0
 def crawl_get_last_for_site(self, site_id):
     conn = engine.connect()
     cmd = select([crawls]).where(crawls.c.site_id == site_id).order_by(
         desc(crawls.c.date))
     record = conn.execute(cmd).first()
     e = None
     if not record == None:
         e = Crawl()
         e.load_from_rs(record)
     conn.close()
     return e
示例#8
0
 def _get_all(self, table, class_name):
     entities = []
     conn = engine.connect()
     cmd = select([table])
     rp = conn.execute(cmd)
     for record in rp:
         e = class_name()
         e.load_from_rs(record)
         entities.append(e)
     conn.close()
     return entities
示例#9
0
 def url_count_pending(self, crawl_id):
     """Count unvisited and in_progress and internal links"""
     conn = engine.connect()
     cmd = select([func.count(urls.c.id)]).where(
         or_(urls.c.job_status == Url.JOB_STATUS_NOT_VISITED,
             urls.c.job_status == Url.JOB_STATUS_IN_PROGRESS),
         urls.c.type == Url.TYPE_INTERNAL, urls.c.crawl_id == crawl_id)
     record = conn.execute(cmd).first()
     n = record.count_1
     conn.close()
     return n
示例#10
0
 def url_count_incoming_for_resource(self, resource_id):
     """Counts no of urls that point to page from a different page"""
     conn = engine.connect()
     cmd = select([func.count(urls.c.id)
                   ]).where(urls.c.src_resource_id != None,
                            urls.c.dst_resource_id == resource_id,
                            urls.c.type == Url.TYPE_INTERNAL)
     record = conn.execute(cmd).first()
     n = record.count_1
     conn.close()
     return n
示例#11
0
 def url_get_all_by_crawl_id(self, crawl_id):
     entities = []
     conn = engine.connect()
     cmd = select([urls]).where(urls.c.crawl_id == crawl_id)
     #.order_by(desc(crawls.c.date))
     rp = conn.execute(cmd)
     for record in rp:
         e = Url()
         e.load_from_rs(record)
         entities.append(e)
     conn.close()
     return entities
示例#12
0
 def resource_get_by_absolute_url_and_crawl_id(self, absolute_url, crawlId):
     conn = engine.connect()
     cmd = select([resources
                   ]).where(resources.c.crawl_id == crawlId,
                            resources.c.absolute_url == absolute_url)
     #.order_by(desc(crawls.c.date))
     record = conn.execute(cmd).first()
     e = None
     if record is not None:
         e = Resource()
         e.load_from_rs(record)
     conn.close()
     return e
示例#13
0
    def url_count_external(self, crawl_id):
        # session = self.get_session()
        # n = session.query(func.count(Url.id)).filter(Url.type==Url.TYPE_EXTERNAL) \
        #     .filter(Url.crawl_id == crawl_id) \
        #     .scalar()
        # return n

        conn = engine.connect()
        cmd = select([func.count(urls.c.id)]).where(
            urls.c.type == Url.TYPE_EXTERNAL,
            urls.c.crawl_id == crawl_id,
        )
        record = conn.execute(cmd).first()
        n = record.count_1
        conn.close()
        return n
示例#14
0
 def url_get_all_unvisited(self, crawl_id):
     entities = []
     conn = engine.connect()
     cmd = select([urls]).where(
         urls.c.job_status == Url.JOB_STATUS_NOT_VISITED,
         urls.c.type == Url.TYPE_INTERNAL,
         urls.c.crawl_id == crawl_id,
     )
     #.order_by(desc(crawls.c.date))
     rp = conn.execute(cmd)
     for record in rp:
         e = Url()
         e.load_from_rs(record)
         entities.append(e)
     conn.close()
     return entities
示例#15
0
    def url_get_first_unvisited(self, crawl_id):
        conn = engine.connect()
        cmd = select([urls]).where(
            urls.c.job_status == Url.JOB_STATUS_NOT_VISITED,
            urls.c.type == Url.TYPE_INTERNAL,
            urls.c.crawl_id == crawl_id,
        )
        #.order_by(desc(crawls.c.date))
        record = conn.execute(cmd).first()
        e = None
        if record is not None:
            e = Url()
            e.load_from_rs(record)

        conn.close()
        return e
示例#16
0
 def resource_get_all_incoming_for_resource(self, resource_id):
     """Get all resoources that point, through urls, to this resource
         (Simply put find all pages that give a link to this page)
     """
     entities = []
     conn = engine.connect()
     cmd = select([resources]).select_from(
         resources.join(urls,
                        resources.c.id == urls.c.src_resource_id)).where(
                            urls.c.dst_resource_id == resource_id)
     rp = conn.execute(cmd)
     for record in rp:
         e = Resource()
         e.load_from_rs(record)
         entities.append(e)
     conn.close()
     return entities
示例#17
0
    def _update(self, table, object):
        # table.primary_key.columns[0].name
        primarykey_column = table.primary_key.columns[0]  # primary key Column
        primarykey_name = primarykey_column.name  # # primary key Column's name

        # Prepare the hash (without the primary key pair)
        hash = {}
        for k, v in object.__dict__.items():
            if k == primarykey_name:
                continue
            hash[k] = v

        up = update(table).values(object.__dict__).where(
            primarykey_column == object.__dict__[primarykey_name])
        conn = engine.connect()
        result = conn.execute(up)
        conn.close()
示例#18
0
    def _create(self, table, object):

        hash = object.__dict__
        if SQLALCHEMY_DATABASE == 'postgresql':  # Just skip ID for PostgreSQL (as it complains)
            hash = {}
            for k, v in object.__dict__.items():
                if k == 'id':
                    continue

                hash[k] = v

        # ins = insert(table).values(object.__dict__)
        ins = insert(table).values(hash)
        conn = engine.connect()
        result = conn.execute(ins)
        object.id = result.inserted_primary_key[0]
        conn.close()
        return result.inserted_primary_key[0]
示例#19
0
 def _delete_by_id(self, table, id):
     conn = engine.connect()
     del_cmd = delete(table).where(table.c.id == id)
     result = conn.execute(del_cmd)
     conn.close()
     return True if result.rowcount >= 1 else False
示例#20
0
 def _delete_all(self, table):
     conn = engine.connect()
     del_cmd = delete(table)
     result = conn.execute(del_cmd)
     conn.close()
     return True if result.rowcount >= 1 else False