def resource_count_visited(self, crawl_id): conn = engine.connect() cmd = select([func.count(resources.c.id)]) record = conn.execute(cmd).first() n = record.count_1 conn.close() return n
def user_get_by_email_and_password(self, email, password): conn = engine.connect() cmd = select([users]).where(users.c.email == email, users.c.password == password) result = conn.execute(cmd) user = User() user.load_from_rs(result.first()) conn.close() return user
def _get_by_id(self, table, class_name, id): conn = engine.connect() s = select([table]).where(table.c.id == id) rp = conn.execute(s) record = rp.first() o = class_name() o.load_from_rs(record) conn.close() return o
def resource_is_present(self, absolute_address, crawlId): """Checks to see if a certain Resource is present inside a DB""" conn = engine.connect() cmd = select([func.count(resources.c.id) ]).where(resources.c.absolute_url == absolute_address, resources.c.crawl_id == crawlId) record = conn.execute(cmd).first() n = record.count_1 conn.close() return n > 0
def url_count_internal_full(self, crawl_id): """Counts no of internal urls that have both source and destingation resources""" conn = engine.connect() cmd = select([func.count(urls.c.id)]).where( urls.c.src_resource_id != None, urls.c.dst_resource_id != None, urls.c.type == Url.TYPE_INTERNAL, urls.c.crawl_id == crawl_id) record = conn.execute(cmd).first() n = record.count_1 conn.close() return n
def crawl_get_all_for_site(self, site_id): entities = [] conn = engine.connect() cmd = select([crawls]).where(crawls.c.site_id == site_id) rp = conn.execute(cmd) for record in rp: e = Crawl() e.load_from_rs(record) entities.append(e) conn.close() return entities
def crawl_get_last_for_site(self, site_id): conn = engine.connect() cmd = select([crawls]).where(crawls.c.site_id == site_id).order_by( desc(crawls.c.date)) record = conn.execute(cmd).first() e = None if not record == None: e = Crawl() e.load_from_rs(record) conn.close() return e
def _get_all(self, table, class_name): entities = [] conn = engine.connect() cmd = select([table]) rp = conn.execute(cmd) for record in rp: e = class_name() e.load_from_rs(record) entities.append(e) conn.close() return entities
def url_count_pending(self, crawl_id): """Count unvisited and in_progress and internal links""" conn = engine.connect() cmd = select([func.count(urls.c.id)]).where( or_(urls.c.job_status == Url.JOB_STATUS_NOT_VISITED, urls.c.job_status == Url.JOB_STATUS_IN_PROGRESS), urls.c.type == Url.TYPE_INTERNAL, urls.c.crawl_id == crawl_id) record = conn.execute(cmd).first() n = record.count_1 conn.close() return n
def url_count_incoming_for_resource(self, resource_id): """Counts no of urls that point to page from a different page""" conn = engine.connect() cmd = select([func.count(urls.c.id) ]).where(urls.c.src_resource_id != None, urls.c.dst_resource_id == resource_id, urls.c.type == Url.TYPE_INTERNAL) record = conn.execute(cmd).first() n = record.count_1 conn.close() return n
def url_get_all_by_crawl_id(self, crawl_id): entities = [] conn = engine.connect() cmd = select([urls]).where(urls.c.crawl_id == crawl_id) #.order_by(desc(crawls.c.date)) rp = conn.execute(cmd) for record in rp: e = Url() e.load_from_rs(record) entities.append(e) conn.close() return entities
def resource_get_by_absolute_url_and_crawl_id(self, absolute_url, crawlId): conn = engine.connect() cmd = select([resources ]).where(resources.c.crawl_id == crawlId, resources.c.absolute_url == absolute_url) #.order_by(desc(crawls.c.date)) record = conn.execute(cmd).first() e = None if record is not None: e = Resource() e.load_from_rs(record) conn.close() return e
def url_count_external(self, crawl_id): # session = self.get_session() # n = session.query(func.count(Url.id)).filter(Url.type==Url.TYPE_EXTERNAL) \ # .filter(Url.crawl_id == crawl_id) \ # .scalar() # return n conn = engine.connect() cmd = select([func.count(urls.c.id)]).where( urls.c.type == Url.TYPE_EXTERNAL, urls.c.crawl_id == crawl_id, ) record = conn.execute(cmd).first() n = record.count_1 conn.close() return n
def url_get_all_unvisited(self, crawl_id): entities = [] conn = engine.connect() cmd = select([urls]).where( urls.c.job_status == Url.JOB_STATUS_NOT_VISITED, urls.c.type == Url.TYPE_INTERNAL, urls.c.crawl_id == crawl_id, ) #.order_by(desc(crawls.c.date)) rp = conn.execute(cmd) for record in rp: e = Url() e.load_from_rs(record) entities.append(e) conn.close() return entities
def url_get_first_unvisited(self, crawl_id): conn = engine.connect() cmd = select([urls]).where( urls.c.job_status == Url.JOB_STATUS_NOT_VISITED, urls.c.type == Url.TYPE_INTERNAL, urls.c.crawl_id == crawl_id, ) #.order_by(desc(crawls.c.date)) record = conn.execute(cmd).first() e = None if record is not None: e = Url() e.load_from_rs(record) conn.close() return e
def resource_get_all_incoming_for_resource(self, resource_id): """Get all resoources that point, through urls, to this resource (Simply put find all pages that give a link to this page) """ entities = [] conn = engine.connect() cmd = select([resources]).select_from( resources.join(urls, resources.c.id == urls.c.src_resource_id)).where( urls.c.dst_resource_id == resource_id) rp = conn.execute(cmd) for record in rp: e = Resource() e.load_from_rs(record) entities.append(e) conn.close() return entities
def _update(self, table, object): # table.primary_key.columns[0].name primarykey_column = table.primary_key.columns[0] # primary key Column primarykey_name = primarykey_column.name # # primary key Column's name # Prepare the hash (without the primary key pair) hash = {} for k, v in object.__dict__.items(): if k == primarykey_name: continue hash[k] = v up = update(table).values(object.__dict__).where( primarykey_column == object.__dict__[primarykey_name]) conn = engine.connect() result = conn.execute(up) conn.close()
def _create(self, table, object): hash = object.__dict__ if SQLALCHEMY_DATABASE == 'postgresql': # Just skip ID for PostgreSQL (as it complains) hash = {} for k, v in object.__dict__.items(): if k == 'id': continue hash[k] = v # ins = insert(table).values(object.__dict__) ins = insert(table).values(hash) conn = engine.connect() result = conn.execute(ins) object.id = result.inserted_primary_key[0] conn.close() return result.inserted_primary_key[0]
def _delete_by_id(self, table, id): conn = engine.connect() del_cmd = delete(table).where(table.c.id == id) result = conn.execute(del_cmd) conn.close() return True if result.rowcount >= 1 else False
def _delete_all(self, table): conn = engine.connect() del_cmd = delete(table) result = conn.execute(del_cmd) conn.close() return True if result.rowcount >= 1 else False