def db_finalize(self): query = (Resources.update().where(Resources.c.id == self.data['resource_id']).values(date_crawl_end=datetime.now())) yield tx_pool.runOperation(query) query = (Files.delete().where(Files.c.resource_id == self.data['resource_id'])) yield tx_pool.runOperation(query) query = (Files.update().where(Files.c.resource_id == '-%s' % str(self.data['resource_id'])).values(resource_id=self.data['resource_id'])) yield tx_pool.runOperation(query) query = (ResourceMeta.update().where(ResourceMeta.c.id == self.data['resource_meta_id']).values(file_count=self.resource.db_files_inserts)) yield tx_pool.runOperation(query) if 'recursive_foldersizes' in self.data['options']: yield self.db_calculate_foldersizes() self.db_calculate_filedistribution(resource_id=self.data['resource_id']) self.db_busy_crawling_toggle()
def db_calculate_foldersizes(self): log.msg("[%s] Recursively calculating folder sizes" % self.data['resource_id']) dirs = ['/'] while dirs: file_path = dirs[0] query = ( select([Files.c.file_name, Files.c.file_path, Files.c.file_size, Files.c.file_isdir]) .where(Files.c.resource_id == self.data['resource_id']) .where(Files.c.file_path == file_path)) all = yield tx_pool.runQuery(query) for file_name in [z.file_name for z in all if z.file_isdir]: query = ( select([func.sum(Files.c.file_size)]) .where(Files.c.resource_id == self.data['resource_id']) .where(Files.c.file_path.like(file_path+file_name+'%'))) size = yield tx_pool.runQuery(query) size = size[0] if not size.sum_1: size = 0 else: size = long(size.sum_1) query = ( Files.update() .where(Files.c.resource_id == self.data['resource_id']) .where(Files.c.file_path == file_path) .where(Files.c.file_name == file_name) .where(Files.c.file_isdir == True) .values(file_size=size)) yield tx_pool.runOperation(query) dirs.append('%s%s/' % (file_path, file_name)) dirs.pop(0) log.msg("[%s] Recursively calculating folder sizes DONE" % self.data['resource_id'])