def harvest_job_create(context, data_dict): log.info('Harvest job create: %r', data_dict) check_access('harvest_job_create', context, data_dict) source_id = data_dict['source_id'] # Check if source exists source = HarvestSource.get(source_id) if not source: log.warn('Harvest source %s does not exist', source_id) raise NotFound('Harvest source %s does not exist' % source_id) # Check if the source is active if not source.active: log.warn( 'Harvest job cannot be created for inactive source %s', source_id) raise Exception('Can not create jobs on inactive sources') # Check if there already is an unrun or currently running job for this # source exists = _check_for_existing_jobs(context, source_id) if exists: log.warn( 'There is already an unrun job %r for this source %s', exists, source_id) raise HarvestJobExists('There already is an unrun job for this source') job = HarvestJob() job.source = source job.save() log.info('Harvest job saved %s', job.id) return harvest_job_dictize(job, context)
def harvest_source_show_status(context, data_dict): ''' Returns a status report for a harvest source Given a particular source, returns a dictionary containing information about the source jobs, datasets created, errors, etc. Note that this information is already included on the output of harvest_source_show, under the 'status' field. :param id: the id or name of the harvest source :type id: string :rtype: dictionary ''' p.toolkit.check_access('harvest_source_show_status', context, data_dict) model = context.get('model') source = harvest_model.HarvestSource.get(data_dict['id']) if not source: raise p.toolkit.ObjectNotFound( 'Harvest source {0} does not exist'.format( data_dict['id'])) out = { 'job_count': 0, 'last_job': None, 'total_datasets': 0, } jobs = harvest_model.HarvestJob.filter(source=source).all() job_count = len(jobs) if job_count == 0: return out out['job_count'] = job_count # Get the most recent job last_job = harvest_model.HarvestJob.filter(source=source) \ .order_by(harvest_model.HarvestJob.created.desc()).first() if not last_job: return out out['last_job'] = harvest_job_dictize(last_job, context) # Overall statistics packages = model.Session.query(model.Package) \ .join(harvest_model.HarvestObject) \ .filter(harvest_model.HarvestObject.harvest_source_id == source.id) \ .filter(harvest_model.HarvestObject.current) \ .filter(model.Package.state == u'active') \ .filter(model.Package.private == False) out['total_datasets'] = packages.count() return out
def harvest_job_show(context, data_dict): check_access('harvest_job_show', context, data_dict) id = data_dict.get('id') attr = data_dict.get('attr', None) job = HarvestJob.get(id, attr=attr) if not job: raise NotFound return harvest_job_dictize(job, context)
def harvest_job_list(context, data_dict): check_access('harvest_job_list', context, data_dict) model = context['model'] session = context['session'] source_id = data_dict.get('source_id', False) status = data_dict.get('status', False) query = session.query(HarvestJob) if source_id: query = query.filter(HarvestJob.source_id == source_id) if status: query = query.filter(HarvestJob.status == status) query = query.order_by(HarvestJob.created.desc()) jobs = query.all() context['return_error_summary'] = False return [harvest_job_dictize(job, context) for job in jobs]