def work_in(self, job): t_job = deepcopy(job) cached = lambda f: self.config['master']['cache'] + f t_job['data'] = map(cached, job['data']) t_job['static_file'] = cached(job['static_file']) t_job['product'] = None descriptor = JobDescription(**(t_job)) elapsed, output = descriptor.run() result = { "time": output.time[-1].tolist(), "cloudindex": output.cloudindex[-1, :].tolist(), "globalindex": output.globalradiation[-1, :].tolist(), } yield result return
def save_job_descriptions(self, job_descriptions): """ saves multiple job descriptions at a time job_descriptions - type -> list of dicts - {'jd_id', 'jd_title', jd_html', 'jd_sentences'} """ if not job_descriptions: raise ServiceException('job descriptions are required') jd_insts = [JobDescription(**{ 'jd_id': jd.get('jd_id'), 'jd_title': jd.get('jd_title'), 'jd_html': jd.get('jd_html'), 'jd_sentences': jd.get('jd_sentences') }) for jd in job_descriptions] validated_jds = list(filter( lambda x: x is not None, map(self._validate_jd, jd_insts) )) if validated_jds: JobDescription.objects.insert(validated_jds) return {'inserted_docs': len(validated_jds)}
def test_main(self): config = { 'algorithm': 'heliosat', 'data': 'mock_data/goes13.2015.*.BAND_01.nc', 'temporal_cache': 'temporal_cache', 'product': 'products/estimated', 'tile_cut': self.tile_cut } job = JobDescription(**config) begin = datetime.now() job.run() end = datetime.now() shape = self.verify_output() elapsed = (end - begin).total_seconds() image_ratio = (30. * 14 * 2 / shape[0]) scale_shapes = (2245. / shape[1]) * (3515. / shape[2]) * (image_ratio) estimated = elapsed * scale_shapes / 3600. print "Scaling total time to %.2f hours." % estimated print "Efficiency achieved: %.2f%%" % (3.5 / estimated * 100.)
def test_with_loaded_files(self): files = JobDescription.filter_data(self.files) config = { 'algorithm': 'heliosat', 'static_file': StaticCache('static.nc', files, self.tile_cut), 'data': Cache(files, tile_cut=self.tile_cut), 'product': None, 'tile_cut': self.tile_cut, 'hard': 'gpu', } job = JobDescription(**config) intern_elapsed, output = job.run() shape = self.verify_output(files, output, config) image_ratio = (15. * 12. * 2. / shape[0]) scale_shapes = (2260. / shape[1]) * (4360. / shape[2]) * (image_ratio) cores = 24. * 7. intern_estimated = intern_elapsed * (scale_shapes / cores) / 3600. print("Scaling intern time to {:.2f} hours.".format(intern_estimated)) print("Needed efficiency achieved: {:.2f}%".format( 0.5 / intern_estimated * 100.))
def test_main(self): config = { 'algorithm': 'heliosat', 'data': 'mock_data/goes13.2015.*.BAND_01.nc', 'temporal_cache': 'temporal_cache', 'product': 'products/estimated' } job = JobDescription(**config) begin = datetime.now() job.run() # heliosat.workwith(**config) end = datetime.now() self.verify_output() elapsed = (end - begin).total_seconds() first, last = min(self.files), max(self.files) to_dt = helpers.to_datetime processed = (to_dt(last) - to_dt(first)).total_seconds() processed_days = processed / 3600. / 24 scale_shapes = (2245. / 86) * (3515. / 180) * (30. / processed_days) estimated = elapsed * scale_shapes / 3600. print "Scaling total time to %.2f hours." % estimated print "Efficiency achieved: %.2f%%" % (3.5 / estimated * 100.)
def get_job_descriptions_for_search(self, title, page_number, tagged=False): """ get job descriptions for search """ skip = (page_number - 1) * RESULTS_PER_PAGE limit = RESULTS_PER_PAGE jd_ids = self.get_jd_ids_based_on_status(tagged=tagged) jds_search = JobDescription.objects( jd_id__in=jd_ids, jd_title={'$regex': fr'(?i){title}'} ) jds_search_paginate = jds_search.skip(skip).limit(limit) return [{'jd_title': jd.jd_title, 'jd_id': jd.jd_id} for jd in jds_search_paginate], \ jds_search.count()