def main(args): """ Main entry point """ query_set = DataFile.objects.filter(institute__short_name='MPI-M', experiment__short_name='spinup-1950', version='v20171003') logger.debug('{} files found'.format(query_set.count())) directories_found = [] for df in query_set: if df.online: try: os.remove(os.path.join(df.directory, df.name)) except OSError as exc: logger.error(str(exc)) sys.exit(1) else: if df.directory not in directories_found: directories_found.append(df.directory) df.online = False df.directory = None df.save() for directory in directories_found: if not os.listdir(directory): delete_drs_dir(directory) logger.debug('{} directories removed'.format(len(directories_found))) replace_files(query_set)
def main(): """ Main entry point """ dreqs = DataRequest.objects.filter( institute__short_name='CMCC', variable_request__cmor_name__in=['tasmax', 'tasmin'], datafile__isnull=False).distinct() num_dreqs = dreqs.count() expected_dreqs = 40 if num_dreqs != expected_dreqs: logger.error(f'Found {num_dreqs} but was expecting {expected_dreqs}.') sys.exit(1) daniele = User.objects.get(username='******') long_txt = ( "CMCC tasmax and tasmin contain errors and must be withdrawn. Please" "see https://errata.es-doc.org/static/view.html?uid=9b40a054-21a7-5ae7" "-a3eb-8c373c5adddc.") tas_issue, _created = DataIssue.objects.get_or_create(issue=long_txt, reporter=daniele) for dreq in dreqs: logger.info(dreq) tas_issue.data_file.add(*dreq.datafile_set.all()) delete_files(dreq.datafile_set.all(), BASE_OUTPUT_DIR, skip_badc=True) replace_files(dreq.datafile_set.all())
def main(args): """ Main entry point """ tas_files = DataFile.objects.filter( climate_model__short_name__startswith='HadGEM3-GC31', variable_request__table_name='Amon', variable_request__cmor_name__in=['tasmax', 'tasmin']) epfluxdiv_files = DataFile.objects.filter( climate_model__short_name__startswith='HadGEM3-GC31', variable_request__cmor_name='epfluxdiv') files = tas_files | epfluxdiv_files logger.debug('{} affected files found'.format(files.count())) delete_files(files) # some files have already been replaced and must have their # incoming_directory updated to maintain uniqueness. for df in files: rfs = ReplacedFile.objects.filter( name=df.name, incoming_directory=df.incoming_directory) if rfs.count() == 0: continue for rf in rfs: rf.incoming_directory = rf.incoming_directory + '_01' rf.save() replace_files(files)
def test_duplicate_files(self): copy_file = DataFile.objects.get(name='file_one.nc') orig_id = copy_file.id copy_file.id = None copy_file.save() orig_file = DataFile.objects.filter(id=orig_id) replace_files(orig_file) copy_file = DataFile.objects.filter(name='file_one.nc') replace_files(copy_file) num_files = ReplacedFile.objects.filter(name='file_one.nc').count() self.assertEqual(num_files, 2) num_files = ReplacedFile.objects.filter( name='file_one.nc', incoming_directory='/gws/MOHC/MY-MODEL/incoming/v12345678').count( ) self.assertEqual(num_files, 1) num_files = ReplacedFile.objects.filter( name='file_one.nc', incoming_directory='/gws/MOHC/MY-MODEL/incoming/v12345678_1' ).count() self.assertEqual(num_files, 1)
def main(): """ Main entry point """ fixable = DataRequest.objects.filter( climate_model__short_name__contains='EC-Earth3P', experiment__short_name='highresSST-present', variable_request__cmor_name__regex='r[ls]u[ts]*', rip_code='r1i1p1f1', datafile__isnull=False ).distinct() broken = DataRequest.objects.filter( climate_model__short_name__contains='EC-Earth3P', experiment__short_name='highresSST-present', variable_request__cmor_name__in=['rsdscs', 'rsuscs'], rip_code='r1i1p1f1', datafile__isnull=False ).distinct() dreqs = broken | fixable num_dreqs = dreqs.distinct().count() if num_dreqs != 47: logger.error(f'{num_dreqs} affected data requests found') sys.exit(1) for dreq in dreqs: delete_files(dreq.datafile_set.all(), BASE_OUTPUT_DIR, skip_badc=True) replace_files(dreq.datafile_set.all())
def main(args): """ Main entry point """ js = User.objects.get(username='******') hist_txt = ( 'The calculation of evspsblsoi in these datasets is incorrect; it ' 'does not include the contribution from sublimation, but does include ' 'transpiration. These datasets will be withdrawn shortly and will be ' 'replaced with corrected versions in due course. More information is ' 'available at https://errata.es-doc.org/static/view.html?uid=8113fdfb-' '1a62-38d0-932b-2bce41319eea') prim1hr_issue, _created = DataIssue.objects.get_or_create(issue=hist_txt, reporter=js) affected_files = DataFile.objects.filter( institute__short_name__in=['MOHC', 'NERC'], variable_request__table_name='Lmon', variable_request__cmor_name='evspsblsoi') logger.debug('{} affected files found'.format(affected_files.count())) prim1hr_issue.data_file.add(*affected_files) delete_files(affected_files) replace_files(affected_files)
def main(args): """ Main entry point """ jon = User.objects.get(username='******') long_txt = ( "A bug has been discovered in HadGEM3's implementation of COSP. All " "cltcalipso and parasolRefl variables from all versions of HadGEM3 " "are affected and the data should not be used. These variables have " "been removed from PRIMAVERA and from ESGF.") cosp_issue, _created = DataIssue.objects.get_or_create(issue=long_txt, reporter=jon) affected_files = DataFile.objects.filter( climate_model__short_name__startswith='HadGEM3', variable_request__cmor_name__in=['cltcalipso', 'parasolRefl']) num_files = affected_files.count() logger.debug(f'{num_files} affected files found') cosp_issue.data_file.add(*affected_files) delete_files(affected_files, '/gws/nopw/j04/primavera5/stream1', skip_badc=True) replace_files(affected_files)
def main(args): """ Main entry point """ replaced_files = DataFile.objects.filter( climate_model__short_name='AWI-CM-1-0-LR', experiment__short_name__in=[ 'spinup-1950', 'control-1950', 'hist-1950' ], version='v20171119', data_submission__incoming_directory='/group_workspaces/jasmin2/' 'primavera2/upload/AWI/' 'regridded360x180/core') logger.debug('{} low res files will be replaced'.format( replaced_files.count())) replace_files(replaced_files) remove_issue_files = DataFile.objects.filter( climate_model__short_name='AWI-CM-1-0-LR', experiment__short_name__in=[ 'spinup-1950', 'control-1950', 'hist-1950' ], version='v20171119', data_submission__incoming_directory='/group_workspaces/jasmin2/' 'primavera2/upload/AWI/' 'regridded360x180/core_nogrid') logger.debug('Issue 4 will be removed from {} files'.format( remove_issue_files.count())) awi_issue = DataIssue.objects.get(id=4) awi_issue.data_file.remove(*remove_issue_files)
def main(args): """ Main entry point """ dfs = DataFile.objects.filter( climate_model__short_name='EC-Earth3P', experiment__short_name='control-1950', rip_code='r1i1p2f1', variable_request__table_name='3hr', variable_request__cmor_name='ps' ) num_files = dfs.count() if num_files != 1200: logger.error(f'{num_files} found but was expecting 1200') sys.exit(1) delete_files(dfs, '/gws/nopw/j04/primavera5/stream1') replace_files(dfs) dfs = DataFile.objects.filter( climate_model__short_name='EC-Earth3P-HR', experiment__short_name='highres-future', rip_code='r1i1p1f1', variable_request__table_name='3hr', variable_request__cmor_name='ps' ) num_files = dfs.count() if num_files != 432: logger.error(f'{num_files} found but was expecting 432') sys.exit(1) delete_files(dfs, '/gws/nopw/j04/primavera5/stream1') replace_files(dfs)
def test_all_files(self): self.assertEqual(3, DataFile.objects.count()) one_file = DataFile.objects.all() replace_files(one_file) self.assertEqual(0, DataFile.objects.count()) self.assertEqual(5, ReplacedFile.objects.count())
def test_one_file(self): self.assertEqual(3, DataFile.objects.count()) one_file = DataFile.objects.filter(name='file_one.nc') replace_files(one_file) self.assertEqual(2, DataFile.objects.count()) self.assertEqual(3, ReplacedFile.objects.count())
def test_checksum_copied(self): first_file = DataFile.objects.get(name='file_one.nc') checksum = Checksum.objects.create(checksum_value='1234', checksum_type='ADLER32', data_file=first_file) one_file = DataFile.objects.filter(name='file_one.nc') replace_files(one_file) old_file = ReplacedFile.objects.get(name='file_one.nc') self.assertEqual('1234', old_file.checksum_value)
def main(args): """ Main entry point """ affected_files = DataFile.objects.filter( institute__short_name='CMCC', experiment__short_name__in=['control-1950', 'hist-1950'], variable_request__table_name='Lmon', variable_request__cmor_name='mrfso') logger.debug('{} files will be replaced'.format(affected_files.count())) replace_files(affected_files)
def main(args): """ Main entry point """ orig_amip = DataFile.objects.filter( institute__short_name='CNRM-CERFACS', experiment__short_name='highresSST-present', version__in=['v20170614', 'v20170622'], rip_code='r1i1p1f1' ) logger.debug('{} affected files found'.format(orig_amip.count())) delete_files(orig_amip) replace_files(orig_amip)
def main(args): """ Main entry point """ di = DataIssue.objects.get(id=23) for ds_num in di.data_file.values_list('data_submission', flat=True).distinct(): ds = DataSubmission.objects.get(id=ds_num) affected_files = ds.datafile_set.all() delete_files(affected_files, '/gws/nopw/j04/primavera5/stream1') replace_files(affected_files)
def main(args): """ Main entry point """ var_tables = [ 'evspsbl_Amon', 'evspsbl_Primday', 'evspsbl_Prim3hr', 'hfls_Amon', 'hfls_day', 'hfls_3hr', 'hfss_Amon', 'hfss_day', 'hfss_3hr', 'tso_3hr', ] models = ['EC-Earth3-HR', 'EC-Earth3'] experiment = 'highresSST-present' for var_table in var_tables: var, __, table = var_table.partition('_') for model in models: query_set = DataFile.objects.filter( data_request__climate_model__short_name=model, data_request__experiment__short_name=experiment, variable_request__table_name=table, variable_request__cmor_name=var ) logger.debug('{} {} {} {}'.format(model, table, var, query_set.count())) directories_found = [] for df in query_set: if df.online: try: os.remove(os.path.join(df.directory, df.name)) except OSError as exc: logger.error(str(exc)) sys.exit(1) else: if df.directory not in directories_found: directories_found.append(df.directory) df.online = False df.directory = None df.save() for directory in directories_found: if not os.listdir(directory): delete_drs_dir(directory) replace_files(query_set)
def main(args): """ Main entry point """ new_files = list_files(NEW_SUBMISSION) logger.debug(f'{len(new_files)} files found in the submission') dfs = DataFile.objects.filter(name__in=map(os.path.basename, new_files)) logger.debug(f'{dfs.count()} files found in the DMT') delete_files(dfs, '/gws/nopw/j04/primavera5/stream1') replace_files(dfs)
def main(args): """ Main entry point """ ocean_files = DataSubmission.objects.get( incoming_directory= '/group_workspaces/jasmin2/primavera5/upload/CMCC/CMCC-VHR4/20181002') if ocean_files.datafile_set.count() != 4646: logger.error('{} files found, expecting 4646'.format( ocean_files.datafile_set.count())) sys.exit(1) delete_files(ocean_files.datafile_set.all()) replace_files(ocean_files.datafile_set.all())
def main(args): """ Main entry point """ affected_files = DataFile.objects.filter( climate_model__short_name='CMCC-CM2-VHR4', experiment__short_name='highresSST-present', version='v20170927', variable_request__cmor_name__in=[ 'hur', 'hus', 'ta', 'ua', 'va', 'wap', 'zg' ], name__contains='20030201-20030228') logger.debug('{} files will be replaced'.format(affected_files.count())) replace_files(affected_files)
def main(args): """ Main entry point """ affected_files = DataFile.objects.filter( data_request__institute__short_name='ECMWF', data_request__climate_model__short_name='ECMWF-IFS-LR', experiment__short_name__in=[ 'control-1950', 'hist-1950', 'spinup-1950' ], version='v20170915') logger.debug('{} affected files found'.format(affected_files.count())) if not args.test: replace_files(affected_files)
def main(args): """ Main entry point """ amip_2012 = DataFile.objects.filter( climate_model__short_name='HadGEM3-GC31-HM', rip_code='r1i3p1f1', institute__short_name='MOHC', experiment__short_name='highresSST-present', name__contains='2012' ) logger.debug('HM r1i3p1f1 2012 {} affected files found'. format(amip_2012.count())) delete_files(amip_2012) replace_files(amip_2012)
def main(args): """ Main entry point """ dreqs = DataRequest.objects.filter( climate_model__short_name__startswith='HadGEM3', variable_request__cmor_name='epfz', datafile__isnull=False).distinct() logger.debug(f'{dreqs.count()} data requests found') for dreq in dreqs.order_by('climate_model__short_name', 'experiment__short_name', 'rip_code'): logger.debug(str(dreq)) delete_files(dreq.datafile_set.all(), BASE_OUTPUT_DIR, skip_badc=True) replace_files(dreq.datafile_set.all())
def main(args): """ Main entry point """ dfs = DataFile.objects.filter(climate_model__short_name='EC-Earth3P', experiment__short_name='highresSST-present', rip_code='r2i1p1f1', name__contains='_195') num_files = dfs.count() if num_files != 1920: logger.error(f'{num_files} found but was expecting 1920') sys.exit(1) delete_files(dfs, '/gws/nopw/j04/primavera5/stream1') replace_files(dfs)
def main(args): """ Main entry point """ dreqs = DataRequest.objects.filter( climate_model__short_name='EC-Earth3P', experiment__short_name__in=['primWP5-amv-pos', 'primWP5-amv-neg'], variable_request__cmor_name__in=['rsus', 'rlus']).distinct() num_dreqs = dreqs.count() if num_dreqs != 100: logger.error(f'{num_dreqs} affected data requests found') sys.exit(1) for dreq in dreqs: replace_files(dreq.datafile_set.all())
def main(args): """ Main entry point """ dss = DataSubmission.objects.filter(incoming_directory__in=[ '/gws/nopw/j04/primavera4/upload/EC-Earth-Consortium/EC-Earth3P-HR/' 'control-1950/r3i1p2f1/1950-1952/SeaIce', '/gws/nopw/j04/primavera4/upload/EC-Earth-Consortium/EC-Earth3P-HR/' 'control-1950/r3i1p2f1/1950-1952/Rest' ]) base_output_dir = Settings.get_solo().base_output_dir for ds in dss: delete_files(ds.datafile_set.all(), base_output_dir) replace_files(ds.datafile_set.all())
def main(args): """ Main entry point """ tables_to_delete = ['SIday', 'PrimSIday', 'Omon', 'PrimOmon'] deletion_files = DataFile.objects.filter( data_request__climate_model__short_name='HadGEM3-GC31-MM', data_request__experiment__short_name='spinup-1950', variable_request__table_name__in=tables_to_delete, name__contains='1979' ).exclude(name__contains='siconc_SIday') logger.debug('{} files'.format(deletion_files.count())) for df in deletion_files: logger.debug(df.name) replace_files(deletion_files)
def test_limit_on_inc_dir(self): copy_file = DataFile.objects.get(name='file_one.nc') orig_id = copy_file.id copy_file.id = None copy_file.save() orig_file = DataFile.objects.filter(id=orig_id) replace_files(orig_file) rep_file = ReplacedFile.objects.get(name='file_one.nc') inc_dir = rep_file.incoming_directory for n in range(1, 5): rep_file.id = None rep_file.incoming_directory = f'{inc_dir}_{n}' rep_file.save() copy_file = DataFile.objects.filter(name='file_one.nc') self.assertRaises(ValueError, replace_files, copy_file)
def main(args): """ Main entry point """ jon = User.objects.get(username='******') issue_txt = ( 'The units in this data are actually m s-1 and need to be multiplied ' 'by 1000 to convert to kg m-2 s-1. These files will be replaced with ' 'this correction applied as soon as possible.') cmcc_issue = DataIssue.objects.create(issue=issue_txt, reporter=jon) affected_files = DataFile.objects.filter( climate_model__short_name__in=['CMCC-CM2-HR4', 'CMCC-CM2-VHR4'], experiment__short_name='highresSST-present', version__in=['v20170706', 'v20170927'], variable_request__cmor_name__in=['pr', 'prc', 'prsn']) logger.debug('{} files will be replaced'.format(affected_files.count())) replace_files(affected_files)
def main(args): """ Main entry point """ query_set = DataFile.objects.filter( institute__short_name='MPI-M', climate_model__short_name='MPIESM-1-2-HR', experiment__short_name__in=['control-1950', 'hist-1950'], version='v20171003' ) logger.debug('{} files found'.format(query_set.count())) directories_found = [] for df in query_set: if df.online: try: os.remove(os.path.join(df.directory, df.name)) except OSError as exc: logger.error(str(exc)) sys.exit(1) else: if df.directory not in directories_found: directories_found.append(df.directory) df.online = False df.directory = None df.save() for directory in directories_found: if not os.listdir(directory): delete_drs_dir(directory) logger.debug('{} directories removed'.format(len(directories_found))) replace_files(query_set) num_deleted = DataSubmission.objects.filter( incoming_directory__in=['/group_workspaces/jasmin2/primavera4/upload/' 'MPI-M/MPIESM-1-2-XR/incoming/20171027', '/group_workspaces/jasmin2/primavera4/upload/' 'MPI-M/MPIESM-1-2-XR/incoming/20171019', '/group_workspaces/jasmin2/primavera4/upload/' 'MPI-M/MPIESM-1-2-XR/incoming/20171010'] ).delete() logger.debug('{} DataSubmissions deleted.'.format(num_deleted))
def main(args): """ Main entry point """ o3 = DataFile.objects.filter(institute__short_name='CNRM-CERFACS', variable_request__table_name='Amon', variable_request__cmor_name='o3') cf = DataFile.objects.filter( institute__short_name='CNRM-CERFACS', variable_request__table_name__in=['CFmon', 'CFday'], variable_request__cmor_name__in=['albisccp', 'pctisccp']) affected_files = o3 | cf logger.debug('{} affected files found'.format(affected_files.count())) delete_files(affected_files) replace_files(affected_files)