def main(args):
    """
    Main entry point
    """
    query_set = DataFile.objects.filter(institute__short_name='MPI-M',
                                        experiment__short_name='spinup-1950',
                                        version='v20171003')
    logger.debug('{} files found'.format(query_set.count()))

    directories_found = []
    for df in query_set:
        if df.online:
            try:
                os.remove(os.path.join(df.directory, df.name))
            except OSError as exc:
                logger.error(str(exc))
                sys.exit(1)
            else:
                if df.directory not in directories_found:
                    directories_found.append(df.directory)
            df.online = False
            df.directory = None
            df.save()

    for directory in directories_found:
        if not os.listdir(directory):
            delete_drs_dir(directory)
    logger.debug('{} directories removed'.format(len(directories_found)))

    replace_files(query_set)
示例#2
0
def main(args):
    """
    Main entry point
    """
    var_tables = [
        'evspsbl_Amon',
        'evspsbl_Primday',
        'evspsbl_Prim3hr',
        'hfls_Amon',
        'hfls_day',
        'hfls_3hr',
        'hfss_Amon',
        'hfss_day',
        'hfss_3hr',
        'tso_3hr',
    ]
    models = ['EC-Earth3-HR', 'EC-Earth3']
    experiment = 'highresSST-present'

    for var_table in var_tables:
        var, __, table = var_table.partition('_')
        for model in models:
            query_set = DataFile.objects.filter(
                data_request__climate_model__short_name=model,
                data_request__experiment__short_name=experiment,
                variable_request__table_name=table,
                variable_request__cmor_name=var
            )
            logger.debug('{} {} {} {}'.format(model, table, var,
                                              query_set.count()))

            directories_found = []
            for df in query_set:
                if df.online:
                    try:
                        os.remove(os.path.join(df.directory, df.name))
                    except OSError as exc:
                        logger.error(str(exc))
                        sys.exit(1)
                    else:
                        if df.directory not in directories_found:
                            directories_found.append(df.directory)
                    df.online = False
                    df.directory = None
                    df.save()

            for directory in directories_found:
                if not os.listdir(directory):
                    delete_drs_dir(directory)

            replace_files(query_set)
示例#3
0
def main(args):
    """
    Main entry point
    """
    dfs = DataFile.objects.filter(climate_model__short_name='MPI-ESM1-2-XR',
                                  experiment__short_name='highres-future',
                                  version='v20190617')

    prim_gws = '/gws/nopw/j04/primavera5/stream1'

    old_dirs = []

    for df in dfs:
        old_drs_path = construct_drs_path(df)
        df.version = 'v20190517'
        df.save()
        if df.online:
            # file itself
            gws = get_gws(df.directory)
            old_dir = df.directory
            new_dir = os.path.join(gws, construct_drs_path(df))
            if not os.path.exists(new_dir):
                os.makedirs(new_dir)
            os.rename(os.path.join(df.directory, df.name),
                      os.path.join(new_dir, df.name))
            df.directory = new_dir
            df.save()
            if old_dir not in old_dirs:
                old_dirs.append(old_dir)

            # sym link
            if not is_same_gws(df.directory, prim_gws):
                old_sym_dir = os.path.join(prim_gws, old_drs_path)
                old_sym = os.path.join(old_sym_dir, df.name)
                # TODO next line doesn't work as this is now a broken symlink so returns false
                if os.path.exists(old_sym):
                    if os.path.islink(old_sym):
                        os.remove(old_sym)
                    else:
                        logger.warning(f'Not symlink as expected: {old_sym}')
                new_sym_dir = os.path.join(prim_gws, construct_drs_path(df))
                if not os.path.exists(new_sym_dir):
                    os.makedirs(new_sym_dir)
                os.symlink(os.path.join(new_dir, df.name),
                           os.path.join(new_sym_dir, df.name))
                if old_sym_dir not in old_dirs:
                    old_dirs.append(old_sym_dir)

    logger.debug(f'Removing {len(old_dirs)} old dirs')
    for old_dir in old_dirs:
        delete_drs_dir(old_dir)
def main(args):
    """
    Main entry point
    """
    dreqs = DataRequest.objects.filter(
        institute__short_name='MPI-M',
        experiment__short_name__in=['control-1950', 'hist-1950'],
        variable_request__cmor_name='tos',
        datafile__isnull=False).distinct()

    logger.debug(f'Found {dreqs.count()} datasets')

    for dreq in dreqs:
        if dreq.esgfdataset_set.all():
            # ESGF dataset's been created...
            esgf = dreq.esgfdataset_set.first()
            if esgf.status == 'PUBLISHED':
                # ... and published so the data's in the CEDA archive
                # and symlinked from the PRIMAVERA data structure
                # All sym links will be in one directory
                set_dir = os.path.join(
                    BASE_OUTPUT_DIR,
                    construct_drs_path(dreq.datafile_set.first()))
                for df in dreq.datafile_set.all():
                    file_path = os.path.join(set_dir, df.name)
                    if not os.path.islink(file_path):
                        logger.warning(f'Expected a sym link {file_path}')
                        continue
                    try:
                        os.remove(file_path)
                    except OSError as exc:
                        logger.error(str(exc))
                    df.online = False
                    df.directory = None
                    df.save()
                delete_drs_dir(set_dir)
                logger.debug(f'Removed files for ESGFDataset {esgf}')
                esgf.status = 'CREATED'
                esgf.save()
                continue
        # The data's not been published so delete the files and their sym links
        delete_files(dreq.datafile_set.all(), BASE_OUTPUT_DIR)
        logger.debug(f'Removed files for DataRequest {dreq}')
        dreq.datafile_set.update(directory=None, online=False)

    for dreq in dreqs:
        dreq.datafile_set.update(version='v20191129')
示例#5
0
def main(args):
    """
    Main entry point
    """
    query_set = DataFile.objects.filter(
        institute__short_name='MPI-M',
        climate_model__short_name='MPIESM-1-2-HR',
        experiment__short_name__in=['control-1950', 'hist-1950'],
        version='v20171003'
    )
    logger.debug('{} files found'.format(query_set.count()))

    directories_found = []
    for df in query_set:
        if df.online:
            try:
                os.remove(os.path.join(df.directory, df.name))
            except OSError as exc:
                logger.error(str(exc))
                sys.exit(1)
            else:
                if df.directory not in directories_found:
                    directories_found.append(df.directory)
            df.online = False
            df.directory = None
            df.save()

    for directory in directories_found:
        if not os.listdir(directory):
            delete_drs_dir(directory)
    logger.debug('{} directories removed'.format(len(directories_found)))

    replace_files(query_set)

    num_deleted = DataSubmission.objects.filter(
        incoming_directory__in=['/group_workspaces/jasmin2/primavera4/upload/'
                                'MPI-M/MPIESM-1-2-XR/incoming/20171027',
                                '/group_workspaces/jasmin2/primavera4/upload/'
                                'MPI-M/MPIESM-1-2-XR/incoming/20171019',
                                '/group_workspaces/jasmin2/primavera4/upload/'
                                'MPI-M/MPIESM-1-2-XR/incoming/20171010']
    ).delete()

    logger.debug('{} DataSubmissions deleted.'.format(num_deleted))
示例#6
0
def main(args):
    """
    Main entry point
    """
    for path in ilist_files(args.top_path, ignore_symlinks=True):
        data_file = Path(path)
        try:
            django_file = DataFile.objects.get(name=data_file.name)
        except django.core.exceptions.ObjectDoesNotExist:
            logger.debug(f'Not in DMT: {path}')
            continue

        if django_file.directory.startswith('/badc'):
            if not args.dryrun:
                action = 'Deleting'
                data_file.unlink()
                delete_drs_dir(str(data_file.parent))
            else:
                action = 'Deletable'
            logger.debug(f'{action}: {path}')
def delete_files(query_set):
    """
    Delete any files online from the specified queryset
    """
    directories_found = []
    for df in query_set.filter(online=True):
        try:
            os.remove(os.path.join(df.directory, df.name))
        except OSError as exc:
            logger.error(str(exc))
        else:
            if df.directory not in directories_found:
                directories_found.append(df.directory)
        df.online = False
        df.directory = None
        df.save()

    for directory in directories_found:
        if not os.listdir(directory):
            delete_drs_dir(directory)
    logger.debug('{} directories removed'.format(len(directories_found)))
示例#8
0
    def _rename_file(self):
        """
        Rename the file on disk and move to its new directory. Update the link
        from the primary directory.
        """
        if not os.path.exists(self.new_directory):
            os.makedirs(self.new_directory)

        os.rename(os.path.join(self.old_directory, self.old_filename),
                  os.path.join(self.new_directory, self.new_filename))

        # check for empty directory
        if not os.listdir(self.old_directory):
            delete_drs_dir(self.old_directory)

        # Update the symbolic link if required
        if not is_same_gws(self.old_directory, BASE_OUTPUT_DIR):
            old_link_path = os.path.join(self.old_sym_link_dir,
                                         self.old_filename)
            if os.path.lexists(old_link_path):
                if not os.path.islink(old_link_path):
                    logger.error("{} exists and isn't a symbolic link.".format(
                        old_link_path))
                    raise SymLinkIsFileError(old_link_path)
                else:
                    # it is a link so remove it
                    os.remove(old_link_path)
                    # check for empty directory
                    if not os.listdir(self.old_sym_link_dir):
                        delete_drs_dir(self.old_sym_link_dir)

            new_link_dir = os.path.join(BASE_OUTPUT_DIR,
                                        construct_drs_path(self.datafile))
            if not os.path.exists(new_link_dir):
                os.makedirs(new_link_dir)
            os.symlink(os.path.join(self.new_directory, self.new_filename),
                       os.path.join(new_link_dir, self.new_filename))
示例#9
0
def main(args):
    """
    Main entry point
    """
    dreqs = DataRequest.objects.filter(
        climate_model__short_name='CMCC-CM2-VHR4',
        experiment__short_name='control-1950',
        datafile__isnull=False
    ).distinct().order_by(
        'variable_request__table_name', 'variable_request__cmor_name'
    )

    num_dreqs = dreqs.count()
    logger.info(f'{num_dreqs} data requests found')

    for dreq in dreqs:
        dreq.datafile_set.update(version='v20200917')
        for df in dreq.datafile_set.filter(online=True).order_by('name'):
            old_dir = df.directory
            old_path = os.path.join(old_dir, df.name)
            if not os.path.exists(old_path):
                logger.error(f'{old_path} not found')
                continue
            new_dir = os.path.join(get_gws(df.directory),
                                   construct_drs_path(df))
            if df.directory != new_dir:
                if not os.path.exists(new_dir):
                    os.makedirs(new_dir)

                os.rename(old_path,
                          os.path.join(new_dir, df.name))
                df.directory = new_dir
                df.save()

            # Delete original dir if it's now empty
            if not os.listdir(old_dir):
                delete_drs_dir(old_dir)
示例#10
0
def move_dirs(data_req, new_gws):
    """
    Move the files

    :param pdata_app.models.DataRequest data_req: the data request to move
    :param int new_gws: the number of the gws to move to
    """
    single_dir = '{}{}'.format(COMMON_GWS_NAME, new_gws)
    existing_dirs = data_req.directories()
    # ignore data that is offline
    if None in existing_dirs:
        existing_dirs.remove(None)
    use_single_dir = False
    for exist_dir in existing_dirs:
        if exist_dir.startswith(single_dir):
            use_single_dir = True
            break
    if not use_single_dir:
        # As a quick sanity check, generate an error if there is no
        # data already in the requested output directory
        logger.error('The new output directory is {} but no data from '
                     'this variable is currently in this directory.'.
                     format(single_dir))
        sys.exit(1)

    for exist_dir in existing_dirs:
        if exist_dir.startswith(single_dir):
            continue
        files_to_move = data_req.datafile_set.filter(directory=exist_dir)
        logger.debug('Moving {} files from {}'.format(
            files_to_move.count(), exist_dir))
        for file_to_move in files_to_move:
            # Move the file
            src = os.path.join(exist_dir, file_to_move.name)
            dest_path = os.path.join(single_dir, 'stream1',
                                     construct_drs_path(file_to_move))
            if not os.path.exists(dest_path):
                os.makedirs(dest_path)
            dest = os.path.join(dest_path, file_to_move.name)
            # remove existing link if about to write over it
            if dest.startswith(BASE_OUTPUT_DIR):
                if os.path.exists(dest):
                    if os.path.islink(dest):
                        os.remove(dest)
            # Move the file
            shutil.move(src, dest)
            # Update the file's location in the DB
            file_to_move.directory = dest_path
            file_to_move.save()
            # Check that it was safely copied
            actual_checksum = adler32(dest)
            db_checksum = file_to_move.checksum_set.first().checksum_value
            if not actual_checksum == db_checksum:
                logger.error('For {}\ndatabase checksum: {}\n'
                             'actual checksum: {}'.
                             format(dest, db_checksum, actual_checksum))
                sys.exit(1)
            # Update the symlink
            if not is_same_gws(dest_path, BASE_OUTPUT_DIR):
                primary_path_dir = os.path.join(
                    BASE_OUTPUT_DIR,
                    construct_drs_path(file_to_move))
                primary_path = os.path.join(primary_path_dir,
                                            file_to_move.name)
                if os.path.lexists(primary_path):
                    if not os.path.islink(primary_path):
                        logger.error("{} exists and isn't a symbolic "
                                     "link.".format(primary_path))
                        sys.exit(1)
                    else:
                        # it is a link so remove it
                        os.remove(primary_path)
                if not os.path.exists(primary_path_dir):
                    os.makedirs(primary_path_dir)
                os.symlink(dest, primary_path)

        delete_drs_dir(exist_dir)
示例#11
0
def main(args):
    """
    Main entry point
    """
    logger.debug('Starting delete_request.py for retrieval {}'.format(
        args.retrieval_id))

    deletion_retrieval = match_one(RetrievalRequest, id=args.retrieval_id)
    if not deletion_retrieval:
        logger.error('Unable to find retrieval id {}'.format(
            args.retrieval_id))
        sys.exit(1)

    if deletion_retrieval.date_deleted:
        logger.error('Retrieval {} was already deleted, at {}.'.format(
            deletion_retrieval.id,
            deletion_retrieval.date_deleted.strftime('%Y-%m-%d %H:%M')))
        sys.exit(1)

    if not deletion_retrieval.data_finished:
        logger.error('Retrieval {} is not marked as finished.'.format(
            deletion_retrieval.id))
        sys.exit(1)

    problems_encountered = False
    directories_found = []
    base_output_dir = Settings.get_solo().base_output_dir

    # loop through all of the data requests in this retrieval
    for data_req in deletion_retrieval.data_request.all():
        online_req_files = data_req.datafile_set.filter(
            online=True, directory__isnull=False)
        files_to_delete = date_filter_files(online_req_files,
                                            deletion_retrieval.start_year,
                                            deletion_retrieval.end_year)

        if files_to_delete is None:
            continue

        if not args.force:
            # find any other retrieval requests that still need this data
            other_retrievals = RetrievalRequest.objects.filter(
                data_request=data_req, data_finished=False)
            # loop through the retrieval requests that still need this data
            # request
            for ret_req in other_retrievals:
                ret_online_files = data_req.datafile_set.filter(
                    online=True, directory__isnull=False)
                ret_filtered_files = date_filter_files(ret_online_files,
                                                       ret_req.start_year,
                                                       ret_req.end_year)
                if ret_filtered_files is None:
                    continue
                # remove from the list of files to delete the ones that we have
                # just found are still needed
                files_to_delete = files_to_delete.difference(
                    ret_filtered_files)
                # list the parts of the data request that are still required
                logger.debug("{} {} to {} won't be deleted".format(
                    data_req, ret_req.start_year, ret_req.end_year))

        # don't (try to) delete anything that's in the CEDA archive
        files_to_delete.exclude(directory__startswith=CEDA_ARCHIVE)

        # do the deleting
        if args.dryrun:
            logger.debug('{} {} files can be deleted.'.format(
                data_req,
                files_to_delete.distinct().count()))
        else:
            logger.debug('{} {} files will be deleted.'.format(
                data_req,
                files_to_delete.distinct().count()))
            for data_file in files_to_delete:
                old_file_dir = data_file.directory
                try:
                    os.remove(os.path.join(data_file.directory,
                                           data_file.name))
                except OSError as exc:
                    logger.error(str(exc))
                    problems_encountered = True
                else:
                    if data_file.directory not in directories_found:
                        directories_found.append(data_file.directory)
                    data_file.online = False
                    data_file.directory = None
                    data_file.save()

                # if a symbolic link exists from the base output directory
                # then delete this too
                if not old_file_dir.startswith(base_output_dir):
                    sym_link_dir = os.path.join(base_output_dir,
                                                construct_drs_path(data_file))
                    sym_link = os.path.join(sym_link_dir, data_file.name)
                    if not os.path.islink(sym_link):
                        logger.error(
                            "Expected {} to be a link but it isn't. "
                            "Leaving this file in place.".format(sym_link))
                        problems_encountered = True
                    else:
                        try:
                            os.remove(sym_link)
                        except OSError as exc:
                            logger.error(str(exc))
                            problems_encountered = True
                        else:
                            if sym_link_dir not in directories_found:
                                directories_found.append(sym_link_dir)

    if not args.dryrun:
        # delete any empty directories
        for directory in directories_found:
            if not os.listdir(directory):
                delete_drs_dir(directory)

        # set date_deleted in the db
        if not problems_encountered:
            deletion_retrieval.date_deleted = timezone.now()
            deletion_retrieval.save()
        else:
            logger.error(
                'Errors were encountered and so retrieval {} has not '
                'been marked as deleted. All possible files have been '
                'deleted.'.format(args.retrieval_id))

    logger.debug('Completed delete_request.py for retrieval {}'.format(
        args.retrieval_id))
示例#12
0
def main(args):
    """
    Main entry point
    """
    dreqs1 = DataRequest.objects.filter(
        climate_model__short_name='MPI-ESM1-2-XR',
        experiment__short_name='highresSST-present',
        variable_request__cmor_name__in=['hus7h', 'ta7h', 'ua7h']
    )

    dreqs2 = DataRequest.objects.filter(
        climate_model__short_name__in=['MPI-ESM1-2-HR', 'MPI-ESM1-2-XR'],
        experiment__short_name='highresSST-present',
        variable_request__table_name='Amon',
        variable_request__cmor_name='tas'
    )

    dreqs = dreqs1 | dreqs2

    logger.debug(f'Found {dreqs.count()} data requests')

    for dreq in dreqs:
        logger.debug(f'Processing {dreq}')
        old_directories = []
        for df in dreq.datafile_set.order_by('name'):
            if not df.online:
                logger.error(f'Not online {df.name}')
                continue
            if df.version == NEW_VERSION:
                logger.warning(f'Already at {NEW_VERSION} {df.name}')
                continue
            # save the sym link directory before we make any changes
            old_sym_link_dir = os.path.join(BASE_OUTPUT_DIR,
                                            construct_drs_path(df))
            # now get back to updating the version
            df.version = NEW_VERSION
            gws = get_gws(df.directory)
            new_dir = os.path.join(gws, construct_drs_path(df))
            old_directory = df.directory
            if not os.path.exists(new_dir):
                os.mkdir(new_dir)
            os.rename(os.path.join(df.directory, df.name),
                      os.path.join(new_dir, df.name))
            df.directory = new_dir
            df.save()
            if old_directory not in old_directories:
                old_directories.append(old_directory)
            # Update any sym links too
            sym_link_path = os.path.join(old_sym_link_dir, df.name)
            if os.path.lexists(sym_link_path):
                if os.path.islink(sym_link_path):
                    os.remove(sym_link_path)
                    if old_sym_link_dir not in old_directories:
                        old_directories.append(old_sym_link_dir)
            sym_link_dir = os.path.join(BASE_OUTPUT_DIR,
                                        construct_drs_path(df))
            if not os.path.exists(sym_link_dir):
                os.makedirs(sym_link_dir)
            sym_link_path = os.path.join(sym_link_dir, df.name)
            os.symlink(os.path.join(df.directory, df.name), sym_link_path)

        for directory in old_directories:
            if not os.listdir(directory):
                delete_drs_dir(directory)
            else:
                logger.error(f'Not empty {directory}')
def main(args):
    """
    Main entry point
    """
    dreqs_hr = DataRequest.objects.filter(
        climate_model__short_name='CMCC-CM2-HR4',
        experiment__short_name__in=['hist-1950', 'control-1950'],
        variable_request__table_name__startswith='SI',
        datafile__isnull=False).distinct()

    dreqs_vhr = DataRequest.objects.filter(
        climate_model__short_name='CMCC-CM2-VHR4',
        experiment__short_name='hist-1950',
        variable_request__table_name__startswith='SI',
        datafile__isnull=False).distinct()

    dreqs = dreqs_hr | dreqs_vhr

    logger.debug(f'Found {dreqs.count()} data requests')

    for dreq in dreqs:
        logger.debug(f'Processing {dreq}')
        old_directories = []
        for df in dreq.datafile_set.order_by('name'):
            if not df.online:
                logger.error(f'Not online {df.name}')
                continue
            if df.version == NEW_VERSION:
                logger.warning(f'Already at {NEW_VERSION} {df.name}')
                continue
            # save the sym link directory before we make any changes
            if not is_same_gws(BASE_OUTPUT_DIR, df.directory):
                old_sym_link_dir = os.path.join(BASE_OUTPUT_DIR,
                                                construct_drs_path(df))
            # now get back to updating the version
            df.version = NEW_VERSION
            gws = get_gws(df.directory)
            new_dir = os.path.join(gws, construct_drs_path(df))
            old_directory = df.directory
            if not os.path.exists(new_dir):
                os.mkdir(new_dir)
            os.rename(os.path.join(df.directory, df.name),
                      os.path.join(new_dir, df.name))
            df.directory = new_dir
            df.save()
            if old_directory not in old_directories:
                old_directories.append(old_directory)

            # Update any sym links too
            if not is_same_gws(BASE_OUTPUT_DIR, df.directory):
                sym_link_path = os.path.join(old_sym_link_dir, df.name)
                if os.path.lexists(sym_link_path):
                    if os.path.islink(sym_link_path):
                        os.remove(sym_link_path)
                        if old_sym_link_dir not in old_directories:
                            old_directories.append(old_sym_link_dir)
                sym_link_dir = os.path.join(BASE_OUTPUT_DIR,
                                            construct_drs_path(df))
                if not os.path.exists(sym_link_dir):
                    os.makedirs(sym_link_dir)
                sym_link_path = os.path.join(sym_link_dir, df.name)
                os.symlink(os.path.join(df.directory, df.name), sym_link_path)

        for directory in old_directories:
            if not os.listdir(directory):
                delete_drs_dir(directory)
            else:
                logger.error(f'Not empty {directory}')
示例#14
0
def main(args):
    """
    Main entry point
    """
    var_tables = [
        'rlus_3hr',
        'rsus_3hr',
        'rsuscs_3hr',
        'rlut_E3hr',
        'rlutcs_E3hr',
        'rsut_E3hr',
        'rlus_day',
        'rlut_day',
        'rsus_day',
        'rlutcs_CFday',
        'rsuscs_CFday',
        'rsut_CFday',
        'rsutcs_CFday',
        'rlus_Amon',
        'rlut_Amon',
        'rlutcs_Amon',
        'rsus_Amon',
        'rsuscs_Amon',
        'rsut_Amon',
        'rsutcs_Amon'
    ]

    submissions = [
        '/group_workspaces/jasmin2/primavera4/upload/EC-Earth-Consortium/EC-Earth-3-HR/incoming/v20171110',
        '/group_workspaces/jasmin2/primavera4/upload/EC-Earth-Consortium/EC-Earth-3-HR/incoming/v20171111',
        '/group_workspaces/jasmin2/primavera4/upload/EC-Earth-Consortium/EC-Earth-3-HR/incoming/v20171112',
        '/group_workspaces/jasmin2/primavera4/upload/EC-Earth-Consortium/EC-Earth-3-HR/incoming/v20171116',
        '/group_workspaces/jasmin2/primavera4/upload/EC-Earth-Consortium/EC-Earth-3-HR/incoming/v20171024',
        '/group_workspaces/jasmin2/primavera4/upload/EC-Earth-Consortium/EC-Earth-3-HR/incoming/v20171027',
        '/group_workspaces/jasmin2/primavera4/upload/EC-Earth-Consortium/EC-Earth-3-HR/incoming/v20171101',
        '/group_workspaces/jasmin2/primavera4/upload/EC-Earth-Consortium/EC-Earth-3-HR/incoming/v20171114',
        '/group_workspaces/jasmin2/primavera4/upload/EC-Earth-Consortium/EC-Earth-3-HR/incoming/v20171115',
        '/group_workspaces/jasmin2/primavera4/upload/EC-Earth-Consortium/EC-Earth-3-HR/incoming/v20171113'
    ]

    models = ['EC-Earth3-HR']
    experiment = 'spinup-1950'

    for var_table in var_tables:
        var, __, table = var_table.partition('_')
        for model in models:
            query_set = DataFile.objects.filter(
                data_request__climate_model__short_name=model,
                data_request__experiment__short_name=experiment,
                variable_request__table_name=table,
                variable_request__cmor_name=var,
                data_submission__incoming_directory__in=submissions
            )
            logger.debug('{} {} {} {}'.format(model, table, var,
                                              query_set.count()))

            directories_found = []
            for df in query_set:
                if df.online:
                    try:
                        os.remove(os.path.join(df.directory, df.name))
                    except OSError as exc:
                        logger.error(str(exc))
                        sys.exit(1)
                    else:
                        if df.directory not in directories_found:
                            directories_found.append(df.directory)
                    df.online = False
                    df.directory = None
                    df.save()

            for directory in directories_found:
                if not os.listdir(directory):
                    delete_drs_dir(directory)

            replace_files(query_set)
def main():
    """
    Main entry point
    """
    # TODO: bug in symbolic link code that can't find link and doesn't create new link
    dreqs = DataRequest.objects.filter(
        climate_model__short_name='HadGEM3-GC31-HH',
        # experiment__short_name__in=['control-1950', 'hist-1950',
        #                             'highres-future'],
        # variable_request__table_name__in=['SImon', 'SIday', 'PrimSIday'],
        experiment__short_name='hist-1950',
        variable_request__table_name='SImon',
        variable_request__cmor_name='sisnhc',
        datafile__isnull=False).distinct().order_by(
            'experiment__short_name', 'variable_request__table_name',
            'variable_request__cmor_name')

    num_dreqs = dreqs.count()
    logger.info(f'{num_dreqs} data requests found')

    for dreq in dreqs:
        logger.info(str(dreq))
        old_drs_path = construct_drs_path(dreq.datafile_set.first())
        dreq.datafile_set.update(version=NEW_VERSION_STRING)
        for df in dreq.datafile_set.order_by('name'):
            if not df.online:
                logger.error(f'File not online {df.name}')
                continue
            old_dir = df.directory
            old_path = os.path.join(old_dir, df.name)
            if not os.path.exists(old_path):
                logger.error(f'File not found {old_path}')
                continue
            new_dir = os.path.join(get_gws(df.directory),
                                   construct_drs_path(df))
            if df.directory != new_dir:
                if not os.path.exists(new_dir):
                    os.makedirs(new_dir)

                os.rename(old_path, os.path.join(new_dir, df.name))
                df.directory = new_dir
                df.save()

            # Delete original dir if it's now empty
            if not os.listdir(old_dir):
                delete_drs_dir(old_dir)

            # Update symbolic links on primavera5
            if not get_gws(df.directory) == BASE_OUTPUT_DIR:
                old_link_dir = os.path.join(BASE_OUTPUT_DIR, old_drs_path)
                old_link_path = os.path.join(old_link_dir, df.name)
                if not os.path.exists(old_link_path):
                    logger.error(f'Link not found {old_link_path}')
                    continue
                if not os.path.islink(old_link_path):
                    logger.error(f'Not sym link {old_link_path}')
                    continue
                os.remove(old_link_path)

                new_link_dir = os.path.join(BASE_OUTPUT_DIR,
                                            construct_drs_path(df))
                new_link_path = os.path.join(new_link_dir, df.name)
                if not os.path.exists(new_link_dir):
                    os.makedirs(new_link_dir)
                os.symlink(os.path.join(new_dir, df.name), new_link_path)

                if not os.listdir(old_link_dir):
                    delete_drs_dir(old_link_dir)