def main(args):
    """
    Main entry point
    """
    logger.debug('Starting file structure scan.')

    for nc_file in ilist_files(args.directory):
        nc_file_name = os.path.basename(nc_file)
        db_files = DataFile.objects.filter(name=nc_file_name)

        if db_files.count() == 0:
            logger.error('File not found in database: {}'.format(nc_file))
        elif db_files.count() > 1:
            logger.error('{} entries found in database for file: {}'.
                         format(db_files.count(), nc_file))
        else:
            db_file = db_files.first()

        act_size = os.path.getsize(nc_file)
        if act_size != db_file.size:
            logger.info('File %s has size %d', db_file.name, act_size)
            db_file.online = False
            db_file.directory = None
            db_file.save()

            os.remove(nc_file)
            if not is_same_gws(nc_file, BASE_OUTPUT_DIR):
                sym_link_path = os.path.join(BASE_OUTPUT_DIR,
                                             construct_drs_path(db_file),
                                             db_file.name)
                try:
                    if os.path.exists(sym_link_path):
                        os.remove(sym_link_path)
                except OSError:
                    logger.error('Unable to delete sym link %s', sym_link_path)
def scan_database():
    """
    Start the scan of the database.
    """
    logger.debug('Starting database scan.')

    for data_file in DataFile.objects.filter(online=True).iterator():
        full_path = os.path.join(data_file.directory, data_file.name)
        if not os.path.exists(full_path):
            logger.warning('File cannot be found on disk, status changed to '
                           'offline: {}'.format(full_path))
            data_file.online = False
            data_file.directory = None
            data_file.save()
            continue

        if not is_same_gws(data_file.directory, BASE_OUTPUT_DIR):
            sym_link_dir = os.path.join(BASE_OUTPUT_DIR,
                                        construct_drs_path(data_file))
            sym_link_path = os.path.join(sym_link_dir,
                                         data_file.name)
            if not os.path.exists(sym_link_path):
                if not os.path.exists(sym_link_dir):
                    os.makedirs(sym_link_dir)
                os.symlink(full_path, sym_link_path)
                logger.warning('Created symlink for file {} from {}'.
                               format(data_file.name, sym_link_path))

    logger.debug('Completed database scan.')
示例#3
0
def main(args):
    """
    Main entry point
    """
    base_output_dir = Settings.get_solo().base_output_dir

    for data_file in DataFile.objects.filter(online=True):
        gws_pattern = r'^/group_workspaces/jasmin2/primavera(\d)/(\S*)'
        gws = re.match(gws_pattern, data_file.directory)
        if not gws:
            logger.error('No GWS match for {}'.format(data_file.name))
            continue
        new_gws = '/gws/nopw/j04/primavera' + gws.group(1)
        new_dir = os.path.join(new_gws, gws.group(2))
        new_path = os.path.join(new_dir, data_file.name)
        if not os.path.exists(new_path):
            logger.error('Cannot find {}'.format(new_path))
            continue
        data_file.directory = new_dir
        data_file.save()

        if not is_same_gws(data_file.directory, base_output_dir):
            link_path = os.path.join(base_output_dir,
                                     construct_drs_path(data_file),
                                     data_file.name)
            # it's got to be a link but check anyway
            if os.path.islink(link_path):
                os.remove(link_path)
                os.symlink(os.path.join(data_file.directory, data_file.name),
                           link_path)
            else:
                logger.error('Expected a link but found a file at {}'.
                             format(link_path))
示例#4
0
def main(args):
    """
    Main entry point
    """
    dfs = DataFile.objects.filter(climate_model__short_name='MPI-ESM1-2-XR',
                                  experiment__short_name='highres-future',
                                  version='v20190617')

    prim_gws = '/gws/nopw/j04/primavera5/stream1'

    old_dirs = []

    for df in dfs:
        old_drs_path = construct_drs_path(df)
        df.version = 'v20190517'
        df.save()
        if df.online:
            # file itself
            gws = get_gws(df.directory)
            old_dir = df.directory
            new_dir = os.path.join(gws, construct_drs_path(df))
            if not os.path.exists(new_dir):
                os.makedirs(new_dir)
            os.rename(os.path.join(df.directory, df.name),
                      os.path.join(new_dir, df.name))
            df.directory = new_dir
            df.save()
            if old_dir not in old_dirs:
                old_dirs.append(old_dir)

            # sym link
            if not is_same_gws(df.directory, prim_gws):
                old_sym_dir = os.path.join(prim_gws, old_drs_path)
                old_sym = os.path.join(old_sym_dir, df.name)
                # TODO next line doesn't work as this is now a broken symlink so returns false
                if os.path.exists(old_sym):
                    if os.path.islink(old_sym):
                        os.remove(old_sym)
                    else:
                        logger.warning(f'Not symlink as expected: {old_sym}')
                new_sym_dir = os.path.join(prim_gws, construct_drs_path(df))
                if not os.path.exists(new_sym_dir):
                    os.makedirs(new_sym_dir)
                os.symlink(os.path.join(new_dir, df.name),
                           os.path.join(new_sym_dir, df.name))
                if old_sym_dir not in old_dirs:
                    old_dirs.append(old_sym_dir)

    logger.debug(f'Removing {len(old_dirs)} old dirs')
    for old_dir in old_dirs:
        delete_drs_dir(old_dir)
示例#5
0
def main(args):
    """Main entry point"""
    base_dir = Settings.get_solo().base_output_dir

    for extracted_file in ilist_files(args.top_dir):
        found_name = os.path.basename(extracted_file)

        try:
            data_file = DataFile.objects.get(name=found_name)
        except django.core.exceptions.ObjectDoesNotExist:
            logger.warning('Cannot find DMT entry. Skipping {}'.
                           format(extracted_file))
            continue

        found_checksum = adler32(extracted_file)
        if not found_checksum == data_file.checksum_set.first().checksum_value:
            logger.warning("Checksum doesn't match. Skipping {}".
                           format(found_name))
            continue

        dest_dir = os.path.join(get_gws_any_dir(extracted_file), 'stream1',
                                construct_drs_path(data_file))
        dest_path = os.path.join(dest_dir, found_name)
        if os.path.exists(dest_path):
            logger.warning('Skipping {} as it already exists at {}'.
                           format(found_name, dest_path))
            continue
        # create the directory if it doesn't exist
        if not os.path.exists(dest_dir):
            os.makedirs(dest_dir)

        os.rename(extracted_file, dest_path)

        # create a link from the base dir
        if not is_same_gws(dest_path, base_dir):
            link_dir = os.path.join(base_dir, construct_drs_path(data_file))
            link_path = os.path.join(link_dir, data_file.name)
            if not  os.path.exists(link_dir):
                os.makedirs(link_dir)
            os.symlink(dest_path, link_path)

        data_file.online = True
        data_file.directory = dest_dir
        data_file.save()
示例#6
0
    def _rename_file(self):
        """
        Rename the file on disk and move to its new directory. Update the link
        from the primary directory.
        """
        if not os.path.exists(self.new_directory):
            os.makedirs(self.new_directory)

        os.rename(os.path.join(self.old_directory, self.old_filename),
                  os.path.join(self.new_directory, self.new_filename))

        # check for empty directory
        if not os.listdir(self.old_directory):
            delete_drs_dir(self.old_directory)

        # Update the symbolic link if required
        if not is_same_gws(self.old_directory, BASE_OUTPUT_DIR):
            old_link_path = os.path.join(self.old_sym_link_dir,
                                         self.old_filename)
            if os.path.lexists(old_link_path):
                if not os.path.islink(old_link_path):
                    logger.error("{} exists and isn't a symbolic link.".format(
                        old_link_path))
                    raise SymLinkIsFileError(old_link_path)
                else:
                    # it is a link so remove it
                    os.remove(old_link_path)
                    # check for empty directory
                    if not os.listdir(self.old_sym_link_dir):
                        delete_drs_dir(self.old_sym_link_dir)

            new_link_dir = os.path.join(BASE_OUTPUT_DIR,
                                        construct_drs_path(self.datafile))
            if not os.path.exists(new_link_dir):
                os.makedirs(new_link_dir)
            os.symlink(os.path.join(self.new_directory, self.new_filename),
                       os.path.join(new_link_dir, self.new_filename))
示例#7
0
def move_dirs(data_req, new_gws):
    """
    Move the files

    :param pdata_app.models.DataRequest data_req: the data request to move
    :param int new_gws: the number of the gws to move to
    """
    single_dir = '{}{}'.format(COMMON_GWS_NAME, new_gws)
    existing_dirs = data_req.directories()
    # ignore data that is offline
    if None in existing_dirs:
        existing_dirs.remove(None)
    use_single_dir = False
    for exist_dir in existing_dirs:
        if exist_dir.startswith(single_dir):
            use_single_dir = True
            break
    if not use_single_dir:
        # As a quick sanity check, generate an error if there is no
        # data already in the requested output directory
        logger.error('The new output directory is {} but no data from '
                     'this variable is currently in this directory.'.
                     format(single_dir))
        sys.exit(1)

    for exist_dir in existing_dirs:
        if exist_dir.startswith(single_dir):
            continue
        files_to_move = data_req.datafile_set.filter(directory=exist_dir)
        logger.debug('Moving {} files from {}'.format(
            files_to_move.count(), exist_dir))
        for file_to_move in files_to_move:
            # Move the file
            src = os.path.join(exist_dir, file_to_move.name)
            dest_path = os.path.join(single_dir, 'stream1',
                                     construct_drs_path(file_to_move))
            if not os.path.exists(dest_path):
                os.makedirs(dest_path)
            dest = os.path.join(dest_path, file_to_move.name)
            # remove existing link if about to write over it
            if dest.startswith(BASE_OUTPUT_DIR):
                if os.path.exists(dest):
                    if os.path.islink(dest):
                        os.remove(dest)
            # Move the file
            shutil.move(src, dest)
            # Update the file's location in the DB
            file_to_move.directory = dest_path
            file_to_move.save()
            # Check that it was safely copied
            actual_checksum = adler32(dest)
            db_checksum = file_to_move.checksum_set.first().checksum_value
            if not actual_checksum == db_checksum:
                logger.error('For {}\ndatabase checksum: {}\n'
                             'actual checksum: {}'.
                             format(dest, db_checksum, actual_checksum))
                sys.exit(1)
            # Update the symlink
            if not is_same_gws(dest_path, BASE_OUTPUT_DIR):
                primary_path_dir = os.path.join(
                    BASE_OUTPUT_DIR,
                    construct_drs_path(file_to_move))
                primary_path = os.path.join(primary_path_dir,
                                            file_to_move.name)
                if os.path.lexists(primary_path):
                    if not os.path.islink(primary_path):
                        logger.error("{} exists and isn't a symbolic "
                                     "link.".format(primary_path))
                        sys.exit(1)
                    else:
                        # it is a link so remove it
                        os.remove(primary_path)
                if not os.path.exists(primary_path_dir):
                    os.makedirs(primary_path_dir)
                os.symlink(dest, primary_path)

        delete_drs_dir(exist_dir)
def main(args):
    """
    Main entry point
    """
    logger.debug('Starting incoming_to_drs.py')

    data_sub = _get_submission_object(os.path.normpath(args.directory))

    if not args.alternative:
        drs_base_dir = BASE_OUTPUT_DIR
    else:
        drs_base_dir = args.alternative

    errors_encountered = False

    for data_file in data_sub.datafile_set.order_by('name'):
        # make full path of existing file
        existing_path = os.path.join(data_file.directory, data_file.name)

        # make full path of where it will live
        drs_sub_path = construct_drs_path(data_file)
        drs_dir = os.path.join(drs_base_dir, drs_sub_path)
        drs_path = os.path.join(drs_dir, data_file.name)

        # check the destination directory exists
        if not os.path.exists(drs_dir):
            os.makedirs(drs_dir)

        # link if on same GWS, or else copy
        this_file_error = False
        try:
            os.rename(existing_path, drs_path)
        except OSError as exc:
            logger.error('Unable to link from {} to {}. {}'.format(
                existing_path, drs_path, str(exc)))
            errors_encountered = True
            this_file_error = True

        # update the file's location in the database
        if not this_file_error:
            data_file.directory = drs_dir
            if not data_file.online:
                data_file.online = True
            data_file.save()

        # if storing the files in an alternative location, create a sym link
        # from the primary DRS structure to the file
        if not is_same_gws(BASE_OUTPUT_DIR, drs_base_dir):
            primary_path = os.path.join(BASE_OUTPUT_DIR, drs_sub_path)
            try:
                if not os.path.exists(primary_path):
                    os.makedirs(primary_path)

                os.symlink(drs_path, os.path.join(primary_path,
                                                  data_file.name))
            except OSError as exc:
                logger.error('Unable to link from {} to {}. {}'.format(
                    drs_path, os.path.join(primary_path, data_file.name),
                    str(exc)))
                errors_encountered = True

    # summarise what happened and keep the DB updated
    if not errors_encountered:
        logger.debug('All files copied with no errors. Data submission '
                     'incoming directory can be deleted.')
    else:
        logger.error('Errors were encountered. Please fix these before '
                     'deleting the incoming directory.')

    logger.debug('Completed incoming_to_drs.py')
示例#9
0
    def test_slightly_bad_path(self):
        path1 = '/group_workspaces/jasmin2/primavera2/some/dir'
        path2 = '/group_workspaces/jasmin1/primavera1/some/dir'

        self.assertFalse(is_same_gws(path1, path2))
示例#10
0
    def test_archive_second(self):
        path1 = '/group_workspaces/jasmin2/primavera2/some/dir'
        path2 = '/badc/cmip6'

        self.assertFalse(is_same_gws(path1, path2))
示例#11
0
    def test_new_diff(self):
        path1 = '/gws/nopw/j04/primavera1/some/dir'
        path2 = '/gws/nopw/j04/primavera5/another/dir'

        self.assertFalse(is_same_gws(path1, path2))
示例#12
0
    def test_same(self):
        path1 = '/group_workspaces/jasmin2/primavera1/some/dir'
        path2 = '/group_workspaces/jasmin2/primavera1/another/dir'

        self.assertTrue(is_same_gws(path1, path2))
def main(args):
    """
    Main entry point
    """
    dreqs_hr = DataRequest.objects.filter(
        climate_model__short_name='CMCC-CM2-HR4',
        experiment__short_name__in=['hist-1950', 'control-1950'],
        variable_request__table_name__startswith='SI',
        datafile__isnull=False).distinct()

    dreqs_vhr = DataRequest.objects.filter(
        climate_model__short_name='CMCC-CM2-VHR4',
        experiment__short_name='hist-1950',
        variable_request__table_name__startswith='SI',
        datafile__isnull=False).distinct()

    dreqs = dreqs_hr | dreqs_vhr

    logger.debug(f'Found {dreqs.count()} data requests')

    for dreq in dreqs:
        logger.debug(f'Processing {dreq}')
        old_directories = []
        for df in dreq.datafile_set.order_by('name'):
            if not df.online:
                logger.error(f'Not online {df.name}')
                continue
            if df.version == NEW_VERSION:
                logger.warning(f'Already at {NEW_VERSION} {df.name}')
                continue
            # save the sym link directory before we make any changes
            if not is_same_gws(BASE_OUTPUT_DIR, df.directory):
                old_sym_link_dir = os.path.join(BASE_OUTPUT_DIR,
                                                construct_drs_path(df))
            # now get back to updating the version
            df.version = NEW_VERSION
            gws = get_gws(df.directory)
            new_dir = os.path.join(gws, construct_drs_path(df))
            old_directory = df.directory
            if not os.path.exists(new_dir):
                os.mkdir(new_dir)
            os.rename(os.path.join(df.directory, df.name),
                      os.path.join(new_dir, df.name))
            df.directory = new_dir
            df.save()
            if old_directory not in old_directories:
                old_directories.append(old_directory)

            # Update any sym links too
            if not is_same_gws(BASE_OUTPUT_DIR, df.directory):
                sym_link_path = os.path.join(old_sym_link_dir, df.name)
                if os.path.lexists(sym_link_path):
                    if os.path.islink(sym_link_path):
                        os.remove(sym_link_path)
                        if old_sym_link_dir not in old_directories:
                            old_directories.append(old_sym_link_dir)
                sym_link_dir = os.path.join(BASE_OUTPUT_DIR,
                                            construct_drs_path(df))
                if not os.path.exists(sym_link_dir):
                    os.makedirs(sym_link_dir)
                sym_link_path = os.path.join(sym_link_dir, df.name)
                os.symlink(os.path.join(df.directory, df.name), sym_link_path)

        for directory in old_directories:
            if not os.listdir(directory):
                delete_drs_dir(directory)
            else:
                logger.error(f'Not empty {directory}')
示例#14
0
def copy_et_files_into_drs(data_files, retrieval_dir, args):
    """
    Copy files from the restored data cache into the DRS structure.

    :param list data_files: The DataFile objects to copy.
    :param str retrieval_dir: The path that the files were retrieved to.
    :param argparse.Namespace args: The parsed command line arguments
        namespace.
    """
    logger.debug('Copying elastic tape files')

    for data_file in data_files:
        file_submission_dir = data_file.incoming_directory
        filename = (data_file.name if not args.incoming
                    else data_file.incoming_name)
        extracted_file_path = os.path.join(retrieval_dir,
                                           file_submission_dir.lstrip('/'),
                                           filename)
        if not os.path.exists(extracted_file_path):
            msg = ('Unable to find file {} in the extracted data at {}. The '
                   'expected path was {}'.format(filename, retrieval_dir,
                                                 extracted_file_path))
            logger.error(msg)
            sys.exit(1)

        drs_path = construct_drs_path(data_file)
        if not args.alternative:
            drs_dir = os.path.join(BASE_OUTPUT_DIR, drs_path)
        else:
            drs_dir = os.path.join(args.alternative, drs_path)
        dest_file_path = os.path.join(drs_dir, filename)

        # create the path if it doesn't exist
        if not os.path.exists(drs_dir):
            os.makedirs(drs_dir)

        if os.path.exists(dest_file_path):
            msg = 'File already exists on disk: {}'.format(dest_file_path)
            logger.warning(msg)
        else:
            os.rename(extracted_file_path, dest_file_path)

        if not args.skip_checksums:
            try:
                _check_file_checksum(data_file, dest_file_path)
            except ChecksumError:
                # warning message has already been displayed and so move on
                # to next file
                continue

        # create symbolic link from main directory if storing data in an
        # alternative directory
        if args.alternative and not is_same_gws(dest_file_path,
                                                BASE_OUTPUT_DIR):
            primary_path = os.path.join(BASE_OUTPUT_DIR, drs_path)
            if not os.path.exists(primary_path):
                os.makedirs(primary_path)
            os.symlink(dest_file_path,
                       os.path.join(primary_path, filename))

        # set directory and set status as being online
        data_file.directory = drs_dir
        data_file.online = True
        data_file.save()

    logger.debug('Finished copying elastic tape files')