示例#1
0
def get_quota_usage_from_irods(username):
    """
    Query iRODS AVU to get quota usage for a user reported in iRODS quota microservices
    :param username: the user name to get quota usage for.
    :return: the combined quota usage from iRODS data zone and user zone; raise ValidationError
    if quota usage cannot be retrieved from iRODS
    """
    attname = username + '-usage'
    istorage = IrodsStorage()
    # get quota size for user in iRODS data zone by retrieving AVU set on irods bagit path
    # collection
    try:
        uqDataZoneSize = istorage.getAVU(settings.IRODS_BAGIT_PATH, attname)
        if uqDataZoneSize is None:
            # user may not have resources in data zone, so corresponding quota size AVU may not
            # exist for this user
            uqDataZoneSize = -1
        else:
            uqDataZoneSize = float(uqDataZoneSize)
    except SessionException:
        # user may not have resources in data zone, so corresponding quota size AVU may not exist
        # for this user
        uqDataZoneSize = -1

    # get quota size for the user in iRODS user zone
    try:
        uz_bagit_path = os.path.join('/', settings.HS_USER_IRODS_ZONE, 'home',
                                     settings.HS_IRODS_PROXY_USER_IN_USER_ZONE,
                                     settings.IRODS_BAGIT_PATH)
        uqUserZoneSize = istorage.getAVU(uz_bagit_path, attname)
        if uqUserZoneSize is None:
            # user may not have resources in user zone, so corresponding quota size AVU may not
            # exist for this user
            uqUserZoneSize = -1
        else:
            uqUserZoneSize = float(uqUserZoneSize)
    except SessionException:
        # user may not have resources in user zone, so corresponding quota size AVU may not exist
        # for this user
        uqUserZoneSize = -1

    if uqDataZoneSize < 0 and uqUserZoneSize < 0:
        err_msg = 'no quota size AVU in data zone and user zone for user {}'.format(
            username)
        logger.error(err_msg)
        raise ValidationError(err_msg)
    elif uqUserZoneSize < 0:
        used_val = uqDataZoneSize
    elif uqDataZoneSize < 0:
        used_val = uqUserZoneSize
    else:
        used_val = uqDataZoneSize + uqUserZoneSize
    return used_val
示例#2
0
def download(request, path, *args, **kwargs):
    
    split_path_strs = path.split('/')
    if split_path_strs[0] == 'bags':
        res_id = os.path.splitext(split_path_strs[1])[0]
    else:
        res_id = split_path_strs[0]
    _, authorized, _ = authorize(request, res_id, needed_permission=Action_To_Authorize.VIEW_RESOURCE,
                                 raises_exception=False)
    if not authorized:
        response = HttpResponse()
        response.content = "<h1>You do not have permission to download this resource!</h1>"
        return response

    if 'environment' in kwargs:
        environment = int(kwargs['environment'])
        environment = m.RodsEnvironment.objects.get(pk=environment)
        session = Session("/tmp/django_irods", settings.IRODS_ICOMMANDS_PATH, session_id=uuid4())
        session.create_environment(environment)
        session.run('iinit', None, environment.auth)
    elif getattr(settings, 'IRODS_GLOBAL_SESSION', False):
        session = GLOBAL_SESSION
    elif icommands.ACTIVE_SESSION:
        session = icommands.ACTIVE_SESSION
    else:
        raise KeyError('settings must have IRODS_GLOBAL_SESSION set if there is no environment object')

    # do on-demand bag creation
    istorage = IrodsStorage()
    bag_modified = "false"
    # needs to check whether res_id collection exists before getting/setting AVU on it to accommodate the case
    # where the very same resource gets deleted by another request when it is getting downloaded
    if istorage.exists(res_id):
        bag_modified = istorage.getAVU(res_id, 'bag_modified')
    if bag_modified == "true":
        create_bag_by_irods(res_id, istorage)
        if istorage.exists(res_id):
            istorage.setAVU(res_id, 'bag_modified', "false")

    # obtain mime_type to set content_type
    mtype = 'application-x/octet-stream'
    mime_type = mimetypes.guess_type(path)
    if mime_type[0] is not None:
        mtype = mime_type[0]

    # retrieve file size to set up Content-Length header
    stdout = session.run("ils", None, "-l", path)[0].split()
    flen = int(stdout[3])

    options = ('-',) # we're redirecting to stdout.
    proc = session.run_safe('iget', None, path, *options)
    response = FileResponse(proc.stdout, content_type=mtype)
    response['Content-Disposition'] = 'attachment; filename="{name}"'.format(name=path.split('/')[-1])
    response['Content-Length'] = flen
    return response
示例#3
0
    def verify_user_quota_usage_avu_in_user_zone(self, attname, qsize):
        '''
        Have to use HS_USER_ZONE_PROXY_USER with rodsadmin role to get user type AVU in user zone
        and verify its quota usage is set correctly
        :param attname: quota usage attribute name set on iRODS proxy user in user zone
        :param qsize: quota size (type string) to be verified to equal to the value set for attname.
        '''
        istorage = IrodsStorage()
        istorage.set_user_session(username=settings.HS_USER_ZONE_PROXY_USER,
                                  password=settings.HS_USER_ZONE_PROXY_USER_PWD,
                                  host=settings.HS_USER_ZONE_HOST,
                                  port=settings.IRODS_PORT,
                                  zone=settings.HS_USER_IRODS_ZONE,
                                  sess_id='user_proxy_session')

        uz_bagit_path = os.path.join('/', settings.HS_USER_IRODS_ZONE, 'home',
                                     settings.HS_LOCAL_PROXY_USER_IN_FED_ZONE,
                                     settings.IRODS_BAGIT_PATH)
        get_qsize = istorage.getAVU(uz_bagit_path, attname)
        self.assertEqual(qsize, get_qsize)
示例#4
0
    def verify_user_quota_usage_avu_in_user_zone(self, attname, qsize):
        '''
        Have to use LINUX_ADMIN_USER_FOR_HS_USER_ZONE with rodsadmin role to get user type AVU
        in user zone and verify its quota usage is set correctly
        :param attname: quota usage attribute name set on iRODS proxy user in user zone
        :param qsize: quota size (type string) to be verified to equal to the value set for attname.
        '''
        istorage = IrodsStorage()
        istorage.set_user_session(username=settings.LINUX_ADMIN_USER_FOR_HS_USER_ZONE,
                                  password=settings.LINUX_ADMIN_USER_PWD_FOR_HS_USER_ZONE,
                                  host=settings.HS_USER_ZONE_HOST,
                                  port=settings.IRODS_PORT,
                                  zone=settings.HS_USER_IRODS_ZONE,
                                  sess_id='user_proxy_session')

        uz_bagit_path = os.path.join('/', settings.HS_USER_IRODS_ZONE, 'home',
                                     settings.HS_IRODS_PROXY_USER_IN_USER_ZONE,
                                     settings.IRODS_BAGIT_PATH)
        get_qsize = istorage.getAVU(uz_bagit_path, attname)
        self.assertEqual(qsize, get_qsize)
示例#5
0
def get_size_and_avu_for_irods_ref_files(username, password, host, port, zone,
                                         irods_fnames):
    """
    use iget to transfer selected data object from irods zone to local as a NamedTemporaryFile
    :param username: iRODS login account username used to download irods data object for uploading
    :param password: iRODS login account password used to download irods data object for uploading
    :param host: iRODS login host used to download irods data object for uploading
    :param port: iRODS login port used to download irods data object for uploading
    :param zone: iRODS login zone used to download irods data object for uploading
    :param irods_fnames: the data object file name to download to local for uploading
    :raises SessionException(proc.returncode, stdout, stderr) defined in django_irods/icommands.py
            to capture iRODS exceptions raised from iRODS icommand subprocess run triggered from
            any method calls from IrodsStorage() if an error or exception ever occurs
    :return: list of file sizes corresponding to irods_fnames, and extra metadata dicts with each dict item
             corresponding to iRODS AVUs on the file or data object with file name preappended to attributes
    """
    irods_storage = IrodsStorage()
    irods_storage.set_user_session(username=username,
                                   password=password,
                                   host=host,
                                   port=port,
                                   zone=zone)
    ifnames = string.split(irods_fnames, ',')

    ifsizes = []
    ifextra_mds = {}
    for ifname in ifnames:
        size = irods_storage.size(ifname)
        ifsizes.append(size)
        extra_md_dict = irods_storage.getAVU(ifname, type='-d')
        for key, val in extra_md_dict.iteritems():
            ukey = ifname + '_' + key
            ifextra_mds[ukey] = val

    # delete the user session after iRODS file operations are done
    irods_storage.delete_user_session()
    return ifsizes, ifextra_mds
示例#6
0
def update_quota_usage_task(username):
    """update quota usage. This function runs as a celery task, invoked asynchronously with 1
    minute delay to give enough time for iRODS real time quota update micro-services to update
    quota usage AVU for the user before this celery task to check this AVU to get the updated
    quota usage for the user. Note iRODS micro-service quota update only happens on HydroShare
    iRODS data zone and user zone independently, so the aggregation of usage in both zones need
    to be accounted for in this function to update Django DB as an aggregated usage for hydroshare
    internal zone.
    :param
    username: the name of the user that needs to update quota usage for.
    :return: True if quota usage update succeeds;
             False if there is an exception raised or quota cannot be updated. See log for details.
    """
    hs_internal_zone = "hydroshare"
    uq = UserQuota.objects.filter(user__username=username,
                                  zone=hs_internal_zone).first()
    if uq is None:
        # the quota row does not exist in Django
        logger.error(
            'quota row does not exist in Django for hydroshare zone for '
            'user ' + username)
        return False

    attname = username + '-usage'
    istorage = IrodsStorage()
    # get quota size for user in iRODS data zone by retrieving AVU set on irods bagit path
    # collection
    try:
        uqDataZoneSize = istorage.getAVU(settings.IRODS_BAGIT_PATH, attname)
        if uqDataZoneSize is None:
            # user may not have resources in data zone, so corresponding quota size AVU may not
            # exist for this user
            uqDataZoneSize = -1
        else:
            uqDataZoneSize = float(uqDataZoneSize)
    except SessionException:
        # user may not have resources in data zone, so corresponding quota size AVU may not exist
        # for this user
        uqDataZoneSize = -1

    # get quota size for the user in iRODS user zone
    try:
        uz_bagit_path = os.path.join('/', settings.HS_USER_IRODS_ZONE, 'home',
                                     settings.HS_IRODS_PROXY_USER_IN_USER_ZONE,
                                     settings.IRODS_BAGIT_PATH)
        uqUserZoneSize = istorage.getAVU(uz_bagit_path, attname)
        if uqUserZoneSize is None:
            # user may not have resources in user zone, so corresponding quota size AVU may not
            # exist for this user
            uqUserZoneSize = -1
        else:
            uqUserZoneSize = float(uqUserZoneSize)
    except SessionException:
        # user may not have resources in user zone, so corresponding quota size AVU may not exist
        # for this user
        uqUserZoneSize = -1

    if uqDataZoneSize < 0 and uqUserZoneSize < 0:
        logger.error(
            'no quota size AVU in data zone and user zone for the user ' +
            username)
        return False
    elif uqUserZoneSize < 0:
        used_val = uqDataZoneSize
    elif uqDataZoneSize < 0:
        used_val = uqUserZoneSize
    else:
        used_val = uqDataZoneSize + uqUserZoneSize

    uq.update_used_value(used_val)

    return True
示例#7
0
def download(request, path, rest_call=False, use_async=True, *args, **kwargs):

    split_path_strs = path.split('/')
    is_bag_download = False
    if split_path_strs[0] == 'bags':
        res_id = os.path.splitext(split_path_strs[1])[0]
        is_bag_download = True
    else:
        res_id = split_path_strs[0]
    res, authorized, _ = authorize(
        request,
        res_id,
        needed_permission=ACTION_TO_AUTHORIZE.VIEW_RESOURCE,
        raises_exception=False)
    if not authorized:
        response = HttpResponse(status=401)
        content_msg = "You do not have permission to download this resource!"
        if rest_call:
            raise PermissionDenied(content_msg)
        else:
            signin_html = '</h1><div class="col-xs-12"><h2 class="page-title">' \
                          '<a href="/oauth_request/"><span class ="glyphicon glyphicon-log-in"></span>' \
                          'Sign In</a></h2>'
            response.content = '<h1>' + content_msg + signin_html
            return response

    if not is_bag_download and "/data" not in path:
        idx_sep = path.find('/')
        path = path[idx_sep:]

    istorage = IrodsStorage()

    if 'environment' in kwargs:
        environment = int(kwargs['environment'])
        environment = m.RodsEnvironment.objects.get(pk=environment)
        session = Session("/tmp/django_irods",
                          settings.IRODS_ICOMMANDS_PATH,
                          session_id=uuid4())
        session.create_environment(environment)
        session.run('iinit', None, environment.auth)
    elif getattr(settings, 'IRODS_GLOBAL_SESSION', False):
        session = GLOBAL_SESSION
    elif icommands.ACTIVE_SESSION:
        session = icommands.ACTIVE_SESSION
    else:
        raise KeyError('settings must have IRODS_GLOBAL_SESSION set '
                       'if there is no environment object')

    if istorage.exists(res_id) and is_bag_download:
        bag_modified = istorage.getAVU(res_id, 'bag_modified')
        # make sure if bag_modified is not set to true, we still recreate the bag if the
        # bag file does not exist for some reason to resolve the error to download a nonexistent
        # bag when bag_modified is false due to the flag being out-of-sync with the real bag status

        if bag_modified is None or bag_modified.lower() == "false":
            # check whether the bag file exists
            bag_file_name = res_id + '.zip'
            bag_full_path = os.path.join('bags', bag_file_name)

            if not istorage.exists(bag_full_path):
                bag_modified = 'true'

        if bag_modified is None or bag_modified.lower() == "true":
            create_bag(res)

    resource_cls = check_resource_type(res.resource_type)

    # send signal for pre download file
    download_file_name = split_path_strs[-1]
    pre_download_file.send(sender=resource_cls,
                           resource=res,
                           download_file_name=download_file_name,
                           request=request)

    # obtain mime_type to set content_type
    mtype = 'application-x/octet-stream'
    mime_type = mimetypes.guess_type(path)
    if mime_type[0] is not None:
        mtype = mime_type[0]
    # retrieve file size to set up Content-Length header
    stdout = session.run("ils", None, "-l", path)[0].split()
    flen = int(stdout[3])
    options = ('-', )  # we're redirecting to stdout.
    proc = session.run_safe('iget', None, path, *options)
    response = FileResponse(proc.stdout, content_type=mtype)
    response['Content-Disposition'] = 'attachment; filename="{name}"'.format(
        name=path.split('/')[-1])
    response['Content-Length'] = flen
    return response
    def handle(self, *args, **options):
        resource_counter = 0
        storage = IrodsStorage()
        avu_list = ['bag_modified', 'metadata_dirty', 'isPublic', 'resourceType']
        for resource in BaseResource.objects.all():
            if resource.storage_type == 'user':
                # resource is in user zone, so migrate it to data zone
                # copy files from iRODS user zone to data zone
                try:
                    src_coll = resource.root_path
                    tgt_coll = resource.short_id

                    if storage.exists(tgt_coll):
                        storage.delete(tgt_coll)
                    storage.copyFiles(src_coll, tgt_coll)
                    # copy AVU over for the resource collection from iRODS user zone to data zone

                    for avu_name in avu_list:
                        value = storage.getAVU(src_coll, avu_name)
                        # bag_modified AVU needs to be set to true for the new resource so the bag
                        # can be regenerated in the data zone
                        if avu_name == 'bag_modified':
                            storage.setAVU(tgt_coll, avu_name, 'true')
                        # everything else gets copied literally
                        else:
                            storage.setAVU(tgt_coll, avu_name, value)

                    # Just to be on the safe side, it is better not to delete resources from user
                    # zone after it is migrated over to data zone in case there are issues with
                    # migration. A simple irm iRODS command can be issued to delete all resource
                    # collections afterwards if all works well after some time. Commenting the
                    # deletion statement below rather than deleting it to serve as a reminder
                    # that additional cleanup to delete all resource collections in user zone
                    # is needed after we can confirm migration is successfully.
                    # delete the original resource from user zone
                    # storage.delete(src_coll)

                    path_migrated = False
                    for res_file in resource.files.all():
                        if res_file.resource_file.name:
                            print('The resource_file field should be empty for resource {} but '
                                  'have the value of {}'.format(resource.short_id,
                                                                res_file.resource_file.name))
                            break
                        file_path = res_file.fed_resource_file.name
                        if not file_path:
                            print('The fed_resource_file field should not be empty for '
                                  'resource {}'.format(resource.short_id))
                            break
                        elif file_path.startswith(resource.resource_federation_path):
                            file_path = file_path[len(resource.resource_federation_path)+1:]
                            res_file.resource_file.name = file_path
                            res_file.fed_resource_file.name = ''
                            res_file.save()
                            path_migrated = True
                        else:
                            res_file.resource_file.name = file_path
                            res_file.fed_resource_file.name = ''
                            res_file.save()
                            path_migrated = True
                            print('fed_resource_file field does not contain absolute federation '
                                  'path which is an exception but can work after migration. '
                                  'file_path is {}'.format(file_path))
                    if path_migrated or resource.files.count() == 0:
                        # update resource federation path to point resource to data zone
                        resource.resource_federation_path = ''
                        resource.save()
                        print("Resource {} has been moved from user zone to data zone "
                              "successfully".format(resource.short_id))
                        resource_counter += 1
                    else:
                        continue
                except SessionException as ex:
                    print("Resource {} failed to move: {}".format(resource.short_id, ex.stderr))

        print("{} resources have been moved from user zone to data zone successfully".format(
            resource_counter))
示例#9
0
def download(request, path, rest_call=False, use_async=True, use_reverse_proxy=True,
             *args, **kwargs):
    split_path_strs = path.split('/')
    is_bag_download = False
    is_zip_download = False
    is_sf_agg_file = False
    if split_path_strs[0] == 'bags':
        res_id = os.path.splitext(split_path_strs[1])[0]
        is_bag_download = True
    elif split_path_strs[0] == 'zips':
        if path.endswith('.zip'):
            res_id = os.path.splitext(split_path_strs[2])[0]
        else:
            res_id = os.path.splitext(split_path_strs[1])[0]
        is_zip_download = True
    else:
        res_id = split_path_strs[0]

    # if the resource does not exist in django, authorized will be false
    res, authorized, _ = authorize(request, res_id,
                                   needed_permission=ACTION_TO_AUTHORIZE.VIEW_RESOURCE,
                                   raises_exception=False)
    if not authorized:
        response = HttpResponse(status=401)
        content_msg = "You do not have permission to download this resource!"
        if rest_call:
            raise PermissionDenied(content_msg)
        else:
            response.content = "<h1>" + content_msg + "</h1>"
            return response

    if res.resource_type == "CompositeResource" and not path.endswith(".zip"):
        for f in ResourceFile.objects.filter(object_id=res.id):
            if path == f.storage_path:
                if f.has_logical_file and f.logical_file.is_single_file_aggregation:
                    is_sf_agg_file = True

    if res.resource_federation_path:
        # the resource is stored in federated zone
        istorage = IrodsStorage('federated')
        federated_path = res.resource_federation_path
        path = os.path.join(federated_path, path)
        session = icommands.ACTIVE_SESSION
    else:
        # TODO: From Alva: I do not understand the use case for changing the environment.
        # TODO: This seems an enormous potential vulnerability, as arguments are
        # TODO: passed from the URI directly to IRODS without verification.
        istorage = IrodsStorage()
        federated_path = ''
        if 'environment' in kwargs:
            environment = int(kwargs['environment'])
            environment = m.RodsEnvironment.objects.get(pk=environment)
            session = Session("/tmp/django_irods", settings.IRODS_ICOMMANDS_PATH,
                              session_id=uuid4())
            session.create_environment(environment)
            session.run('iinit', None, environment.auth)
        elif getattr(settings, 'IRODS_GLOBAL_SESSION', False):
            session = GLOBAL_SESSION
        elif icommands.ACTIVE_SESSION:
            session = icommands.ACTIVE_SESSION
        else:
            raise KeyError('settings must have IRODS_GLOBAL_SESSION set '
                           'if there is no environment object')

    resource_cls = check_resource_type(res.resource_type)

    if federated_path:
        res_root = os.path.join(federated_path, res_id)
    else:
        res_root = res_id

    if is_zip_download or is_sf_agg_file:
        if not path.endswith(".zip"):  # requesting folder that needs to be zipped
            input_path = path.split(res_id)[1]
            random_hash = random.getrandbits(32)
            daily_date = datetime.datetime.today().strftime('%Y-%m-%d')
            random_hash_path = 'zips/{daily_date}/{res_id}/{rand_folder}'.format(
                daily_date=daily_date, res_id=res_id,
                rand_folder=random_hash)
            output_path = '{random_hash_path}{path}.zip'.format(random_hash_path=random_hash_path,
                                                                path=input_path)

            if res.resource_type == "CompositeResource":
                aggregation_name = input_path[len('/data/contents/'):]
                res.create_aggregation_xml_documents(aggregation_name=aggregation_name)

            if use_async:
                task = create_temp_zip.apply_async((res_id, input_path, output_path,
                                                    is_sf_agg_file), countdown=3)
                delete_zip.apply_async((random_hash_path, ),
                                       countdown=(20 * 60))  # delete after 20 minutes
                if is_sf_agg_file:
                    download_path = request.path.split(res_id)[0] + output_path
                else:
                    download_path = request.path.split("zips")[0] + output_path
                if rest_call:
                    return HttpResponse(json.dumps({'zip_status': 'Not ready',
                                                    'task_id': task.task_id,
                                                    'download_path': download_path}),
                                        content_type="application/json")
                request.session['task_id'] = task.task_id
                request.session['download_path'] = download_path
                return HttpResponseRedirect(res.get_absolute_url())

            ret_status = create_temp_zip(res_id, input_path, output_path, is_sf_agg_file)
            delete_zip.apply_async((random_hash_path, ),
                                   countdown=(20 * 60))  # delete after 20 minutes
            if not ret_status:
                content_msg = "Zip cannot be created successfully. Check log for details."
                response = HttpResponse()
                if rest_call:
                    response.content = content_msg
                else:
                    response.content = "<h1>" + content_msg + "</h1>"
                return response

            path = output_path

    bag_modified = istorage.getAVU(res_root, 'bag_modified')
    # make sure if bag_modified is not set to true, we still recreate the bag if the
    # bag file does not exist for some reason to resolve the error to download a nonexistent
    # bag when bag_modified is false due to the flag being out-of-sync with the real bag status
    if bag_modified is None or bag_modified.lower() == "false":
        # check whether the bag file exists
        bag_file_name = res_id + '.zip'
        if res_root.startswith(res_id):
            bag_full_path = os.path.join('bags', bag_file_name)
        else:
            bag_full_path = os.path.join(federated_path, 'bags', bag_file_name)
        # set bag_modified to 'true' if the bag does not exist so that it can be recreated
        # and the bag_modified AVU will be set correctly as well subsequently
        if not istorage.exists(bag_full_path):
            bag_modified = 'true'

    metadata_dirty = istorage.getAVU(res_root, 'metadata_dirty')
    # do on-demand bag creation
    # needs to check whether res_id collection exists before getting/setting AVU on it
    # to accommodate the case where the very same resource gets deleted by another request
    # when it is getting downloaded

    if is_bag_download:
        # send signal for pre_check_bag_flag
        pre_check_bag_flag.send(sender=resource_cls, resource=res)
        if bag_modified is None or bag_modified.lower() == "true":
            if metadata_dirty is None or metadata_dirty.lower() == 'true':
                create_bag_files(res)
            if use_async:
                # task parameter has to be passed in as a tuple or list, hence (res_id,) is needed
                # Note that since we are using JSON for task parameter serialization, no complex
                # object can be passed as parameters to a celery task
                task = create_bag_by_irods.apply_async((res_id,), countdown=3)
                if rest_call:
                    return HttpResponse(json.dumps({'bag_status': 'Not ready',
                                                    'task_id': task.task_id}),
                                        content_type="application/json")

                request.session['task_id'] = task.task_id
                request.session['download_path'] = request.path
                return HttpResponseRedirect(res.get_absolute_url())
            else:
                ret_status = create_bag_by_irods(res_id)
                if not ret_status:
                    content_msg = "Bag cannot be created successfully. Check log for details."
                    response = HttpResponse()
                    if rest_call:
                        response.content = content_msg
                    else:
                        response.content = "<h1>" + content_msg + "</h1>"
                    return response

    elif metadata_dirty is None or metadata_dirty.lower() == 'true':
        if path.endswith("resourcemap.xml") or path.endswith('resourcemetadata.xml'):
            # we need to regenerate the metadata xml files
            create_bag_files(res)

    # send signal for pre download file
    download_file_name = split_path_strs[-1]
    pre_download_file.send(sender=resource_cls, resource=res,
                           download_file_name=download_file_name,
                           request=request)

    # obtain mime_type to set content_type
    mtype = 'application-x/octet-stream'
    mime_type = mimetypes.guess_type(path)
    if mime_type[0] is not None:
        mtype = mime_type[0]
    # retrieve file size to set up Content-Length header
    stdout = session.run("ils", None, "-l", path)[0].split()
    flen = int(stdout[3])

    # If this path is resource_federation_path, then the file is a local user file
    userpath = '/' + os.path.join(
        getattr(settings, 'HS_USER_IRODS_ZONE', 'hydroshareuserZone'),
        'home',
        getattr(settings, 'HS_LOCAL_PROXY_USER_IN_FED_ZONE', 'localHydroProxy'))

    # Allow reverse proxy if request was forwarded by nginx
    # (HTTP_X_DJANGO_REVERSE_PROXY is 'true')
    # and reverse proxy is possible according to configuration.

    if use_reverse_proxy and getattr(settings, 'SENDFILE_ON', False) and \
       'HTTP_X_DJANGO_REVERSE_PROXY' in request.META:

        # The NGINX sendfile abstraction is invoked as follows:
        # 1. The request to download a file enters this routine via the /rest_download or /download
        #    url in ./urls.py. It is redirected here from Django. The URI contains either the
        #    unqualified resource path or the federated resource path, depending upon whether
        #    the request is local or federated.
        # 2. This deals with unfederated resources by redirecting them to the uri
        #    /irods-data/{resource-id}/... on nginx. This URI is configured to read the file
        #    directly from the iRODS vault via NFS, and does not work for direct access to the
        #    vault due to the 'internal;' declaration in NGINX.
        # 3. This deals with federated resources by reading their path, matching local vaults, and
        #    redirecting to URIs that are in turn mapped to read from appropriate iRODS vaults. At
        #    present, the only one of these is /irods-user, which handles files whose federation
        #    path is stored in the variable 'userpath'.
        # 4. If there is no vault available for the resource, the file is transferred without
        #    NGINX, exactly as it was transferred previously.

        # stop NGINX targets that are non-existent from hanging forever.
        if not istorage.exists(path):
            content_msg = "file path {} does not exist in iRODS".format(path)
            response = HttpResponse(status=404)
            if rest_call:
                response.content = content_msg
            else:
                response.content = "<h1>" + content_msg + "</h1>"
            return response

        if not res.is_federated:
            # invoke X-Accel-Redirect on physical vault file in nginx
            response = HttpResponse(content_type=mtype)
            response['Content-Disposition'] = 'attachment; filename="{name}"'.format(
                name=path.split('/')[-1])
            response['Content-Length'] = flen
            response['X-Accel-Redirect'] = '/'.join([
                getattr(settings, 'IRODS_DATA_URI', '/irods-data'), path])
            return response

        elif res.resource_federation_path == userpath:  # this guarantees a "user" resource
            # invoke X-Accel-Redirect on physical vault file in nginx
            # if path is full user path; strip federation prefix
            if path.startswith(userpath):
                path = path[len(userpath)+1:]
            # invoke X-Accel-Redirect on physical vault file in nginx
            response = HttpResponse(content_type=mtype)
            response['Content-Disposition'] = 'attachment; filename="{name}"'.format(
                name=path.split('/')[-1])
            response['Content-Length'] = flen
            response['X-Accel-Redirect'] = os.path.join(
                getattr(settings, 'IRODS_USER_URI', '/irods-user'), path)
            return response

    # if we get here, none of the above conditions are true
    if flen <= FILE_SIZE_LIMIT:
        options = ('-',)  # we're redirecting to stdout.
        # this unusual way of calling works for federated or local resources
        proc = session.run_safe('iget', None, path, *options)
        response = FileResponse(proc.stdout, content_type=mtype)
        response['Content-Disposition'] = 'attachment; filename="{name}"'.format(
            name=path.split('/')[-1])
        response['Content-Length'] = flen
        return response

    else:
        content_msg = "File larger than 1GB cannot be downloaded directly via HTTP. " \
                      "Please download the large file via iRODS clients."
        response = HttpResponse(status=403)
        if rest_call:
            response.content = content_msg
        else:
            response.content = "<h1>" + content_msg + "</h1>"
        return response
示例#10
0
def update_quota_usage_task(username):
    """update quota usage. This function runs as a celery task, invoked asynchronously with 1
    minute delay to give enough time for iRODS real time quota update micro-services to update
    quota usage AVU for the user before this celery task to check this AVU to get the updated
    quota usage for the user. Note iRODS micro-service quota update only happens on HydroShare
    iRODS data zone and user zone independently, so the aggregation of usage in both zones need
    to be accounted for in this function to update Django DB as an aggregated usage for hydroshare
    internal zone.
    :param
    username: the name of the user that needs to update quota usage for.
    :return: True if quota usage update succeeds;
             False if there is an exception raised or quota cannot be updated. See log for details.
    """
    hs_internal_zone = "hydroshare"
    uq = UserQuota.objects.filter(user__username=username, zone=hs_internal_zone).first()
    if uq is None:
        # the quota row does not exist in Django
        logger.error('quota row does not exist in Django for hydroshare zone for '
                     'user ' + username)
        return False

    attname = username + '-usage'
    istorage = IrodsStorage()
    # get quota size for user in iRODS data zone by retrieving AVU set on irods bagit path
    # collection
    try:
        uqDataZoneSize = istorage.getAVU(settings.IRODS_BAGIT_PATH, attname)
        if uqDataZoneSize is None:
            # user may not have resources in data zone, so corresponding quota size AVU may not
            # exist for this user
            uqDataZoneSize = -1
        else:
            uqDataZoneSize = float(uqDataZoneSize)
    except SessionException:
        # user may not have resources in data zone, so corresponding quota size AVU may not exist
        # for this user
        uqDataZoneSize = -1

    # get quota size for the user in iRODS user zone
    try:
        uz_bagit_path = os.path.join('/', settings.HS_USER_IRODS_ZONE, 'home',
                                     settings.HS_LOCAL_PROXY_USER_IN_FED_ZONE,
                                     settings.IRODS_BAGIT_PATH)
        uqUserZoneSize = istorage.getAVU(uz_bagit_path, attname)
        if uqUserZoneSize is None:
            # user may not have resources in user zone, so corresponding quota size AVU may not
            # exist for this user
            uqUserZoneSize = -1
        else:
            uqUserZoneSize = float(uqUserZoneSize)
    except SessionException:
        # user may not have resources in user zone, so corresponding quota size AVU may not exist
        # for this user
        uqUserZoneSize = -1

    if uqDataZoneSize < 0 and uqUserZoneSize < 0:
        logger.error('no quota size AVU in data zone and user zone for the user ' + username)
        return False
    elif uqUserZoneSize < 0:
        used_val = uqDataZoneSize
    elif uqDataZoneSize < 0:
        used_val = uqUserZoneSize
    else:
        used_val = uqDataZoneSize + uqUserZoneSize

    uq.update_used_value(used_val)

    return True
示例#11
0
def download(request,
             path,
             rest_call=False,
             use_async=True,
             use_reverse_proxy=True,
             *args,
             **kwargs):
    split_path_strs = path.split('/')
    is_bag_download = False
    is_zip_download = False
    is_sf_agg_file = False
    if split_path_strs[0] == 'bags':
        res_id = os.path.splitext(split_path_strs[1])[0]
        is_bag_download = True
    elif split_path_strs[0] == 'zips':
        if path.endswith('.zip'):
            res_id = os.path.splitext(split_path_strs[2])[0]
        else:
            res_id = os.path.splitext(split_path_strs[1])[0]
        is_zip_download = True
    else:
        res_id = split_path_strs[0]

    # if the resource does not exist in django, authorized will be false
    res, authorized, _ = authorize(
        request,
        res_id,
        needed_permission=ACTION_TO_AUTHORIZE.VIEW_RESOURCE,
        raises_exception=False)
    if not authorized:
        response = HttpResponse(status=401)
        content_msg = "You do not have permission to download this resource!"
        if rest_call:
            raise PermissionDenied(content_msg)
        else:
            response.content = "<h1>" + content_msg + "</h1>"
            return response

    if res.resource_type == "CompositeResource" and not path.endswith(".zip"):
        for f in ResourceFile.objects.filter(object_id=res.id):
            if path == f.storage_path:
                if f.has_logical_file and f.logical_file.is_single_file_aggregation:
                    is_sf_agg_file = True

    if res.resource_federation_path:
        # the resource is stored in federated zone
        istorage = IrodsStorage('federated')
        federated_path = res.resource_federation_path
        path = os.path.join(federated_path, path)
        session = icommands.ACTIVE_SESSION
    else:
        # TODO: From Alva: I do not understand the use case for changing the environment.
        # TODO: This seems an enormous potential vulnerability, as arguments are
        # TODO: passed from the URI directly to IRODS without verification.
        istorage = IrodsStorage()
        federated_path = ''
        if 'environment' in kwargs:
            environment = int(kwargs['environment'])
            environment = m.RodsEnvironment.objects.get(pk=environment)
            session = Session("/tmp/django_irods",
                              settings.IRODS_ICOMMANDS_PATH,
                              session_id=uuid4())
            session.create_environment(environment)
            session.run('iinit', None, environment.auth)
        elif getattr(settings, 'IRODS_GLOBAL_SESSION', False):
            session = GLOBAL_SESSION
        elif icommands.ACTIVE_SESSION:
            session = icommands.ACTIVE_SESSION
        else:
            raise KeyError('settings must have IRODS_GLOBAL_SESSION set '
                           'if there is no environment object')

    resource_cls = check_resource_type(res.resource_type)

    if federated_path:
        res_root = os.path.join(federated_path, res_id)
    else:
        res_root = res_id

    if is_zip_download or is_sf_agg_file:
        if not path.endswith(
                ".zip"):  # requesting folder that needs to be zipped
            input_path = path.split(res_id)[1]
            random_hash = random.getrandbits(32)
            daily_date = datetime.datetime.today().strftime('%Y-%m-%d')
            random_hash_path = 'zips/{daily_date}/{res_id}/{rand_folder}'.format(
                daily_date=daily_date, res_id=res_id, rand_folder=random_hash)
            output_path = '{random_hash_path}{path}.zip'.format(
                random_hash_path=random_hash_path, path=input_path)

            if res.resource_type == "CompositeResource":
                aggregation_name = input_path[len('/data/contents/'):]
                res.create_aggregation_xml_documents(
                    aggregation_name=aggregation_name)

            if use_async:
                task = create_temp_zip.apply_async(
                    (res_id, input_path, output_path, is_sf_agg_file),
                    countdown=3)
                delete_zip.apply_async(
                    (random_hash_path, ),
                    countdown=(20 * 60))  # delete after 20 minutes
                if is_sf_agg_file:
                    download_path = request.path.split(res_id)[0] + output_path
                else:
                    download_path = request.path.split("zips")[0] + output_path
                if rest_call:
                    return HttpResponse(json.dumps({
                        'zip_status':
                        'Not ready',
                        'task_id':
                        task.task_id,
                        'download_path':
                        download_path
                    }),
                                        content_type="application/json")
                request.session['task_id'] = task.task_id
                request.session['download_path'] = download_path
                return HttpResponseRedirect(res.get_absolute_url())

            ret_status = create_temp_zip(res_id, input_path, output_path,
                                         is_sf_agg_file)
            delete_zip.apply_async(
                (random_hash_path, ),
                countdown=(20 * 60))  # delete after 20 minutes
            if not ret_status:
                content_msg = "Zip cannot be created successfully. Check log for details."
                response = HttpResponse()
                if rest_call:
                    response.content = content_msg
                else:
                    response.content = "<h1>" + content_msg + "</h1>"
                return response

            path = output_path

    bag_modified = istorage.getAVU(res_root, 'bag_modified')
    # make sure if bag_modified is not set to true, we still recreate the bag if the
    # bag file does not exist for some reason to resolve the error to download a nonexistent
    # bag when bag_modified is false due to the flag being out-of-sync with the real bag status
    if bag_modified is None or bag_modified.lower() == "false":
        # check whether the bag file exists
        bag_file_name = res_id + '.zip'
        if res_root.startswith(res_id):
            bag_full_path = os.path.join('bags', bag_file_name)
        else:
            bag_full_path = os.path.join(federated_path, 'bags', bag_file_name)
        # set bag_modified to 'true' if the bag does not exist so that it can be recreated
        # and the bag_modified AVU will be set correctly as well subsequently
        if not istorage.exists(bag_full_path):
            bag_modified = 'true'

    metadata_dirty = istorage.getAVU(res_root, 'metadata_dirty')
    # do on-demand bag creation
    # needs to check whether res_id collection exists before getting/setting AVU on it
    # to accommodate the case where the very same resource gets deleted by another request
    # when it is getting downloaded

    if is_bag_download:
        # send signal for pre_check_bag_flag
        pre_check_bag_flag.send(sender=resource_cls, resource=res)
        if bag_modified is None or bag_modified.lower() == "true":
            if metadata_dirty is None or metadata_dirty.lower() == 'true':
                create_bag_files(res)
            if use_async:
                # task parameter has to be passed in as a tuple or list, hence (res_id,) is needed
                # Note that since we are using JSON for task parameter serialization, no complex
                # object can be passed as parameters to a celery task
                task = create_bag_by_irods.apply_async((res_id, ), countdown=3)
                if rest_call:
                    return HttpResponse(json.dumps({
                        'bag_status': 'Not ready',
                        'task_id': task.task_id
                    }),
                                        content_type="application/json")

                request.session['task_id'] = task.task_id
                request.session['download_path'] = request.path
                return HttpResponseRedirect(res.get_absolute_url())
            else:
                ret_status = create_bag_by_irods(res_id)
                if not ret_status:
                    content_msg = "Bag cannot be created successfully. Check log for details."
                    response = HttpResponse()
                    if rest_call:
                        response.content = content_msg
                    else:
                        response.content = "<h1>" + content_msg + "</h1>"
                    return response

    elif metadata_dirty is None or metadata_dirty.lower() == 'true':
        if path.endswith("resourcemap.xml") or path.endswith(
                'resourcemetadata.xml'):
            # we need to regenerate the metadata xml files
            create_bag_files(res)

    # send signal for pre download file
    download_file_name = split_path_strs[-1]
    pre_download_file.send(sender=resource_cls,
                           resource=res,
                           download_file_name=download_file_name,
                           request=request)

    # obtain mime_type to set content_type
    mtype = 'application-x/octet-stream'
    mime_type = mimetypes.guess_type(path)
    if mime_type[0] is not None:
        mtype = mime_type[0]
    # retrieve file size to set up Content-Length header
    stdout = session.run("ils", None, "-l", path)[0].split()
    flen = int(stdout[3])

    # If this path is resource_federation_path, then the file is a local user file
    userpath = '/' + os.path.join(
        getattr(settings, 'HS_USER_IRODS_ZONE', 'hydroshareuserZone'), 'home',
        getattr(settings, 'HS_LOCAL_PROXY_USER_IN_FED_ZONE',
                'localHydroProxy'))

    # Allow reverse proxy if request was forwarded by nginx
    # (HTTP_X_DJANGO_REVERSE_PROXY is 'true')
    # and reverse proxy is possible according to configuration.

    if use_reverse_proxy and getattr(settings, 'SENDFILE_ON', False) and \
       'HTTP_X_DJANGO_REVERSE_PROXY' in request.META:

        # The NGINX sendfile abstraction is invoked as follows:
        # 1. The request to download a file enters this routine via the /rest_download or /download
        #    url in ./urls.py. It is redirected here from Django. The URI contains either the
        #    unqualified resource path or the federated resource path, depending upon whether
        #    the request is local or federated.
        # 2. This deals with unfederated resources by redirecting them to the uri
        #    /irods-data/{resource-id}/... on nginx. This URI is configured to read the file
        #    directly from the iRODS vault via NFS, and does not work for direct access to the
        #    vault due to the 'internal;' declaration in NGINX.
        # 3. This deals with federated resources by reading their path, matching local vaults, and
        #    redirecting to URIs that are in turn mapped to read from appropriate iRODS vaults. At
        #    present, the only one of these is /irods-user, which handles files whose federation
        #    path is stored in the variable 'userpath'.
        # 4. If there is no vault available for the resource, the file is transferred without
        #    NGINX, exactly as it was transferred previously.

        # stop NGINX targets that are non-existent from hanging forever.
        if not istorage.exists(path):
            content_msg = "file path {} does not exist in iRODS".format(path)
            response = HttpResponse(status=404)
            if rest_call:
                response.content = content_msg
            else:
                response.content = "<h1>" + content_msg + "</h1>"
            return response

        if not res.is_federated:
            # invoke X-Accel-Redirect on physical vault file in nginx
            response = HttpResponse(content_type=mtype)
            response[
                'Content-Disposition'] = 'attachment; filename="{name}"'.format(
                    name=path.split('/')[-1])
            response['Content-Length'] = flen
            response['X-Accel-Redirect'] = '/'.join(
                [getattr(settings, 'IRODS_DATA_URI', '/irods-data'), path])
            return response

        elif res.resource_federation_path == userpath:  # this guarantees a "user" resource
            # invoke X-Accel-Redirect on physical vault file in nginx
            # if path is full user path; strip federation prefix
            if path.startswith(userpath):
                path = path[len(userpath) + 1:]
            # invoke X-Accel-Redirect on physical vault file in nginx
            response = HttpResponse(content_type=mtype)
            response[
                'Content-Disposition'] = 'attachment; filename="{name}"'.format(
                    name=path.split('/')[-1])
            response['Content-Length'] = flen
            response['X-Accel-Redirect'] = os.path.join(
                getattr(settings, 'IRODS_USER_URI', '/irods-user'), path)
            return response

    # if we get here, none of the above conditions are true
    if flen <= FILE_SIZE_LIMIT:
        options = ('-', )  # we're redirecting to stdout.
        # this unusual way of calling works for federated or local resources
        proc = session.run_safe('iget', None, path, *options)
        response = FileResponse(proc.stdout, content_type=mtype)
        response[
            'Content-Disposition'] = 'attachment; filename="{name}"'.format(
                name=path.split('/')[-1])
        response['Content-Length'] = flen
        return response

    else:
        content_msg = "File larger than 1GB cannot be downloaded directly via HTTP. " \
                      "Please download the large file via iRODS clients."
        response = HttpResponse(status=403)
        if rest_call:
            response.content = content_msg
        else:
            response.content = "<h1>" + content_msg + "</h1>"
        return response