Exemplo n.º 1
0
 def test_get_mime_type(self):
     test_file = 'my_file.txt'
     self.assertEquals(utils.get_file_mime_type(test_file), 'text/plain')
     test_file = 'my_file.tif'
     self.assertEquals(utils.get_file_mime_type(test_file), 'image/tiff')
     test_file = 'my_file.abc'
     self.assertEquals(utils.get_file_mime_type(test_file), 'application/abc')
Exemplo n.º 2
0
 def test_get_mime_type(self):
     test_file = 'my_file.txt'
     self.assertEquals(utils.get_file_mime_type(test_file), 'text/plain')
     test_file = 'my_file.tif'
     self.assertEquals(utils.get_file_mime_type(test_file), 'image/tiff')
     test_file = 'my_file.abc'
     self.assertEquals(utils.get_file_mime_type(test_file), 'application/abc')
Exemplo n.º 3
0
def link_irods_file_to_django(resource, filepath):
    """
    Link a newly created irods file to Django resource model

    :param filepath: full path to file
    """
    # link the newly created file (**filepath**) to Django resource model
    b_add_file = False
    # TODO: folder is an abstract concept... utilize short_path for whole API
    if resource:
        folder, base = ResourceFile.resource_path_is_acceptable(resource, filepath,
                                                                test_exists=False)
        try:
            ResourceFile.get(resource=resource, file=base, folder=folder)
        except ObjectDoesNotExist:
            # this does not copy the file from anywhere; it must exist already
            ResourceFile.create(resource=resource, file=base, folder=folder)
            b_add_file = True

        if b_add_file:
            file_format_type = get_file_mime_type(filepath)
            if file_format_type not in [mime.value for mime in resource.metadata.formats.all()]:
                resource.metadata.create_element('format', value=file_format_type)
            # this should assign a logical file object to this new file
            # if this resource supports logical file
            resource.set_default_logical_file()
Exemplo n.º 4
0
def add_resource_files(pk, *files):
    """
    Called by clients to update a resource in HydroShare by adding a single file.

    REST URL:  PUT /resource/{pid}/files/{file}

    Parameters:
    pid - Unique HydroShare identifier for the resource that is to be updated.
    file - The data bytes of the file that will be added to the existing resource identified by pid

    Returns:    The pid assigned to the updated resource

    Return Type:    pid

    Raises:
    Exceptions.NotAuthorized - The user is not authorized
    Exceptions.InvalidContent - The content of the file is invalid
    Exception.ServiceFailure - The service is unable to process the request

    Notes:
    For mutable resources (resources not formally published), the update adds the file that is passed
    to this method to the resource. For immutable resources (formally published resources), this method creates a new
    resource that is a new version of the formally published resource. HydroShare will record the update by storing the
    SystemMetadata.obsoletes and SystemMetadata.obsoletedBy fields for the respective resources in their system metadata
    HydroShare MUST check or set the values of SystemMetadata.obsoletes and SystemMetadata.obsoletedBy so that they
    accurately represent the relationship between the new and old objects. HydroShare MUST also set
    SystemMetadata.dateSysMetadataModified. The modified system metadata entries must then be available in
    HydroShare.listObjects() to ensure that any cataloging systems pick up the changes when filtering on
    SystmeMetadata.dateSysMetadataModified. A formally published resource can only be obsoleted by one newer version.
    Once a resource is obsoleted, no other resources can obsolete it.

    """
    resource = utils.get_resource_by_shortkey(pk)
    ret = []
    for file in files:
        ret.append(ResourceFile.objects.create(
            content_object=resource,
            resource_file=File(file) if not isinstance(file, UploadedFile) else file
        ))

        # add format metadata element if necessary
        file_format_type = utils.get_file_mime_type(file.name)
        if file_format_type not in [mime.value for mime in resource.metadata.formats.all()]:
            resource.metadata.create_element('format', value=file_format_type)

    return ret
Exemplo n.º 5
0
def delete_format_metadata_after_delete_file(resource, file_name):
    """
    delete format metadata as appropriate after a file is deleted.
    :param resource: BaseResource object representing a HydroShare resource
    :param file_name: the file name to be deleted
    :return:
    """
    delete_file_mime_type = utils.get_file_mime_type(file_name)
    delete_file_extension = os.path.splitext(file_name)[1]

    # if there is no other resource file with the same extension as the
    # file just deleted then delete the matching format metadata element for the resource
    resource_file_extensions = [os.path.splitext(get_resource_file_name(f))[1] for f in
                                resource.files.all()]
    if delete_file_extension not in resource_file_extensions:
        format_element = resource.metadata.formats.filter(value=delete_file_mime_type).first()
        if format_element:
            resource.metadata.delete_element(format_element.term, format_element.id)
Exemplo n.º 6
0
def raster_pre_delete_file_from_resource_trigger(sender, **kwargs):
    res = kwargs['resource']
    del_file = kwargs['file']
    del_res_fname = get_resource_file_name(del_file)
    # delete core metadata coverage now that the only file is deleted
    res.metadata.coverages.all().delete()

    # delete all other resource specific metadata
    res.metadata.originalCoverage.delete()
    res.metadata.cellInformation.delete()
    res.metadata.bandInformations.all().delete()

    # delete all the files that is not the user selected file
    for f in ResourceFile.objects.filter(object_id=res.id):
        fname = get_resource_file_name(f)
        if fname != del_res_fname:
            delete_resource_file_only(res, f)

    # delete the format of the files that is not the user selected delete file
    del_file_format = utils.get_file_mime_type(del_res_fname)
    for format_element in res.metadata.formats.all():
        if format_element.value != del_file_format:
            res.metadata.delete_element(format_element.term, format_element.id)
Exemplo n.º 7
0
def raster_pre_delete_file_from_resource_trigger(sender, **kwargs):
    res = kwargs['resource']
    del_file = kwargs['file']
    del_res_fname = get_resource_file_name(del_file)
    # delete core metadata coverage now that the only file is deleted
    res.metadata.coverages.all().delete()

    # delete all other resource specific metadata
    res.metadata.originalCoverage.delete()
    res.metadata.cellInformation.delete()
    res.metadata.bandInformations.all().delete()

    # delete all the files that is not the user selected file
    for f in ResourceFile.objects.filter(object_id=res.id):
        fname = get_resource_file_name(f)
        if fname != del_res_fname:
            delete_resource_file_only(res, f)

    # delete the format of the files that is not the user selected delete file
    del_file_format = utils.get_file_mime_type(del_res_fname)
    for format_element in res.metadata.formats.all():
        if format_element.value != del_file_format:
            res.metadata.delete_element(format_element.term, format_element.id)
Exemplo n.º 8
0
def create_bag_files(resource):
    """
    create and update files needed by bagit operation that is conducted on iRODS server;
    no bagit operation is performed, only files that will be included in the bag are created
    or updated.

    Parameters:
    :param resource: A resource whose files will be created or updated to be included in the
    resource bag.
    :return: istorage, an IrodsStorage object that will be used by subsequent operation to
    create a bag on demand as needed.
    """
    from hs_core.hydroshare.utils import current_site_url, get_file_mime_type

    istorage = resource.get_irods_storage()

    # the temp_path is a temporary holding path to make the files available to iRODS
    # we have to make temp_path unique even for the same resource with same update time
    # to accommodate asynchronous multiple file move operations for the same resource

    # TODO: This is always in /tmp; otherwise code breaks because open() is called on the result!
    temp_path = istorage.getUniqueTmpPath

    try:
        os.makedirs(temp_path)
    except OSError as ex:
        # TODO: there might be concurrent operations.
        if ex.errno == errno.EEXIST:
            shutil.rmtree(temp_path)
            os.makedirs(temp_path)
        else:
            raise Exception(ex.message)

    # an empty visualization directory will not be put into the zipped bag file by ibun command,
    # so creating an empty visualization directory to be put into the zip file as done by the two
    # statements below does not work. However, if visualization directory has content to be
    # uploaded, it will work. This is to be implemented as part of the resource model in the future.
    # The following two statements are placeholders serving as reminder
    # to_file_name = '{res_id}/data/visualization/'.format(res_id=resource.short_id)
    # istorage.saveFile('', to_file_name, create_directory=True)

    # create resourcemetadata.xml in local directory and upload it to iRODS
    from_file_name = os.path.join(temp_path, 'resourcemetadata.xml')
    with open(from_file_name, 'w') as out:
        # write resource level metadata
        out.write(resource.get_metadata_xml())
    to_file_name = os.path.join(resource.root_path, 'data', 'resourcemetadata.xml')
    istorage.saveFile(from_file_name, to_file_name, True)

    # URLs are found in the /data/ subdirectory to comply with bagit format assumptions
    current_site_url = current_site_url()
    # This is the qualified resource url.
    hs_res_url = os.path.join(current_site_url, 'resource', resource.short_id, 'data')
    # this is the path to the resourcemedata file for download
    metadata_url = os.path.join(hs_res_url, 'resourcemetadata.xml')
    # this is the path to the resourcemap file for download
    res_map_url = os.path.join(hs_res_url, 'resourcemap.xml')

    # make the resource map:
    utils.namespaces['citoterms'] = Namespace('http://purl.org/spar/cito/')
    utils.namespaceSearchOrder.append('citoterms')

    ag_url = os.path.join(hs_res_url, 'resourcemap.xml#aggregation')
    a = Aggregation(ag_url)

    # Set properties of the aggregation
    a._dc.title = resource.metadata.title.value
    a._dcterms.type = URIRef(resource.metadata.type.url)
    a._citoterms.isDocumentedBy = metadata_url
    a._ore.isDescribedBy = res_map_url

    res_type_aggregation = AggregatedResource(resource.metadata.type.url)
    res_type_aggregation._rdfs.label = resource._meta.verbose_name
    res_type_aggregation._rdfs.isDefinedBy = current_site_url + "/terms"

    a.add_resource(res_type_aggregation)

    # Create a description of the metadata document that describes the whole resource and add it
    # to the aggregation
    resMetaFile = AggregatedResource(metadata_url)
    resMetaFile._dc.title = "Dublin Core science metadata document describing the HydroShare " \
                            "resource"
    resMetaFile._citoterms.documents = ag_url
    resMetaFile._ore.isAggregatedBy = ag_url
    resMetaFile._dc.format = "application/rdf+xml"
    a.add_resource(resMetaFile)

    # Add the resource files to the aggregation
    files = ResourceFile.objects.filter(object_id=resource.id)

    for f in files:
        # only the files that are not part of file type aggregation (logical file)
        # should be added to the resource level map xml file
        if f.logical_file is None:
            res_uri = u'{hs_url}/resource/{res_id}/data/contents/{file_name}'.format(
                hs_url=current_site_url,
                res_id=resource.short_id,
                file_name=f.short_path)
            ar = AggregatedResource(res_uri)
            ar._ore.isAggregatedBy = ag_url
            ar._dc.format = get_file_mime_type(os.path.basename(f.short_path))
            a.add_resource(ar)

    # handle collection resource type
    # save contained resource urls into resourcemap.xml
    if resource.resource_type == "CollectionResource" and resource.resources:
        for contained_res in resource.resources.all():
            contained_res_id = contained_res.short_id
            resource_map_url = '{hs_url}/resource/{res_id}/data/resourcemap.xml'.format(
                    hs_url=current_site_url,
                    res_id=contained_res_id)

            ar = AggregatedResource(resource_map_url)
            ar._ore.isAggregatedBy = ag_url
            ar._dc.format = "application/rdf+xml"
            a.add_resource(ar)
    elif resource.resource_type == "CompositeResource":
        # add file type aggregations to resource aggregation
        for logical_file in resource.logical_files:
            if logical_file.has_parent:
                # skip nested aggregations
                continue
            aggr_uri = u'{hs_url}/resource/{res_id}/data/contents/{map_file_path}#aggregation'
            aggr_uri = aggr_uri.format(
                hs_url=current_site_url,
                res_id=resource.short_id,
                map_file_path=logical_file.map_short_file_path)
            agg = Aggregation(aggr_uri)
            agg._ore.isAggregatedBy = ag_url
            agg_type_url = "{site}/terms/{aggr_type}"
            agg_type_url = agg_type_url.format(site=current_site_url,
                                               aggr_type=logical_file.get_aggregation_type_name())
            agg._dcterms.type = URIRef(agg_type_url)
            a.add_resource(agg)

    # Register a serializer with the aggregation, which creates a new ResourceMap that needs a URI
    serializer = RdfLibSerializer('xml')
    resMap = a.register_serialization(serializer, res_map_url)
    resMap._dc.identifier = resource.short_id

    # Fetch the serialization
    remdoc = a.get_serialization()

    # change the namespace for the 'creator' element from 'dcterms' to 'dc'
    xml_string = remdoc.data.replace('dcterms:creator', 'dc:creator')

    # delete this extra element
    # <ore:aggregates rdf:resource="[hydroshare domain]/terms/[Resource class name]"/>
    xml_string = xml_string.replace(
        '<ore:aggregates rdf:resource="%s"/>\n' % str(resource.metadata.type.url), '')

    # create resourcemap.xml and upload it to iRODS
    from_file_name = os.path.join(temp_path, 'resourcemap.xml')
    with open(from_file_name, 'w') as out:
        out.write(xml_string)
    to_file_name = os.path.join(resource.root_path, 'data', 'resourcemap.xml')
    istorage.saveFile(from_file_name, to_file_name, False)

    # if the resource is a composite resource generate aggregation metadata
    # and map xml documents
    if resource.resource_type == "CompositeResource":
        resource.create_aggregation_xml_documents()

    res_coll = resource.root_path
    istorage.setAVU(res_coll, 'metadata_dirty', "false")
    shutil.rmtree(temp_path)
    return istorage
Exemplo n.º 9
0
def create_bag_files(resource):
    """
    create and update files needed by bagit operation that is conducted on iRODS server;
    no bagit operation is performed, only files that will be included in the bag are created
    or updated.

    Parameters:
    :param resource: A resource whose files will be created or updated to be included in the
    resource bag.
    :return: istorage, an IrodsStorage object that will be used by subsequent operation to
    create a bag on demand as needed.
    """
    from hs_core.hydroshare.utils import current_site_url, get_file_mime_type

    istorage = resource.get_irods_storage()

    # the temp_path is a temporary holding path to make the files available to iRODS
    # we have to make temp_path unique even for the same resource with same update time
    # to accommodate asynchronous multiple file move operations for the same resource

    # TODO: This is always in /tmp; otherwise code breaks because open() is called on the result!
    temp_path = os.path.join(getattr(settings, 'IRODS_ROOT', '/tmp'),
                             uuid4().hex)

    try:
        os.makedirs(temp_path)
    except OSError as ex:
        # TODO: there might be concurrent operations.
        if ex.errno == errno.EEXIST:
            shutil.rmtree(temp_path)
            os.makedirs(temp_path)
        else:
            raise Exception(ex.message)

    # an empty visualization directory will not be put into the zipped bag file by ibun command,
    # so creating an empty visualization directory to be put into the zip file as done by the two
    # statements below does not work. However, if visualization directory has content to be
    # uploaded, it will work. This is to be implemented as part of the resource model in the future.
    # The following two statements are placeholders serving as reminder
    # to_file_name = '{res_id}/data/visualization/'.format(res_id=resource.short_id)
    # istorage.saveFile('', to_file_name, create_directory=True)

    # create resourcemetadata.xml in local directory and upload it to iRODS
    from_file_name = os.path.join(temp_path, 'resourcemetadata.xml')
    with open(from_file_name, 'w') as out:
        # resources that don't support file types this would write only resource level metadata
        # resource types that support file types this would write resource level metadata
        # as well as file type metadata
        out.write(resource.get_metadata_xml())
    to_file_name = os.path.join(resource.root_path, 'data',
                                'resourcemetadata.xml')
    istorage.saveFile(from_file_name, to_file_name, True)

    # URLs are found in the /data/ subdirectory to comply with bagit format assumptions
    current_site_url = current_site_url()
    # This is the qualified resource url.
    hs_res_url = os.path.join(current_site_url, 'resource', resource.short_id,
                              'data')
    # this is the path to the resourcemedata file for download
    metadata_url = os.path.join(hs_res_url, 'resourcemetadata.xml')
    # this is the path to the resourcemap file for download
    res_map_url = os.path.join(hs_res_url, 'resourcemap.xml')

    # make the resource map:
    utils.namespaces['citoterms'] = Namespace('http://purl.org/spar/cito/')
    utils.namespaceSearchOrder.append('citoterms')

    ag_url = os.path.join(hs_res_url, 'resourcemap.xml#aggregation')
    a = Aggregation(ag_url)

    # Set properties of the aggregation
    a._dc.title = resource.metadata.title.value
    a._dcterms.type = URIRef(resource.metadata.type.url)
    a._citoterms.isDocumentedBy = metadata_url
    a._ore.isDescribedBy = res_map_url

    res_type_aggregation = AggregatedResource(resource.metadata.type.url)
    res_type_aggregation._rdfs.label = resource._meta.verbose_name
    res_type_aggregation._rdfs.isDefinedBy = current_site_url + "/terms"

    a.add_resource(res_type_aggregation)

    # Create a description of the metadata document that describes the whole resource and add it
    # to the aggregation
    resMetaFile = AggregatedResource(metadata_url)
    resMetaFile._dc.title = "Dublin Core science metadata document describing the HydroShare " \
                            "resource"
    resMetaFile._citoterms.documents = ag_url
    resMetaFile._ore.isAggregatedBy = ag_url
    resMetaFile._dc.format = "application/rdf+xml"

    # Create a description of the content file and add it to the aggregation
    files = ResourceFile.objects.filter(object_id=resource.id)
    resFiles = []
    for n, f in enumerate(files):
        res_uri = '{hs_url}/resource/{res_id}/data/contents/{file_name}'.format(
            hs_url=current_site_url,
            res_id=resource.short_id,
            file_name=f.short_path)
        resFiles.append(AggregatedResource(res_uri))
        resFiles[n]._ore.isAggregatedBy = ag_url
        resFiles[n]._dc.format = get_file_mime_type(
            os.path.basename(f.short_path))

    # Add the resource files to the aggregation
    a.add_resource(resMetaFile)
    for f in resFiles:
        a.add_resource(f)

    # handle collection resource type
    # save contained resource urls into resourcemap.xml
    if resource.resource_type == "CollectionResource" and resource.resources:
        for contained_res in resource.resources.all():
            contained_res_id = contained_res.short_id
            resource_map_url = '{hs_url}/resource/{res_id}/data/resourcemap.xml'.format(
                hs_url=current_site_url, res_id=contained_res_id)

            ar = AggregatedResource(resource_map_url)
            ar._ore.isAggregatedBy = ag_url
            ar._dc.format = "application/rdf+xml"
            a.add_resource(ar)

    # Register a serializer with the aggregation, which creates a new ResourceMap that needs a URI
    serializer = RdfLibSerializer('xml')
    resMap = a.register_serialization(serializer, res_map_url)
    resMap._dc.identifier = resource.short_id

    # Fetch the serialization
    remdoc = a.get_serialization()

    # change the namespace for the 'creator' element from 'dcterms' to 'dc'
    xml_string = remdoc.data.replace('dcterms:creator', 'dc:creator')

    # delete this extra element
    # <ore:aggregates rdf:resource="[hydroshare domain]/terms/[Resource class name]"/>
    xml_string = xml_string.replace(
        '<ore:aggregates rdf:resource="%s"/>\n' %
        str(resource.metadata.type.url), '')

    # create resourcemap.xml and upload it to iRODS
    from_file_name = os.path.join(temp_path, 'resourcemap.xml')
    with open(from_file_name, 'w') as out:
        out.write(xml_string)
    to_file_name = os.path.join(resource.root_path, 'data', 'resourcemap.xml')
    istorage.saveFile(from_file_name, to_file_name, False)

    res_coll = resource.root_path
    istorage.setAVU(res_coll, 'metadata_dirty', "false")
    shutil.rmtree(temp_path)
    return istorage
def data_store_structure(request):
    """
    Get file hierarchy (collection of subcollections and data objects) for the requested directory
    in hydroshareZone or any federated zone used for CommonsShare resource backend store.
    It is invoked by an AJAX call and returns json object that holds content for files
    and folders under the requested directory/collection/subcollection.
    The AJAX request must be a POST request with input data passed in for res_id and store_path
    where store_path is the relative path under res_id collection/directory
    """
    res_id = request.POST.get('res_id', None)
    if res_id is None:
        logger.error("no resource id in request")
        return HttpResponse('Bad request - resource id is not included',
                            status=status.HTTP_500_INTERNAL_SERVER_ERROR)
    res_id = str(res_id).strip()
    try:
        resource, _, _ = authorize(
            request,
            res_id,
            needed_permission=ACTION_TO_AUTHORIZE.VIEW_RESOURCE)
    except NotFound:
        logger.error("resource {} not found".format(res_id))
        return HttpResponse('Bad request - resource not found',
                            status=status.HTTP_400_BAD_REQUEST)
    except PermissionDenied:
        logger.error("permission denied for resource {}".format(res_id))
        return HttpResponse('Permission denied',
                            status=status.HTTP_401_UNAUTHORIZED)

    store_path = request.POST.get('store_path', None)
    if store_path is None:
        logger.error("store_path not included for resource {}".format(res_id))
        return HttpResponse('Bad request - store_path is not included',
                            status=status.HTTP_400_BAD_REQUEST)
    store_path = str(store_path).strip()
    if not store_path:
        logger.error("store_path empty for resource {}".format(res_id))
        return HttpResponse('Bad request - store_path cannot be empty',
                            status=status.HTTP_400_BAD_REQUEST)

    if not store_path.startswith('data/contents'):
        logger.error(
            "store_path doesn't start with data/contents for resource {}".
            format(res_id))
        return HttpResponse(
            'Bad request - store_path must start with data/contents/',
            status=status.HTTP_400_BAD_REQUEST)

    if store_path.find('/../') >= 0 or store_path.endswith('/..'):
        logger.error(
            "store_path cannot contain .. for resource {}".format(res_id))
        return HttpResponse('Bad request - store_path cannot contain /../',
                            status=status.HTTP_400_BAD_REQUEST)

    istorage = resource.get_irods_storage()
    res_coll = os.path.join(resource.root_path, store_path)
    try:
        store = istorage.listdir(res_coll)
        files = []
        for fname in store[1]:  # files
            fname = fname.decode('utf-8')
            name_with_full_path = os.path.join(res_coll, fname)
            size = istorage.size(name_with_full_path)
            mtype = get_file_mime_type(fname)
            idx = mtype.find('/')
            if idx >= 0:
                mtype = mtype[idx + 1:]
            f_pk = ''
            f_url = ''
            logical_file_type = ''
            logical_file_id = ''
            for f in ResourceFile.objects.filter(object_id=resource.id):
                if name_with_full_path == f.storage_path:
                    f_pk = f.pk
                    f_url = get_resource_file_url(f)
                    if resource.resource_type == "CompositeResource":
                        f_logical = f.get_or_create_logical_file
                        logical_file_type = f.logical_file_type_name
                        logical_file_id = f_logical.id
                    break

            if f_pk:  # file is found in Django
                files.append({
                    'name': fname,
                    'size': size,
                    'type': mtype,
                    'pk': f_pk,
                    'url': f_url,
                    'logical_type': logical_file_type,
                    'logical_file_id': logical_file_id
                })
            else:  # file is not found in Django
                logger.error(
                    "data_store_structure: filename {} in iRODs has no analogue in Django"
                    .format(name_with_full_path))
        # show reference file links if any which don't have physical presence in iRODS
        for f in ResourceFile.objects.filter(object_id=resource.id):
            if not f.resource_file and not f.fed_resource_file and f.reference_file_path:
                files.append({
                    'name': f.reference_file_path,
                    'size': f.reference_file_size,
                    'type': 'Reference',
                    'pk': f.pk,
                    'url': '',
                    'logical_type': '',
                    'logical_file_id': ''
                })
    except SessionException as ex:
        logger.error("session exception querying store_path {} for {}".format(
            store_path, res_id))
        return HttpResponse(ex.stderr,
                            status=status.HTTP_500_INTERNAL_SERVER_ERROR)

    return_object = {
        'files': files,
        'folders': store[0],
        'can_be_public': resource.can_be_public_or_discoverable
    }

    if resource.resource_type == "CompositeResource":
        spatial_coverage_dict = get_coverage_data_dict(resource)
        temporal_coverage_dict = get_coverage_data_dict(
            resource, coverage_type='temporal')
        return_object['spatial_coverage'] = spatial_coverage_dict
        return_object['temporal_coverage'] = temporal_coverage_dict
    return HttpResponse(json.dumps(return_object),
                        content_type="application/json")
Exemplo n.º 11
0
def data_store_structure(request):
    """
    Get file hierarchy (collection of subcollections and data objects) for the requested directory
    in hydroshareZone or any federated zone used for HydroShare resource backend store.
    It is invoked by an AJAX call and returns json object that holds content for files
    and folders under the requested directory/collection/subcollection.
    The AJAX request must be a POST request with input data passed in for res_id and store_path
    where store_path is the relative path to res_id/data/contents
    """
    res_id = request.POST.get('res_id', None)
    if res_id is None:
        logger.error("no resource id in request")
        return HttpResponse('Bad request - resource id is not included',
                            status=status.HTTP_500_INTERNAL_SERVER_ERROR)
    res_id = str(res_id).strip()
    try:
        resource, _, _ = authorize(
            request,
            res_id,
            needed_permission=ACTION_TO_AUTHORIZE.VIEW_RESOURCE)
    except NotFound:
        logger.error("resource {} not found".format(res_id))
        return HttpResponse('Bad request - resource not found',
                            status=status.HTTP_400_BAD_REQUEST)
    except PermissionDenied:
        logger.error("permission denied for resource {}".format(res_id))
        return HttpResponse('Permission denied',
                            status=status.HTTP_401_UNAUTHORIZED)

    store_path = request.POST.get('store_path', None)

    try:
        store_path = _validate_path(store_path,
                                    'store_path',
                                    check_path_empty=False)
    except ValidationError as ex:
        return HttpResponse(str(ex), status=status.HTTP_400_BAD_REQUEST)

    istorage = resource.get_irods_storage()
    directory_in_irods = resource.get_irods_path(store_path)

    try:
        store = istorage.listdir(directory_in_irods)
    except SessionException as ex:
        logger.error("session exception querying store_path {} for {}".format(
            store_path, res_id))
        return HttpResponse(ex.stderr,
                            status=status.HTTP_500_INTERNAL_SERVER_ERROR)

    files = []
    dirs = []
    aggregations = []
    # folder path relative to 'data/contents/' needed for the UI
    folder_path = store_path[len("data/contents/"):]
    for dname in store[0]:  # directories
        d_pk = dname
        d_store_path = os.path.join(store_path, d_pk)
        d_url = resource.get_url_of_path(d_store_path)
        main_file = ''
        folder_aggregation_type = ''
        folder_aggregation_name = ''
        folder_aggregation_id = ''
        folder_aggregation_type_to_set = ''
        if resource.resource_type == "CompositeResource":
            dir_path = resource.get_irods_path(d_store_path)
            # find if this folder *dir_path* represents (contains) an aggregation object
            aggregation_object = resource.get_folder_aggregation_object(
                dir_path)
            # folder aggregation type is not relevant for single file aggregation types - which
            # are: GenericLogicalFile, and RefTimeseriesLogicalFile
            if aggregation_object is not None:
                folder_aggregation_type = aggregation_object.get_aggregation_class_name(
                )
                folder_aggregation_name = aggregation_object.get_aggregation_display_name(
                )
                folder_aggregation_id = aggregation_object.id
                if not aggregation_object.is_fileset:
                    main_file = aggregation_object.get_main_file.file_name
            else:
                # find if FileSet aggregation type that can be created from this folder
                if resource.can_set_folder_to_fileset(dir_path):
                    folder_aggregation_type_to_set = FileSetLogicalFile.__name__
                else:
                    folder_aggregation_type_to_set = ""
        dirs.append({
            'name': d_pk,
            'url': d_url,
            'main_file': main_file,
            'folder_aggregation_type': folder_aggregation_type,
            'folder_aggregation_name': folder_aggregation_name,
            'folder_aggregation_id': folder_aggregation_id,
            'folder_aggregation_type_to_set': folder_aggregation_type_to_set,
            'folder_short_path': os.path.join(folder_path, d_pk)
        })

    is_federated = resource.is_federated
    for index, fname in enumerate(store[1]):  # files
        f_store_path = os.path.join(store_path, fname)
        file_in_irods = resource.get_irods_path(f_store_path)
        size = store[2][index]
        mtype = get_file_mime_type(fname)
        idx = mtype.find('/')
        if idx >= 0:
            mtype = mtype[idx + 1:]

        if is_federated:
            f = ResourceFile.objects.filter(
                object_id=resource.id,
                fed_resource_file=file_in_irods).first()
        else:
            f = ResourceFile.objects.filter(
                object_id=resource.id, resource_file=file_in_irods).first()

        if not f:
            # skip metadata files
            continue

        f_ref_url = ''
        logical_file_type = ''
        logical_file_id = ''
        aggregation_name = ''
        if f.has_logical_file:
            main_extension = f.logical_file.get_main_file_type()
            if not main_extension:
                # accept any extension
                main_extension = ""
            if main_extension.endswith(f.extension):
                aggregations.append({
                    'logical_file_id':
                    f.logical_file.id,
                    'name':
                    f.logical_file.dataset_name,
                    'logical_type':
                    f.logical_file.get_aggregation_class_name(),
                    'aggregation_name':
                    f.logical_file.get_aggregation_display_name(),
                    'main_file':
                    f.logical_file.get_main_file.file_name,
                    'url':
                    f.logical_file.url
                })
            logical_file_type = f.logical_file_type_name
            logical_file_id = f.logical_file.id
            aggregation_name = f.aggregation_display_name
            if 'url' in f.logical_file.extra_data:
                f_ref_url = f.logical_file.extra_data['url']

        files.append({
            'name': fname,
            'size': size,
            'type': mtype,
            'pk': f.pk,
            'url': f.url,
            'reference_url': f_ref_url,
            'aggregation_name': aggregation_name,
            'logical_type': logical_file_type,
            'logical_file_id': logical_file_id
        })

    return_object = {
        'files': files,
        'folders': dirs,
        'aggregations': aggregations,
        'can_be_public': resource.can_be_public_or_discoverable
    }

    if resource.resource_type == "CompositeResource":
        return_object['spatial_coverage'] = get_coverage_data_dict(resource)
        return_object['temporal_coverage'] = get_coverage_data_dict(
            resource, coverage_type='temporal')
    return HttpResponse(json.dumps(return_object),
                        content_type="application/json")
def data_store_structure(request):
    """
    Get file hierarchy (collection of subcollections and data objects) for the requested directory
    in hydroshareZone or any federated zone used for HydroShare resource backend store.
    It is invoked by an AJAX call and returns json object that holds content for files
    and folders under the requested directory/collection/subcollection.
    The AJAX request must be a POST request with input data passed in for res_id and store_path
    where store_path is the relative path to res_id/data/contents
    """
    res_id = request.POST.get('res_id', None)
    if res_id is None:
        logger.error("no resource id in request")
        return HttpResponse('Bad request - resource id is not included',
                            status=status.HTTP_500_INTERNAL_SERVER_ERROR)
    res_id = str(res_id).strip()
    try:
        resource, _, _ = authorize(request, res_id,
                                   needed_permission=ACTION_TO_AUTHORIZE.VIEW_RESOURCE)
    except NotFound:
        logger.error("resource {} not found".format(res_id))
        return HttpResponse('Bad request - resource not found', status=status.HTTP_400_BAD_REQUEST)
    except PermissionDenied:
        logger.error("permission denied for resource {}".format(res_id))
        return HttpResponse('Permission denied', status=status.HTTP_401_UNAUTHORIZED)

    store_path = request.POST.get('store_path', None)

    try:
        store_path = _validate_path(store_path, 'store_path', check_path_empty=False)
    except ValidationError as ex:
        return HttpResponse(ex.message, status=status.HTTP_400_BAD_REQUEST)

    istorage = resource.get_irods_storage()
    directory_in_irods = resource.get_irods_path(store_path)

    try:
        store = istorage.listdir(directory_in_irods)
    except SessionException as ex:
        logger.error("session exception querying store_path {} for {}".format(store_path, res_id))
        return HttpResponse(ex.stderr, status=status.HTTP_500_INTERNAL_SERVER_ERROR)

    files = []
    dirs = []
    # folder path relative to 'data/contents/' needed for the UI
    folder_path = store_path[len("data/contents/"):]
    for dname in store[0]:     # directories
        d_pk = dname.decode('utf-8')
        d_store_path = os.path.join(store_path, d_pk)
        d_url = resource.get_url_of_path(d_store_path)
        main_file = ''
        folder_aggregation_type = ''
        folder_aggregation_name = ''
        folder_aggregation_id = ''
        folder_aggregation_type_to_set = ''
        if resource.resource_type == "CompositeResource":
            dir_path = resource.get_public_path(d_store_path)
            # find if this folder *dir_path* represents (contains) an aggregation object
            aggregation_object = resource.get_folder_aggregation_object(dir_path)
            # folder aggregation type is not relevant for single file aggregation types - which
            # are: GenericLogicalFile, and RefTimeseriesLogicalFile
            if aggregation_object is not None and not \
                    aggregation_object.is_single_file_aggregation:
                folder_aggregation_type = aggregation_object.get_aggregation_class_name()
                folder_aggregation_name = aggregation_object.get_aggregation_display_name()
                folder_aggregation_id = aggregation_object.id
                main_file = ''
                if not aggregation_object.is_fileset:
                    main_file = aggregation_object.get_main_file.file_name
            else:
                # find if any aggregation type that can be created from this folder
                folder_aggregation_type_to_set =  \
                    resource.get_folder_aggregation_type_to_set(dir_path)
                if folder_aggregation_type_to_set is None:
                    folder_aggregation_type_to_set = ""
        dirs.append({'name': d_pk,
                     'url': d_url,
                     'main_file': main_file,
                     'folder_aggregation_type': folder_aggregation_type,
                     'folder_aggregation_name': folder_aggregation_name,
                     'folder_aggregation_id': folder_aggregation_id,
                     'folder_aggregation_type_to_set': folder_aggregation_type_to_set,
                     'folder_short_path': os.path.join(folder_path, d_pk)})

    is_federated = resource.is_federated
    for index, fname in enumerate(store[1]):  # files
        fname = fname.decode('utf-8')
        f_store_path = os.path.join(store_path, fname)
        file_in_irods = resource.get_irods_path(f_store_path)
        size = store[2][index]
        mtype = get_file_mime_type(fname)
        idx = mtype.find('/')
        if idx >= 0:
            mtype = mtype[idx + 1:]

        if is_federated:
            f = ResourceFile.objects.filter(object_id=resource.id,
                                            fed_resource_file=file_in_irods).first()
        else:
            f = ResourceFile.objects.filter(object_id=resource.id,
                                            resource_file=file_in_irods).first()

        if not f:
            # skip metadata files
            continue

        f_ref_url = ''
        logical_file_type = ''
        logical_file_id = ''
        aggregation_name = ''
        is_single_file_aggregation = ''
        if resource.resource_type == "CompositeResource":
            if f.has_logical_file:
                logical_file_type = f.logical_file_type_name
                logical_file_id = f.logical_file.id
                aggregation_name = f.aggregation_display_name
                is_single_file_aggregation = f.logical_file.is_single_file_aggregation
                if 'url' in f.logical_file.extra_data:
                    f_ref_url = f.logical_file.extra_data['url']

        files.append({'name': fname, 'size': size, 'type': mtype, 'pk': f.pk, 'url': f.url,
                      'reference_url': f_ref_url,
                      'aggregation_name': aggregation_name,
                      'logical_type': logical_file_type,
                      'logical_file_id': logical_file_id,
                      'is_single_file_aggregation': is_single_file_aggregation})

    return_object = {'files': files,
                     'folders': dirs,
                     'can_be_public': resource.can_be_public_or_discoverable}

    if resource.resource_type == "CompositeResource":
        return_object['spatial_coverage'] = get_coverage_data_dict(resource)
        return_object['temporal_coverage'] = get_coverage_data_dict(resource,
                                                                    coverage_type='temporal')
    return HttpResponse(
        json.dumps(return_object),
        content_type="application/json"
    )
Exemplo n.º 13
0
def delete_resource_file(pk, filename_or_id, user):
    """
    Deletes an individual file from a HydroShare resource. If the file does not exist, the Exceptions.NotFound exception
    is raised.

    REST URL:  DELETE /resource/{pid}/files/{filename}

    Parameters:
    pid - The unique HydroShare identifier for the resource from which the file will be deleted
    filename - Name of the file to be deleted from the resource

    Returns:    The pid of the resource from which the file was deleted

    Return Type:    pid

    Raises:
    Exceptions.NotAuthorized - The user is not authorized
    Exceptions.NotFound - The resource identified by pid does not exist or the file identified by file does not exist
    Exception.ServiceFailure - The service is unable to process the request

    Note:  For mutable resources (resources that have not been formally published), this method modifies the resource by
    deleting the file. For immutable resources (formally published resources), this method creates a new resource that
    is a new version of the formally published resource. HydroShare will record the update by storing the
    SystemMetadata.obsoletes and SystemMetadata.obsoletedBy fields for the respective resources in their system metadata
    HydroShare MUST check or set the values of SystemMetadata.obsoletes and SystemMetadata.obsoletedBy so that they
    accurately represent the relationship between the new and old objects. HydroShare MUST also set
    SystemMetadata.dateSysMetadataModified. The modified system metadata entries must then be available in
    HydroShare.listObjects() to ensure that any cataloging systems pick up the changes when filtering on
    SystmeMetadata.dateSysMetadataModified. A formally published resource can only be obsoleted by one newer
    version. Once a resource is obsoleted, no other resources can obsolete it.
    """
    resource = utils.get_resource_by_shortkey(pk)
    res_cls = resource.__class__

    try:
        file_id = int(filename_or_id)
        filter_condition = lambda fl: fl.id == file_id
    except ValueError:
        filter_condition = lambda fl: os.path.basename(fl.resource_file.name) == filename_or_id

    for f in ResourceFile.objects.filter(object_id=resource.id):
        if filter_condition(f):
            # send signal
            signals.pre_delete_file_from_resource.send(sender=res_cls, file=f, resource=resource)

            file_name = f.resource_file.name
            f.resource_file.delete()
            f.delete()
            delete_file_mime_type = utils.get_file_mime_type(file_name)
            delete_file_extension = os.path.splitext(file_name)[1]

            # if there is no other resource file with the same extension as the
            # file just deleted then delete the matching format metadata element for the resource
            resource_file_extensions = [os.path.splitext(f.resource_file.name)[1] for f in resource.files.all()]
            if delete_file_extension not in resource_file_extensions:
                format_element = resource.metadata.formats.filter(value=delete_file_mime_type).first()
                if format_element:
                    resource.metadata.delete_element(format_element.term, format_element.id)
            break
    else:
        raise ObjectDoesNotExist(filename_or_id)

    if resource.public:
        if not resource.can_be_public:
            resource.public = False
            resource.save()

    # generate bag
    utils.resource_modified(resource, user)

    return filename_or_id
def data_store_structure(request):
    """
    Get file hierarchy (collection of subcollections and data objects) for the requested directory
    in hydroshareZone or any federated zone used for HydroShare resource backend store.
    It is invoked by an AJAX call and returns json object that holds content for files
    and folders under the requested directory/collection/subcollection.
    The AJAX request must be a POST request with input data passed in for res_id and store_path
    where store_path is the relative path under res_id collection/directory
    """
    res_id = request.POST.get('res_id', None)
    if res_id is None:
        return HttpResponse('Bad request - resource id is not included',
                            status=status.HTTP_500_INTERNAL_SERVER_ERROR)
    res_id = str(res_id).strip()
    try:
        resource, _, _ = authorize(
            request,
            res_id,
            needed_permission=ACTION_TO_AUTHORIZE.VIEW_RESOURCE)
    except NotFound:
        return HttpResponse('Bad request - resource not found',
                            status=status.HTTP_400_BAD_REQUEST)
    except PermissionDenied:
        return HttpResponse('Permission denied',
                            status=status.HTTP_401_UNAUTHORIZED)

    store_path = request.POST.get('store_path', None)
    if store_path is None:
        return HttpResponse('Bad request - store_path is not included',
                            status=status.HTTP_400_BAD_REQUEST)
    store_path = str(store_path).strip()
    if not store_path:
        return HttpResponse('Bad request - store_path cannot be empty',
                            status=status.HTTP_400_BAD_REQUEST)
    # this is federated if warranted, automatically, by choosing an appropriate session.
    istorage = resource.get_irods_storage()
    if resource.resource_federation_path:
        res_coll = os.path.join(resource.resource_federation_path, res_id,
                                store_path)
        rel_path = store_path
    else:
        res_coll = os.path.join(res_id, store_path)
        rel_path = res_coll
    try:
        store = istorage.listdir(res_coll)
        files = []
        for fname in store[1]:
            name_with_full_path = os.path.join(res_coll, fname)
            name_with_rel_path = os.path.join(rel_path, fname)
            size = istorage.size(name_with_full_path)
            mtype = get_file_mime_type(fname)
            idx = mtype.find('/')
            if idx >= 0:
                mtype = mtype[idx + 1:]
            f_pk = ''
            f_url = ''
            logical_file_type = ''
            logical_file_id = ''
            for f in ResourceFile.objects.filter(object_id=resource.id):
                if name_with_rel_path == get_resource_file_name_and_extension(
                        f)[0]:
                    f_pk = f.pk
                    f_url = get_resource_file_url(f)
                    if resource.resource_type == "CompositeResource":
                        logical_file_type = f.logical_file_type_name
                        logical_file_id = f.logical_file.id
                    break

            files.append({
                'name': fname,
                'size': size,
                'type': mtype,
                'pk': f_pk,
                'url': f_url,
                'logical_type': logical_file_type,
                'logical_file_id': logical_file_id
            })
    except SessionException as ex:
        return HttpResponse(ex.stderr,
                            status=status.HTTP_500_INTERNAL_SERVER_ERROR)

    return_object = {
        'files': files,
        'folders': store[0],
        'can_be_public': resource.can_be_public_or_discoverable
    }

    if resource.resource_type == "CompositeResource":
        spatial_coverage_dict = get_coverage_data_dict(resource)
        temporal_coverage_dict = get_coverage_data_dict(
            resource, coverage_type='temporal')
        return_object['spatial_coverage'] = spatial_coverage_dict
        return_object['temporal_coverage'] = temporal_coverage_dict
    return HttpResponse(json.dumps(return_object),
                        content_type="application/json")