Exemplo n.º 1
0
def setCitation(pkg):
    citation = []

    title = pkg.get('title')
    authors = pkg.get('author')
    year = getPackageExtra('Year', pkg)

    doi = getPackageExtra('EcoSIS DOI', pkg)
    if doi is None or doi == '':
        doi = getPackageExtra('Citation DOI', pkg)

    if authors is not None:
        authors = authors.split(',')
        authors = map(unicode.strip, authors)
        if len(authors) == 1:
            citation.append(authors[0])
        elif len(authors) == 2:
            citation.append(' and '.join(authors))
        elif len(authors) > 2:
            last = authors.pop()
            citation.append('%s and %s' % (', '.join(authors), last))

    if year is not None:
        citation.append(year)

    if title is not None:
        citation.append(title)

    citation.append('Data set. Available on-line [http://ecosis.org] from the Ecosystem Spectral Information System (EcoSIS)')

    if doi is not None:
        citation.append(doi)

    citation = '. '.join(citation)
    setPackageExtra('Citation', citation, pkg)
Exemplo n.º 2
0
def getDoiStatus(pkg):
    doi = {
        'status' : getPackageExtra('EcoSIS DOI Status', pkg),
        'value' : getPackageExtra('EcoSIS DOI', pkg)
    }

    if doi['status'] is None or doi['status'] == "":
        doi['status'] = {}
    else:
        doi['status'] = json.loads(doi['status'])

    return doi
Exemplo n.º 3
0
def process():
    response.headers["Content-Type"] = "application/json"

    package_id = request.params.get('package_id')
    hasAccess(package_id)

    if hasAppliedDoi(package_id):
        return {'error':True, 'message':'Cannot edit resource of package with applied DOI'}

    sheet_id = request.params.get('sheet_id')
    resource_id = request.params.get('resource_id')
    ids = request.params.get('resource_ids')

    if sheet_id == "":
        sheet_id = None

    try:
        options = json.loads(request.params.get('options'))
    except:
        options = {}

    # option, if a resource id and a datasheet id are passed, then the full 'merged' view will be return

    # only allow specified options
    safeOptions = {}
    for option in parseOptions:
        if option in options:
            safeOptions[option] = options[option]

    # see if we are editing multiple files or just one
    result = []
    if ids is not None:
        ids = json.loads(ids)
        for resource_id in ids:
            workspace.prepareFile(package_id, resource_id, sheet_id, safeOptions)
            result.append(query.getResource(resource_id))
    else:
        workspace.prepareFile(package_id, resource_id, sheet_id, safeOptions)
        result = query.getResource(resource_id, sheet_id)

    # update the dataset, so the metadata timestamp changes
    context = {'model': model, 'user': c.user}
    pkg = logic.get_action('package_show')(context, {'id': package_id})

    # use this counter to poke the dataset.  This will update the last modified timestamps
    # required for 'updated since last pushed UI'
    resourceUpdateCount = utils.getPackageExtra('resourceUpdateCount', pkg)
    if resourceUpdateCount is None:
        resourceUpdateCount = 1
    else:
        resourceUpdateCount = int(resourceUpdateCount) + 1
    utils.setPackageExtra('resourceUpdateCount', resourceUpdateCount, pkg)
    pkg = logic.get_action('package_update')(context, pkg)

    result = {
        'metadata_modified' : pkg.get('metadata_modified'),
        'result' : result
    }

    return jsonStringify(result)
Exemplo n.º 4
0
def processGeoJson(bboxInfo, pkg):
    result = {
        "type": "GeometryCollection",
        "geometries": []
    }

    # if we found bbox info in the spectra, add it
    if bboxInfo['use']:
        result['geometries'].append({
            "type": "Polygon",
            "coordinates" : [[
                [bboxInfo["maxlng"], bboxInfo["maxlat"]],
                [bboxInfo["minlng"], bboxInfo["maxlat"]],
                [bboxInfo["minlng"], bboxInfo["minlat"]],
                [bboxInfo["maxlng"], bboxInfo["minlat"]],
                [bboxInfo["maxlng"], bboxInfo["maxlat"]]
            ]]
        })

    geojson = getPackageExtra("geojson", pkg)
    if geojson != None:
        try:
            # TODO: add checks for valid geojson
            result['geometries'].append(json.loads(geojson))
        except Exception:
            pass

    return result
Exemplo n.º 5
0
def processAttribute(name, input, pkg, mrValue, setValues, keywords):
    val = None

    # first we need to get the values we are working with
    if name == 'Keywords': # this is the keywords attribute, special case
        val = keywords
    elif schemaMap.get(name) != None: # if the schemaMap has alias set, lookup value based on alias name
        val = pkg.get(schemaMap.get(name))
    else: # otherwise just use the provided attribute name
        val = getPackageExtra(name, pkg)

    # if we don't have values to process, do nothing
    if val == None or val == '':
        return

    # if attribute schema type is 'controlled', split to multiple values
    if name == 'Keywords':
        pass
    elif input == "controlled" or input == "split-text" or name == 'Author':
        val = val.split(",")
    else: # we store everything as an array, easier to handle on other end
        val = [val]

    # now we have an dataset value, see if we have spectra value and join if we do
    # what does this mean?  So spectra resource attributes were mapreduced into
    # this single 'mrValue' dict.  If the attribute name is found as a first class
    # citizen, then it was provided by the spectra and we need to include it
    if mrValue.get(name) != None:
        spValues = mrValue.get(name)

        # merge and above values with new values
        for v in val:
            if not v in spValues:
                spValues.append(v)
        val = spValues

    # finally, clean all values (strip and set to lower case)
    if name != 'geojson' and name != 'Citation':
        val = map(lambda it: cleanValue(it), val)

    setValues['$set']['value.'+name] = val
Exemplo n.º 6
0
def updateEcosisNs(pkg, spectra_count, bboxInfo):
    # get the package workspace object, contains config
    config = collections.get("package").find_one({"packageId": pkg.get("id")})
    if config is None:
        config = {}

    collection = collections.get('search_package')



    sort = getPackageExtra("sort", pkg)
    if sort is None:
        sort = {}
    else:
        sort = json.loads(sort)

    # store these as dates
    created = None
    modified = None
    try:
        created = dateparser.parse(pkg.get("metadata_created"))
    except Exception as e:
        pass
    try:
        modified = dateparser.parse(pkg.get("metadata_modified"))
    except Exception as e:
        pass

    # default ecosis information we are going to add to the package
    ecosis = {
        # TODO: change to ISO string, check this doesn't break 'updated since last push check'
        "pushed" : datetime.datetime.utcnow(),
        "organization" : "",
        "organization_id" : "",
        "description" : pkg.get('notes'),
        "groups" : [],
        "package_id" : pkg.get("id"),
        "package_name" : pkg.get("name"),
        "package_title" : pkg.get("title"),
        "created" : created,
        "modified" : modified,
        "version" : pkg.get("version"),
        "license" : pkg.get("license_title"),
        "spectra_count" : spectra_count,
        "spectra_metadata_schema" : {
            "wavelengths" : [],
            "metadata" : [],
            "units" : {},
            "aliases" : None,
        },
        "resources" : [],
        "linked_data" : [],
        "geojson" : None,
        "sort_on" : sort.get("on"),
        "sort_type": sort.get("type"),
        "sort_description" : sort.get("description")
    }

    # append the units
    units = query.allUnits(pkg.get("id"))
    if units != None:
         ecosis["spectra_metadata_schema"]["units"] = units

    # append the linked data
    linkeddata = getPackageExtra('LinkedData', pkg)
    if linkeddata != None:
        ecosis["linked_data"] = json.loads(linkeddata)

    # append the EcoSIS DOI
    doi = getPackageExtra('EcoSIS DOI', pkg)
    if doi != None:
        ecosis["doi"] = doi

    # append the list of resources
    for item in pkg['resources']:
        if item.get("state") != "active":
            continue

        ecosis["resources"].append({
            "type" : item.get('url_type'),
            "mimetype" : item.get("mimetype"),
            "name" : item.get("name"),
            "url" : item.get("url")
        })

    # append the list of keywords
    keywords = []
    for item in pkg['tags']:
        keywords.append(item['display_name'])

    # add metadata aliases
    aliases = getPackageExtra('aliases', pkg)
    if aliases is not None:
        try:
            ecosis["spectra_metadata_schema"]["aliases"] = json.loads(aliases)

            # map units for aliases
            for key, value in ecosis["spectra_metadata_schema"]["aliases"].iteritems():
                unit = ecosis["spectra_metadata_schema"]["units"].get(value)
                if unit is not None:
                    ecosis["spectra_metadata_schema"]["units"][key] = unit

        except Exception:
            pass

    # append the data groups
    for item in pkg['groups']:
        ecosis["groups"].append(item['display_name'])

    # append the organizations
    if 'organization' in pkg:
        if pkg['organization'] != None:
            ecosis["organization"] = pkg['organization']['title']
            ecosis["organization_id"] = pkg['organization']['id']
        else:
            ecosis['organization'] = 'None'
    else:
        ecosis['organization'] = 'None'

    # make sure the map reduce did not create a null collection, if so, remove
    # This means there is no spectra
    item = collection.find_one({'_id': pkg['id']})

    # now see if we have a group by attribute...
    if item is None:
        pass
    elif item.get('value') is None:
        collection.remove({'_id': pkg['id']})
    else:
        item = collection.find_one({'_id': pkg['id']})

        setValues = {'$set' : { 'value.ecosis': ecosis }, '$unset' : {}}

        # grab the mapreduce value
        mrValue = item.get('value')

        # process ecosis schema
        # bubble attributes from mapreduce
        names = []
        for category, items in schema.iteritems():
            for item in items:
                name = item.get('name')
                input = item.get('input')

                # ignore these attributes, they should not be processed.
                # TODO: make this a global list
                if name == 'Latitude' or name == 'Longitude' or name == 'geojson' or name == 'NASA GCMD Keywords':
                    continue

                # processAttribute does all sorts-o-stuff, see function definition below
                processAttribute(name, input, pkg, mrValue, setValues, keywords)
                names.append(name)

                if item.get('allowOther') == True:
                    processAttribute(name+" Other", "split-text", pkg, mrValue, setValues, keywords)
                    names.append(name+" Other")

        # set the known data attributes
        # the mapreduce function created these objects, storing all know wavelengths and metadata attributes
        # for the function.  Here we transform those objects (dicts) into arrays, we only care about the keys
        # Finally, MongoDB does not allow '.' in attribute name, so names were stored with commas instead,
        # transpose the ',' back to '.'
        for key in mrValue['tmp__schema__']['wavelengths']:
            ecosis['spectra_metadata_schema']['wavelengths'].append(re.sub(r',', '.', key))
        for key in mrValue['tmp__schema__']['metadata']:
            ecosis['spectra_metadata_schema']['metadata'].append(re.sub(r',', '.', key))

        # tell MongoDB to remove the object storing our schema information processed above
        setValues['$unset']['value.tmp__schema__'] = ''

        # append the gcmd keywords
        gcmd = getPackageExtra('NASA GCMD Keywords', pkg)
        if gcmd is not None and gcmd != '':
            arr = json.loads(gcmd)
            setValues['$set']['value.ecosis']['nasa_gcmd_keywords'] = arr
            keywords = []

            # create unique array of all gcmd keywords to be searched on
            for item in arr:
                parts = item.get('label').split('>')
                parts =  map(unicode.strip, parts)
                for key in parts:
                    if key not in keywords:
                        keywords.append(key)

            setValues['$set']['value.NASA GCMD Keywords'] = keywords

        # finally, let's handle geojson
        geojson = processGeoJson(bboxInfo, pkg);
        if len(geojson.get('geometries')) == 0:
            setValues['$set']['value.ecosis']['geojson'] = None
        else:
            setValues['$set']['value.ecosis']['geojson'] = geojson

        # really, finally, update the collection with the 'setValues' dict we have been creating
        collection.update(
            {'_id': pkg['id']},
            setValues
        )