def prepare(package_id, force=False): packageInfo = collections.get("package").find_one({ "packageId" : package_id, },{"_id" : 0}) # create a workspace package object if required if packageInfo is None: packageInfo = { "packageId" : package_id } # quit if there is nothing todo if not force and packageInfo.get("prepared") == True: return { "success" : True, "message" : "already prepared, use force flag to force prepare" } # make sure we are not trying to prepare a package that has been deleted ckanPackage = ckanPackageQuery.get(package_id) if ckanPackage.get('state') == 'deleted': raise Exception('Package has been deleted') # get all package resources resources = ckanResourceQuery.active(package_id) status = [] for resource in resources: # get path on disk for file as well as file extension filepath = resourceUtil.get_path(resource.get('id')) ext = _getFileExtension(resource.get('name')) # extract zip contents if zip if ext == "zip": # TODO: we should be checking a zip hash before we go unzipping every time results = extractZip(package_id, resource.get('id'), filepath, resource.get('name')) for result in results: status.append(result) # extract 'normal' file (non-zip) else: result = importer.processFile(filepath, package_id, resource.get('id'), resource=resource) status.append(result) # respond with update of what we did (or did not) do. packageInfo["runInfo"] = status packageInfo["lastTouched"] = datetime.utcnow() packageInfo["prepared"] = True collections.get("package").update({"packageId":package_id}, packageInfo, upsert=True) return packageInfo
def get(package_id): # get all package resources resources = ckanResourceQuery.active(package_id) response = { "package" : collections.get("package").find_one({ "packageId": package_id, }, {"runInfo": 0, "_id": 0}), "resources" : [], "ckan" : { "package" : ckanPackageQuery.get(package_id), "resources" : resources }, "pushed" : isPushed(package_id) } if response['package'] is None: response['package'] = {} # append information about the dataset resources to response for resource in resources: sheets = getResource(resource.get('id')) upload = uploader.ResourceUpload(resource) path = upload.get_path(resource['id']) if os.path.exists(path): resource['file_size'] = os.path.getsize(path) else: resource['file_size'] = 0 for sheet in sheets: # we don't care about root excel files, only the sheets if sheet.get('excel') == True or sheet.get('isZip') == True: continue response.get('resources').append(sheet) return response
def get(packageId="", resourceId=None, sheetId=None, index=0, showProcessInfo=False, must_be_valid=False, clean_wavelengths=True): # build out query query = { "type" : "data", "packageId" : packageId } # you can limit by resource and sheet id if you want if resourceId is not None: query["resourceId"] = resourceId if sheetId is not None: query["sheetId"] = sheetId # get spectra at index main = collections.get('spectra').find_one(query, skip=index, sort=[("index", pymongo.ASCENDING)]) if main == None: raise Exception('Unabled to get spectra from package_id: %s at index %s' % (packageId, index)) # the collection also contains config information about the spectra, just grab to spectra attribute spectra = main.get('spectra') # this also replaces , with . # also moves measurement waveslength keys to 'datapoints' object moveWavelengths(spectra, clean_wavelengths) if must_be_valid: if 'datapoints' not in spectra: return {} if len(spectra['datapoints']) == 0: return {} # get information for the sheet this spectra came from sheetInfo = collections.get('resource').find_one({ "packageId": packageId, "resourceId": main.get("resourceId"), "sheetId" : main.get("sheetId") }) # get package information for the package this spectra came from package = ckanPackageQuery.get(packageId) attributeProcessInfo = [] # join together metadata to this spectra join(packageId, spectra, attributeProcessInfo) config = collections.get('package').find_one({"packageId": packageId}) if config == None: config = {} # set the spectra attribute aliases mapNames(spectra, config, attributeProcessInfo, package) # lookup any usda code given usda.setCodes(spectra, info=attributeProcessInfo) # strip controlled vocab fields. Remove any values that are not part of the controlled # vocabulary controlledVocab.enforce(spectra) # add 'spectra.ecosis' attribute with package and sheet info if showProcessInfo: addEcosisNamespace(spectra, package, main, sheetInfo, processInfo=attributeProcessInfo) else: addEcosisNamespace(spectra, package, main, sheetInfo) # set the sort information. This data needs to be of the correct type (string, number, date) for # proper sorting in mongodb setSort(spectra, config, package) # set the location information. Needs to be proper geojson if it's going to be used setLocation(spectra) # set photo setPhoto(packageId, spectra) return spectra