def prepare(package_id, force=False): packageInfo = collections.get("package").find_one({ "packageId" : package_id, },{"_id" : 0}) # create a workspace package object if required if packageInfo is None: packageInfo = { "packageId" : package_id } # quit if there is nothing todo if not force and packageInfo.get("prepared") == True: return { "success" : True, "message" : "already prepared, use force flag to force prepare" } # make sure we are not trying to prepare a package that has been deleted ckanPackage = ckanPackageQuery.get(package_id) if ckanPackage.get('state') == 'deleted': raise Exception('Package has been deleted') # get all package resources resources = ckanResourceQuery.active(package_id) status = [] for resource in resources: # get path on disk for file as well as file extension filepath = resourceUtil.get_path(resource.get('id')) ext = _getFileExtension(resource.get('name')) # extract zip contents if zip if ext == "zip": # TODO: we should be checking a zip hash before we go unzipping every time results = extractZip(package_id, resource.get('id'), filepath, resource.get('name')) for result in results: status.append(result) # extract 'normal' file (non-zip) else: result = importer.processFile(filepath, package_id, resource.get('id'), resource=resource) status.append(result) # respond with update of what we did (or did not) do. packageInfo["runInfo"] = status packageInfo["lastTouched"] = datetime.utcnow() packageInfo["prepared"] = True collections.get("package").update({"packageId":package_id}, packageInfo, upsert=True) return packageInfo
def get(package_id): # get all package resources resources = ckanResourceQuery.active(package_id) response = { "package" : collections.get("package").find_one({ "packageId": package_id, }, {"runInfo": 0, "_id": 0}), "resources" : [], "ckan" : { "package" : ckanPackageQuery.get(package_id), "resources" : resources }, "pushed" : isPushed(package_id) } if response['package'] is None: response['package'] = {} # append information about the dataset resources to response for resource in resources: sheets = getResource(resource.get('id')) upload = uploader.ResourceUpload(resource) path = upload.get_path(resource['id']) if os.path.exists(path): resource['file_size'] = os.path.getsize(path) else: resource['file_size'] = 0 for sheet in sheets: # we don't care about root excel files, only the sheets if sheet.get('excel') == True or sheet.get('isZip') == True: continue response.get('resources').append(sheet) return response