示例#1
0
def prepare(package_id, force=False):
    packageInfo = collections.get("package").find_one({
        "packageId" : package_id,
    },{"_id" : 0})

    # create a workspace package object if required
    if packageInfo is None:
        packageInfo = {
            "packageId" : package_id
        }

    # quit if there is nothing todo
    if not force and packageInfo.get("prepared") == True:
        return {
            "success" : True,
            "message" : "already prepared, use force flag to force prepare"
        }

    # make sure we are not trying to prepare a package that has been deleted
    ckanPackage = ckanPackageQuery.get(package_id)
    if ckanPackage.get('state') == 'deleted':
        raise Exception('Package has been deleted')

    # get all package resources
    resources = ckanResourceQuery.active(package_id)

    status = []
    for resource in resources:
        # get path on disk for file as well as file extension
        filepath = resourceUtil.get_path(resource.get('id'))
        ext = _getFileExtension(resource.get('name'))

        # extract zip contents if zip
        if ext == "zip":
            # TODO: we should be checking a zip hash before we go unzipping every time
            results = extractZip(package_id, resource.get('id'), filepath, resource.get('name'))
            for result in results:
                status.append(result)

        # extract 'normal' file (non-zip)
        else:
            result = importer.processFile(filepath, package_id, resource.get('id'), resource=resource)
            status.append(result)

    # respond with update of what we did (or did not) do.
    packageInfo["runInfo"] = status
    packageInfo["lastTouched"] = datetime.utcnow()
    packageInfo["prepared"] = True

    collections.get("package").update({"packageId":package_id}, packageInfo, upsert=True)
    return packageInfo
示例#2
0
def get(package_id):
    # get all package resources
    resources = ckanResourceQuery.active(package_id)

    response = {
        "package" : collections.get("package").find_one({
            "packageId": package_id,
        }, {"runInfo": 0, "_id": 0}),
        "resources" : [],
        "ckan" : {
            "package" : ckanPackageQuery.get(package_id),
            "resources" : resources
        },
        "pushed" : isPushed(package_id)
    }

    if response['package'] is None:
        response['package'] = {}

    # append information about the dataset resources to response
    for resource in resources:
        sheets = getResource(resource.get('id'))

        upload = uploader.ResourceUpload(resource)
        path = upload.get_path(resource['id'])
        if os.path.exists(path):
            resource['file_size'] = os.path.getsize(path)
        else:
            resource['file_size'] = 0

        for sheet in sheets:
            # we don't care about root excel files, only the sheets
            if sheet.get('excel') == True or sheet.get('isZip') == True:
                continue

            response.get('resources').append(sheet)

    return response