Exemplo n.º 1
0
def get_available_checksums_from_disk(channel_id, drive_id):
    try:
        basepath = get_mounted_drive_by_id(drive_id).datafolder
    except KeyError:
        raise LocationError("Drive with id {} does not exist".format(drive_id))
    PER_DISK_CACHE_KEY = "DISK_AVAILABLE_CHECKSUMS_{basepath}".format(
        basepath=basepath)
    PER_DISK_PER_CHANNEL_CACHE_KEY = "DISK_AVAILABLE_CHECKSUMS_{basepath}_{channel_id}".format(
        basepath=basepath, channel_id=channel_id)
    if PER_DISK_PER_CHANNEL_CACHE_KEY not in cache:
        if PER_DISK_CACHE_KEY not in cache:
            content_dir = get_content_storage_dir_path(datafolder=basepath)

            disk_checksums = []

            for _, _, files in os.walk(content_dir):
                for name in files:
                    checksum = os.path.splitext(name)[0]
                    # Only add valid checksums formatted according to our standard filename
                    if checksum_regex.match(checksum):
                        disk_checksums.append(checksum)
            # Cache is per device, so a relatively long lived one should
            # be fine.
            cache.set(PER_DISK_CACHE_KEY, disk_checksums, 3600)
        else:
            disk_checksums = cache.get(PER_DISK_CACHE_KEY)
        checksums = set(
            LocalFile.objects.filter(
                files__contentnode__channel_id=channel_id).values_list(
                    "id", flat=True)).intersection(set(disk_checksums))
        cache.set(PER_DISK_PER_CHANNEL_CACHE_KEY, checksums, 3600)
    else:
        checksums = cache.get(PER_DISK_PER_CHANNEL_CACHE_KEY)
    return checksums
Exemplo n.º 2
0
    def channeldiffstats(self, request):
        job_metadata = {}
        channel_id = request.data.get("channel_id")
        method = request.data.get("method")
        drive_id = request.data.get("drive_id")
        baseurl = request.data.get("baseurl")

        # request validation and job metadata info
        if not channel_id:
            raise serializers.ValidationError("The channel_id field is required.")
        if not method:
            raise serializers.ValidationError("The method field is required.")

        if method == "network":
            baseurl = baseurl or conf.OPTIONS["Urls"]["CENTRAL_CONTENT_BASE_URL"]
            job_metadata["baseurl"] = baseurl
            # get channel version metadata
            url = get_channel_lookup_url(baseurl=baseurl, identifier=channel_id)
            resp = requests.get(url)
            channel_metadata = resp.json()
            job_metadata["new_channel_version"] = channel_metadata[0]["version"]
        elif method == "disk":
            if not drive_id:
                raise serializers.ValidationError(
                    "The drive_id field is required when using 'disk' method."
                )
            job_metadata = _add_drive_info(job_metadata, request.data)
            # get channel version metadata
            drive = get_mounted_drive_by_id(drive_id)
            channel_metadata = read_channel_metadata_from_db_file(
                get_content_database_file_path(channel_id, drive.datafolder)
            )
            job_metadata["new_channel_version"] = channel_metadata.version
        else:
            raise serializers.ValidationError(
                "'method' field should either be 'network' or 'disk'."
            )

        job_metadata.update(
            {
                "type": "CHANNELDIFFSTATS",
                "started_by": request.user.pk,
                "channel_id": channel_id,
            }
        )

        job_id = priority_queue.enqueue(
            diff_stats,
            channel_id,
            method,
            drive_id=drive_id,
            baseurl=baseurl,
            extra_metadata=job_metadata,
            track_progress=False,
            cancellable=True,
        )

        resp = _job_to_response(priority_queue.fetch_job(job_id))

        return Response(resp)
Exemplo n.º 3
0
    def checksums_from_drive_id(self, drive_id, instance):
        try:
            datafolder = get_mounted_drive_by_id(drive_id).datafolder
        except KeyError:
            raise serializers.ValidationError(
                "The external drive with given drive id {} does not exist.".format(
                    drive_id
                )
            )

        return get_available_checksums_from_disk(instance.channel_id, datafolder)
Exemplo n.º 4
0
    def startdiskcontentimport(self, request):

        try:
            channel_id = request.data["channel_id"]
        except KeyError:
            raise serializers.ValidationError(
                "The channel_id field is required.")

        try:
            drive_id = request.data["drive_id"]
        except KeyError:
            raise serializers.ValidationError(
                "The drive_id field is required.")

        try:
            drive = get_mounted_drive_by_id(drive_id)
        except KeyError:
            raise serializers.ValidationError(
                "That drive_id was not found in the list of drives.")

        # optional arguments
        node_ids = request.data.get("node_ids", None)
        exclude_node_ids = request.data.get("exclude_node_ids", None)

        if node_ids and not isinstance(node_ids, list):
            raise serializers.ValidationError("node_ids must be a list.")

        if exclude_node_ids and not isinstance(exclude_node_ids, list):
            raise serializers.ValidationError(
                "exclude_node_ids must be a list.")

        job_metadata = {
            "type": "DISKCONTENTIMPORT",
            "started_by": request.user.pk
        }

        job_id = get_queue().enqueue(
            call_command,
            "importcontent",
            "disk",
            channel_id,
            drive.datafolder,
            node_ids=node_ids,
            exclude_node_ids=exclude_node_ids,
            extra_metadata=job_metadata,
            track_progress=True,
            cancellable=True,
        )

        resp = _job_to_response(get_queue().fetch_job(job_id))

        return Response(resp)
Exemplo n.º 5
0
def _add_drive_info(import_task, task_description):
    try:
        drive_id = task_description["drive_id"]
    except KeyError:
        raise serializers.ValidationError("The drive_id field is required.")

    try:
        drive = get_mounted_drive_by_id(drive_id)
    except KeyError:
        raise serializers.ValidationError(
            "That drive_id was not found in the list of drives.")

    import_task.update({"drive_id": drive_id, "datafolder": drive.datafolder})

    return import_task
Exemplo n.º 6
0
    def startdiskchannelimport(self, request):

        # Load the required parameters
        try:
            channel_id = request.data["channel_id"]
        except KeyError:
            raise serializers.ValidationError(
                "The channel_id field is required.")

        try:
            drive_id = request.data["drive_id"]
        except KeyError:
            raise serializers.ValidationError(
                "The drive_id field is required.")

        try:
            drive = get_mounted_drive_by_id(drive_id)
        except KeyError:
            raise serializers.ValidationError(
                "That drive_id was not found in the list of drives.")

        job_metadata = {
            "type": "DISKCHANNELIMPORT",
            "started_by": request.user.pk
        }

        job_id = get_queue().enqueue(
            call_command,
            "importchannel",
            "disk",
            channel_id,
            drive.datafolder,
            extra_metadata=job_metadata,
            cancellable=True,
        )

        resp = _job_to_response(get_queue().fetch_job(job_id))
        return Response(resp)
Exemplo n.º 7
0
def _localexport(
    channel_id,
    drive_id,
    update_progress=None,
    check_for_cancel=None,
    node_ids=None,
    exclude_node_ids=None,
    extra_metadata=None,
):
    drive = get_mounted_drive_by_id(drive_id)

    call_command(
        "exportchannel",
        channel_id,
        drive.datafolder,
        update_progress=update_progress,
        check_for_cancel=check_for_cancel,
    )
    try:
        call_command(
            "exportcontent",
            channel_id,
            drive.datafolder,
            node_ids=node_ids,
            exclude_node_ids=exclude_node_ids,
            update_progress=update_progress,
            check_for_cancel=check_for_cancel,
        )
    except UserCancelledError:
        try:
            os.remove(
                get_content_database_file_path(channel_id,
                                               datafolder=drive.datafolder))
        except OSError:
            pass
        raise
Exemplo n.º 8
0
def diff_stats(channel_id, method, drive_id=None, baseurl=None):
    """
    Download the channel database to an upgraded path.
    Annotate the local file availability of the upgraded channel db.
    Calculate diff stats comparing default db and annotated channel db.
    """
    # upgraded content database path
    source_path = paths.get_upgrade_content_database_file_path(channel_id)
    # annotated db to be used for calculating diff stats
    destination_path = paths.get_annotated_content_database_file_path(channel_id)
    try:
        if method == "network":
            call_command(
                "importchannel", "network", channel_id, baseurl=baseurl, no_upgrade=True
            )
        elif method == "disk":
            drive = get_mounted_drive_by_id(drive_id)
            call_command(
                "importchannel", "disk", channel_id, drive.datafolder, no_upgrade=True
            )

        # create all fields/tables at the annotated destination db, based on the current schema version
        bridge = Bridge(
            sqlite_file_path=destination_path, schema_version=CURRENT_SCHEMA_VERSION
        )
        bridge.Base.metadata.create_all(bridge.engine)

        # initialize import manager based on annotated destination path, pulling from source db path
        import_manager = channel_import.initialize_import_manager(
            channel_id,
            cancel_check=False,
            source=source_path,
            destination=destination_path,
        )

        # import channel data from source db path
        import_manager.import_channel_data()
        import_manager.end()

        # annotate file availability on destination db
        annotation.set_local_file_availability_from_disk(destination=destination_path)
        # get the diff count between whats on the default db and the annotated db
        new_resources_count = count_new_resources_available_for_import(
            destination_path, channel_id
        )
        # get the count for leaf nodes which are in the default db, but not in the annotated db
        resources_to_be_deleted_count = count_removed_resources(
            destination_path, channel_id
        )
        # get the ids of leaf nodes which are now incomplete due to missing local files
        updated_resources_ids = automatically_updated_resource_ids(
            destination_path, channel_id
        )
        # remove the annotated database
        try:
            os.remove(destination_path)
        except OSError as e:
            logger.info(
                "Tried to remove {}, but exception {} occurred.".format(
                    destination_path, e
                )
            )
        # annotate job metadata with diff stats
        job = get_current_job()
        if job:
            job.extra_metadata["new_resources_count"] = new_resources_count
            job.extra_metadata[
                "deleted_resources_count"
            ] = resources_to_be_deleted_count
            job.extra_metadata["updated_node_ids"] = updated_resources_ids
            job.save_meta()

    except UserCancelledError:
        # remove the annotated database
        try:
            os.remove(destination_path)
        except OSError:
            pass
        raise