示例#1
0
    def __fetch_object(self, opts):
        """
        Upload a file to Diffgram from an Azure Blob

        :param opts: Dictionary with parameters for object fetching.
        :return: file obj if file was uploaded, else False
        """
        spec_list = [{'bucket_name': str, 'path': str}]
        log = regular_log.default()
        log, input = regular_input.input_check_many(untrusted_input = opts,
                                                    spec_list = spec_list,
                                                    log = log)
        if len(log["error"].keys()) >= 1:
            return {'log': log}
        shared_access_signature = BlobSharedAccessSignature(
            account_name = self.connection_client.account_name,
            account_key = self.connection_client.credential.account_key
        )

        expiration_offset = 40368000
        blob_name = opts['path']
        container = opts['bucket_name']
        added_seconds = datetime.timedelta(0, expiration_offset)
        expiry_time = datetime.datetime.utcnow() + added_seconds
        filename = blob_name.split("/")[-1]
        sas = shared_access_signature.generate_blob(
            container_name = container,
            blob_name = blob_name,
            start = datetime.datetime.utcnow(),
            expiry = expiry_time,
            permission = BlobSasPermissions(read = True),
            content_disposition = 'attachment; filename=' + filename,
        )
        sas_url = 'https://{}.blob.core.windows.net/{}/{}?{}'.format(
            self.connection_client.account_name,
            container,
            blob_name,
            sas
        )

        with sessionMaker.session_scope() as session:

            project = Project.get_by_string_id(session, self.config_data.get('project_string_id'))
            member = session.query(Member).filter(Member.user_id == opts['event_data']['request_user']).first()
            # Deduct Media Type:
            extension = Path(opts['path']).suffix
            extension = extension.lower()
            media_type = None
            if extension in images_allowed_file_names:
                media_type = 'image'
            elif extension in videos_allowed_file_names:
                media_type = 'video'
            else:
                # TODO: Decide, do we want to raise an exception? or just do nothing?
                log = regular_log.default()
                log['error']['invalid_type'] = 'File must type of: {} {}'.format(str(images_allowed_file_names),
                                                                                 str(videos_allowed_file_names))
                log['error']['file_name'] = opts['path']
                log['opts'] = opts
                Event.new(
                    session = session,
                    member_id = opts['event_data']['request_user'],
                    kind = 'microsoft_azure_new_import_warning',
                    description = 'Skipped import for {}, invalid file type.'.format(opts['path']),
                    error_log = log,
                    project_id = project.id,
                    member = member,
                    success = False
                )
                return None

            # metadata = self.connection_client.head_object(Bucket=self.config_data['bucket_name'], Key=path)
            created_input = packet.enqueue_packet(self.config_data['project_string_id'],
                                                  session = session,
                                                  media_url = sas_url,
                                                  media_type = media_type,
                                                  job_id = opts.get('job_id'),
                                                  batch_id = opts.get('batch_id'),
                                                  file_name = opts.get('path'),
                                                  video_split_duration = opts.get('video_split_duration'),
                                                  directory_id = opts.get('directory_id'),
                                                  extract_labels_from_batch = True)
            log = regular_log.default()
            log['opts'] = opts
            Event.new(
                session = session,
                member_id = opts['event_data']['request_user'],
                kind = 'microsoft_azure_new_import_success',
                description = 'New cloud import for {}'.format(opts['path']),
                error_log = opts,
                project_id = project.id,
                member = member,
                success = True
            )
        return created_input
示例#2
0
    def __send_export(self, opts):
        spec_list = [{'project_string_id': dict}]
        log = regular_log.default()
        log, input = regular_input.input_check_many(untrusted_input = self.config_data,
                                                    spec_list = spec_list,
                                                    log = log)
        if len(log["error"].keys()) >= 1:
            return {'log': log}
        spec_list = [
            {'path': str},
            {"format": {
                'default': 'JSON',
                'kind': str,
                'valid_values_list': ['JSON', 'YAML']
            }},
            {'export_id': str},
            {'bucket_name': str},

        ]
        log = regular_log.default()
        log, input = regular_input.input_check_many(untrusted_input = opts,
                                                    spec_list = spec_list,
                                                    log = log,
                                                    string_len_not_zero = False)
        if len(log["error"].keys()) >= 1:
            return {'log': log}
        if not opts['path'].endswith('/') and opts['path'] != '':
            log['error']['path'] = 'Path on bucket must be a folder, not a filename.'
            return log

        with sessionMaker.session_scope() as session:
            project = Project.get_by_string_id(session, self.config_data['project_string_id'])
            member = session.query(Member).filter(Member.user_id == opts['event_data']['request_user']).first()
            export = session.query(Export).filter(Export.id == opts['export_id']).first()
            # Check perms and export status.
            export_check_result = check_export_permissions_and_status(export,
                                                                      self.config_data['project_string_id'],
                                                                      session)
            if len(export_check_result['error'].keys()) > 1:
                return export_check_result

            result = export_view_core(
                export = export,
                format = opts['format'],
                return_type = 'bytes')
            filename = generate_file_name_from_export(export, session)

            if opts['path'] != '':
                key = '{}{}.{}'.format(opts['path'], filename, opts['format'].lower())
            else:
                key = '{}.{}'.format(filename, opts['format'].lower())

            file = io.BytesIO(result)
            blob_client = self.connection_client.get_blob_client(container = opts['bucket_name'], blob = key)
            content_type = mimetypes.guess_type(filename)[0]
            my_content_settings = ContentSettings(content_type = content_type)
            blob_client.upload_blob(file, content_settings = my_content_settings)
            log = regular_log.default()
            log['opts'] = opts
            Event.new(
                session = session,
                member_id = opts['event_data']['request_user'],
                kind = 'microsoft_azure_new_export_success',
                description = 'New cloud export for {}{}'.format(opts['path'], filename),
                error_log = opts,
                member = member,
                project_id = project.id,
                success = True
            )
            return {'result': True}
示例#3
0
def job_launch_list_core(session, metadata_proposed, output_mode="serialize"):
    """
        Get the job_launch objects based on filters in
        metadata_proposed.

    """

    meta = default_metadata(metadata_proposed)
    output_job_launch_list = []
    limit_counter = 0

    # CAUTION
    # Multiple "modes", for output and trainer builder, maybe more in future
    builder_or_trainer_mode = meta['builder_or_trainer']['mode']

    # It doesn't really make sense to have this here
    # Should be part of some other meta data checking or something.
    if builder_or_trainer_mode not in ["builder", "trainer"]:
        raise Forbidden("Invalid builder_or_trainer_mode mode.")

    query = session.query(JobLaunch).join(Job)

    user = User.get(session)

    if user.last_builder_or_trainer_mode != builder_or_trainer_mode:
        raise Forbidden(
            "Invalid user relation to builder_or_trainer_mode mode.")

    ### START FILTERS ###

    if meta["status"]:
        if meta["status"] != "All":
            query = query.filter(JobLaunch.status == meta["status"])
    if meta["date_from"]:
        date_from = datetime.datetime.strptime(meta["date_from"], "%Y-%m-%d")
        date_from = date_from.replace(hour=0,
                                      minute=0,
                                      second=0,
                                      microsecond=0)
        query = query.filter(JobLaunch.time_created >= date_from)
    if meta["date_to"]:
        date_to = datetime.datetime.strptime(meta["date_to"], "%Y-%m-%d")
        date_to = date_to.replace(hour=0, minute=0, second=0, microsecond=0)

        query = query.filter(JobLaunch.time_created <= date_to)

    if meta["job_ids"]:
        query = query.filter(Job.id.in_(meta["job_ids"]))

    # Also assumes org is None.
    # Actually this should be complimentary still
    if meta["project_string_id"]:
        project = Project.get_by_string_id(
            session=session, project_string_id=meta["project_string_id"])
        query = query.filter(Job.project_id == project.id)
    #### END FILTERS ###

    query = query.order_by(Job.time_created.desc())
    query = query.limit(meta["limit"])
    query = query.offset(meta["start_index"])

    job_launch_list = query.all()

    if output_mode == "serialize":

        for job_launch in job_launch_list:
            serialized = job_launch.serialize_for_list_view(session=session)

            output_job_launch_list.append(serialized)
            limit_counter += 1

    meta['end_index'] = meta['start_index'] + len(output_job_launch_list)
    meta['length_current_page'] = len(output_job_launch_list)

    if limit_counter == 0:
        meta['no_results_match_meta'] = True

    return output_job_launch_list, meta
    def __fetch_folder(self, opts):
        result = []

        if self.config_data.get('project_string_id') is None:
            return {'result': 'error'}
        paths = opts['path']
        if type(paths) != list:
            paths = [paths]
        with sessionMaker.session_scope() as session:
            project = Project.get_by_string_id(
                session, self.config_data.get('project_string_id'))
            member = session.query(Member).filter(
                Member.user_id == opts['event_data']['request_user']).first()
            for path in paths:
                blobs = self.connection_client.list_blobs(opts['bucket_name'],
                                                          prefix=path)
                for blob in blobs:
                    # Deduct Media Type:
                    if blob.name.endswith('/'):
                        continue

                    blob_expiry = int(time.time() + (60 * 60 * 24 * 30))
                    signed_url = blob.generate_signed_url(
                        expiration=blob_expiry)
                    extension = Path(blob.path).suffix
                    media_type = None
                    if extension in images_allowed_file_names:
                        media_type = 'image'
                    elif extension in videos_allowed_file_names:
                        media_type = 'video'
                    else:
                        logging.warn('File: {} must type of: {} {}'.format(
                            blob.name, str(images_allowed_file_names),
                            str(videos_allowed_file_names)))

                        log = regular_log.default()
                        log['error'][
                            'invalid_type'] = 'File must type of: {} {}'.format(
                                str(images_allowed_file_names),
                                str(videos_allowed_file_names))
                        log['error']['file_name'] = path
                        log['opts'] = opts
                        Event.new(
                            session=session,
                            member_id=opts['event_data']['request_user'],
                            kind='google_cloud_new_import_warning',
                            description=
                            'Skipped import for {}, invalid file type.'.format(
                                blob.name),
                            error_log=log,
                            project_id=project.id,
                            member=member,
                            success=False)
                        continue
                    result = []
                    # TODO: check Input() table for duplicate file?
                    created_input = packet.enqueue_packet(
                        self.config_data['project_string_id'],
                        session=session,
                        media_url=signed_url,
                        media_type=media_type,
                        job_id=opts.get('job_id'),
                        batch_id=opts.get('batch_id'),
                        file_name=path,
                        video_split_duration=opts.get('video_split_duration'),
                        directory_id=opts.get('directory_id'),
                        extract_labels_from_batch=True)
                    log = regular_log.default()
                    log['opts'] = opts
                    Event.new(session=session,
                              member_id=opts['event_data']['request_user'],
                              kind='google_cloud_new_import_success',
                              description='New cloud import for {}'.format(
                                  blob.name),
                              error_log=opts,
                              project_id=project.id,
                              member=member,
                              success=True)
                    result.append(created_input)
        return result
示例#5
0
    def __fetch_object(self, opts):
        """Upload a file to diffgram from an S3 bucket

        :param s3_file_key: path of file to fetch from
        :return: file obj if file was uploaded, else False
        """
        spec_list = [{'bucket_name': str, 'path': str}]
        log = regular_log.default()
        log, input = regular_input.input_check_many(untrusted_input=opts,
                                                    spec_list=spec_list,
                                                    log=log)
        if len(log["error"].keys()) >= 1:
            return {'log': log}
        # This might be an issue. Currently not supporting urls with no expiration. Biggest time is 1 week.
        signed_url = self.connection_client.generate_presigned_url('get_object',
                                                                   Params={'Bucket': opts['bucket_name'],
                                                                           'Key': opts['path']},
                                                                   ExpiresIn=3600 * 24 * 6)  # 5 Days.

        with sessionMaker.session_scope() as session:

            project = Project.get_by_string_id(session, self.config_data.get('project_string_id'))
            member = session.query(Member).filter(Member.user_id == opts['event_data']['request_user']).first()
            # Deduct Media Type:
            extension = Path(opts['path']).suffix
            extension = extension.lower()
            media_type = None
            if extension in images_allowed_file_names:
                media_type = 'image'
            elif extension in videos_allowed_file_names:
                media_type = 'video'
            else:
                # TODO: Decide, do we want to raise an exception? or just do nothing?
                log = regular_log.default()
                log['error']['invalid_type'] = 'File must type of: {} {}'.format(str(images_allowed_file_names),
                                                                                 str(videos_allowed_file_names))
                log['error']['file_name'] = opts['path']
                log['opts'] = opts
                Event.new(
                    session=session,
                    member_id=opts['event_data']['request_user'],
                    kind='aws_s3_new_import_warning',
                    description='Skipped import for {}, invalid file type.'.format(opts['path']),
                    error_log=log,
                    project_id=project.id,
                    member=member,
                    success=False
                )
                return None
            # print('AAAAA', opts, opts.get('job_id'))
            # metadata = self.connection_client.head_object(Bucket=self.config_data['bucket_name'], Key=path)
            created_input = packet.enqueue_packet(self.config_data['project_string_id'],
                                                  session=session,
                                                  media_url=signed_url,
                                                  media_type=media_type,
                                                  job_id=opts.get('job_id'),
                                                  video_split_duration=opts.get('video_split_duration'),
                                                  directory_id=opts.get('directory_id'))
            log = regular_log.default()
            log['opts'] = opts
            Event.new(
                session=session,
                member_id=opts['event_data']['request_user'],
                kind='aws_s3_new_import_success',
                description='New cloud import for {}'.format(opts['path']),
                error_log=opts,
                project_id=project.id,
                member=member,
                success=True
            )
        return created_input