Пример #1
0
async def upload_to_table(wf_module, uploaded_file):
    ext = '.' + uploaded_file.name.split('.')[-1]
    mime_type = _ExtensionMimeTypes.get(ext, None)
    if mime_type:
        try:
            with open_for_read(uploaded_file.bucket, uploaded_file.key) as s3:
                with TempfileBackedReader(s3) as tempio:
                    with BufferedReader(tempio) as bufio:
                        result = parse_bytesio(bufio, mime_type, None)
        except ResponseError as err:
            result = ProcessResult(error=str(err))
    else:
        result = ProcessResult(
            error=(f'Error parsing {uploaded_file.file.name}: '
                   'unknown content type'))

    if result.error:
        # delete uploaded file, we probably can't ever use it
        uploaded_file.delete()

    result.truncate_in_place_if_too_big()
    result.sanitize_in_place()

    await ModuleImpl.commit_result(wf_module,
                                   result,
                                   stored_object_json=[{
                                       'uuid':
                                       uploaded_file.uuid,
                                       'name':
                                       uploaded_file.name
                                   }])

    # don't delete UploadedFile, so that we can reparse later or allow higher
    # row limit or download original, etc.
    return
Пример #2
0
async def parse_uploaded_file(uploaded_file) -> ProcessResult:
    """
    Convert an UploadedFile to a ProcessResult.

    TODO make this synchronous, and move it somewhere sensible. See comments
    surrounding "upload_DELETEME".

    This is async because it can take a long time: the processing happens in a
    background thread.
    """
    ext = '.' + uploaded_file.name.split('.')[-1]
    mime_type = _ExtensionMimeTypes.get(ext, None)
    loop = asyncio.get_event_loop()
    if mime_type:
        try:
            with open_for_read(uploaded_file.bucket, uploaded_file.key) as s3:
                with TempfileBackedReader(s3) as tempio:
                    with BufferedReader(tempio) as bufio:
                        result = await loop.run_in_executor(
                            None, parse_bytesio, bufio, mime_type, None)

        except ResponseError as err:
            return ProcessResult(error=str(err))
    else:
        return ProcessResult(error=(
            f'Error parsing {uploaded_file.name}: unknown content type'))

    result.truncate_in_place_if_too_big()
    result.sanitize_in_place()

    # don't delete UploadedFile, so that we can reparse later or allow higher
    # row limit or download original, etc.
    return result
Пример #3
0
def _external_module_get_html_bytes(id_name: str,
                                    version: str) -> Optional[bytes]:
    prefix = '%s/%s/' % (id_name, version)
    all_keys = minio.list_file_keys(minio.ExternalModulesBucket, prefix)
    try:
        html_key = next(k for k in all_keys if k.endswith('.html'))
    except StopIteration:
        return None  # there is no HTML file

    with minio.open_for_read(minio.ExternalModulesBucket, html_key) as f:
        return f.read()
Пример #4
0
async def parse_uploaded_file(uploaded_file) -> ProcessResult:
    ext = '.' + uploaded_file.name.split('.')[-1]
    mime_type = _ExtensionMimeTypes.get(ext, None)
    if mime_type:
        try:
            with open_for_read(uploaded_file.bucket, uploaded_file.key) as s3:
                with TempfileBackedReader(s3) as tempio:
                    with BufferedReader(tempio) as bufio:
                        result = parse_bytesio(bufio, mime_type, None)
        except ResponseError as err:
            return ProcessResult(error=str(err))
    else:
        return ProcessResult(error=(
            f'Error parsing {uploaded_file.name}: unknown content type'
        ))

    result.truncate_in_place_if_too_big()
    result.sanitize_in_place()

    # don't delete UploadedFile, so that we can reparse later or allow higher
    # row limit or download original, etc.
    return result