Exemplo n.º 1
0
def write_csv_to_s3(data, is_owner, **kwargs):  # pragma: no cover
    cwd = cd_temp()
    csv_file = csv_file_for(data, **kwargs)
    csv_file.close()
    zip_file_name = csv_file.name + '.zip'
    with zipfile.ZipFile(zip_file_name, 'w', zipfile.ZIP_DEFLATED) as zip_file:
        zip_file.write(csv_file.name)

    file_path = get_downloads_path(is_owner) + zip_file_name
    S3.upload_file(file_path)
    os.chdir(cwd)
    return S3.url_for(file_path)
Exemplo n.º 2
0
def write_csv_to_s3(data, is_owner, **kwargs):  # pragma: no cover
    cwd = cd_temp()
    csv_file = csv_file_for(data, **kwargs)
    csv_file.close()
    zip_file_name = csv_file.name + '.zip'
    with zipfile.ZipFile(zip_file_name, 'w', zipfile.ZIP_DEFLATED) as zip_file:
        zip_file.write(csv_file.name)

    key = get_downloads_path(is_owner) + zip_file.filename
    S3.upload_file(key=key,
                   file_path=os.path.abspath(zip_file.filename),
                   binary=True,
                   metadata=dict(ContentType='application/zip'),
                   headers={'content-type': 'application/zip'})
    os.chdir(cwd)
    return S3.url_for(key)
Exemplo n.º 3
0
def write_export_file(version, resource_type, resource_serializer_type,
                      logger):  # pylint: disable=too-many-statements,too-many-locals
    cwd = cd_temp()
    logger.info('Writing export file to tmp directory: %s' % cwd)

    logger.info('Found %s version %s.  Looking up resource...' %
                (resource_type, version.version))
    resource = version.head
    logger.info('Found %s %s.  Serializing attributes...' %
                (resource_type, resource.mnemonic))

    resource_serializer = get_class(resource_serializer_type)(version)
    data = resource_serializer.data
    resource_string = json.dumps(data, cls=encoders.JSONEncoder)
    logger.info('Done serializing attributes.')

    batch_size = 1000
    concepts_qs = version.concepts
    mappings_qs = version.mappings
    if resource_type != 'collection':
        concepts_qs = concepts_qs.filter(is_active=True)
        mappings_qs = mappings_qs.filter(is_active=True)

    total_concepts = concepts_qs.count()
    total_mappings = mappings_qs.count()

    with open('export.json', 'w') as out:
        out.write('%s, "concepts": [' % resource_string[:-1])

    if total_concepts:
        logger.info('%s has %d concepts. Getting them in batches of %d...' %
                    (resource_type.title(), total_concepts, batch_size))
        concept_serializer_class = get_class(
            'core.concepts.serializers.ConceptVersionDetailSerializer')
        for start in range(0, total_concepts, batch_size):
            end = min(start + batch_size, total_concepts)
            logger.info('Serializing concepts %d - %d...' % (start + 1, end))
            concept_versions = concepts_qs.prefetch_related(
                'names',
                'descriptions').select_related('parent__organization',
                                               'parent__user')[start:end]
            concept_serializer = concept_serializer_class(concept_versions,
                                                          many=True)
            concept_data = concept_serializer.data
            concept_string = json.dumps(concept_data, cls=encoders.JSONEncoder)
            concept_string = concept_string[1:-1]
            with open('export.json', 'a') as out:
                out.write(concept_string)
                if end != total_concepts:
                    out.write(', ')
        logger.info('Done serializing concepts.')
    else:
        logger.info('%s has no concepts to serialize.' %
                    (resource_type.title()))

    with open('export.json', 'a') as out:
        out.write('], "mappings": [')

    if total_mappings:
        logger.info('%s has %d mappings. Getting them in batches of %d...' %
                    (resource_type.title(), total_mappings, batch_size))
        mapping_serializer_class = get_class(
            'core.mappings.serializers.MappingDetailSerializer')
        for start in range(0, total_mappings, batch_size):
            end = min(start + batch_size, total_mappings)
            logger.info('Serializing mappings %d - %d...' % (start + 1, end))
            mappings = mappings_qs.select_related(
                'parent__organization',
                'parent__user',
                'from_concept',
                'to_concept',
                'from_source__organization',
                'from_source__user',
                'to_source__organization',
                'to_source__user',
            )[start:end]
            mapping_serializer = mapping_serializer_class(mappings, many=True)
            mapping_data = mapping_serializer.data
            mapping_string = json.dumps(mapping_data, cls=encoders.JSONEncoder)
            mapping_string = mapping_string[1:-1]
            with open('export.json', 'a') as out:
                out.write(mapping_string)
                if end != total_mappings:
                    out.write(', ')
        logger.info('Done serializing mappings.')
    else:
        logger.info('%s has no mappings to serialize.' %
                    (resource_type.title()))

    with open('export.json', 'a') as out:
        out.write(']}')

    with zipfile.ZipFile('export.zip', 'w', zipfile.ZIP_DEFLATED) as _zip:
        _zip.write('export.json')

    file_path = os.path.abspath('export.zip')
    logger.info(file_path)
    logger.info('Done compressing.  Uploading...')

    s3_key = version.export_path
    S3.upload_file(key=s3_key, file_path=file_path, binary=True)
    uploaded_path = S3.url_for(s3_key)
    logger.info('Uploaded to %s.' % uploaded_path)
    os.chdir(cwd)