def populate_system_with_items(upload_id, data_dir, db, storage_client=None): df = upload_data_dir_to_dataframe(data_dir) items, filepaths = upload_data_to_items_and_filepaths( data_dir, df, upload_id) container_name = group_id_to_container_name(items[0].group_id) if storage_client and not exists(storage_client, container_name): create_container(storage_client, container_name) duplicates = [] valids, brokens = split_items_to_valids_and_brokens(items) for item, filepath in zip(items, filepaths): if item.filename in brokens: continue try: result = db.items.insert_one(item.get_dict()) except DuplicateKeyError: duplicates.append(item.filename) continue item._id = result.inserted_id blob_name = item_id_and_extension_to_blob_name(item._id, item.extension) blob_meta = dict(filename=item.filename) if storage_client: upload_blob(storage_client, container_name, blob_name, filepath, blob_meta) return dict(image_count=len(items), duplicate_image_count=len(duplicates), duplicate_filenames=duplicates, broken_record_count=len(brokens), broken_records=brokens)
def upload_package_from_stream(filename, stream, db, storage_client): container_name = blob.group_id_to_container_name('upload') if not blob.exists(storage_client, container_name): blob.create_container(storage_client, container_name) upload_doc = upload.create(db, filename) blob_filename = str(upload_doc.inserted_id) blob_meta = dict(filename=filename) blob.create_blob_from_stream(storage_client, container_name, blob_filename, stream, blob_meta) upload.update_state(db, blob_filename, 'uploaded') return blob_filename
def upload_export_file(client, local_filepath): container_name = group_id_to_container_name('download') if not exists(client, container_name): create_container(client, container_name) blob_name = str(uuid.uuid1()) + '.tsv' upload_blob(client, container_name, blob_name, local_filepath, None) sas = generate_download_sas(client, container_name, blob_name, EXPORT_SAS_EXPIRY_MINUTES) return make_blob_url(client, container_name, blob_name, sas)