def upload_etl_file(config, key_name, barcode2field2value, log, type_bio, remove_keys=[]): log.info('\tstart upload_etl_file(%s)' % (key_name)) output_file = StringIO() for field2value in barcode2field2value.itervalues(): for remove_key in remove_keys: if remove_key[0] in field2value and field2value[ remove_key[0]] in remove_key[1]: log.warning( "\t\tWARNING: %s samples should be excluded. Skipped sample: %s" % (remove_key[0], field2value['SampleBarcode'])) continue output_file.write(json.dumps(field2value) + "\n") tmp_dir_parent = os.environ.get('ISB_TMP', '/tmp/') path = os.path.join(tmp_dir_parent, type_bio + '/') if not os.path.isdir(path): os.makedirs(path) file_path = path + type_bio + '.json' with open(file_path, 'w') as bio_file: bio_file.write(output_file.getvalue()) output_file.close() bucket_name = config['buckets']['open'] if config['upload_etl_files']: gcs_wrapper.upload_file(file_path, bucket_name, key_name, log) log.info('\tuploaded etl file') else: log.info('\tnot uploading etl file') log.info('\tfinish upload_etl_file')
def upload_etl_file(config, key_name, barcode2field2value, log, type_bio, remove_keys=[]): log.info('\tstart upload_etl_file(%s)' % (key_name)) output_file = StringIO() for field2value in barcode2field2value.itervalues(): for remove_key in remove_keys: if remove_key[0] in field2value and field2value[remove_key[0]] in remove_key[1]: log.warning("\t\tWARNING: %s samples should be excluded. Skipped sample: %s" % (remove_key[0], field2value['SampleBarcode'])) continue output_file.write(json.dumps(field2value) + "\n") tmp_dir_parent = os.environ.get('ISB_TMP', '/tmp/') path = os.path.join(tmp_dir_parent, type_bio + '/') if not os.path.isdir(path): os.makedirs(path) file_path = path + type_bio + '.json' with open(file_path, 'w') as bio_file: bio_file.write(output_file.getvalue()) output_file.close() bucket_name = config['buckets']['open'] if config['upload_etl_files']: gcs_wrapper.upload_file(file_path, bucket_name, key_name, log) log.info('\tuploaded etl file') else: log.info('\tnot uploading etl file') log.info('\tfinish upload_etl_file')
def upload_latestarchive_file(config, archive_file_path, log): bucket_name = config['buckets']['open'] key_name = '/%s/%s' % (config['latestarchive_folder'], str(date.today()).replace('-', '_') + '_' + 'latestarchive.txt') if config['upload_files'] and config['upload_open']: log.info('\tnot uploading %s to %s' % (archive_file_path, key_name)) gcs_wrapper.upload_file(archive_file_path, bucket_name, key_name, log) else: log.info('\tnot uploading %s to %s' % (archive_file_path, key_name))
def upload_latestarchive_file(config, archive_file_path, log): bucket_name = config['buckets']['open'] key_name = '/%s/%s' % (config['latestarchive_folder'], str( date.today()).replace('-', '_') + '_' + 'latestarchive.txt') if config['upload_files'] and config['upload_open']: log.info('\tnot uploading %s to %s' % (archive_file_path, key_name)) gcs_wrapper.upload_file(archive_file_path, bucket_name, key_name, log) else: log.info('\tnot uploading %s to %s' % (archive_file_path, key_name))
def upload_files(config, archive_path, file2metadata, log): # TODO: for the DatafileNameKey, use the value already in the metadata files = os.listdir(archive_path) if 0 < len(files): bucket_name, key_prefix = get_bucket_key_prefix(config, file2metadata[files[0]]) for file_name in files: metadata = file2metadata[file_name] key_name = key_prefix + metadata['DataLevel'].replace(' ', '_') + '/'+ file_name metadata['DatafileNameKey'] = key_name if config['upload_files']: gcs_wrapper.upload_file(archive_path + file_name, bucket_name, key_name, log) else: log.warning('\tno files for %s' % (archive_path))
def upload_sdrf_file(config, archive_path, file_name, metadata, log): center2platform = config['upload_archives']['mage-tab'] try: if metadata['DataCenterName'] not in center2platform or metadata['Platform'] not in center2platform[metadata['DataCenterName']]: log.info('\t\tskipping uploading %s from sdrf archive' % (file_name)) return except Exception as e: log.exception('problem checking uploading SDRF file') raise e bucket_name = config['buckets']['open'] key_name = getSDRFKeyName(file_name, metadata, log) if config['upload_files'] and config['upload_open']: gcs_wrapper.upload_file(archive_path + file_name, bucket_name, key_name, log) else: log.info('\t\tnot uploading %s from sdrf archive to %s' % (file_name, key_name))
def upload_file(config, file_path, key_name, log): bucket_name = config['buckets']['open'] if config['upload_files']: log.info('\tuploading %s' % (key_name)) gcs_wrapper.upload_file(file_path, bucket_name, key_name, log)