def add_seqr_sample_to_locals3(sample: SeqrSample): parts = parse_vcf_s3_path(sample.path_to_vcf) local_filename = "vcfs/" + str(sample.project) + "/" + parts['filename'] if not hl.hadoop_is_file("hdfs:///user/hdfs/" + local_filename): os.system('aws s3 cp ' + sample.path_to_vcf + ' .') os.system('hdfs dfs -put ' + parts['filename'] + ' ' + local_filename) os.system('rm ' + parts['filename']) return local_filename
def add_vcf_to_hdfs(s3path_to_vcf): parts = parse_vcf_s3_path(s3path_to_vcf) if hl.utils.hadoop_exists("hdfs:///user/hadoop/" + parts['filename']): return parts['filename'] s3buckets = boto3.resource('s3') s3bucket = s3buckets.Bucket(parts['bucket']) s3bucket.download_file(parts['path'], parts['filename']) os.system('hdfs dfs -put ' + parts['filename']) print(parts['filename']) os.system('rm ' + parts['filename']) return parts['filename']
def add_seqr_sample_to_locals3(sample: SeqrSample): parts = parse_vcf_s3_path(sample.path_to_vcf) local_bucket = "seqr-data" local_filename = "vcfs/" + str(sample.project) + "/" + parts['filename'] s3 = boto3.client('s3') maybe_list = s3.list_objects(Bucket=local_bucket, EncodingType='url', Prefix=local_filename, RequestPayer='requester') if 'Contents' in maybe_list and maybe_list['Contents']: return local_filename else: copy_source = {'Bucket': parts['bucket'], 'Key': parts['path']} s3.copy(copy_source, local_bucket, local_filename) return local_filename