def sync(Lochness, subject, dry): delete = lochness.dropbox.delete_on_success(Lochness, Basename) logger.debug('delete_on_success for {0} is {1}'.format(Basename, delete)) for dbx_sid in subject.dropbox[Module]: logger.debug('exploring {0}/{1}'.format(subject.study, subject.id)) _passphrase = keyring.passphrase(Lochness, subject.study) enc_key = cryptease.kdf(_passphrase) api_token = keyring.dropbox_api_token(Lochness, Module) client = dropbox.Dropbox(api_token) for datatype, products in iter(CONFIG.items()): dbx_head = os.path.join(os.sep, datatype, subject.study, dbx_sid) dbx_head_len = len(dbx_head) logger.debug('walking %s', dbx_head) for root, dirs, files in lochness.dropbox.walk(client, dbx_head): for f in files: dbx_tail = os.path.join(root, f)[dbx_head_len:].lstrip(os.sep) dbx_file = (dbx_head, dbx_tail) product = _find_product(dbx_tail, products, subject=dbx_sid) if not product: continue protect = product.get('protect', False) compress = product.get('compress', False) key = enc_key if protect else None output_base = subject.protected_folder if protect else subject.general_folder output_base = tree.get(datatype, output_base) lochness.dropbox.save(client, dbx_file, output_base, key=key, compress=compress, delete=delete, dry=dry)
def sync(Lochness, subject, dry): delete = lochness.dropbox.delete_on_success(Lochness, Basename) logger.debug('delete_on_success for {0} is {1}'.format(Basename, delete)) for dbx_sid in subject.dropbox[Module]: logger.debug('exploring {0}/{1}'.format(subject.study, subject.id)) _passphrase = keyring.passphrase(Lochness, subject.study) enc_key = cryptease.kdf(_passphrase) api_token = keyring.dropbox_api_token(Lochness, Module) client = dropbox.Dropbox(api_token) patterns = _batch_compile(CONFIG, dbx_sid) for category,datatype in _iterate(CONFIG): output_base = subject.protected_folder if category == 'PROTECTED' else subject.general_folder output_base = tree.get(datatype, output_base) dbx_head = os.path.join(os.sep, datatype, subject.study) # shim the dropbox head for certain data types if datatype == 'onsite_interview': dbx_head = os.path.join(dbx_head, 'output') elif datatype == 'behav_qc': dbx_head = os.path.join(dbx_head, dbx_sid) dbx_head_len = len(dbx_head) for root,dirs,files in lochness.dropbox.walk(client, dbx_head): for f in files: dbx_tail = os.path.join(root, f)[dbx_head_len:].lstrip(os.sep) dbx_file = dbx_head,dbx_tail if patterns[datatype].match(dbx_tail): key = enc_key if category == 'PROTECTED' else None lochness.dropbox.save(client, dbx_file, output_base, key=key, delete=delete, dry=dry)
def write_keyring_and_encrypt(self): with open(self.keyring_loc, 'w') as f: json.dump(self.keyring, f) keyring_content = open(self.keyring_loc, 'rb') key = crypt.kdf('') crypt.encrypt( keyring_content, key, filename=self.tmp_lochness_dir / '.lochness.enc')
def main(): parser = ap.ArgumentParser('File encryption/decryption utility') group1 = parser.add_mutually_exclusive_group(required=True) group1.add_argument('--decrypt', action='store_true', help='Decrypt file') group1.add_argument('--encrypt', action='store_true', help='Encrypt file') parser.add_argument('-o', '--output-file', help='Output file') parser.add_argument('--debug', action='store_true', help='Enable debug messages') parser.add_argument('file', help='File to encrypt or decrypt') args = parser.parse_args() # read passphrase (ask twice for --encrypt) if 'ENCRYPT_PASS' in os.environ: passphrase = os.environ['ENCRYPT_PASS'] else: passphrase = gp.getpass('enter passphrase: ') if args.encrypt: reentered = gp.getpass('re-enter passphrase: ') if passphrase != reentered: logger.critical('passphrases do not match') sys.exit(1) # get file handle raw = get(args.file) # get key using file header, or build a new one if args.decrypt: key = crypt.key_from_file(raw, passphrase) else: key = crypt.kdf(passphrase) # lock or unlock the file if args.decrypt: if not args.output_file: stdout = os.fdopen(sys.stdout.fileno(), 'wb') for chunk in crypt.decrypt(raw, key): stdout.write(chunk) else: if overwrite(args.output_file): logger.info('saving {}'.format(args.output_file)) crypt.decrypt(raw, key, filename=args.output_file) elif args.encrypt: if not args.output_file: stdout = os.fdopen(sys.stdout.fileno(), 'wb') for chunk in crypt.encrypt(raw, key): stdout.write(chunk) else: if overwrite(args.output_file): logger.info('saving {}'.format(args.output_file)) crypt.encrypt(raw, key, filename=args.output_file)
def update_keyring_and_encrypt_DPACC(tmp_lochness_dir: str): keyring_loc = Path(tmp_lochness_dir) / 'lochness.json' with open(keyring_loc, 'r') as f: keyring = json.load(f) keyring['lochness_sync']['PATH_IN_HOST'] = '.' with open(keyring_loc, 'w') as f: json.dump(keyring, f) keyring_content = open(keyring_loc, 'rb') key = crypt.kdf('') crypt.encrypt(keyring_content, key, filename=Path(tmp_lochness_dir) / '.lochness.enc')
def main(): parser = ap.ArgumentParser('File encryption/decryption utility') parser.add_argument('--keyring', default='~/.nrg-keyring.enc', type=os.path.expanduser, help='Keyring file') group = parser.add_mutually_exclusive_group() group.add_argument('--phoenix-study', help='Return passphrase for PHOENIX study') group.add_argument('--beiwe-study', help='Return passphrase for Beiwe study') parser.add_argument('--debug', action='store_true', help='Enable debug messages') args = parser.parse_args() # read encrypted keyring file raw = open(args.keyring) # get salt from raw file header salt = None header, _ = crypt.read_header(raw) salt = header['kdf'].salt # read passphrase from an env or command line if 'NRG_KEYRING_PASS' in os.environ: passphrase = os.environ['NRG_KEYRING_PASS'] else: passphrase = gp.getpass('enter passphrase: ') # construct decryption key key = crypt.kdf(passphrase, salt=salt) # decrypt the keyring content content = '' for chunk in crypt.decrypt(raw, key): content += chunk js = json.loads(content) # return what the user requested try: if args.phoenix_study: sys.stdout.write(js['kitchen']['SECRETS'][args.phoenix_study]) elif args.beiwe_study: sys.stdout.write(js['beiwe']['SECRETS'][args.beiwe_study]) except KeyError as e: logger.critical('key not found {0}'.format(e)) sys.exit(1)
def test_write_file(): original = b'''Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.''' passphrase = 'foo bar biz bat' key = crypt.kdf(passphrase) with tf.NamedTemporaryFile(dir=DIR, prefix='enc', delete=False) as enc_tmp: crypt.encrypt_to_file(enc_tmp.name, io.BytesIO(original), key) key = None with tf.NamedTemporaryFile(dir=DIR, prefix='dec', delete=False) as dec_tmp: with open(enc_tmp.name, 'rb') as fo: key = crypt.key_from_file(fo, passphrase) crypt.decrypt_to_file(dec_tmp.name, fo, key) os.remove(enc_tmp.name) with open(dec_tmp.name, 'rb') as fo: decrypted = fo.read() os.remove(dec_tmp.name) assert decrypted == original
def save(Keyring, archive, user_id, output_dir, lock=None, passphrase=None): ''' The order of operations here is important to ensure the ability to reach a state of consistency: 1. Save the file 2. Update the local registry ''' num_saved = 0 if not archive: return num_saved encoding = locale.getpreferredencoding() if not lock: lock = list() else: if not passphrase: raise SaveError( 'if you wish to lock a data type, you need a passphrase') lock_ext = LOCK_EXT.lstrip('.') # open registry file in downloaded archive logger.debug('reading registry file from beiwe archive') with archive.open('registry', 'r') as fo: registry = json.loads(fo.read().decode('utf-8')) # if archive registry contains any entries, process them if registry: # iterate over archive members for member in archive.namelist(): # skip over the registry file if member == 'registry': continue # get information about the current archive member info = archive.getinfo(member) # parse the data type name from the current archive member member_datatype = _parse_datatype(member, user_id) # check if data type should be encrypted encrypt = True if member_datatype in lock else False logger.debug('processing archive member: %s (lock=%s)', member, encrypt) # create target name target = member # add lock extension to target name if necessary if encrypt: target = '{0}.{1}'.format(target, lock_ext) target_abs = os.path.join(output_dir, target) target_dir = os.path.dirname(target_abs) # detect if target exists if os.path.exists(target_abs): os.remove(target_abs) # create target directory if not os.path.exists(target_dir): _makedirs(target_dir, umask=0o5022) # read archive member content and encrypt it if necessary content = archive.open(member) if encrypt: key = crypt.kdf(passphrase) crypt.encrypt(content, key, filename=target_abs, permissions=0o0644) else: # write content to persistent storage _atomic_write(target_abs, content.read()) num_saved += 1 # update local registry file to avoid re-downloading these files local_registry_file = os.path.join(output_dir, user_id, '.registry') local_registry = dict() if os.path.exists(local_registry_file): with open(local_registry_file, 'r') as fo: local_registry = json.load(fo) local_registry.update(registry) local_registry_str = json.dumps(local_registry, indent=2) _atomic_write(local_registry_file, local_registry_str.encode(encoding)) # return the number of saved files return num_saved
def sync_module(Lochness: 'lochness.config', subject: 'subject.metadata', study_name: 'mediaflux.study_name', dry: bool): '''sync mediaflux data for the subject''' if dry: raise NotImplementedError('--dry option is not implemented') study_basename = study_name.split('.')[1] for mf_subid in subject.mediaflux[study_name]: logger.debug(f'exploring {subject.study}/{subject.id}') _passphrase = keyring.passphrase(Lochness, subject.study) enc_key = enc.kdf(_passphrase) mflux_cfg = keyring.mediaflux_api_token(Lochness, study_name) mf_base = base(Lochness, study_basename) print(mf_base) for datatype, products in \ iter(Lochness['mediaflux'][study_basename]['file_patterns'].items()): print(datatype, products) ''' file_patterns: actigraphy: - vendor: Philips product: Actiwatch 2 data_dir: all_BWH_actigraphy pattern: 'accel/*csv' protect: True - vendor: Activinsights product: GENEActiv data_dir: all_BWH_actigraphy pattern: 'GENEActiv/*bin,GENEActiv/*csv' ''' for prod in products: for patt in prod['pattern'].split(','): # consider the case with space # pattern: 'GENEActiv/*bin, GENEActiv/*csv' patt = patt.strip() if '*' not in patt: raise PatternError( 'Mediaflux pattern must include an asterisk e.g. *csv or GENEActiv/*csv' ) # construct mediaflux remote dir mf_remote_pattern = pjoin(mf_base, prod['data_dir'], mf_subid, patt) mf_remote_dir = dirname(mf_remote_pattern) # obtain mediaflux remote paths with tempfile.TemporaryDirectory() as tmpdir: diff_path = pjoin(tmpdir, 'diff.csv') cmd = (' ').join([ 'unimelb-mf-check', '--mf.config', mflux_cfg, '--nb-retries 5', '--direction down', tmpdir, mf_remote_dir, '-o', diff_path ]) p = Popen(cmd, shell=True) p.wait() # ENH # if dry: exit() if not isfile(diff_path): continue df = pd.read_csv(diff_path) for remote in df['SRC_PATH'].values: if remote is nan: continue if not re.search(patt.replace('*', '(.+?)'), remote): continue else: remote = remote.split(':')[1] # construct local path protect = prod.get('protect', True) processed = prod.get('processed', False) key = enc_key if protect else None subj_dir = subject.protected_folder \ if protect else subject.general_folder # mf_local= pjoin(subj_dir, datatype, dirname(patt), basename(remote)) mf_local = str( tree.get(datatype, subj_dir, processed=processed, BIDS=Lochness['BIDS'])) # ENH set different permissions # GENERAL: 0o755, PROTECTED: 0700 os.makedirs(mf_local, exist_ok=True) # subprocess call unimelb-mf-download cmd = (' ').join([ 'unimelb-mf-download', '--mf.config', mflux_cfg, '-o', mf_local, '--nb-retries 5', f'\"{remote}\"' ]) p = Popen(cmd, shell=True) p.wait() # verify checksum after download completes # if checksum does not match, data will be downloaded again # ENH should we verify checksum 5 times? cmd += ' --csum-check' p = Popen(cmd, shell=True) p.wait()
def sync_module(Lochness: 'lochness.config', subject: 'subject.metadata', module_name: 'box.module_name', dry: bool): '''Sync box data for the subject''' # only the module_name string without 'box.' module_basename = module_name.split('.')[1] # delete on success delete = delete_on_success(Lochness, module_basename) logger.debug(f'delete_on_success for {module_basename} is {delete}') for bx_sid in subject.box[module_name]: logger.debug(f'exploring {subject.study}/{subject.id}') _passphrase = keyring.passphrase(Lochness, subject.study) enc_key = enc.kdf(_passphrase) client_id, client_secret, api_token = keyring.box_api_token( Lochness, module_name) # box authentication auth = OAuth2( client_id=client_id, client_secret=client_secret, access_token=api_token, ) client = Client(auth) bx_base = base(Lochness, module_basename) # get the id of the bx_base path in box bx_base_obj = get_box_object_based_on_name(client, bx_base, '0') if bx_base_obj == None: logger.debug('Root of the box is not found') continue # loop through the items defined for the BOX data for datatype, products in iter( Lochness['box'][module_basename]['file_patterns'].items()): subject_obj = get_box_object_based_on_name(client, bx_sid, bx_base_obj.id) if subject_obj == None: logger.debug(f'{bx_sid} is not found under {bx_base_obj}') continue datatype_obj = get_box_object_based_on_name( client, datatype, subject_obj.id) # full path bx_head = join(bx_base, datatype, bx_sid) logger.debug('walking %s', bx_head) # if the directory is empty if datatype_obj == None: continue # walk through the root directory for root, dirs, files in walk_from_folder_object( bx_head, datatype_obj): for box_file_object in files: bx_tail = join(basename(root), box_file_object.name) product = _find_product(bx_tail, products, subject=bx_sid) if not product: continue protect = product.get('protect', False) output_base = subject.protected_folder \ if protect else subject.general_folder encrypt = product.get('encrypt', False) key = enc_key if encrypt else None processed = product.get('processed', False) # For DPACC, get processed from the config.yml output_base = tree.get(datatype, output_base, processed=processed, BIDS=Lochness['BIDS']) compress = product.get('compress', False) save(box_file_object, (root, box_file_object.name), output_base, key=key, compress=compress, delete=False, dry=False)