def test_platform_icons(): """platform icons""" assert get_file('platform/unknown.svg') assert get_file('platform/linux.svg') assert get_file('platform/darwin.svg') assert get_file('platform/windows.svg') assert get_file('platform/bsd.svg')
def __init__(self, settings, debug_output): username = settings['omop_user'] password = settings['omop_passwd'] host = settings['omop_host'] port = settings['omop_port'] database = settings['omop_db'] engine = settings.get('omop_engine', 'postgresql') self._parents = {} self._codes = {} if settings['omop_use_alt_hierarchies']: if 'ccs_diag' in settings: self._codes['Condition_ICD9CM'] = {} self._parents['Condition_ICD9CM'] = util.read_CCS( util.get_file(settings['ccs_diag'], debug_output), self._codes['Condition_ICD9CM']) if 'ccs_proc' in settings: self._codes['Procedure_ICD9CM'] = {} self._parents['Procedure_ICD9CM'] = util.read_CCS( util.get_file(settings['ccs_proc'], debug_output), self._codes['Procedure_ICD9CM']) #self.schema = settings['omop_schema'] #self.db = sqlalchemy.create_engine('{0}://{1}:{2}@{3}:{4}/{5}'.format(engine, username, password, host, port, database)) self.schema = 'DIKB_DEV' self.db = sqlalchemy.create_engine( 'oracle://*****:*****@130.49.206.154:1521/dbmi01'.format( engine, username, password, host, port, database))
def main(): file_name = util.get_file("Select mtx or npz file") if file_name.split('.')[-1] == 'mtx': isMtx = True else: isMtx = False file_row_head = util.get_file("Select gene names") file_barcodes = util.get_file("Select barcodes file") spr, row_heads, barcodes, output_file = get_data_from_files( file_name, file_row_head, file_barcodes, isMtx) split_and_convert(spr, row_heads, barcodes, output_file)
def get_file(settings, key, default): if key in settings: file = settings[key] else: file = default settings[key] = file return util.get_file(file, debugOutput)
def __init__(self): self.sheets = get_file("/kf16/singatakoronakensa.html", "xlsx", True)["Sheet1"] self.inspections_count = 2 self._inspections_json = {} self._inspections_summary_json = {} self.get_inspections()
def genes_to_ensg(genes): '''Convert the list of genes names to list of ENSG. Arguments: genes {list} -- list of genes to be converted to ENSG Returns: list -- list of ENSG values converted from genes ''' try: f = open(GENE_TO_ENSG) except FileNotFoundError as _: f = util.get_file("Select mappings file") m = list(map(lambda x: x.replace('\n', '').split(','), f.readlines())) from_map = [line[1] for line in m] to_map = [line[0] for line in m] ensg = [] for gene in genes: try: ensg.append(to_map[from_map.index(gene)]) except ValueError as _: ensg.append(gene) f.close() return ensg
def main(sysargv): #open PDB to convert pdb_file = util.get_file(sysargv, 0, "r", "PDB") #open the RES file, using the name of the PDB res_file = open(os.path.splitext(pdb_file.name)[0] + ".res", "w") line_dict = {} #read the PDB data pdb_lines = pdb_file.readlines() for line in pdb_lines: if re.match("ATOM", line) or re.match("HETATM", line): cols = re.findall( "\S+", line ) #does not find the right columns when there is no whitespace separating them try: key = (cols[4], int(cols[5])) if key not in line_dict: line_dict[key] = "NATAA" except: pass #write data to RES file res_file.write("USE_INPUT_SC\nEX 1 EX 2\nstart\n") for key in sorted(line_dict.keys()): line = str( key[1]).rjust(4) + key[0].rjust(3) + " " + line_dict[key] + "\n" res_file.write(line) pdb_file.close() res_file.close()
def crawler(): #auto_url = AutoGernateUrlQueue(22958, 1,"http://shooter.cn/xml/list/sub/%s/%s.xml",100) #auto_url = AutoGernateUrlQueue(253574, 1,"http://shooter.cn/xml/sub/%s/%s.xml",100) auto_url = AutoGernateUrlQueue(100000, 1,"http://shooter.cn/xml/sub/%s/%s.xml",100) while True: url = auto_url.pop() if url: url_id = get_url_id(url) data = get_html_data(url) if data: caption_id = get_caption_id(data) write_data("%s%s" %(path, url_id),data) util.get_file(caption_id, "%s%s" %(path, url_id)) else: print("crawler end") break
def __init__(self): # データファイルの取得 この時点で取得しておくと、取得失敗時にこの時点で処理を終了させられるため self.patients_sheet = get_file("/kk03/corona_kanjyajyokyo.html", "xlsx", True).worksheets[0] self.inspections_sheet = requests_file("/kk03/documents/pcr.xlsx", "xlsx", True).worksheets[0] # self.pdf_texts = get_file('/kk03/corona_hasseijyokyo.html', "pdf") self.summary_sheet = requests_file("/kk03/documents/yousei.xlsx", "xlsx", True).worksheets[0] # データ量(行数)を調べ始める最初の行の指定 self.patients_count = patients_first_cell self.clusters_count = clusters_first_cell self.inspections_count = inspections_first_cell self.data_count = main_summary_first_cell # クラスター一覧を収納するリスト self.clusters = [] # 総病床数 TODO:適宜手動更新が必要なので自動化が望まれる self.sickbeds_count = 246 # 検査数や入院者数などを格納するリスト self.summary_values = [] # 以下、内部変数 self._patients_json = {} self._patients_summary_json = {} self._clusters_json = {} self._clusters_summary_json = {} self._age_json = {} self._age_summary_json = {} self._inspections_json = {} self._inspections_summary_json = {} self._main_summary_json = {} self._sickbeds_summary_json = {} # 初期化(最大行数の取得) self.get_patients() self.get_clusters() self.get_inspections() self.get_data_count()
def __init__(self, settings, debug_output): username = settings['omop_user'] password = settings['omop_passwd'] host = settings['omop_host'] port = settings['omop_port'] database = settings['omop_db'] engine = settings.get('omop_engine', 'postgresql') self._parents = {} self._codes = {} if settings['omop_use_alt_hierarchies']: if 'ccs_diag' in settings: self._codes['Condition_ICD9CM'] = {} self._parents['Condition_ICD9CM'] = util.read_CCS(util.get_file(settings['ccs_diag'], debug_output), self._codes['Condition_ICD9CM']) if 'ccs_proc' in settings: self._codes['Procedure_ICD9CM'] = {} self._parents['Procedure_ICD9CM'] = util.read_CCS(util.get_file(settings['ccs_proc'], debug_output), self._codes['Procedure_ICD9CM']) self.schema = settings['omop_schema'] self.db = sqlalchemy.create_engine('{0}://{1}:{2}@{3}:{4}/{5}'.format(engine, username, password, host, port, database))
def delete_file(item: DeleteFile, q: str = Query(None)): filepath = os.path.join(q, item.name) safe_path = safe_path_join(path=filepath, root=os.environ["ROOT_DIR"]) if not os.path.exists(safe_path): raise HTTPException(status_code=404, detail="File does not exists.") resp = util.get_file(safe_path) os.remove(safe_path) return JSONResponse(content=jsonable_encoder(resp))
def add_file(item: AddFile, q: str = Query(None)): safe_path = safe_path_join(path=q, root=os.environ["ROOT_DIR"]) if not os.path.isdir(safe_path): raise HTTPException( status_code=403, detail="Specified location is not a directory." ) filepath = util.write_file(safe_path, item) resp = util.get_file(filepath) return JSONResponse(content=jsonable_encoder(resp))
def update_file(item: AddFile, q: str = Query(None)): safe_path = safe_path_join(path=q, root=os.environ["ROOT_DIR"]) filepath = util.write_file(safe_path, item) if not os.path.exists(filepath): raise HTTPException( status_code=403, detail="Failed to update or create a file." ) resp = util.get_file(filepath) return JSONResponse(content=jsonable_encoder(resp))
def test_get_file_success(self): with mock.patch("builtins.open", mock.mock_open(read_data=self.content)) as mock_file: res = util.get_file(self.path) item = [{ "name": self.filename, "owner": mock.ANY, "size": mock.ANY, "permissions": mock.ANY, "content": self.content, }] self.assertEqual(res, item)
def __init__(self): self.sheets = get_file("/kk03/corona_kanjyajyokyo.html", "xlsx", True)["公表"] self.patients_count = 5 self.clusters_count = 11 self.clusters = [] self._patients_json = {} self._patients_summary_json = {} self._clusters_json = {} self._clusters_summary_json = {} self._age_json = {} self._age_summary_json = {} self.get_patients() self.get_clusters()
def main(): fn = util.get_file() num = 1 print('num', num) s = time.time() l = time.time() for _ in range(num): compress_file(fn) print(time.time() - l) l = time.time() e = time.time() print('average time', (e - s) / num) print('total time', e - s)
def get_summary_json(self) -> Dict: pdf_texts = get_file('/kk03/corona_hasseijyokyo.html', "pdf") # Set summary values content = ''.join(pdf_texts[3:]) self.values = get_numbers_in_text(content) self.set_summary_values(self.summary) # Set last update caption = pdf_texts[0] dt_vals = get_numbers_in_text(caption) last_update = datetime(datetime.now().year, dt_vals[0], dt_vals[1]) + timedelta(hours=dt_vals[2]) self.summary['last_update'] = datetime.strftime(last_update, '%Y-%m-%dT%H:%M:%S+09:00') return self.summary
def main(): fn = util.get_file() mat = sp.load_npz(fn) num = 3 print('num', num) s = time.time() l = time.time() for _ in range(num): _ = corr_coef(mat) print(time.time() - l) l = time.time() e = time.time() print('average time', (e - s) / num) print('total time', e - s)
def buildhck_server(request): # FIXME: terrible hacks port = randint(49152, 65535) util.SERVER = 'http://localhost:{}'.format(port) p = Process(target=run, kwargs={'port': port}) p.start() for _ in range(30): if p.is_alive() and get_file(''): break sleep(0.25) else: raise TimeoutError('failed to start buildhck') def fin(): p.terminate() p.join() request.addfinalizer(fin) return p
def get_path_items(path: str): """ Get directory or file details at the given filepath """ if not os.path.exists(path): host_path = get_host_path(path) raise HTTPException( status_code=404, detail="The path `{}` does not exist or you don't have permissions to access it.".format( host_path ), ) _, file_extension = os.path.splitext(path) if file_extension and file_extension != ".txt": raise HTTPException(status_code=403, detail="Can only read .txt files") items = [] if os.path.isdir(path): items = util.get_dir(path) else: items = util.get_file(path) return items
def test_index(): """index page""" assert get_file('')
def test_favicon(): """favicon""" assert get_file('favicon.ico')
def __init__(self): self.pdf_texts = get_file('/kk03/corona_hasseijyokyo.html', "pdf") self.sickbeds_count = 154 self.values = [] self._main_summary = {} self._sickbeds_summary_json = {}
def new(name=None, role='workspace'): """ Initializes a workspace with the specified name. """ current_path = os.getcwdu() # Initialize the workspace directory if the name is available workspace_path = os.path.join(current_path, name) # if os.path.exists(workspace_path): # print "The if not os.path.exists(workspace_path): os.makedirs(workspace_path) print "Initialized empty workspace in %s." % workspace_path # Initialize the '.gesso' directory in the workspace directory. gesso_path = os.path.join(workspace_path, '.gesso') if not os.path.exists(gesso_path): os.makedirs(gesso_path) print "Initialized Glue directory in %s." % gesso_path gesso_dirs = [ os.path.join(gesso_path, 'components'), os.path.join(gesso_path, 'logs') ] for gesso_dir in gesso_dirs: if not os.path.exists(gesso_dir): os.makedirs(gesso_dir) # Initialize workspace with README.md file readme_path = os.path.join(workspace_path, "README.md") if not os.path.exists(readme_path): with open(readme_path, 'w+') as file: file.write("# %s" % name) # TODO: # - .gesso/config # - .gesso/components/* # - .gesso/logs/* # - README.md # - git repo? return # Check if cwd contains .gesso, or if any of it's parents contain one (if so, we're in a gesso repository) # If not, create it and populate it with a 'config' file that will contain initial config # If so, print error 'Current/parent directory already contains a .gesso folder' parent_path = util.parent_contains('.gesso') if not parent_path is None: print 'Error: I can\'t do that.' print 'Reason: There is already a .gesso directory at %s.' % parent_path print 'Hint: Use `cd` to change to a different directory.' else: gesso_root = os.getcwdu() # Initialize the gesso root file system gesso_path = os.path.join(gesso_root, '.gesso') if not os.path.exists(gesso_path): print 'mkdir %s' % gesso_path os.makedirs(gesso_path) # Initialize gesso config file #gesso_config = collections.OrderedDict() #gesso_config['name'] = petname.Generate(2) if name == None else name #gesso_config['uuid'] = str(uuid.uuid4()) # TODO: Read UUID from device via Gesso API #gesso_config['role'] = str(role) #gesso_config['project'] = 'none' gesso_config = { 'name': petname.Generate(2) if name == None else name, 'uuid': str(uuid.uuid4()), # TODO: read UUID from hardware! 'role': str(role), 'project': 'none' } gesso_config_json = json.dumps(gesso_config, indent=4, sort_keys=False) gesso_config_path = os.path.join(gesso_root, '.gesso', 'config') if not os.path.exists(gesso_config_path): print 'touch %s' % gesso_config_path with open(gesso_config_path, 'w+') as file: file.write(gesso_config_json) # create interface configuration system_controller_source = 'import gesso\nimport gesso.api\nprint gesso.api.version()' system_controller_path = os.path.join(gesso_root, 'system.py') if not os.path.exists(system_controller_path): logger.info('touch %s' % system_controller_path) with open(system_controller_path, 'w+') as file: file.write(system_controller_source) # gesso new zesty-koala --virtual if role == 'gesso': # This is done only gesso devices. Not dev machines! # Add insecure pre-shared SSH public key (to boostrap communications) insecure_ssh_public_key = util.get_file('public_key') os.system('echo "%s" | cat >> ~/.ssh/authorized_keys' % insecure_ssh_public_key) elif role == 'workspace': insecure_ssh_private_key = util.get_data_filename('private_key') os.system('ssh-add %s' % insecure_ssh_private_key)
def __init__(self, dataset_settings_dict): self.batch_size = dataset_settings_dict['batch_size'] self.pad_noise_bool = dataset_settings_dict['pad_noise_bool'] # self.wrangle_features = wrangle_features self.feature_dict_list = dataset_settings_dict['feature_dict_list'] self.pred_task_dict = dataset_settings_dict['pred_task_dict'] self.file_list = dataset_settings_dict['file_list'] self.file_to_idx = {f: i for i, f in enumerate(self.file_list)} self.ab_to_idx = {a: i for i, a in enumerate(['A', 'B'])} self.set_type = dataset_settings_dict['set_type'] # self.prediction_length = self.pred_task_dict['VA']['pred_len'] self.active_outputs = self.pred_task_dict['active_outputs'] # self.data_select_list = dataset_settings_dict['data_select_list'] # data_select_list = dataset_settings_dict['data_select_list'] # self.g_f_str_to_idx = {g_f: idx for idx, g_f in enumerate(self.data_select_list)} self.device = dataset_settings_dict['device'] # self.device_2 = dataset_settings_dict['device_2'] self.output_order = dataset_settings_dict['output_order'] self.use_ling = dataset_settings_dict['use_ling'] # self.use_da_enc = dataset_settings_dict['use_da_enc'] self.ling_timings = dataset_settings_dict['ling_timings'] self.update_annots = dataset_settings_dict['update_annots'] self.num_preloader_workers = dataset_settings_dict['num_preloader_workers'] self.num_feat_per_person = dataset_settings_dict['num_feat_per_person'] self.lstm_sets_dict = dataset_settings_dict['lstm_sets_dict'] self.time_out_length = dataset_settings_dict['lstm_sets_dict']['time_out_length'] self.extra_pad_frames = dataset_settings_dict['extra_pad_frames'] self.pad_all_max_len_bool = dataset_settings_dict['lstm_sets_dict']['pad_all_max_len_bool'] self.pad_max_len = dataset_settings_dict['lstm_sets_dict']['pad_max_len'] # self.turn_batch = dataset_settings_dict['turn_batch'] self.encoder_settings = dataset_settings_dict['lstm_sets_dict']['encoder_settings'] self.unspec_tok = dataset_settings_dict['lstm_sets_dict']['unspec_tok'] self.sil_tok = dataset_settings_dict['lstm_sets_dict']['sil_tok'] self.wait_tok = dataset_settings_dict['lstm_sets_dict']['wait_tok'] self.stop_tok = dataset_settings_dict['lstm_sets_dict']['stop_tok'] self.pad_tok = dataset_settings_dict['lstm_sets_dict']['pad_tok'] # self.dataset_settings_dict = dataset_settings_dict n_pre = self.lstm_sets_dict['pred_task_dict']['n_pre'] if dataset_settings_dict['set_type'] == 'train' and self.lstm_sets_dict['two_sys_turn']: self.two_sys_turn = True else: self.two_sys_turn = False # Variables to fill self.len = 0 self.dataset = [] # self.file_update_lengths = [] self.results_lengths = {} self.feature_size = 0 # self.cont_hold_shift_points_count = 0 # self.bc_points_count = 0 self.use_saved_data_bool = dataset_settings_dict['use_saved_data_bool'] self.use_saved_data_fold = dataset_settings_dict['use_saved_data_fold'] self.sil_cov_matrices = {filename: {} for filename in self.file_list} self.sil_means = {filename: {} for filename in self.file_list} if self.use_ling: self.ling_size = 300 self.tot_num_feat_per_person = self.num_feat_per_person['acous'] + \ self.ling_size else: self.ling_size = 0 self.tot_num_feat_per_person = self.num_feat_per_person['acous'] self.files_ab = [ [filename] + a_b for filename in self.file_list for a_b in [['A', 'B'], ['B', 'A']]] print('saved_data:' + self.use_saved_data_fold+self.set_type+'.p') if self.use_saved_data_bool and os.path.exists(self.use_saved_data_fold+self.set_type+'.p'): print('using saved data: ' + self.use_saved_data_fold) pdat = pickle.load( open(self.use_saved_data_fold + self.set_type + '.p', 'rb')) self.files_ab = pdat['files_ab'] self.dataset = pdat['dataset'] self.sil_means = pdat['sil_means'] self.sil_cov_matrices = pdat['sil_cov_matrices'] else: data_f_dict, data_g_dict = {}, {} args = [] for file_name in self.file_list: file_args = [file_name, dataset_settings_dict] args.append(file_args) mult_out = [] for arg in args: get_file_out = get_file(arg) mult_out.append(get_file_out) for mult in mult_out: f, g, set_obj_train = mult data_f_dict.update(f) data_g_dict.update(g) data_dict = {'A': data_f_dict, 'B': data_g_dict} # get annotations list_in = [] for f_idx, lst in enumerate(self.files_ab): file, a_usr, b_sys = lst annots_usr = self.update_annots[file][a_usr] annots_sys = self.update_annots[file][b_sys] data_usr = data_dict[a_usr][file]['acous'] data_sys = data_dict[b_sys][file]['acous'] num_feat_per_person = self.num_feat_per_person['acous'] pad_noise_bool = self.pad_noise_bool list_in.append([f_idx, file, a_usr, b_sys, annots_usr, annots_sys, data_usr, data_sys, num_feat_per_person, pad_noise_bool, n_pre]) print('reached pre multiprocessing in dataloader') # get updates new_files_ab = [] data_strt_idx = 0 for f_idx, ls in enumerate(list_in): data_pts, sil_means, cov, file, a_usr, b_sys, file_length = get_update_annots( ls) for data_pt in data_pts: self.dataset.append(data_pt) # self.dataset.extend(data_pts) # note mean and cov for opposite speaker's files used self.sil_means[file][b_sys] = sil_means self.sil_cov_matrices[file][b_sys] = cov new_files_ab.append([f_idx, file[0], a_usr[0], b_sys[0], file_length, data_strt_idx, data_strt_idx+file_length]) data_strt_idx += file_length self.files_ab = new_files_ab # Save data if self.use_saved_data_bool and not os.path.exists(self.use_saved_data_fold + self.set_type + '.p'): print('Saving DATA: '+self.set_type) if not os.path.exists(self.use_saved_data_fold): os.makedirs(self.use_saved_data_fold) pickle.dump( { 'dataset': self.dataset, 'files_ab': self.files_ab, 'sil_cov_matrices': self.sil_cov_matrices, 'sil_means': self.sil_means }, open(self.use_saved_data_fold+self.set_type+'.p', 'wb') )
temp += i w.write('%d,%d,%d\n' % (index, i, temp)) index *= 2 w.write('\nnum of row pos gene, num of cell,,\n') w.write('0,%d,%d\n' % (row_pos_genes[0], row_pos_genes[0])) temp = row_pos_genes[0] index = 1 for i in row_pos_genes[1:]: temp += i w.write('%d,%d,%d\n' % (index, i, temp)) index *= 2 if __name__ == '__main__': filename = util.get_file() print(filename) try: if os.path.splitext(filename)[1] == '.csv': print('Compressing...') data = compress.compress_file(filename, save=False) print('Statistic...') calculate(filename, data) elif os.path.splitext(filename)[1] == '.mtx': print('Statistic...') calculate(filename, scipy.io.mmread(filename)) else: print('Statistic...') calculate(filename, sparse.load_npz(filename)) except Exception as _: print('WARNING*******************************************\n', filename)
#!/usr/bin/env python3 from my_plot import save_plot_os, save_plot_comparison from util import get_file, extract_columns # Directories # macos_out = get_file("macos", "matlabOutput.csv") matlab_ubuntu = get_file("matlab/output/ubuntu", "matlabOutput.csv") matlab_windows = get_file("matlab/output/windows", "matlabOutput.csv") cpp_ubuntu_native_32 = get_file("cpp/output/ubuntu", "cppOutput-native-32.csv") cpp_ubuntu_native_64 = get_file("cpp/output/ubuntu", "cppOutput-native-64.csv") cpp_ubuntu_mkl = get_file("cpp/output/ubuntu", "cppOutput-mkl.csv") cpp_windows_mkl = get_file("cpp/output/windows", "cppOutput-mkl.csv") cpp_windows_native_64 = get_file("cpp/output/windows", "cppOutput-native-64.csv") # columns_macos = extract_columns(macos_out) columns_matlab_ubuntu = extract_columns(matlab_ubuntu) columns_matlab_windows = extract_columns(matlab_windows) columns_cpp_ubuntu_native_32 = extract_columns(cpp_ubuntu_native_32) columns_cpp_ubuntu_native_64 = extract_columns(cpp_ubuntu_native_64) columns_cpp_ubuntu_mkl = extract_columns(cpp_ubuntu_mkl) columns_cpp_windows_mkl = extract_columns(cpp_windows_mkl) columns_cpp_windows_native_64 = extract_columns(cpp_windows_native_64) ## Single plot ### NO # save_plot_os(columns_macos, "rows", "Matrix Size", "results/macos_OnSize.pdf") # save_plot_os(columns_macos, "nonZeros", "Non Zeros", "results/macos_OnNonZeros.pdf") ### NO
def main(): input_file = util.get_file('Select h5 file') h5_to_csv(input_file)
from util import get_file, extract_columns, reshape_columns from visualisation import comparison_result import numpy as np input = get_file("results", "dct2_comparison.2.csv") columns = extract_columns(input) n = columns["n"][-1:] my = columns["my"][-4:] orig = columns["orig"][-4:] reshaped_columns = reshape_columns(columns) my_mean = [] orig_mean = [] for i in range(len(reshaped_columns["iteration"])): my_mean.append(np.mean(reshaped_columns["my"][i, :])) orig_mean.append(np.mean(reshaped_columns["orig"][i, :])) size = np.append(reshaped_columns["n"][:, 0], np.array([n])) my_mean.append(np.mean(my)) orig_mean.append(np.mean(orig)) comparison_result(size, [my_mean, orig_mean], ["Our dctn", "SciPy dctn"], "DCT2", "results/presentation.pdf")