def dose_plot(df,err,cols,scale='linear'): n_rows = int(np.ceil(len(cols)/3.0)) plt.figure(figsize=(20,4 * n_rows)) subs = gridspec.GridSpec(n_rows, 3) plt.subplots_adjust(hspace=0.54,wspace=0.27) for col,sub in zip(cols,subs): plt.subplot(sub) for base in df['Base'].unique(): for drug in get_drugs_with_multiple_doses(filter_rows(df,'Base',base)): data = thread_first(df, (filter_rows,'Drug',drug), (filter_rows,'Base',base), (DF.sort, 'Dose')) error = thread_first(err, (filter_rows,'Drug',drug), (filter_rows,'Base',base), (DF.sort, 'Dose')) if scale == 'linear': plt.errorbar(data['Dose'],data[col],yerr=error[col]) title = "{} vs. Dose".format(col) else: plt.errorbar(data['Dose'],data[col],yerr=error[col]) plt.xscale('log') title = "{} vs. Dose (Log Scale)".format(col) plt.xticks(data['Dose'].values,data['Dose'].values) plt.xlim(0.06,15) label('Dose ({})'.format(data.Unit.values[0]), col,title,fontsize = 15) plt.legend(df['Base'].unique(), loc = 0)
def regenerate(): '''recreates all index files. This will parse all notes, and might take some time.''' print('Regenerate index, this may take some time...') for pf in [c.title_idx_path, c.tag_idx_path, c.group_idx_path]: c.unlink_if_existing(pf) tag_idx = {} title_idx = {} group_idx = {} doi_idx = {} empty_set = set() files = list(Path(c.load_config().save_path, 'md').iterdir()) for file in tqdm(files): title, tags, group, doi = c.parse_file(file.read_text()) id = int(file.stem) title_idx = c.insert_index_entry(title_idx, title, id) tag_idx = c.update_multi_index(tag_idx, tags, empty_set, id) group_idx = c.insert_index_entry(group_idx, group, id) if doi is not None: doi_idx = c.insert_index_entry(doi_idx, doi, id) c.store_group_index(group_idx) c.store_title_index(title_idx) c.store_tag_index(tag_idx) c.store_doi_index(doi_idx) t.thread_first(c.load_state(), (t.assoc, 'next_index', max(map(int, [f.stem for f in files])) + 1), c.save_state)
def new(template: Path, doi: str, pdf: Path, pdf_asset_path: Path, reload: bool): '''creates a new note''' save_path = Path(c.load_config().save_path) state = c.load_state() md_folder = save_path / 'md' md_folder.mkdir(0o755, True, True) new_file_path = md_folder / f'{state.next_index}.md' c.assert_new_file_does_not_exist(new_file_path) if doi is not None: bibtex = c.load_bibtex_cached(doi, reload) title, author, link = c.get_title_author_and_link(bibtex) template = doi_template.format(author, doi, title, link) elif template is not None: template = template.read_text() else: template = new_md_template template += c.get_pdf_template(pdf, pdf_asset_path) new_file_path.write_text(template) t.thread_first(state, (t.assoc, 'next_index', state.next_index + 1), (t.assoc, 'last_created', state.next_index), c.save_state) c.store_group_index(c.insert_index_entry(c.load_group_index(), 'None', state.next_index)) c.store_title_index(c.insert_index_entry(c.load_title_index(), 'None', state.next_index)) sp.run(f'mdn -c {c.config_path} edit', shell=True)
def format_filename(pair): """ Given a pair of (directory,filename) return a string with the date and filename.""" directory = pair[0] filename = pair[1] return thread_first(directory, os.path.split, snd, lambda date: "{} {}".format(date, filename))
def handle_httpexception(err: HTTPException) -> Response: """Return JSON instead of HTML for HTTP errors.""" # start with the correct headers and status code from the error response = err.get_response() try: validation_messages = err.data.get("messages", None) except AttributeError: validation_messages = None error_body = ServerError(response.status_code).error_body if validation_messages: error_body_with_validation_errors = toolz.thread_first( error_body, # Remove description from dict (toolz.dissoc, "description"), # Merge other fields into the dict lambda x: { **x, "hint": "Errors with query params", "code": err.code, "message": "Validation errors", "errors": validation_messages }) response.data = json.dumps(error_body_with_validation_errors) else: response.data = json.dumps(error_body) response.content_type = "application/json" return response
def thread_first_repeat(x,f,args): """ Execute thread first with f applied once for each set of args. """ # Need to improve the documentation for this function, and maybe change its implementation. # It's really confusing. Try using foldl. I think that's the better option. return thread_first(x,*map2(lambda x,y: tuple([x] + y), repeat(f,len(args)), args))
def full_config(configs, base_config): if 'type' in base_config: return base_config prototype = full_config(configs, configs[base_config['prototype']]) return t.thread_first(prototype, (t.merge, base_config), (t.dissoc, 'prototype'))
def create_id(input_hashes, input_hash_fn, name, version): return t.thread_first( input_hashes, input_hash_fn, (t.merge, { 'name': name, 'version': version }), hash )
def get_files(path): """ Given a path, recursively find all files beneath it, and return a dictionary with a display string as the key and a tuple of (path,stripped filename). """ return thread_first(path, os.walk, list, map(get_dataset_in_dir), concatenate, map(lambda pair: (format_filename(pair), pair)), lambda x: sorted(x, key=fst, reverse=True), OrderedDict)
def get_plate_data_from_file_with_multiple_plates(path, c): return thread_first( path, open, file.read, (str.replace, '\r', ''), (str.split, c['plate_delimiter']), tail, map(StringIO), map(pd.read_csv(delimiter=c['delimiter'], skiprows=c['skiprows'])), pd.concat, df.dropna(axis=1, how='all'), (drop_matching_columns, c['dropcols']), df.rename(columns=c['colrename']), (add_normalized_columns, c['normcols']))
def get_plate_data(path, c): """ Get plate data, drop empty columns, drop selected columns, rename columns, add normalized columns. """ return thread_first( path, from_file, (str.replace, '\r', ''), StringIO, pd.read_csv(delimiter=c['delimiter'], skiprows=c['skiprows']), df.dropna(axis=1, how='all'), (drop_matching_columns, c['dropcols']), df.rename(columns=c['colrename']), (add_normalized_columns, c['normcols']))
def calc_signature(secret_key, _, datestamp, region, service, aws_request): # assert service == 's3' # this shouldn't be a problem if AMI was setup right return thread_first( f'AWS4{secret_key}'.encode('utf-8'), (sign, datestamp), (sign, region), (sign, service), (sign, aws_request), )
def __init__(self, filename=None, spec=CONFIG_SPEC_PATH, options=None, default_spec=None): """Initializer """ super(Config, self).__init__() self.read_from_file = None self.filename = None # Merge specs, giving precedence to user spec, then, before_init_spec, # then default_spec if ((spec == self.CONFIG_SPEC_PATH and not os.path.isfile(self.CONFIG_SPEC_PATH))): spec = None user_spec_ohm = thread_first(spec, convert_spec, conf_to_ohm) before_init_spec_ohm = thread_first(self.DEFAULT_CONFIG, ohm_to_spec, convert_spec, conf_to_ohm) default_spec_ohm = thread_first(default_spec, convert_spec, conf_to_ohm) user_spec_ohm.merge(before_init_spec_ohm) user_spec_ohm.merge(default_spec_ohm) full_spec = ohm_to_spec_list(user_spec_ohm) # Load the configuration and overload it with the options if filename is not None: self.load(filename, full_spec, options) else: self.base = configobj.ConfigObj() # Unless the options are already there, overload them with the defaults # set before initialization for key, value in self.DEFAULT_CONFIG.items(): if key not in self: self[key] = value
def format_filename(pair): """ Given a pair of (directory,filename) return a string with the date and filename.""" directory = pair[0] filename = pair[1] return thread_first( directory, os.path.split, snd, lambda date: "{} {}".format(date,filename))
def get_dataset_in_dir(dir_triple): """ Given a triple of (path,subdirectories,files), return list of tuples of (directory,filename).""" directory = dir_triple[0] filenames = dir_triple[2] return thread_first( filenames, map(lambda filename: filename.rstrip('-well.csv').rstrip( '-conditions.csv')), set, list, map(lambda trimmed_filename: (directory, trimmed_filename)))
def get_info(record, keys_wanted): return thread_first( record, # (id_with_side, JSON), get_paths, list, # print, lambda paths: tree_select_kv(record, paths, keys_wanted), m_parse_flat_pubmed, )
def process_location(location): return thread_first( location, # Remove leading and trailing spaces. lambda x: x.strip(), # Some of the locations have digits at the end if there are # duplicates in the same city/county. lambda x: x[:-1] if x[-1].isdigit() else x, # After removing the digits, strip spaces one more time (only needs # to be at the end since the beginning wasn't changed). lambda x: x.rstrip())
def get_dataset_in_dir(dir_triple): """ Given a triple of (path,subdirectories,files), return list of tuples of (directory,filename).""" directory = dir_triple[0] filenames = dir_triple[2] return thread_first( filenames, map(lambda filename: filename.rstrip('-well.csv').rstrip('-conditions.csv')), set, list, map(lambda trimmed_filename: (directory,trimmed_filename)))
def get_plate_data(path,c): """ Get plate data, drop empty columns, drop selected columns, rename columns, add normalized columns. """ return thread_first(path, from_file, (str.replace,'\r',''), StringIO, pd.read_csv(delimiter=c['delimiter'], skiprows=c['skiprows']), df.dropna(axis=1,how='all'), (drop_matching_columns,c['dropcols']), df.rename(columns=c['colrename']), (add_normalized_columns,c['normcols']))
def get_files(path): """ Given a path, recursively find all files beneath it, and return a dictionary with a display string as the key and a tuple of (path,stripped filename). """ return thread_first( path, os.walk, list, map(get_dataset_in_dir), concatenate, map(lambda pair: (format_filename(pair),pair)), lambda x: sorted(x, key = fst, reverse=True), OrderedDict)
def get_plate_data_from_file_with_multiple_plates(path,c): return thread_first(path, open, file.read, (str.replace,'\r',''), (str.split,c['plate_delimiter']), tail, map(StringIO), map(pd.read_csv(delimiter=c['delimiter'], skiprows=c['skiprows'])), pd.concat, df.dropna(axis=1,how='all'), (drop_matching_columns,c['dropcols']), df.rename(columns=c['colrename']), (add_normalized_columns,c['normcols']))
def elasticsearch_query(start, end, targets, profanity_mapping): return thread_first( {"size": 0}, (assoc, "query", time_range(start, end)), (assoc, "aggregations", tweets_per_minute), ( assoc_in, ["aggregations", "tweets_per_minute", "aggregations"], profanity_filter(profanity_mapping), ), ( assoc_in, [ "aggregations", "tweets_per_minute", "aggregations", "profanity", "aggregations", ], target_filter(targets), ), )
def takecycle(elements, n): """ Return first n elements of infinite cycle given by elements. """ return thread_first(elements, cycle, (islice, n), list)
def __repr__(self): """Get a representation of the mapping""" return thread_first(self, iteritems, list, repr)
def thread_first_repeat(x, f, args): """ Execute thread first with f applied once for each set of args. """ # Need to improve the documentation for this function, and maybe change its implementation. # It's really confusing. Try using foldl. I think that's the better option. return thread_first( x, *map2(lambda x, y: tuple([x] + y), repeat(f, len(args)), args))
return df ## function that maps across cre_col function to create a random dataset def cre_df(nb_rows=1000, percent_na=[0.01, 0.1, 0.9], name=range(3)): df = pd.concat(list(map(cre_col, 3 * [nb_rows], percent_na, name)), axis=1) df.index = pd.date_range('2001/01/01', periods=nb_rows, freq='H') return df ## function that removes columns that may that have too many NA values def nas_remover(df, na_percentage=0.2): na_df = df.isna().sum() / len(df) list_col_to_keep = na_df[na_df < na_percentage].index return df[list_col_to_keep] ## function that resamples data at user defined hour intervals def cre_resampler(df, resampling_str): return df.resample(resampling_str).mean() ## function to fill NA values def fill_na(df): return df.interpolate().ffill().bfill() ## pipe data through all functions in list if __name__ == '__main__': res = thread_first(cre_df(), nas_remover, (cre_resampler, '2H'), fill_na)
def update_single_index(index: Index, new: str, old: str, id: int) -> Index: return t.thread_first(index, (remove_index_entry, old, id), (insert_index_entry, new, id))