Пример #1
0
def dose_plot(df,err,cols,scale='linear'):
    n_rows = int(np.ceil(len(cols)/3.0))
    plt.figure(figsize=(20,4 * n_rows))
    subs = gridspec.GridSpec(n_rows, 3) 
    plt.subplots_adjust(hspace=0.54,wspace=0.27)

    for col,sub in zip(cols,subs):
        plt.subplot(sub)
        for base in df['Base'].unique():
            for drug in get_drugs_with_multiple_doses(filter_rows(df,'Base',base)):
                data = thread_first(df,
                                    (filter_rows,'Drug',drug),
                                    (filter_rows,'Base',base),
                                    (DF.sort, 'Dose'))
                error = thread_first(err,
                                     (filter_rows,'Drug',drug),
                                     (filter_rows,'Base',base),
                                     (DF.sort, 'Dose'))
                if scale == 'linear':
                    plt.errorbar(data['Dose'],data[col],yerr=error[col])
                    title = "{} vs. Dose".format(col)
                else: 
                    plt.errorbar(data['Dose'],data[col],yerr=error[col])
                    plt.xscale('log')
                    title = "{} vs. Dose (Log Scale)".format(col)
                    plt.xticks(data['Dose'].values,data['Dose'].values)
                    plt.xlim(0.06,15)
                label('Dose ({})'.format(data.Unit.values[0]), col,title,fontsize = 15)

                plt.legend(df['Base'].unique(), loc = 0)
Пример #2
0
def dose_plot(df,err,cols,scale='linear'):
    n_rows = int(np.ceil(len(cols)/3.0))
    plt.figure(figsize=(20,4 * n_rows))
    subs = gridspec.GridSpec(n_rows, 3) 
    plt.subplots_adjust(hspace=0.54,wspace=0.27)

    for col,sub in zip(cols,subs):
        plt.subplot(sub)
        for base in df['Base'].unique():
            for drug in get_drugs_with_multiple_doses(filter_rows(df,'Base',base)):
                data = thread_first(df,
                                    (filter_rows,'Drug',drug),
                                    (filter_rows,'Base',base),
                                    (DF.sort, 'Dose'))
                error = thread_first(err,
                                     (filter_rows,'Drug',drug),
                                     (filter_rows,'Base',base),
                                     (DF.sort, 'Dose'))
                if scale == 'linear':
                    plt.errorbar(data['Dose'],data[col],yerr=error[col])
                    title = "{} vs. Dose".format(col)
                else: 
                    plt.errorbar(data['Dose'],data[col],yerr=error[col])
                    plt.xscale('log')
                    title = "{} vs. Dose (Log Scale)".format(col)
                    plt.xticks(data['Dose'].values,data['Dose'].values)
                    plt.xlim(0.06,15)
                label('Dose ({})'.format(data.Unit.values[0]), col,title,fontsize = 15)

                plt.legend(df['Base'].unique(), loc = 0)
Пример #3
0
def regenerate():
    '''recreates all index files.
    This will parse all notes, and might take some time.'''
    print('Regenerate index, this may take some time...')
    for pf in [c.title_idx_path, c.tag_idx_path, c.group_idx_path]:
        c.unlink_if_existing(pf)
    tag_idx = {}
    title_idx = {}
    group_idx = {}
    doi_idx = {}
    empty_set = set()
    files = list(Path(c.load_config().save_path, 'md').iterdir())

    for file in tqdm(files):
        title, tags, group, doi = c.parse_file(file.read_text()) 
        id = int(file.stem)
        title_idx = c.insert_index_entry(title_idx, title, id)
        tag_idx = c.update_multi_index(tag_idx, tags, empty_set, id)
        group_idx = c.insert_index_entry(group_idx, group, id)
        if doi is not None:
            doi_idx = c.insert_index_entry(doi_idx, doi, id)
    c.store_group_index(group_idx)
    c.store_title_index(title_idx)
    c.store_tag_index(tag_idx)
    c.store_doi_index(doi_idx)

    t.thread_first(c.load_state(),
        (t.assoc, 'next_index', 
                  max(map(int, [f.stem for f in files])) + 1),
        c.save_state)
Пример #4
0
def new(template: Path, doi: str, pdf: Path, pdf_asset_path: Path, 
        reload: bool):
    '''creates a new note'''
    save_path = Path(c.load_config().save_path)
    state = c.load_state()
    md_folder = save_path / 'md' 
    md_folder.mkdir(0o755, True, True)
    new_file_path =  md_folder / f'{state.next_index}.md'
    c.assert_new_file_does_not_exist(new_file_path)
    if doi is not None:
        bibtex = c.load_bibtex_cached(doi, reload)
        title, author, link = c.get_title_author_and_link(bibtex)
        template = doi_template.format(author, doi, title, link)
    elif template is not None:
        template = template.read_text()
    else:
        template = new_md_template

    template += c.get_pdf_template(pdf, pdf_asset_path)
    new_file_path.write_text(template)
    t.thread_first(state,
        (t.assoc, 'next_index', state.next_index + 1),
        (t.assoc, 'last_created', state.next_index),
        c.save_state)
    c.store_group_index(c.insert_index_entry(c.load_group_index(), 
                      'None', state.next_index))
    c.store_title_index(c.insert_index_entry(c.load_title_index(), 
                      'None', state.next_index))
    sp.run(f'mdn -c {c.config_path} edit', shell=True)
Пример #5
0
def format_filename(pair):
    """ Given a pair of (directory,filename)
        return a string with the date and filename."""
    directory = pair[0]
    filename = pair[1]
    return thread_first(directory, os.path.split, snd,
                        lambda date: "{} {}".format(date, filename))
Пример #6
0
def handle_httpexception(err: HTTPException) -> Response:
    """Return JSON instead of HTML for HTTP errors."""
    # start with the correct headers and status code from the error
    response = err.get_response()

    try:
        validation_messages = err.data.get("messages", None)
    except AttributeError:
        validation_messages = None

    error_body = ServerError(response.status_code).error_body

    if validation_messages:
        error_body_with_validation_errors = toolz.thread_first(
            error_body,
            # Remove description from dict
            (toolz.dissoc, "description"),
            # Merge other fields into the dict
            lambda x: {
                **x, "hint": "Errors with query params",
                "code": err.code,
                "message": "Validation errors",
                "errors": validation_messages
            })
        response.data = json.dumps(error_body_with_validation_errors)
    else:
        response.data = json.dumps(error_body)

    response.content_type = "application/json"
    return response
Пример #7
0
def thread_first_repeat(x,f,args):
    """ Execute thread first with f applied once for each set of args. """
    # Need to improve the documentation for this function, and maybe change its implementation.
    # It's really confusing. Try using foldl. I think that's the better option.
    return thread_first(x,*map2(lambda x,y: tuple([x] + y),
                               repeat(f,len(args)),
                               args))
Пример #8
0
def full_config(configs, base_config):
    if 'type' in base_config:
        return base_config
    prototype = full_config(configs, configs[base_config['prototype']])
    return t.thread_first(prototype,
                          (t.merge, base_config),
                          (t.dissoc, 'prototype'))
Пример #9
0
def create_id(input_hashes, input_hash_fn, name, version):
    return t.thread_first(
        input_hashes, input_hash_fn, (t.merge, {
            'name': name,
            'version': version
        }), hash
    )
Пример #10
0
def get_files(path):
    """ Given a path, recursively find all files beneath it, and return
        a dictionary with a display string as the key and a tuple of
        (path,stripped filename). """
    return thread_first(path, os.walk, list, map(get_dataset_in_dir),
                        concatenate,
                        map(lambda pair: (format_filename(pair), pair)),
                        lambda x: sorted(x, key=fst, reverse=True),
                        OrderedDict)
Пример #11
0
def get_plate_data_from_file_with_multiple_plates(path, c):
    return thread_first(
        path, open, file.read, (str.replace, '\r', ''),
        (str.split, c['plate_delimiter']), tail, map(StringIO),
        map(pd.read_csv(delimiter=c['delimiter'],
                        skiprows=c['skiprows'])), pd.concat,
        df.dropna(axis=1, how='all'), (drop_matching_columns, c['dropcols']),
        df.rename(columns=c['colrename']),
        (add_normalized_columns, c['normcols']))
Пример #12
0
def get_plate_data(path, c):
    """ Get plate data, drop empty columns, drop selected columns, 
        rename columns, add normalized columns. """
    return thread_first(
        path, from_file, (str.replace, '\r', ''), StringIO,
        pd.read_csv(delimiter=c['delimiter'], skiprows=c['skiprows']),
        df.dropna(axis=1, how='all'), (drop_matching_columns, c['dropcols']),
        df.rename(columns=c['colrename']),
        (add_normalized_columns, c['normcols']))
Пример #13
0
 def calc_signature(secret_key, _, datestamp, region, service, aws_request):
     # assert service == 's3'  # this shouldn't be a problem if AMI was setup right
     return thread_first(
         f'AWS4{secret_key}'.encode('utf-8'),
         (sign, datestamp),
         (sign, region),
         (sign, service),
         (sign, aws_request),
     )
Пример #14
0
    def __init__(self,
                 filename=None,
                 spec=CONFIG_SPEC_PATH,
                 options=None,
                 default_spec=None):
        """Initializer
        """
        super(Config, self).__init__()
        self.read_from_file = None
        self.filename = None

        # Merge specs, giving precedence to user spec, then, before_init_spec,
        # then default_spec
        if ((spec == self.CONFIG_SPEC_PATH and
             not os.path.isfile(self.CONFIG_SPEC_PATH))):
            spec = None

        user_spec_ohm = thread_first(spec, convert_spec, conf_to_ohm)
        before_init_spec_ohm = thread_first(self.DEFAULT_CONFIG,
                                            ohm_to_spec,
                                            convert_spec,
                                            conf_to_ohm)
        default_spec_ohm = thread_first(default_spec,
                                        convert_spec,
                                        conf_to_ohm)

        user_spec_ohm.merge(before_init_spec_ohm)
        user_spec_ohm.merge(default_spec_ohm)
        full_spec = ohm_to_spec_list(user_spec_ohm)

        # Load the configuration and overload it with the options
        if filename is not None:
            self.load(filename,
                      full_spec,
                      options)
        else:
            self.base = configobj.ConfigObj()

        # Unless the options are already there, overload them with the defaults
        # set before initialization
        for key, value in self.DEFAULT_CONFIG.items():
            if key not in self:
                self[key] = value
Пример #15
0
def format_filename(pair):
    """ Given a pair of (directory,filename)
        return a string with the date and filename."""
    directory = pair[0]
    filename = pair[1]
    return thread_first(
        directory,
        os.path.split,
        snd,
        lambda date: "{} {}".format(date,filename))
Пример #16
0
def get_dataset_in_dir(dir_triple):
    """ Given a triple of (path,subdirectories,files),
        return list of tuples of (directory,filename)."""
    directory = dir_triple[0]
    filenames = dir_triple[2]
    return thread_first(
        filenames,
        map(lambda filename: filename.rstrip('-well.csv').rstrip(
            '-conditions.csv')), set, list,
        map(lambda trimmed_filename: (directory, trimmed_filename)))
Пример #17
0
def get_info(record, keys_wanted):
    return thread_first(
        record,
        # (id_with_side, JSON),
        get_paths,
        list,
        # print,
        lambda paths: tree_select_kv(record, paths, keys_wanted),
        m_parse_flat_pubmed,
    )
def process_location(location):
    return thread_first(
        location,
        # Remove leading and trailing spaces.
        lambda x: x.strip(),
        # Some of the locations have digits at the end if there are
        # duplicates in the same city/county.
        lambda x: x[:-1] if x[-1].isdigit() else x,
        # After removing the digits, strip spaces one more time (only needs
        # to be at the end since the beginning wasn't changed).
        lambda x: x.rstrip())
Пример #19
0
def get_dataset_in_dir(dir_triple):
    """ Given a triple of (path,subdirectories,files), 
        return list of tuples of (directory,filename)."""
    directory = dir_triple[0]
    filenames = dir_triple[2]
    return thread_first(
        filenames,
        map(lambda filename: filename.rstrip('-well.csv').rstrip('-conditions.csv')),
        set,
        list,
        map(lambda trimmed_filename: (directory,trimmed_filename)))  
Пример #20
0
def get_plate_data(path,c):
    """ Get plate data, drop empty columns, drop selected columns, 
        rename columns, add normalized columns. """
    return thread_first(path,
                        from_file,
                        (str.replace,'\r',''),
                        StringIO,
                        pd.read_csv(delimiter=c['delimiter'], skiprows=c['skiprows']),
                        df.dropna(axis=1,how='all'),
                        (drop_matching_columns,c['dropcols']),
                        df.rename(columns=c['colrename']),
                        (add_normalized_columns,c['normcols']))
Пример #21
0
def get_files(path):
    """ Given a path, recursively find all files beneath it, and return 
        a dictionary with a display string as the key and a tuple of 
        (path,stripped filename). """
    return thread_first(
        path,
        os.walk,
        list,
        map(get_dataset_in_dir),
        concatenate,
        map(lambda pair: (format_filename(pair),pair)),
        lambda x: sorted(x, key = fst, reverse=True),
        OrderedDict)
Пример #22
0
    def __init__(self,
                 filename=None,
                 spec=CONFIG_SPEC_PATH,
                 options=None,
                 default_spec=None):
        """Initializer
        """
        super(Config, self).__init__()
        self.read_from_file = None
        self.filename = None

        # Merge specs, giving precedence to user spec, then, before_init_spec,
        # then default_spec
        if ((spec == self.CONFIG_SPEC_PATH
             and not os.path.isfile(self.CONFIG_SPEC_PATH))):
            spec = None

        user_spec_ohm = thread_first(spec, convert_spec, conf_to_ohm)
        before_init_spec_ohm = thread_first(self.DEFAULT_CONFIG, ohm_to_spec,
                                            convert_spec, conf_to_ohm)
        default_spec_ohm = thread_first(default_spec, convert_spec,
                                        conf_to_ohm)

        user_spec_ohm.merge(before_init_spec_ohm)
        user_spec_ohm.merge(default_spec_ohm)
        full_spec = ohm_to_spec_list(user_spec_ohm)

        # Load the configuration and overload it with the options
        if filename is not None:
            self.load(filename, full_spec, options)
        else:
            self.base = configobj.ConfigObj()

        # Unless the options are already there, overload them with the defaults
        # set before initialization
        for key, value in self.DEFAULT_CONFIG.items():
            if key not in self:
                self[key] = value
Пример #23
0
def get_plate_data_from_file_with_multiple_plates(path,c):
    return thread_first(path,
                        open,
                        file.read,
                        (str.replace,'\r',''),
                        (str.split,c['plate_delimiter']),
                        tail,
                        map(StringIO),
                        map(pd.read_csv(delimiter=c['delimiter'], skiprows=c['skiprows'])),
                        pd.concat,
                        df.dropna(axis=1,how='all'),
                        (drop_matching_columns,c['dropcols']),
                        df.rename(columns=c['colrename']),
                        (add_normalized_columns,c['normcols']))
Пример #24
0
def elasticsearch_query(start, end, targets, profanity_mapping):
    return thread_first(
        {"size": 0},
        (assoc, "query", time_range(start, end)),
        (assoc, "aggregations", tweets_per_minute),
        (
            assoc_in,
            ["aggregations", "tweets_per_minute", "aggregations"],
            profanity_filter(profanity_mapping),
        ),
        (
            assoc_in,
            [
                "aggregations",
                "tweets_per_minute",
                "aggregations",
                "profanity",
                "aggregations",
            ],
            target_filter(targets),
        ),
    )
Пример #25
0
def takecycle(elements, n):
    """ Return first n elements of infinite cycle given by elements. """
    return thread_first(elements, cycle, (islice, n), list)    
 def __repr__(self):
     """Get a representation of the mapping"""
     return thread_first(self, iteritems, list, repr)
Пример #27
0
def thread_first_repeat(x, f, args):
    """ Execute thread first with f applied once for each set of args. """
    # Need to improve the documentation for this function, and maybe change its implementation.
    # It's really confusing. Try using foldl. I think that's the better option.
    return thread_first(
        x, *map2(lambda x, y: tuple([x] + y), repeat(f, len(args)), args))
Пример #28
0
def takecycle(elements, n):
    """ Return first n elements of infinite cycle given by elements. """
    return thread_first(elements, cycle, (islice, n), list)
Пример #29
0
    return df


## function that maps across cre_col function to create a random dataset
def cre_df(nb_rows=1000, percent_na=[0.01, 0.1, 0.9], name=range(3)):
    df = pd.concat(list(map(cre_col, 3 * [nb_rows], percent_na, name)), axis=1)
    df.index = pd.date_range('2001/01/01', periods=nb_rows, freq='H')
    return df


## function that removes columns that may that have too many NA values
def nas_remover(df, na_percentage=0.2):
    na_df = df.isna().sum() / len(df)
    list_col_to_keep = na_df[na_df < na_percentage].index
    return df[list_col_to_keep]


## function that resamples data at user defined hour intervals
def cre_resampler(df, resampling_str):
    return df.resample(resampling_str).mean()


## function to fill NA values
def fill_na(df):
    return df.interpolate().ffill().bfill()


## pipe data through all functions in list
if __name__ == '__main__':
    res = thread_first(cre_df(), nas_remover, (cre_resampler, '2H'), fill_na)
Пример #30
0
def update_single_index(index: Index, new: str, old: str, id: int) -> Index:
    return t.thread_first(index, (remove_index_entry, old, id),
                          (insert_index_entry, new, id))
Пример #31
0
 def __repr__(self):
     """Get a representation of the mapping"""
     return thread_first(self, iteritems, list, repr)