def test_mention_context_batch_sampler():
    mentions_by_page = {
        0: [40, 50, 60, 70, 80, 90],
        1: [100],
        2: [0, 10, 20, 30]
    }
    cursor = get_mock_cursor(mentions_by_page)
    batch_size = 5
    page_id_order = [2, 0, 1]
    mentions_in_page_order = _.mapcat(
        page_id_order, lambda page_id: mentions_by_page[page_id])
    batch_sampler = MentionContextBatchSampler(cursor, page_id_order,
                                               batch_size)
    batches_seen = []
    indexes_seen = []
    for batch_num, batch_indexes in enumerate(batch_sampler):
        assert _.is_empty(_.intersection(batch_indexes, indexes_seen))
        indexes_seen.extend(batch_indexes)
        if batch_num == 0:
            assert len(set(batch_indexes) - {0, 10, 20, 30}) == 1
            assert any([
                mention in set(batch_indexes) - {0, 10, 20, 30}
                for mention in mentions_by_page[0]
            ])
            assert len(batch_indexes) == batch_size
        elif batch_num == 1:
            assert _.is_empty(set(batch_indexes) - set(mentions_by_page[0]))
            assert len(batch_indexes) == batch_size
        elif batch_num == 2:
            assert batch_indexes == [100]
            assert len(batch_indexes) == 1
        batches_seen.append(batch_num)
    assert _.is_empty(_.difference(mentions_in_page_order, indexes_seen))
    assert batches_seen == [0, 1, 2]
Пример #2
0
def plot_experiment(experiment_spec, experiment_df):
    '''
    Plot the variable specs vs fitness vector of an experiment, where each point is a trial.
    ref colors: https://plot.ly/python/heatmaps-contours-and-2dhistograms-tutorial/#plotlys-predefined-color-scales
    '''
    y_cols = ['fitness'] + FITNESS_COLS
    x_cols = ps.difference(experiment_df.columns.tolist(), y_cols)

    fig = viz.tools.make_subplots(rows=len(y_cols), cols=len(x_cols), shared_xaxes=True, shared_yaxes=True)
    fitness_sr = experiment_df['fitness']
    min_fitness = fitness_sr.values.min()
    max_fitness = fitness_sr.values.max()
    for row_idx, y in enumerate(y_cols):
        for col_idx, x in enumerate(x_cols):
            x_sr = experiment_df[x]
            guard_cat_x = x_sr.astype(str) if x_sr.dtype == 'object' else x_sr
            trace = viz.go.Scatter(
                y=experiment_df[y], yaxis=f'y{row_idx+1}',
                x=guard_cat_x, xaxis=f'x{col_idx+1}',
                showlegend=False, mode='markers',
                marker={
                    'symbol': 'circle-open-dot', 'color': experiment_df['fitness'], 'opacity': 0.5,
                    # dump first quarter of colorscale that is too bright
                    'cmin': min_fitness - 0.50 * (max_fitness - min_fitness), 'cmax': max_fitness,
                    'colorscale': 'YIGnBu', 'reversescale': True
                },
            )
            fig.append_trace(trace, row_idx + 1, col_idx + 1)
            fig.layout[f'xaxis{col_idx+1}'].update(title='<br>'.join(ps.chunk(x, 20)), zerolinewidth=1, categoryarray=sorted(guard_cat_x.unique()))
        fig.layout[f'yaxis{row_idx+1}'].update(title=y, rangemode='tozero')
    fig.layout.update(title=f'experiment graph: {experiment_spec["name"]}', width=max(600, len(x_cols) * 300), height=700)
    viz.plot(fig)
    return fig
Пример #3
0
def retro_analyze_trials(predir):
    '''Retro analyze all trials'''
    logger.info('Running retro_analyze_trials')
    session_spec_paths = glob(f'{predir}/*_s*_spec.json')
    # remove session spec paths
    trial_spec_paths = ps.difference(glob(f'{predir}/*_t*_spec.json'), session_spec_paths)
    util.parallelize(_retro_analyze_trial, [(p,) for p in trial_spec_paths], num_cpus=util.NUM_CPUS)
Пример #4
0
def plot_experiment(experiment_spec, experiment_df):
    '''
    Plot the variable specs vs fitness vector of an experiment, where each point is a trial.
    ref colors: https://plot.ly/python/heatmaps-contours-and-2dhistograms-tutorial/#plotlys-predefined-color-scales
    '''
    y_cols = ['fitness'] + FITNESS_COLS
    x_cols = ps.difference(experiment_df.columns.tolist(), y_cols)

    fig = viz.tools.make_subplots(rows=len(y_cols), cols=len(x_cols), shared_xaxes=True, shared_yaxes=True)
    fitness_sr = experiment_df['fitness']
    min_fitness = fitness_sr.values.min()
    max_fitness = fitness_sr.values.max()
    for row_idx, y in enumerate(y_cols):
        for col_idx, x in enumerate(x_cols):
            x_sr = experiment_df[x]
            guard_cat_x = x_sr.astype(str) if x_sr.dtype == 'object' else x_sr
            trace = viz.go.Scatter(
                y=experiment_df[y], yaxis=f'y{row_idx+1}',
                x=guard_cat_x, xaxis=f'x{col_idx+1}',
                showlegend=False, mode='markers',
                marker={
                    'symbol': 'circle-open-dot', 'color': experiment_df['fitness'], 'opacity': 0.5,
                    # dump first quarter of colorscale that is too bright
                    'cmin': min_fitness - 0.50 * (max_fitness - min_fitness), 'cmax': max_fitness,
                    'colorscale': 'YIGnBu', 'reversescale': True
                },
            )
            fig.append_trace(trace, row_idx + 1, col_idx + 1)
            fig.layout[f'xaxis{col_idx+1}'].update(title='<br>'.join(ps.chunk(x, 20)), zerolinewidth=1, categoryarray=sorted(guard_cat_x.unique()))
        fig.layout[f'yaxis{row_idx+1}'].update(title=y, rangemode='tozero')
    fig.layout.update(title=f'experiment graph: {experiment_spec["name"]}', width=max(600, len(x_cols) * 300), height=700)
    viz.plot(fig)
    return fig
Пример #5
0
def retro_analyze_trials(predir):
    '''Retro analyze all trials'''
    logger.info('Running retro_analyze_trials')
    session_spec_paths = glob(f'{predir}/*_s*_spec.json')
    # remove session spec paths
    trial_spec_paths = ps.difference(glob(f'{predir}/*_t*_spec.json'),
                                     session_spec_paths)
    for p in trial_spec_paths:
        _retro_analyze_trial(p)
Пример #6
0
def plot_experiment(experiment_spec, experiment_df, metrics_cols):
    '''
    Plot the metrics vs. specs parameters of an experiment, where each point is a trial.
    ref colors: https://plot.ly/python/heatmaps-contours-and-2dhistograms-tutorial/#plotlys-predefined-color-scales
    '''
    y_cols = metrics_cols
    x_cols = ps.difference(experiment_df.columns.tolist(), y_cols + ['trial'])
    fig = subplots.make_subplots(rows=len(y_cols),
                                 cols=len(x_cols),
                                 shared_xaxes=True,
                                 shared_yaxes=True,
                                 print_grid=False)
    strength_sr = experiment_df['strength']
    min_strength, max_strength = strength_sr.min(), strength_sr.max()
    for row_idx, y in enumerate(y_cols):
        for col_idx, x in enumerate(x_cols):
            x_sr = experiment_df[x]
            guard_cat_x = x_sr.astype(str) if x_sr.dtype == 'object' else x_sr
            trace = go.Scatter(
                y=experiment_df[y],
                yaxis=f'y{row_idx+1}',
                x=guard_cat_x,
                xaxis=f'x{col_idx+1}',
                showlegend=False,
                mode='markers',
                marker={
                    'symbol': 'circle-open-dot',
                    'color': strength_sr,
                    'opacity': 0.5,
                    # dump first portion of colorscale that is too bright
                    'cmin': min_strength - 0.5 * (max_strength - min_strength),
                    'cmax': max_strength,
                    'colorscale': 'YlGnBu',
                    'reversescale': False
                },
            )
            fig.add_trace(trace, row_idx + 1, col_idx + 1)
            fig.update_xaxes(title_text='<br>'.join(ps.chunk(x, 20)),
                             zerolinewidth=1,
                             categoryarray=sorted(guard_cat_x.unique()),
                             row=len(y_cols),
                             col=col_idx + 1)
        fig.update_yaxes(title_text=y,
                         rangemode='tozero',
                         row=row_idx + 1,
                         col=1)
    fig.layout.update(title=f'experiment graph: {experiment_spec["name"]}',
                      width=100 + 300 * len(x_cols),
                      height=200 + 300 * len(y_cols))
    plot(fig)
    graph_prepath = experiment_spec['meta']['graph_prepath']
    save_image(fig, f'{graph_prepath}_experiment_graph.png')
    # save important graphs in prepath directly
    prepath = experiment_spec['meta']['prepath']
    save_image(fig, f'{prepath}_experiment_graph.png')
    return fig
Пример #7
0
 def get_not_registered_imeis(cls, imeis):
     """Method to check and return IMEIs which are not already registered."""
     try:
         normalized_imeis = cls.bulk_normalize(imeis)
         query = ApprovedImeis.query.filter(ApprovedImeis.imei.in_(normalized_imeis)). \
             filter(ApprovedImeis.status == 'whitelist')
         data = query.all()
         data = list(map(lambda x: x.imei, data))
         return pydash.difference(normalized_imeis, data)
     except Exception as e:
         raise e
Пример #8
0
def calc_experiment_df(trial_data_dict, info_prepath=None):
    '''Collect all trial data (metrics and config) from trials into a dataframe'''
    experiment_df = pd.DataFrame(trial_data_dict).transpose()
    cols = METRICS_COLS
    config_cols = sorted(ps.difference(experiment_df.columns.tolist(), cols))
    sorted_cols = config_cols + cols
    experiment_df = experiment_df.reindex(sorted_cols, axis=1)
    experiment_df.sort_values(by=['strength'], ascending=False, inplace=True)
    if info_prepath is not None:
        util.write(experiment_df, f'{info_prepath}_experiment_df.csv')
        # save important metrics in info_prepath directly
        util.write(experiment_df, f'{info_prepath.replace("info/", "")}_experiment_df.csv')
    return experiment_df
Пример #9
0
def retro_analyze_experiment(predir):
    '''Retro analyze an experiment'''
    logger.info('Running retro_analyze_experiment')
    trial_spec_paths = glob(f'{predir}/*_t*_spec.json')
    # remove trial and session spec paths
    experiment_spec_paths = ps.difference(glob(f'{predir}/*_spec.json'), trial_spec_paths)
    experiment_spec_path = experiment_spec_paths[0]
    spec = util.read(experiment_spec_path)
    info_prepath = spec['meta']['info_prepath']
    if os.path.exists(f'{info_prepath}_trial_data_dict.json'):
        return  # only run analysis if experiment had been ran
    trial_data_dict = util.read(f'{info_prepath}_trial_data_dict.json')
    analysis.analyze_experiment(spec, trial_data_dict)
Пример #10
0
def retro_analyze_experiment(predir):
    '''Retro analyze an experiment'''
    logger.info('Running retro_analyze_experiment')
    if ps.is_empty(glob(f'{predir}/info/*_trial_data_dict.json')):
        logger.info(
            'Skipping retro_analyze_experiment since no experiment was ran.')
        return  # only run analysis if experiment had been ran
    trial_spec_paths = glob(f'{predir}/*_t*_spec.json')
    # remove trial and session spec paths
    experiment_spec_paths = ps.difference(glob(f'{predir}/*_spec.json'),
                                          trial_spec_paths)
    experiment_spec_path = experiment_spec_paths[0]
    spec = util.read(experiment_spec_path)
    info_prepath = spec['meta'].get('info_prepath')
    trial_data_dict = util.read(f'{info_prepath}_trial_data_dict.json')
    analysis.analyze_experiment(spec, trial_data_dict)
Пример #11
0
def analyze_experiment(experiment):
    '''
    Gather experiment trial_data_dict as experiment_df, plot.
    Search module must return best_spec and experiment_data with format {trial_index: exp_trial_data},
    where trial_data = {**var_spec, **fitness_vec, fitness}.
    This is then made into experiment_df.
    @returns {DataFrame} experiment_df Of var_specs, fitness_vec, fitness for all trials.
    '''
    logger.info('Analyzing experiment')
    experiment_df = pd.DataFrame(experiment.trial_data_dict).transpose()
    cols = FITNESS_COLS + ['fitness']
    config_cols = sorted(ps.difference(experiment_df.columns.tolist(), cols))
    sorted_cols = config_cols + cols
    experiment_df = experiment_df.reindex(sorted_cols, axis=1)
    experiment_df.sort_values(by=['fitness'], ascending=False, inplace=True)
    logger.info(f'Experiment data:\n{experiment_df}')
    experiment_fig = plot_experiment(experiment.spec, experiment_df)
    save_experiment_data(experiment.spec, experiment.info_space, experiment_df, experiment_fig)
    return experiment_df
Пример #12
0
def analyze_experiment(experiment):
    '''
    Gather experiment trial_data_dict as experiment_df, plot.
    Search module must return best_spec and experiment_data with format {trial_index: exp_trial_data},
    where trial_data = {**var_spec, **fitness_vec, fitness}.
    This is then made into experiment_df.
    @returns {DataFrame} experiment_df Of var_specs, fitness_vec, fitness for all trials.
    '''
    logger.info('Analyzing experiment')
    experiment_df = pd.DataFrame(experiment.trial_data_dict).transpose()
    cols = FITNESS_COLS + ['fitness']
    config_cols = sorted(ps.difference(experiment_df.columns.tolist(), cols))
    sorted_cols = config_cols + cols
    experiment_df = experiment_df.reindex(sorted_cols, axis=1)
    experiment_df.sort_values(by=['fitness'], ascending=False, inplace=True)
    logger.info(f'Experiment data:\n{experiment_df}')
    experiment_fig = plot_experiment(experiment.spec, experiment_df)
    save_experiment_data(experiment.spec, experiment.info_space, experiment_df, experiment_fig)
    return experiment_df
Пример #13
0
def test_difference(case, expected):
    assert _.difference(*case) == expected
Пример #14
0
with open('files/gene lists/dbVarCNGsBenignPCGenes.txt', 'r') as dbVarCNGsBenignPCGenes:
    benignGain = dbVarCNGsBenignPCGenes.read().splitlines()

with open('files/gene lists/dbVarCNLsBenignPCGenes.txt', 'r') as dbVarCNLsBenignPCGenes:
    benignLoss = dbVarCNLsBenignPCGenes.read().splitlines()

with open('files/gene lists/dbVarCNGPeaksPathogenicPCGenes.txt', 'r') as dbVarCNGPeaksPathogenicPCGenes:
    pathoGain = dbVarCNGPeaksPathogenicPCGenes.read().splitlines()

with open('files/gene lists/dbVarCNLPeaksPathogenicPCGenes.txt', 'r') as dbVarCNLPeaksPathogenicPCGenes:
    pathoLoss = dbVarCNLPeaksPathogenicPCGenes.read().splitlines()

lists['BGBLPGPL'] = pydash.intersection(benignGain, benignLoss, pathoGain, pathoLoss)

_BGBLPG = pydash.intersection(benignGain, benignLoss, pathoGain)
lists['BGBLPG'] = pydash.difference(_BGBLPG, pathoLoss)

_BGBLPL = pydash.intersection(benignGain, benignLoss, pathoLoss)
lists['BGBLPL'] = pydash.difference(_BGBLPL, pathoGain)

_BGPGPL = pydash.intersection(benignGain, pathoGain, pathoLoss)
lists['BGPGPL'] = pydash.difference(_BGPGPL, benignLoss)

_BLPGPL = pydash.intersection(benignLoss, pathoGain, pathoLoss)
lists['BLPGPL'] = pydash.difference(_BLPGPL, benignGain)

_BGBL = pydash.intersection(benignGain, benignLoss)
lists['BGBL'] = pydash.difference(_BGBL, pathoGain, pathoLoss)

_BGPG = pydash.intersection(benignGain, pathoGain)
lists['BGPG'] = pydash.difference(_BGPG, benignLoss, pathoLoss)
Пример #15
0
def enum_envs():
    '''Enumerate all the env names of the latest version'''
    envs = [es.id for es in gym.envs.registration.registry.all()]
    envs += INCLUDE_ENVS
    envs = ps.difference(envs, EXCLUDE_ENVS)
    return envs
Пример #16
0
async def command_clear(message, client, *args, **kwargs):
    if message.content.startswith('!clear'):
        logger.info('Run command !clear')

        # If no arguments were found for the command, clear the entire channel
        command_args = kwargs.pop('command_args', [])
        if not command_args:

            # Get all the messages in the channel
            messages = client.logs_from(message.channel)
            messages = [m async for m in messages]

            # Try bulk deleting the messages in chunks of 100
            for chunk in pydash.chunk(messages, 100):
                await client.delete_messages(chunk)
                messages = pydash.difference(messages, chunk)

            # Individually delete each message which could not be deleted
            # via bulk
            for message_ in messages:
                await client.delete_message(message_)

            return True

        # If arguments were found for the command, do different things
        else:
            command_args = list(command_args)

            # If there is more than one argument, do nothing
            if len(command_args) > 1:
                return True

            # Save the first argument to a variable to readability
            clear_amount = command_args[0]

            # If the first argument is an integer, only clear so many messages
            if type(clear_amount) is int:

                # TODO: Support retrieving more than 100 messages
                # If clear_amount is greater than 100, send an apology
                if clear_amount > 100:
                    await client.send_message(
                        message.channel,
                        'I can only clear 100 messages at a time.'
                    )
                    return True

                # Get the messages to be deleted
                messages = client.logs_from(
                    message.channel, limit=clear_amount)
                messages = [m async for m in messages]

                # Try bulk deleting the messages in chunks of 100
                for chunk in pydash.chunk(messages, 100):
                    await client.delete_messages(chunk)
                    messages = pydash.difference(messages, chunk)

                # Individually delete each message which could not be deleted
                # via bulk
                for message_ in messages:
                    await client.delete_message(message_)

                return True

    return False
def get_recon_managed_regions():
    # we only want to mail on regions 90 days post launch
    region_metadata = get_region_metadata()
    rms_managed_regions = get_regions_within_ninety_business_days_post_launch(region_metadata)

    return pydash.difference(get_airport_codes(region_metadata), get_airport_codes(rms_managed_regions))