Exemplo n.º 1
0
def load_SVM_models(settings):
    pickle_path = settings['paths']['pickle']
    SVM3 = load_pickle(os.path.join(pickle_path, 'SVM_poly_3.pkl'))
    SVM4 = load_pickle(os.path.join(pickle_path, 'SVM_poly_4.pkl'))
    SVMr = load_pickle(os.path.join(pickle_path, 'SVM_rbf.pkl'))

    return SVM3, SVM4, SVMr
Exemplo n.º 2
0
def process_livestream_folder(audio_folder=AUDIO_FOLDER):

    print(f'Processing {audio_folder}...')
    infer_from_wav = infer.make_infer()  # Create inference function

    pkl_name = 'total_stream.pkl'

    list_ = lib.load_pickle(
        '../data/live_stream_predictions/'+pkl_name)

    for file in os.listdir(audio_folder):
        if file.endswith('.wav'):
            print(f'File: {file}')

            pred_ = process_livestream_file(audio_folder+file, infer_from_wav)
            print(pred_)

            list_.append(pred_)

            # Move processed file to archive:
            os.rename(audio_folder+file, audio_folder+'archive/'+file)

    lib.dump_to_pickle(list_, '../data/live_stream_predictions/'+pkl_name)

    return list_
Exemplo n.º 3
0
def load_model():
    """
    Load dictionary that contains a trained cry-no cry classification model.
    Keys: 'clf', 'scaler', 'classification_report'.

    :return: dict.
    """

    return lib.load_pickle(MODEL_DIR)
def main(operons, path, mibig_dict, settings):
    logger.debug('Calculating gene cluster pairwise distances')
    ssn_file = path + 'operon_pairs.ssn'
    ssn_file_filtered = path + 'operon_pairs_filtered.ssn'
    pfile = os.path.join(settings['paths']['pickles'], 'jaccard_dict.pkl')
    if settings['calculate_jaccard']:
        t0 = time.time()
        organized_operons, set_dict = organize_operons_by_domains(
            operons, mibig_dict)
        t1 = time.time()
        jaccard_dict_domains = calculate_all_jaccard_prefiltered(
            organized_operons, set_dict, settings['jaccard_cutoff'])
        t2 = time.time()
        jaccard_dict = convert_domainscore_to_operonscore(
            organized_operons, jaccard_dict_domains)
        t3 = time.time()
        logger.debug('Jaccard prefilter calculation time: %.4f' % (t1 - t0))
        logger.debug('Jaccard actual calculation time: %.4f' % (t2 - t1))
        logger.debug('Jaccard conversion time: %.4f' % (t3 - t2))
        if not os.path.isfile(ssn_file):
            write_ssn(jaccard_dict, ssn_file, i=0)
        store_pickle(jaccard_dict, pfile)
    else:
        jaccard_dict = load_pickle(pfile)
    if settings['jaccard_mcl']:
        mcl_file = path + 'jaccard_groups.mcl'
        if not os.path.isfile(mcl_file):
            r1 = run_mcl(ssn_file, mcl_file, settings['cores'])
        groups = read_mcl(mcl_file, settings['min_group_size'])
    else:
        groups = pairs_to_groups(jaccard_dict, settings['min_group_size'])
    collection_name = 'jaccard_groups'
    logger.debug('Setting gene cluster pairs')
    group_names, operon_collections = assign_groups_operons(
        operons, groups, mibig_dict, collection_name)
    filtered_pairs = filter_operon_pairs(jaccard_dict, groups)
    write_ssn(filtered_pairs, ssn_file_filtered, i=0)
    set_jaccard_operon_pairs(operons, mibig_dict, filtered_pairs)
    return group_names, operon_collections, jaccard_dict, filtered_pairs
Exemplo n.º 5
0
def plot_heatmap(save_filename=None):

    list_ = lib.load_pickle('../data/live_stream_predictions/total_stream.pkl')

    df = pd.DataFrame(list_, columns=['Timestamp', 'Prediction'])

    df['Hour'] = df.Timestamp.apply(lambda t: t.hour)
    df['Minute'] = df.Timestamp.apply(lambda t: t.minute)
    df['Day'] = df.Timestamp.apply(lambda t: t.day)
    df['Month'] = df.Timestamp.apply(lambda t: t.month)

    dfg = (df.groupby(['Month', 'Day', 'Hour', 'Minute'],
                      as_index=False)['Prediction'].sum())

    dfg['Min_cried'] = dfg['Prediction'] >= 2
    dfg['Hour_frac'] = dfg['Hour'] + dfg['Minute'] / 60

    dfg_hr = dfg.groupby(['Month', 'Day', 'Hour'],
                         as_index=False)['Min_cried'].sum()

    merge_frame = pd.DataFrame(np.concatenate(
        ([np.ones((24, ), dtype=int) * 6], [np.ones(
            (24, ), dtype=int) * 22], [np.arange(24)]),
        axis=0).transpose(),
                               columns=['Month', 'Day', 'Hour'])

    pivoted = (pd.merge(dfg_hr,
                        merge_frame,
                        how='outer',
                        on=['Month', 'Day',
                            'Hour']).pivot('Day', 'Hour',
                                           'Min_cried').fillna(0))

    labels = (
        pivoted[pivoted > 3].fillna(0)  # filter
        .astype(int).astype(str).replace('0', '').values)

    # labels = (pivoted[pivoted > 3]   # filter
    #           .astype(str)
    #           .replace('nan', '', regex=True)
    #           .replace('.0', '', regex=True)
    #           .values
    #           )

    with sns.plotting_context("poster"):
        plt.figure(figsize=(20, 6))

        # plt.title('Minutes cried')

        g = sns.heatmap(pivoted,
                        annot=labels,
                        fmt="",
                        linewidths=5,
                        cmap=sns.cubehelix_palette(5),
                        cbar_kws=dict(use_gridspec=False, location="top"))

        g.set_yticklabels([
            'Fri',
            'Sat',
            'Sun',
            'Mon',
            'Tue',
            'Wed',
        ],
                          rotation=0,
                          fontsize=25)

        g.set_xticks(range(25))  # 25 to label the interstices.
        g.set_xticklabels(
            (['mid-\nnight\n'] + [str(t) + 'a' for t in list(range(1, 12))] +
             ['noon'] + [str(t) + 'p'
                         for t in list(range(1, 12))] + ['mid-\nnight\n']),
            rotation=0,
            fontsize=20,
        )

        g.set_xlabel('Time of Day')

        fig = g.get_figure()

        if save_filename is not None:
            fig.savefig('../docs/' + save_filename)
Exemplo n.º 6
0
from lib import load_pickle, store_pickle
import os


def read_pfam(p):
    d = {}
    with open(p) as f:
        for l in f:
            tabs = l.strip().split('\t')
            pfam_acc, clan_acc, clan_ID, pfam_ID, pfam_descr = tabs
            d[pfam_acc] = pfam_descr

    return (d)


if __name__ == '__main__':
    import sys
    args = sys.argv
    if len(args) < 2:
        print('USAGE: python update_pfam.py /path/to/Pfam-A.clans.tsv')
        exit()
    else:
        path = args[1]
        d = read_pfam(path)
        data_path = '../data/Pickles/'
        descr_path = os.path.join(data_path, 'domain_descr.pkl')
        old_d = load_pickle(descr_path)
        old_d.update(d)
        store_pickle(old_d, descr_path)
Exemplo n.º 7
0
def get_training_data_dict():
    """
    Load training_data from pickle.
    :return: dict. keys: 'label', 'raw', 'vec', 'mat'
    """
    return lib.load_pickle(PICKLE_PATH)