def load_SVM_models(settings): pickle_path = settings['paths']['pickle'] SVM3 = load_pickle(os.path.join(pickle_path, 'SVM_poly_3.pkl')) SVM4 = load_pickle(os.path.join(pickle_path, 'SVM_poly_4.pkl')) SVMr = load_pickle(os.path.join(pickle_path, 'SVM_rbf.pkl')) return SVM3, SVM4, SVMr
def process_livestream_folder(audio_folder=AUDIO_FOLDER): print(f'Processing {audio_folder}...') infer_from_wav = infer.make_infer() # Create inference function pkl_name = 'total_stream.pkl' list_ = lib.load_pickle( '../data/live_stream_predictions/'+pkl_name) for file in os.listdir(audio_folder): if file.endswith('.wav'): print(f'File: {file}') pred_ = process_livestream_file(audio_folder+file, infer_from_wav) print(pred_) list_.append(pred_) # Move processed file to archive: os.rename(audio_folder+file, audio_folder+'archive/'+file) lib.dump_to_pickle(list_, '../data/live_stream_predictions/'+pkl_name) return list_
def load_model(): """ Load dictionary that contains a trained cry-no cry classification model. Keys: 'clf', 'scaler', 'classification_report'. :return: dict. """ return lib.load_pickle(MODEL_DIR)
def main(operons, path, mibig_dict, settings): logger.debug('Calculating gene cluster pairwise distances') ssn_file = path + 'operon_pairs.ssn' ssn_file_filtered = path + 'operon_pairs_filtered.ssn' pfile = os.path.join(settings['paths']['pickles'], 'jaccard_dict.pkl') if settings['calculate_jaccard']: t0 = time.time() organized_operons, set_dict = organize_operons_by_domains( operons, mibig_dict) t1 = time.time() jaccard_dict_domains = calculate_all_jaccard_prefiltered( organized_operons, set_dict, settings['jaccard_cutoff']) t2 = time.time() jaccard_dict = convert_domainscore_to_operonscore( organized_operons, jaccard_dict_domains) t3 = time.time() logger.debug('Jaccard prefilter calculation time: %.4f' % (t1 - t0)) logger.debug('Jaccard actual calculation time: %.4f' % (t2 - t1)) logger.debug('Jaccard conversion time: %.4f' % (t3 - t2)) if not os.path.isfile(ssn_file): write_ssn(jaccard_dict, ssn_file, i=0) store_pickle(jaccard_dict, pfile) else: jaccard_dict = load_pickle(pfile) if settings['jaccard_mcl']: mcl_file = path + 'jaccard_groups.mcl' if not os.path.isfile(mcl_file): r1 = run_mcl(ssn_file, mcl_file, settings['cores']) groups = read_mcl(mcl_file, settings['min_group_size']) else: groups = pairs_to_groups(jaccard_dict, settings['min_group_size']) collection_name = 'jaccard_groups' logger.debug('Setting gene cluster pairs') group_names, operon_collections = assign_groups_operons( operons, groups, mibig_dict, collection_name) filtered_pairs = filter_operon_pairs(jaccard_dict, groups) write_ssn(filtered_pairs, ssn_file_filtered, i=0) set_jaccard_operon_pairs(operons, mibig_dict, filtered_pairs) return group_names, operon_collections, jaccard_dict, filtered_pairs
def plot_heatmap(save_filename=None): list_ = lib.load_pickle('../data/live_stream_predictions/total_stream.pkl') df = pd.DataFrame(list_, columns=['Timestamp', 'Prediction']) df['Hour'] = df.Timestamp.apply(lambda t: t.hour) df['Minute'] = df.Timestamp.apply(lambda t: t.minute) df['Day'] = df.Timestamp.apply(lambda t: t.day) df['Month'] = df.Timestamp.apply(lambda t: t.month) dfg = (df.groupby(['Month', 'Day', 'Hour', 'Minute'], as_index=False)['Prediction'].sum()) dfg['Min_cried'] = dfg['Prediction'] >= 2 dfg['Hour_frac'] = dfg['Hour'] + dfg['Minute'] / 60 dfg_hr = dfg.groupby(['Month', 'Day', 'Hour'], as_index=False)['Min_cried'].sum() merge_frame = pd.DataFrame(np.concatenate( ([np.ones((24, ), dtype=int) * 6], [np.ones( (24, ), dtype=int) * 22], [np.arange(24)]), axis=0).transpose(), columns=['Month', 'Day', 'Hour']) pivoted = (pd.merge(dfg_hr, merge_frame, how='outer', on=['Month', 'Day', 'Hour']).pivot('Day', 'Hour', 'Min_cried').fillna(0)) labels = ( pivoted[pivoted > 3].fillna(0) # filter .astype(int).astype(str).replace('0', '').values) # labels = (pivoted[pivoted > 3] # filter # .astype(str) # .replace('nan', '', regex=True) # .replace('.0', '', regex=True) # .values # ) with sns.plotting_context("poster"): plt.figure(figsize=(20, 6)) # plt.title('Minutes cried') g = sns.heatmap(pivoted, annot=labels, fmt="", linewidths=5, cmap=sns.cubehelix_palette(5), cbar_kws=dict(use_gridspec=False, location="top")) g.set_yticklabels([ 'Fri', 'Sat', 'Sun', 'Mon', 'Tue', 'Wed', ], rotation=0, fontsize=25) g.set_xticks(range(25)) # 25 to label the interstices. g.set_xticklabels( (['mid-\nnight\n'] + [str(t) + 'a' for t in list(range(1, 12))] + ['noon'] + [str(t) + 'p' for t in list(range(1, 12))] + ['mid-\nnight\n']), rotation=0, fontsize=20, ) g.set_xlabel('Time of Day') fig = g.get_figure() if save_filename is not None: fig.savefig('../docs/' + save_filename)
from lib import load_pickle, store_pickle import os def read_pfam(p): d = {} with open(p) as f: for l in f: tabs = l.strip().split('\t') pfam_acc, clan_acc, clan_ID, pfam_ID, pfam_descr = tabs d[pfam_acc] = pfam_descr return (d) if __name__ == '__main__': import sys args = sys.argv if len(args) < 2: print('USAGE: python update_pfam.py /path/to/Pfam-A.clans.tsv') exit() else: path = args[1] d = read_pfam(path) data_path = '../data/Pickles/' descr_path = os.path.join(data_path, 'domain_descr.pkl') old_d = load_pickle(descr_path) old_d.update(d) store_pickle(old_d, descr_path)
def get_training_data_dict(): """ Load training_data from pickle. :return: dict. keys: 'label', 'raw', 'vec', 'mat' """ return lib.load_pickle(PICKLE_PATH)