Exemplos de HDFStore em Python, exemplos de pandas.io.pytables.HDFStore em Python

Exemplo n.º 1

0

Exibir arquivo

def main(args):
    if len(args.files) > 0:  # Data files passed on command line.
        if len(args.files) > 1:  # Demultiplexed data files.
            ac, oc, meta = process_fastq_files(args)
            expdefs = { None: [(file2col(f),) for f in args.files]}  # Use file names.
        else:  # Single multiplexed data file.
            if args.expdefs is None:
                print "Experiment definitions required."
                return 1
            with open(args.expdefs, 'rb') as f:
                expdefs = pik.load(f)
            ac, oc, meta = process_fastq_files(args, [v[0][0] for v in expdefs.values()])
        if args.cache is not None:  # Use cache to store parsed data.
            store = pytables.HDFStore(args.cache)
            store['metadata'] = meta
            store['allele_counts'] = ac
            store['other_counts'] = oc
            store.close()
    elif args.cache is not None:  # Load data from cache.
        cache = pytables.HDFStore(args.cache)
        ac = cache['allele_counts']
        oc = cache['other_counts']
        meta = cache['metadata']
        cache.close()
        if args.expdefs is None:
            expdefs = {None: [(c,) for c in list(ac.columns)]}
        else:
            with open(args.expdefs, 'rb') as f:
                expdefs = pik.load(f)
    else:
        print "Specify at least one data source."
        return 1

    data = convert_data(ac, meta)

    if args.cache is not None:
        cache = pytables.HDFStore(args.cache)
        if 'fitness' in cache and not args.recompute:  # Load cached fitness data.
            fitness = cache['fitness']
        else:  # Recompute and cache fitness data.
            fitness = pd.Panel(compute_fitness(args, data, expdefs))
            cache['fitness'] = fitness
        cache.close()
    else:  # Compute fitness data on-the-fly.
        fitness = compute_fitness(args, data, expdefs)

    if args.out is not None:  # Draw figures in output directory.
        draw_hist(fitness, os.path.join(args.out, 'fitmap_hist.png'))
        idx = meta.reset_index().set_index(['Pos', 'AA', 'Codon'])
        aa = list(set(meta['AA']) - {None, np.nan})
        cod = list(set(meta['Codon']) - {None, np.nan})
        pos = list(set(meta['Pos']) - {None, np.nan, 0})
        aamaps = compute_hmap(fitness['slope'], pos, aa, 'Pos', 'AA', idx, [np.nanmedian, np.nanstd])
        codmaps = compute_hmap(fitness['slope'], pos, cod, 'Pos', 'Codon', idx, [np.nanmedian, np.nanstd])
        draw_hmap(aamaps[0], aa, os.path.join(args.out, 'fitmap_aa_median.png'))
        draw_hmap(codmaps[0], cod, os.path.join(args.out, 'fitmap_cod_median.png'))

    return 0

Exemplo n.º 2

0

Exibir arquivo

Arquivo: gitdata_storage_tests.py Projeto: Milllss/metacommunities

def time_data_retrieval(number_of_tests=1):

    """ Runs some timeit tests on one day of github data stored in
    - python pickle file
    - pytables hd5
    - mysql table
    - mongodb
    """
    gh_csv = 'data/oneday.csv'
    gh_pick = 'data/oneday.pyd'
    gh_hd5 = 'data/git.h5'

    csv_timer = timeit.Timer(lambda: load_csv_df(gh_csv))
    print('csv:', csv_timer.timeit(number_of_tests))

    pickle_timer = timeit.Timer(lambda: load_pickle_df(gh_pick))
    print('pickle:', pickle_timer.timeit(number_of_tests))

    con = mysql_setup()
    query = 'select * from git.oneday;'
    mysql_timer = timeit.Timer(lambda:load_mysql_df(con, query))
    print('mysql:', mysql_timer.timeit(number_of_tests))

    store = pyt.HDFStore(gh_hd5)
    hd5_timer = timeit.Timer(lambda: load_hd5_df(store))
    print('hd5:', hd5_timer.timeit(number_of_tests))

    client = MongoClient()
    dbm = client['git']
    mong_timer = timeit.Timer(lambda: load_mongo_df(dbm))
    print('mongodb:', mong_timer.timeit(number_of_tests))
    
    test_df = load_hd5_df(store)
    print('Test data size: ', test_df.shape)

Exemplo n.º 3

0

Exibir arquivo

Arquivo: gitdata_storage_tests.py Projeto: Milllss/metacommunities

def setup_test_data():
    """Function uses sample githubarchive data.
    It saves a few hundred copies as a csv, as a python pickle file,
    as a hdf5 store, as mysql table and as mondodb.
    If you haven't run  timing tests before, you will need
    to run this first.
    """
    print 'use one hour of sample and replicate 100 times'
    #use only the repository data --
    onehr_df = ghd.load_local_archive_dataframe()
    onehr_json = ghd.load_local_archive_json()
    one_hr_repo_df = ghd.unnest_git_json(onehr_df)['repository']
    many_hr_repo_df = pn.DataFrame()
    for i in range(1,100):
        many_hr_repo_df = many_hr_repo_df.append(one_hr_repo_df)
    print('saving dataframe with', many_hr_repo_df.shape, "rows")
    print 'saving data to a csv file'
    many_hr_repo_df.to_csv('data/oneday.csv', encoding='utf-8')
    print 'dumping data to python pickle'
    pickle.dump(many_hr_repo_df, open('data/oneday.pyd', 'wb'))
    print 'dumping data to mysql database'
    con = mysql_setup()
    many_hr_repo_df_clean = many_hr_repo_df.fillna('')
    sql.write_frame(many_hr_repo_df_clean, 'oneday', con, 'mysql')
    print 'saving data to hdf5 filestore'
    store = pyt.HDFStore('data/git.h5')
    store.put('oneday', many_hr_repo_df)
    print 'saving data to mongodb'
    # repos_son = onehr_df['repository']
    many_hr_repo_df = many_hr_repo_df.dropna()    
    client = MongoClient()
    dbm = client['git']
    collection = dbm['gittest']
    # many_hr_repo_df = many_hr_repo_df.set_index(many_hr_repo_df.name)
    [collection.insert(onehr_json) for i in range(1,100)]

Exemplo n.º 4

0

Exibir arquivo

Arquivo: models.py Projeto: TCRichards/Insights

 def load_saved(cls, save_path):
     obj = cls()
     s = pt.HDFStore(save_path)
     obj.dframes = dict([(k[1:], s[k]) for k in s.keys()])
     s.close()
     return obj

Exemplo n.º 5

0

Exibir arquivo

Arquivo: models.py Projeto: TCRichards/Insights

 def save(self, save_path):
     s = pt.HDFStore(save_path)
     for k in self.dframes.keys():
         s[k] = self.dframes[k]
     s.close()

Exemplo n.º 6

0

Exibir arquivo

Arquivo: lshf5.py Projeto: asarnow/common-pubs

#!/usr/bin/env python2.7
from pandas.io import pytables
import sys

store = pytables.HDFStore(sys.argv[1])
print store
store.close()

Exemplo n.º 7

0

Exibir arquivo

    def get_hdf5(self):

        store = tab.HDFStore('datastore.h5')
        return store['prices']

Exemplo n.º 8

0

Exibir arquivo

    def store_hdf5(self, l_sym, start_date, end_date):
        df_prices = self.get_yahoo_data(l_sym, start_date, end_date)

        store = tab.HDFStore('datastore.h5')
        store['prices'] = df_prices
        store.close()