Exemplo n.º 1
0
def process_model_fall(exp, tag, force=False):
    src_dp = DATA_PATH.joinpath("model-raw", "%s_%s.dpkg" % (exp, tag))
    dest_dp = DATA_PATH.joinpath("model", "%s_%s_fall.dpkg" % (exp, tag))

    if not src_dp.exists():
        raise IOError("Package '%s' does not exist" % src_dp.relpath())

    if dest_dp.exists() and not force:
        return

    # load the raw model data
    logger.info("Loading '%s'", src_dp)
    dp = dpkg.DataPackage.load(src_dp)
    dp.load_resources()

    # compute the number of blocks that moved
    resp = process_model_nmoved(dp)

    # the destination datapackage already exists, so just load it and
    # update it
    if dest_dp.exists():
        new_dp = dpkg.DataPackage.load(dest_dp)
        new_dp.bump_minor_version()

        r1 = new_dp.get_resource("model.csv")
        r2 = new_dp.get_resource("metadata")

    # the destination datapackage doesn't exist, so we need to create
    # it from scratch
    else:
        new_dp = dpkg.DataPackage(name=dest_dp.name, licenses=['odc-by'])
        new_dp['version'] = '1.0.0'
        new_dp.add_contributor("Jessica B. Hamrick", "*****@*****.**")
        new_dp.add_contributor("Peter W. Battaglia", "*****@*****.**")
        new_dp.add_contributor("Joshua B. Tenenbaum", "*****@*****.**")

        r1 = dpkg.Resource(name="model.csv", fmt="csv", pth="./model.csv")
        r2 = dpkg.Resource(name="metadata", fmt="json")
        r2['mediaformat'] = "application/json"

        new_dp.add_resource(r1)
        new_dp.add_resource(r2)

    # update the resource data
    r1.data = resp
    r2.data = dict(source=src_dp, nfell_min=0, nfell_max=10)

    # create destination folders, if they don't exist
    if not dest_dp.dirname().exists():
        dest_dp.dirname().makedirs_p()

    # save
    new_dp.save(dest_dp.dirname())
    logger.info("Saved to '%s'" % dest_dp.relpath())
Exemplo n.º 2
0
def get_table():
    dbpath = DATA_PATH.joinpath("human", "workers.db")

    if not dbtools.Table.exists(dbpath, "workers"):
        logger.info("Creating new table 'workers'")
        tbl = dbtools.Table.create(dbpath, "workers", [('pid', str),
                                                       ('dataset', str)])

    else:
        logger.info("Loading existing table 'workers'")
        tbl = dbtools.Table(dbpath, "workers")

    return tbl
Exemplo n.º 3
0
def save_stability(exp):
    dp_path = DATA_PATH.joinpath("human", "%s.dpkg" % exp)

    # load the datapackage
    logger.info("Loading '%s'", dp_path.relpath())
    dp = dpkg.DataPackage.load(dp_path)

    # is there a turk.csv file?
    try:
        turk = dp.load_resource("turk.csv")
    except KeyError:
        turk = None

    # is there a participants.csv file?
    try:
        parts = dp.load_resource("participants.csv")
    except KeyError:
        parts = None

    # get the worker ids
    if turk is not None:
        workers = sorted(turk.reset_index()['WorkerId'].unique())
    elif parts is not None:
        workers = sorted(parts['pid'].unique())
    else:
        logger.warning("'%s' is not a Mechanical Turk experiment", exp)
        return

    # create a new dataframe
    df = pd.DataFrame({'pid': workers})
    df['dataset'] = str(dp_path.name)

    # load the table we're saving it to
    tbl = get_table()

    KEY = ['pid']
    tbl_dupes = tbl.select(KEY, where=("dataset=?", dp_path.name))
    tbl_dupes['pid'] = map(str, tbl_dupes['pid'])
    tbl_dupes = set([x[0] for x in tbl_dupes.to_records(index=False).tolist()])
    df_dupes = set([x[0] for x in df[KEY].to_records(index=False).tolist()])

    # get the unique values and the duplicated values, because we will
    # treat them differently
    unique = pd.Index(df_dupes.difference(tbl_dupes), name="pid")
    df_idx = df.set_index(KEY)

    if len(unique) > 0:
        logger.info("Adding %d new items", len(unique))
        tbl.insert(df_idx.ix[unique].reset_index().T.to_dict().values())
Exemplo n.º 4
0
def save_stability(exp, tag, force=False):
    dp_path = DATA_PATH.joinpath("model", "%s_%s_fall.dpkg" % (exp, tag))

    # load the stability data
    logger.info("Loading '%s'", dp_path.relpath())
    fb = get_stability(dp_path)
    if fb is None:
        return
    fb['dataset'] = str(dp_path.name)

    # load the table we're saving it to
    tbl = get_table()

    KEY = ['stimulus', 'kappa']
    tbl_dupes = tbl.select(KEY, where=("dataset=?", dp_path.name))
    tbl_dupes['stimulus'] = map(str, tbl_dupes['stimulus'])
    tbl_dupes = set(tbl_dupes.to_records(index=False).tolist())
    fb_dupes = set(fb[KEY].to_records(index=False).tolist())

    # get the unique values and the duplicated values, because we will
    # treat them differently
    unique = fb_dupes.difference(tbl_dupes)
    dupes = fb_dupes.intersection(tbl_dupes)
    fb_idx = fb.set_index(KEY)

    if len(unique) > 0:
        logger.info("Adding %d new items", len(unique))
        tbl.insert(fb_idx.ix[unique].reset_index().T.to_dict().values())

    if len(dupes) > 0 and force:
        logger.info("Updating %d old items", len(dupes))
        for dupe in dupes:
            newvals = fb_idx.ix[[dupe]].reset_index().irow(0).to_dict()
            newvals['kappa'] = float(newvals['kappa'])
            newvals['nfell'] = int(newvals['nfell'])
            newvals['stable'] = bool(newvals['stable'])
            tbl.update(newvals,
                       where=("stimulus=? AND kappa=? AND dataset=?",
                              dupe + (str(dp_path.name), )))
Exemplo n.º 5
0
def fetch(site_root, filename, experiment, force=False):
    """Download `filename` from `site_root` and save it in the
    human-raw/`experiment` data folder.

    """

    # get the url
    url = path(site_root).joinpath(filename)

    # get the destination to save the data, and don't do anything if
    # it exists already
    dest = DATA_PATH.joinpath("human-raw", experiment, "%s.csv" % url.name)
    if dest.exists() and not force:
        return

    # try to open it
    try:
        handler = urllib2.urlopen(url)
    except IOError as err:
        if getattr(err, 'code', None) == 401:
            logger.error("Server authentication failed.")
            raise err
        else:
            raise

    # download the data
    data = handler.read()
    logger.info("Fetched succesfully: %s", url)

    # make the destination folder if it doesn't exist
    if not dest.dirname().exists():
        dest.dirname().makedirs_p()

    # write out the data file
    with open(dest, "w") as fh:
        fh.write(data)
    logger.info("Saved to '%s'", dest.relpath())
Exemplo n.º 6
0
def find_bad_participants(exp, data):
    """Check participant data to make sure they pass the following
    conditions:

    1. No duplicated trials
    2. They finished the whole experiment
    3. They passed the posttest

    Returns a dictionary of failed participants that includes the
    reasons why they failed.

    """

    participants = []
    for (assignment, pid), df in data.groupby(['assignment', 'pid']):
        info = {
            'pid': pid,
            'assignment': assignment,
            'note': None,
            'timestamp': None,
            'percent': 0.0,
            'num_failed': np.nan
        }

        # go ahead and add this to our list now -- the dictionary is
        # mutable, so when we update stuff later the dictionary in the
        # list will also be updated
        participants.append(info)

        # get the time they started the experiment
        times = df['psiturk_time'].copy()
        times.sort_values(inplace=True)
        start_time = pd.to_datetime(datetime.fromtimestamp(times.iloc[0] /
                                                           1e3))
        info['timestamp'] = start_time

        # add condition/counterbalance
        cond = int(df['condition'].unique())
        cb = int(df['counterbalance'].unique())
        info['condition'] = cond
        info['counterbalance'] = cb

        # check for duplicated entries
        if exp == 'mass_inference-G':
            dupes = df.sort_values(by='psiturk_time')[['mode', 'trial', 'trial_phase']]\
                      .duplicated().any()
            if dupes:
                logger.warning("%s (%s, %s) has duplicate trials", pid, cond,
                               cb)
                info['note'] = "duplicate_trials"
                continue

        # check to make sure they actually finished
        try:
            prestim = df\
                .set_index(['mode', 'trial', 'trial_phase'])\
                .groupby(level='trial_phase')\
                .get_group('prestim')
        except IndexError:
            if df['trial_phase'].isnull().all():
                incomplete = True
            else:
                raise
        else:
            if exp == 'mass_inference-G':
                num_trials = 62
            elif exp == 'mass_inference-H':
                num_trials = 62
            elif exp == 'mass_inference-I':
                num_trials = 32
            else:
                raise ValueError("unhandled experiment: %s" % exp)

            incomplete = len(prestim) != num_trials
            info['percent'] = 100 * len(prestim) / num_trials

        if incomplete:
            logger.warning(
                "%s (%s, %s) is incomplete (completed %d/32 trials [%.1f%%])",
                pid, cond, cb, len(prestim), info['percent'])
            info['note'] = "incomplete"
            continue

        # check to see if they passed the posttest
        posttest = df\
            .set_index(['mode', 'trial', 'trial_phase'])\
            .groupby(level=['mode', 'trial_phase'])\
            .get_group(('posttest', 'fall_response'))

        truth = (posttest['nfell'] > 0).astype(float)
        resp = (posttest['response'] > 4).astype(float)
        resp[posttest['response'] == 4] = np.nan
        failed = (truth != resp).sum()
        info['num_failed'] = failed

        if failed > 1:
            logger.warning("%s (%s, %s) failed posttest (%d wrong)", pid, cond,
                           cb, failed)
            info['note'] = "failed_posttest"
            continue

        # see if they already did (a version of) the experiment
        dbpath = DATA_PATH.joinpath("human", "workers.db")
        tbl = dbtools.Table(dbpath, "workers")
        datasets = tbl.select("dataset", where=("pid=?", pid))['dataset']
        exps = map(lambda x: path(x).namebase, datasets)
        if exp in exps:
            exps.remove(exp)
        if len(exps) > 0:
            logger.warning("%s (%s, %s) is a repeat worker", pid, cond, cb)
            info['note'] = "repeat_worker"
            continue

    return participants
Exemplo n.º 7
0
    parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)

    parser.add_argument("-e",
                        "--exp",
                        required=True,
                        help="Experiment version.")
    parser.add_argument("-f",
                        "--force",
                        action="store_true",
                        default=False,
                        help="Force all tasks to be put on the queue.")

    args = parser.parse_args()

    # paths to the data and where we will save it
    data_path = DATA_PATH.joinpath("human-raw", args.exp)
    dest_path = DATA_PATH.joinpath("human", "%s.dpkg" % args.exp)

    # don't do anything if the datapackage already exists
    if dest_path.exists() and not args.force:
        sys.exit(0)

    # create the directory if it doesn't exist
    if not dest_path.dirname().exists:
        dest_path.dirname().makedirs_p()

    # load the data
    meta, conds, fields = load_meta(data_path)
    data, participants = load_data(data_path, conds, fields)
    events = load_events(data_path)
    data = hashids(data)
import re
import warnings
from copy import deepcopy
# External
import numpy as np
# Cogphysics
import cogphysics
import cogphysics.lib.hashtools as ht
# Scenesim
from scenesim.objects.sso import SSO
from scenesim.objects.pso import RBSO
# Local
from snippets import datapackage as dpkg
from mass import DATA_PATH

OLDPATH = DATA_PATH.joinpath("old/old-cogphysics-model-raw")
NEWPATH = DATA_PATH.joinpath("model-raw")

convtable_path = os.path.join(cogphysics.RESOURCE_PATH,
                              "cpobj_conv_stability.pkl")
with open(convtable_path, "r") as fh:
    convtable = pickle.load(fh)
conv_cache = {}


def convert_name(oldname):
    global conv_cache

    if oldname in conv_cache:
        newname = conv_cache[oldname]
    else:
Exemplo n.º 9
0
def process(exp, tag, overwrite=False):
    name = "%s_%s.dpkg" % (exp, tag)
    dp_path = DATA_PATH.joinpath("model-raw", name)

    if dp_path.exists() and not overwrite:
        return

    params, data = load(exp, tag)

    forces = params['forces']
    noises = params['noises']

    sim_meta = {
        'simulations': params['simulation'],
        'physics': params['physics'],
        'index_names': params['index_names'],
        'index_levels': params['index_levels'],
    }

    force_meta = {'index_names': ['phi', 'stimulus', 'sample']}
    force_meta['index_levels'] = {
        x: params['index_levels'][x]
        for x in force_meta['index_names']
    }

    noise_meta = {
        'index_names': ['sigma', 'stimulus', 'sample', 'object', 'position']
    }
    noise_meta['index_levels'] = {
        x: params['index_levels'][x]
        for x in noise_meta['index_names'][:-1]
    }
    noise_meta['index_levels']['position'] = ['x', 'y', 'z']

    # load the existing datapackage and bump the version
    if dp_path.exists():
        dp = dpkg.DataPackage.load(dp_path)
        dp.bump_minor_version()
        dp.clear_resources()

    # create the datapackage
    else:
        dp = dpkg.DataPackage(name=name, licenses=['odc-by'])
        dp['version'] = '1.0.0'
        dp.add_contributor("Jessica B. Hamrick", "*****@*****.**")
        dp.add_contributor("Peter W. Battaglia", "*****@*****.**")
        dp.add_contributor("Joshua B. Tenenbaum", "*****@*****.**")

    dp.add_resource(
        dpkg.Resource(name="simulations.npy",
                      fmt="npy",
                      data=data,
                      pth="./simulations.npy"))

    dp.add_resource(
        dpkg.Resource(name="forces.npy",
                      fmt="npy",
                      data=forces,
                      pth="./forces.npy"))

    dp.add_resource(
        dpkg.Resource(name="noises.npy",
                      fmt="npy",
                      data=noises,
                      pth="./noises.npy"))

    sm = dpkg.Resource(name="simulation_metadata", fmt="json", data=sim_meta)
    sm['mediaformat'] = 'application/json'
    dp.add_resource(sm)

    fm = dpkg.Resource(name="force_metadata", fmt="json", data=force_meta)
    fm['mediaformat'] = 'application/json'
    dp.add_resource(fm)

    fm = dpkg.Resource(name="noise_metadata", fmt="json", data=noise_meta)
    fm['mediaformat'] = 'application/json'
    dp.add_resource(fm)

    dp.save(dp_path.dirname())
Exemplo n.º 10
0
#!/usr/bin/env python

import pandas as pd
import sys
import os
from mass import DATA_PATH

root = os.path.join(os.path.dirname(__file__), '..', '..')
sys.path.append(os.path.join(root, 'lib'))
import datapackage as dpkg

data_G_path = DATA_PATH.joinpath("human", "mass_inference-G.dpkg")
data_H_path = DATA_PATH.joinpath("human", "mass_inference-H.dpkg")
data_I_path = DATA_PATH.joinpath("human", "mass_inference-I.dpkg")
data_path = DATA_PATH.joinpath("human", "mass_inference-merged.dpkg")

dp_G = dpkg.DataPackage.load(data_G_path)
dp_H = dpkg.DataPackage.load(data_H_path)
dp_I = dpkg.DataPackage.load(data_I_path)

dp = dpkg.DataPackage(name=data_path.name, licenses=['odc-by'])
dp['version'] = '1.0.0'
dp.add_contributor("Jessica B. Hamrick", "*****@*****.**")
dp.add_contributor("Thomas L. Griffiths", "*****@*****.**")
dp.add_contributor("Peter W. Battaglia", "*****@*****.**")
dp.add_contributor("Joshua B. Tenenbaum", "*****@*****.**")

# add event data, and save it as csv
events_G = dp_G.load_resource("events.csv")
events_G['version'] = 'G'
events_H = dp_H.load_resource("events.csv")
Exemplo n.º 11
0
from datetime import datetime
# External
import numpy as np
import pandas as pd
import yaml
import json
import dbtools
from path import path
# Cogphysics
import cogphysics
import cogphysics.lib.hashtools as ht
# Local
from snippets import datapackage as dpkg
from mass import DATA_PATH

OLDPATH = DATA_PATH.joinpath("old/old-cogphysics-human-raw-reorganized")
NEWPATH = DATA_PATH.joinpath("human")

convtable_path = os.path.join(cogphysics.RESOURCE_PATH,
                              "cpobj_conv_stability.pkl")
with open(convtable_path, "r") as fh:
    convtable = pickle.load(fh)
conv_cache = {}


def convert_name(oldname):
    global conv_cache

    if oldname in conv_cache:
        newname = conv_cache[oldname]
    else: