Exemplo n.º 1
0
 def test_load(self):
     print "TESTING: load data"
     result = Result()
     data = result.load_results(self.jsonfile)
     self.assertTrue(isinstance(data,pandas.DataFrame))
     self.assertTrue(data.shape[0] == 44)
     self.assertTrue(data.shape[1] == 13)
Exemplo n.º 2
0
 def setUp(self):
     self.pwd = get_installdir()
     self.tmpdir = tempfile.mkdtemp()
     self.jsonfile = os.path.abspath("%s/tests/data/results/results.json" %
                                     self.pwd)
     self.result = Result()
     self.result.load_results(self.jsonfile)
Exemplo n.º 3
0
 def test_load(self):
     print "TESTING: load data"
     result = Result()
     data = result.load_results(self.jsonfile)
     self.assertTrue(isinstance(data, pandas.DataFrame))
     self.assertTrue(data.shape[0] == 44)
     self.assertTrue(data.shape[1] == 13)
def download_data(data_loc,
                  access_token=None,
                  filters=None,
                  battery=None,
                  save=True,
                  url=None,
                  file_name=None):
    start_time = time()
    #Load Results from Database
    results = Result(access_token, filters=filters, url=url)
    data = results.data
    if 'experiment_exp_id' not in data.columns:
        data.loc[:, 'experiment_exp_id'] = [
            x['exp_id'] for x in data['experiment']
        ]
    if 'experiment_template' not in data.columns:
        data.loc[:, 'experiment_template'] = [
            x['template'] for x in data['experiment']
        ]
    if battery:
        data = result_filter(data, battery=battery)

    # remove duplicates
    remove_duplicates(data)

    # remove a few mistakes from data
    data = data.query('worker_id not in ["A254JKSDNE44AM", "A1O51P5O9MC5LX"]'
                      )  # Sandbox workers
    data.reset_index(drop=True, inplace=True)

    # if saving, save the data and the lookup file for anonymized workers
    if save == True:
        if file_name == None:
            file_name = 'mturk_data.json'
        if file_name[-4:] == 'json':
            data.to_json(os.path.join(data_loc, file_name))
        elif file_name[-3:] == 'pkl':
            data.to_pickle(os.path.join(data_loc, file_name))
        print('Finished saving')

    finish_time = (time() - start_time) / 60
    print('Finished downloading data. Time taken: ' + str(finish_time))
    return data
Exemplo n.º 5
0
for col in drop_columns:
    filters[col] = {'drop': True}

# Strip token from specified file
f = open(token)
access_token = f.read().strip()

# Set up variables for the download request
battery = 'Self Regulation Retest Battery'
url = 'http://www.expfactory.org/new_api/results/62/'
file_name = 'mturk_retest_data.json'

fields = get_result_fields()

# Create results object
results = Result(access_token, filters=filters, url=url)

# Clean filters from results objects
results.clean_results(filters)

# Extract data from the results object
data = results.data

# Remainder of download_data
data = result_filter(data, battery=battery)
remove_duplicates(data)
data = data.query(
    'worker_id not in ["A254JKSDNE44AM", "A1O51P5O9MC5LX"]')  # Sandbox workers
data.reset_index(drop=True, inplace=True)

# Save data
Exemplo n.º 6
0
from expanalysis.experiments.utils import remove_duplicates
from expanalysis.results import get_filters, Result

# default filters to clean up the downloaded results object
filters = get_filters()
# expfactory.org/token
access_token = "1111111111"  # expfactory.org/token
# url of the battery
# for example, if the battery url is: http://expfactory.org/batteries/999/
# the url below will be: http://www.expfactory.org/new_api/results/999/
url = 'http://www.expfactory.org/new_api/results/999/'

# create a results object
results = Result(access_token, filters=filters, url=url)

# we care about the data
data = results.data
# remove duplicates - there shouldn't be any, just a safety precaution
remove_duplicates(data)
"""
The results.data object has one row per worker/experiment pair. 

The column "data" holds the data as a dictionary. Unofrunately, this
data is in a 1-length list, and the data's "trialdata" is really what you want.

To index the first row's data you would index the results.data object like so:
>>> first_row_data = results.data.iloc[0]['data'][0]['trialdata']

Which can then be converted into a dataframe easily
>>> first_row_data = pandas.DataFrame(first_row_data)
"""
Exemplo n.º 7
0
class TestAPI(unittest.TestCase):

    def setUp(self):
        self.pwd = get_installdir()
        self.tmpdir = tempfile.mkdtemp()
        self.jsonfile = os.path.abspath("%s/tests/data/results/results.json" %self.pwd)
        self.result = Result()
        self.result.load_results(self.jsonfile)

    def tearDown(self):
        shutil.rmtree(self.tmpdir)

    def test_check_numeric(self):
        not_numeric = ["hello","goodbye"]
        numeric_float = [1.1,2.2,3.3]
        numeric_int = [1,2,3]
        not_numeric_mixed = ["hello",2,3.0]
        self.assertTrue(check_numeric(numeric_float))
        self.assertTrue(check_numeric(numeric_int))
        self.assertTrue(check_numeric(not_numeric)==False)
        self.assertTrue(check_numeric(not_numeric_mixed)==False)


    def test_filter(self):
        filtered = self.result.filter(field="experiment_exp_id",value="bridge_game")
        self.assertTrue(filtered.shape[0]==20)
        self.assertTrue(len(numpy.unique(filtered["experiment_exp_id"]))==1)

    def test_load(self):
        print "TESTING: load data"
        result = Result()
        data = result.load_results(self.jsonfile)
        self.assertTrue(isinstance(data,pandas.DataFrame))
        self.assertTrue(data.shape[0] == 44)
        self.assertTrue(data.shape[1] == 13)

    def test_experiment_extract(self):
        print "TESTING: experiment extraction"
        experiment = self.result.extract_experiment(exp_id="stroop")
        experiment_columns = [u'block_duration', u'condition', u'correct', u'correct_response',
       u'current_trial', u'dateTime', u'feedback_duration',
       u'internal_node_id', u'key_press', u'possible_responses', u'responses',
       u'rt', u'stim_color', u'stim_duration', u'stim_word', u'stimulus',
       u'time_elapsed', u'timing_post_trial', u'trial_id', u'trial_index',
       u'trial_type', u'trialdata', u'uniqueid', u'view_history']
        self.assertTrue(isinstance(experiment,pandas.DataFrame))
        self.assertTrue(experiment.shape[0]==747)
        [self.assertTrue(x) in experiment.columns for x in experiment_columns]


    def test_survey_extract(self):
        print "TESTING: survey extraction"
        survey = self.result.extract_experiment(exp_id="bis11_survey")
        self.assertTrue(isinstance(survey,pandas.DataFrame))


    def test_game_extract(self):
        print "TESTING: game extraction"
        game = self.result.extract_experiment(exp_id="bridge_game")
        game_columns = [u'current_trial', u'uniqueid', u'dateTime', u'ACC', u'RT', u'solution',
       u'problem_id', u'trial', u'finished', u'points', u'answer', u'n1',
       u'n2', u'problem']
        self.assertTrue(isinstance(game,pandas.DataFrame))
        self.assertTrue(game.shape[0]==301)
        [self.assertTrue(x) in game.columns for x in game_columns]
Exemplo n.º 8
0
 def setUp(self):
     self.pwd = get_installdir()
     self.tmpdir = tempfile.mkdtemp()
     self.jsonfile = os.path.abspath("%s/tests/data/results/results.json" % self.pwd)
     self.result = Result()
     self.result.load_results(self.jsonfile)
Exemplo n.º 9
0
class TestAPI(unittest.TestCase):
    def setUp(self):
        self.pwd = get_installdir()
        self.tmpdir = tempfile.mkdtemp()
        self.jsonfile = os.path.abspath("%s/tests/data/results/results.json" % self.pwd)
        self.result = Result()
        self.result.load_results(self.jsonfile)

    def tearDown(self):
        shutil.rmtree(self.tmpdir)

    def test_check_numeric(self):
        not_numeric = ["hello", "goodbye"]
        numeric_float = [1.1, 2.2, 3.3]
        numeric_int = [1, 2, 3]
        not_numeric_mixed = ["hello", 2, 3.0]
        self.assertTrue(check_numeric(numeric_float))
        self.assertTrue(check_numeric(numeric_int))
        self.assertTrue(check_numeric(not_numeric) == False)
        self.assertTrue(check_numeric(not_numeric_mixed) == False)

    def test_filter(self):
        filtered = self.result.filter(field="experiment_exp_id", value="bridge_game")
        self.assertTrue(filtered.shape[0] == 20)
        self.assertTrue(len(numpy.unique(filtered["experiment_exp_id"])) == 1)

    def test_load(self):
        print "TESTING: load data"
        result = Result()
        data = result.load_results(self.jsonfile)
        self.assertTrue(isinstance(data, pandas.DataFrame))
        self.assertTrue(data.shape[0] == 44)
        self.assertTrue(data.shape[1] == 13)

    def test_experiment_extract(self):
        print "TESTING: experiment extraction"
        experiment = self.result.extract_experiment(exp_id="stroop")
        experiment_columns = [
            u"block_duration",
            u"condition",
            u"correct",
            u"correct_response",
            u"current_trial",
            u"dateTime",
            u"feedback_duration",
            u"internal_node_id",
            u"key_press",
            u"possible_responses",
            u"responses",
            u"rt",
            u"stim_color",
            u"stim_duration",
            u"stim_word",
            u"stimulus",
            u"time_elapsed",
            u"timing_post_trial",
            u"trial_id",
            u"trial_index",
            u"trial_type",
            u"trialdata",
            u"uniqueid",
            u"view_history",
        ]
        self.assertTrue(isinstance(experiment, pandas.DataFrame))
        self.assertTrue(experiment.shape[0] == 747)
        [self.assertTrue(x) in experiment.columns for x in experiment_columns]

    def test_survey_extract(self):
        print "TESTING: survey extraction"
        survey = self.result.extract_experiment(exp_id="bis11_survey")
        self.assertTrue(isinstance(survey, pandas.DataFrame))

    def test_game_extract(self):
        print "TESTING: game extraction"
        game = self.result.extract_experiment(exp_id="bridge_game")
        game_columns = [
            u"current_trial",
            u"uniqueid",
            u"dateTime",
            u"ACC",
            u"RT",
            u"solution",
            u"problem_id",
            u"trial",
            u"finished",
            u"points",
            u"answer",
            u"n1",
            u"n2",
            u"problem",
        ]
        self.assertTrue(isinstance(game, pandas.DataFrame))
        self.assertTrue(game.shape[0] == 301)
        [self.assertTrue(x) in game.columns for x in game_columns]
for col in drop_columns:
    filters[col] = {'drop': True}

# Strip token from specified file
f = open(token)
access_token = f.read().strip()

# Set up variables for the download request
battery = 'Self Regulation Retest Battery' 
url = 'http://www.expfactory.org/new_api/results/62/'
file_name = 'mturk_retest_data.json'

fields = get_result_fields()

# Create results object
results = Result(access_token, filters = filters, url = url)

# Clean filters from results objects
results.clean_results(filters)

# Extract data from the results object
data = results.data

# Remainder of download_data
data = result_filter(data, battery = battery)
remove_duplicates(data)
data = data.query('worker_id not in ["A254JKSDNE44AM", "A1O51P5O9MC5LX"]') # Sandbox workers
data.reset_index(drop = True, inplace = True) 

# Save data
data.to_json(path.join(data_dir, file_name))