示例#1
0
 def setUp(self):
     self.pwd = get_installdir()
     self.tmpdir = tempfile.mkdtemp()
     self.jsonfile = os.path.abspath("%s/tests/data/results/results.json" %
                                     self.pwd)
     self.result = Result()
     self.result.load_results(self.jsonfile)
示例#2
0
 def test_load(self):
     print "TESTING: load data"
     result = Result()
     data = result.load_results(self.jsonfile)
     self.assertTrue(isinstance(data,pandas.DataFrame))
     self.assertTrue(data.shape[0] == 44)
     self.assertTrue(data.shape[1] == 13)
def download_data(data_loc,
                  access_token=None,
                  filters=None,
                  battery=None,
                  save=True,
                  url=None,
                  file_name=None):
    start_time = time()
    #Load Results from Database
    results = Result(access_token, filters=filters, url=url)
    data = results.data
    if 'experiment_exp_id' not in data.columns:
        data.loc[:, 'experiment_exp_id'] = [
            x['exp_id'] for x in data['experiment']
        ]
    if 'experiment_template' not in data.columns:
        data.loc[:, 'experiment_template'] = [
            x['template'] for x in data['experiment']
        ]
    if battery:
        data = result_filter(data, battery=battery)

    # remove duplicates
    remove_duplicates(data)

    # remove a few mistakes from data
    data = data.query('worker_id not in ["A254JKSDNE44AM", "A1O51P5O9MC5LX"]'
                      )  # Sandbox workers
    data.reset_index(drop=True, inplace=True)

    # if saving, save the data and the lookup file for anonymized workers
    if save == True:
        if file_name == None:
            file_name = 'mturk_data.json'
        if file_name[-4:] == 'json':
            data.to_json(os.path.join(data_loc, file_name))
        elif file_name[-3:] == 'pkl':
            data.to_pickle(os.path.join(data_loc, file_name))
        print('Finished saving')

    finish_time = (time() - start_time) / 60
    print('Finished downloading data. Time taken: ' + str(finish_time))
    return data
示例#4
0
for col in drop_columns:
    filters[col] = {'drop': True}

# Strip token from specified file
f = open(token)
access_token = f.read().strip()

# Set up variables for the download request
battery = 'Self Regulation Retest Battery'
url = 'http://www.expfactory.org/new_api/results/62/'
file_name = 'mturk_retest_data.json'

fields = get_result_fields()

# Create results object
results = Result(access_token, filters=filters, url=url)

# Clean filters from results objects
results.clean_results(filters)

# Extract data from the results object
data = results.data

# Remainder of download_data
data = result_filter(data, battery=battery)
remove_duplicates(data)
data = data.query(
    'worker_id not in ["A254JKSDNE44AM", "A1O51P5O9MC5LX"]')  # Sandbox workers
data.reset_index(drop=True, inplace=True)

# Save data