########### Loading and organising data
### load numpy array of GS data (nImages*nMeasures*nExperts). Also returns imIDs to keep track of what image each row in expert_data_np refers to
datExp, AllimIDs = ihc.get_gs_data_2r20_numpy(s["expertIDs"])
if s["project_short_name"] == "tb2-r2.0":
    imIDs = list(AllimIDs.Utask_ID_PA)
elif s["project_short_name"] == "tb2-r2.0a":
    imIDs = list(AllimIDs.Utask_ID_a)
elif s["project_short_name"] == "tb2-r2.1":
    imIDs = list(AllimIDs.Utask_ID_2dot1)
elif s["project_short_name"] == "tb2-r2.1b":
    imIDs = list(AllimIDs.GStask_ID.astype(int))
else:
    raise Exception("add project to list to get imIDs")
print "number of images included:", len(imIDs)
### load numpy array of user data. Also returns userIDs in a list
(datUser, userIDs) = ihc.get_user_data(imIDs, s["project_short_name"])
### simulate user data from expert data
# (datUser,userIDs) = ihc.simulate_user_data(datExp)

########### Descriptive stats on user OR expert data
# ihc.describe_expert_agreement(datExp,imIDs,expertIDs = expertIDs, project_short_name = project_short_name)

########### Aggregate expert data to get nImages*nMeasures
# use the mean to aggregate experts
print ("*** Expert Aggregation ***")
aggrExp = ihc.bootstrap_consensus(datExp, np.ma.size(datExp, 2), nSamples=1, replace=False, bias_weight=[None, None])

########### Aggregate user data to get nImages*nMeasures*nSamples
# aggregate users by bootstrapping with number of users in original dataset
print ("*** User Aggregation ***")
aggrUser = ihc.bootstrap_consensus(datUser, nUsersPerSample=np.ma.size(datUser, axis=2))