Exemplo n.º 1
0
def crossvalidateModels(matDataDir, pyDataDir, runId):
    runDir = 'run' + str(runId) + '/'
    matModelFn = utils.findNewestFile(
        matDataDir, runDir + 'trainedModel_' + str(runId) + '*.mat')
    pyModelFn = utils.findNewestFile(
        pyDataDir, 'trainedModel_r' + str(runId) + '*_py.mat')
    matModel = utils.loadMatFile(matModelFn)
    pyModel = utils.loadMatFile(pyModelFn)
    selector = np.concatenate((0 * np.ones((50)), 1 * np.ones(
        (50)), 2 * np.ones((50)), 3 * np.ones((50))),
                              axis=0)
    X = np.array([1, 2, 3, 4])
    nfold = 4
    kf = KFold(nfold)
    mat_cs = np.zeros((nfold, 50))
    py_cs = np.zeros((nfold, 50))
    i = 0
    for train_index, test_index in kf.split(X):
        print("TRAIN:", train_index, "TEST:", test_index)
        trTrain = np.in1d(selector, train_index)
        trTest = np.in1d(selector, test_index)
        # matlab first
        mat_lrc = LogisticRegression()
        categoryTrainLabels = np.argmax(matModel.trainLabels[trTrain, :],
                                        axis=1)
        mat_lrc.fit(matModel.trainPats[trTrain, :], categoryTrainLabels)
        mat_predict = mat_lrc.predict_proba(matModel.trainPats[trTest, :])
        categ_sep = -1 * np.diff(mat_predict, axis=1)
        C0 = np.argwhere(
            np.argmax(matModel.trainLabels[trTest, :], axis=1) == 0)
        C1 = np.argwhere(
            np.argmax(matModel.trainLabels[trTest, :], axis=1) == 1)
        C1_label = C1.flatten()
        mat_correct_subtraction = categ_sep.flatten()
        mat_correct_subtraction[
            C1_label] = -1 * mat_correct_subtraction[C1_label]
        # python second
        py_lrc = LogisticRegression()
        categoryTrainLabels = np.argmax(pyModel.trainLabels[trTrain, :],
                                        axis=1)
        py_lrc.fit(pyModel.trainPats[trTrain, :], categoryTrainLabels)
        py_predict = py_lrc.predict_proba(pyModel.trainPats[trTest, :])
        categ_sep = -1 * np.diff(py_predict, axis=1)
        C0 = np.argwhere(
            np.argmax(pyModel.trainLabels[trTest, :], axis=1) == 0)
        C1 = np.argwhere(
            np.argmax(pyModel.trainLabels[trTest, :], axis=1) == 1)
        C1_label = C1.flatten()
        py_correct_subtraction = categ_sep.flatten()
        py_correct_subtraction[
            C1_label] = -1 * py_correct_subtraction[C1_label]
        mat_cs[i, :] = mat_correct_subtraction
        py_cs[i, :] = py_correct_subtraction

        i += 1
    mat_corr = mat_cs.flatten()
    py_corr = py_cs.flatten()
    return mat_corr, py_corr
Exemplo n.º 2
0
def validatePatternsData(matDataDir, pyDataDir, runId):
    runDir = 'run' + str(runId) + '/'
    # Check how well raw_sm_filt_z values match
    matPatternsFn = utils.findNewestFile(
        matDataDir, runDir + 'patternsdata_' + str(runId) + '*.mat')
    pyBlkGrp1Fn = utils.findNewestFile(
        pyDataDir, 'blkGroup_r' + str(runId) + '_p1_*_py.mat')
    pyBlkGrp2Fn = utils.findNewestFile(
        pyDataDir, 'blkGroup_r' + str(runId) + '_p2_*_py.mat')
    print("Validating patternrs: Matlab {}, Python {} {}".format(
        matPatternsFn, pyBlkGrp1Fn, pyBlkGrp2Fn))

    matPatterns = utils.loadMatFile(matPatternsFn)
    pyBlkGrp1 = utils.loadMatFile(pyBlkGrp1Fn)
    pyBlkGrp2 = utils.loadMatFile(pyBlkGrp2Fn)
    mat_nTRs = matPatterns.raw.shape[0]
    pyp1_nTRs = pyBlkGrp1.raw.shape[0]
    pyp2_nTRs = pyBlkGrp2.raw.shape[0]
    py_nTRs = pyp1_nTRs + pyp2_nTRs
    mat_nVoxels = matPatterns.raw.shape[1]
    py_nVoxels = pyBlkGrp1.raw.shape[1]

    if mat_nTRs != py_nTRs or mat_nVoxels != py_nVoxels:
        raise ValidationError(
            "Number of TRs or Voxels don't match: nTRs m{} p{}, nVoxels m{} p{}"
            .format(mat_nTRs, py_nTRs, mat_nVoxels, py_nVoxels))

    pyCombined_raw_sm_file_z = np.full((py_nTRs, py_nVoxels), np.nan)
    pyCombined_raw_sm_file_z[0:pyp1_nTRs] = pyBlkGrp1.raw_sm_filt_z
    pyCombined_raw_sm_file_z[pyp1_nTRs:] = pyBlkGrp2.raw_sm_filt_z

    corr = vutils.pearsons_mean_corr(matPatterns.raw_sm_filt_z,
                                     pyCombined_raw_sm_file_z)
    print("raw_sm_filt_z correlation: {}".format(corr))
    if corr < 0.99:
        raise ValidationError(
            "Pearson correlation low for raw_sm_filt_z: {}".format(corr))

    # Check how well the models match
    matModelFn = utils.findNewestFile(
        matDataDir, runDir + 'trainedModel_' + str(runId) + '*.mat')
    pyModelFn = utils.findNewestFile(
        pyDataDir, 'trainedModel_r' + str(runId) + '*_py.mat')
    matModel = utils.loadMatFile(matModelFn)
    pyModel = utils.loadMatFile(pyModelFn)
    corr = vutils.pearsons_mean_corr(matModel.weights, pyModel.weights)
    print("model weights correlation: {}".format(corr))
    if corr < 0.99:
        raise ValidationError(
            "Pearson correlation low for model weights: {}".format(corr))
    return
def findPatternsDesignFile(session, dataDir, runId):
    fullPathRegex = getPatternsFileRegex(session,
                                         dataDir,
                                         runId,
                                         addRunDir=True)
    baseDir, filePattern = os.path.split(fullPathRegex)
    pdesignFile = findNewestFile(baseDir, filePattern)
    if pdesignFile is not None and pdesignFile != '':
        return pdesignFile
    fullPathRegex = getPatternsFileRegex(session, dataDir, runId)
    pdesignFile = findNewestFile('', fullPathRegex)
    if pdesignFile is None or pdesignFile == '':
        raise FileNotFoundError(
            "No files found matching {}".format(fullPathRegex))
    return pdesignFile
Exemplo n.º 4
0
 def getNewestFile(self, filePattern):
     baseDir, filePattern = os.path.split(filePattern)
     if not os.path.isabs(baseDir):
         # TODO - handle relative paths
         pass
     filename = findNewestFile(baseDir, filePattern)
     if filename is None:
         # No file matching pattern
         raise FileNotFoundError('No file found matching pattern {}'.format(filePattern))
     elif not os.path.exists(filename):
         raise FileNotFoundError('File missing after match {}'.format(filePattern))
     else:
         with open(filename, 'rb') as fp:
             data = fp.read()
         return data
Exemplo n.º 5
0
 def RetrieveData(self, msg):
     """Retrieve a specfic data file.
     Sets the file path based on the session directory settings and then
     calls the BaseModel retrieve function.
     """
     fileInfo = msg.fields.cfg
     dataDir = getSubjectDataDir(self.session.serverDataDir,
                                 fileInfo.subjectNum, fileInfo.subjectDay)
     fullFileName = os.path.join(dataDir, fileInfo.filename)
     if fileInfo.findNewestPattern not in (None, ''):
         fullFileName = utils.findNewestFile(dataDir,
                                             fileInfo.findNewestPattern)
         if fullFileName is None:
             reply = self.createReplyMessage(msg, MsgResult.Error)
             reply.data = "FindNewestFile failed: %s: no matches found" % (
                 fullFileName)
             return reply
     msg.fields.cfg = fullFileName
     reply = super().RetrieveData(msg)
     reply.fields.filename = os.path.basename(fullFileName)
     return reply
Exemplo n.º 6
0
 def getTrainedModel(self, sessionId, runId):
     """Retrieve a ML model trained in a previous run (runId). First see if it
     is cached in memory, if not load it from file and add it to the cache.
     """
     model = self.modelCache.get(runId, None)
     if model is None:
         # load it from file
         logging.info("modelCache miss on runId %d", runId)
         fname = os.path.join(self.dirs.dataDir,
                              getModelFilename(sessionId, runId))
         if self.session.useSessionTimestamp is True:
             sessionWildcard = re.sub('T.*', 'T*', sessionId)
             filePattern = getModelFilename(sessionWildcard, runId)
             fname = utils.findNewestFile(self.dirs.dataDir, filePattern)
         model = utils.loadMatFile(fname)
         # loadMatFile should either raise an exception or return a value
         if model is None:
             raise StateError("Load model returned None: {}".format(fname))
     if sessionId == self.id_fields.sessionId:
         self.modelCache[runId] = model
     return model
Exemplo n.º 7
0
 def getPrevBlkGrp(self, sessionId, runId, blkGrpId):
     """Retrieve a block group patterns data, first see if it is cached
     in memory, if not load it from file and add it to the cache.
     """
     bgKey = getBlkGrpKey(runId, blkGrpId)
     prev_bg = self.blkGrpCache.get(bgKey, None)
     if prev_bg is None:
         # load it from file
         logging.info("blkGrpCache miss on <runId, blkGrpId> %s", bgKey)
         fname = os.path.join(self.dirs.dataDir,
                              getBlkGrpFilename(sessionId, runId, blkGrpId))
         if self.session.useSessionTimestamp is True:
             sessionWildcard = re.sub('T.*', 'T*', sessionId)
             filePattern = getBlkGrpFilename(sessionWildcard, runId,
                                             blkGrpId)
             fname = utils.findNewestFile(self.dirs.dataDir, filePattern)
         prev_bg = utils.loadMatFile(fname)
         # loadMatFile should either raise an exception or return a value
         if prev_bg is None:
             raise StateError("Load blkGrp returned None: {}".format(fname))
         if sessionId == self.id_fields.sessionId:
             self.blkGrpCache[bgKey] = prev_bg
     return prev_bg
Exemplo n.º 8
0
def getCategSepData(subjectNum):

    rtAttenPath = '/data/jag/cnds/amennen/rtAttenPenn/fmridata/behavdata/gonogo'

    subjectDir = rtAttenPath + '/' + 'subject' + str(subjectNum)
    # this will be less annoying but will just have to average and remember padding zeros
    all_matlab_evidence = np.zeros((9, 100, 3))
    all_python_evidence = np.zeros((9, 100, 3))
    for d in np.arange(3):
        subjectDay = d + 1
        matDataDir = rtAttenPath + '/' + 'subject' + str(
            subjectNum) + '/' + 'day' + str(subjectDay)
        pyDataDir = rtAttenPath + '/' + 'subject' + str(
            subjectNum) + '/' + 'day' + str(subjectDay)

        if subjectDay == 1:
            # then we have 7 runs
            nRuns = 7
            if subjectNum == 106:
                nRuns = 6
        elif subjectDay == 2:
            nRuns = 9
        elif subjectDay == 3:
            nRuns = 8
        #n_feedback_runs = nRuns - 1 # no feedback in first run
        print(np.arange(2, nRuns + 1))

        #def getCategSepData(matDataDir, pyDataDir, runId):
        for RUN in np.arange(2, nRuns + 1):
            runId = RUN  # because 0 index, skip first run
            runDir = 'run' + str(runId) + '/'
            # Check how well raw_sm_filt_z values match
            matPatternsFn = utils.findNewestFile(
                matDataDir, runDir + 'patternsdata_' + str(runId) + '*.mat')
            pyBlkGrp1Fn = utils.findNewestFile(
                pyDataDir, 'blkGroup_r' + str(runId) + '_p1_*_py.mat')
            pyBlkGrp2Fn = utils.findNewestFile(
                pyDataDir, 'blkGroup_r' + str(runId) + '_p2_*_py.mat')
            print("Getting rt classifer data from: Matlab {}, Python {} {}".
                  format(matPatternsFn, pyBlkGrp1Fn, pyBlkGrp2Fn))

            matPatterns = utils.loadMatFile(matPatternsFn)
            pyBlkGrp1 = utils.loadMatFile(pyBlkGrp1Fn)
            pyBlkGrp2 = utils.loadMatFile(pyBlkGrp2Fn)
            mat_nTRs = matPatterns.raw.shape[0]
            pyp1_nTRs = pyBlkGrp1.raw.shape[0]
            pyp2_nTRs = pyBlkGrp2.raw.shape[0]
            py_nTRs = pyp1_nTRs + pyp2_nTRs
            mat_nVoxels = matPatterns.raw.shape[1]
            py_nVoxels = pyBlkGrp1.raw.shape[1]

            if mat_nTRs != py_nTRs or mat_nVoxels != py_nVoxels:
                raise ValidationError(
                    "Number of TRs or Voxels don't match: nTRs m{} p{}, nVoxels m{} p{}"
                    .format(mat_nTRs, py_nTRs, mat_nVoxels, py_nVoxels))

            matPatterns.categoryseparation
            relevant_TR = np.argwhere(np.sum(matPatterns.regressor, 0))[:, 0]
            RT_TR = relevant_TR[int(len(relevant_TR) / 2):]
            mat_RT_CS = matPatterns.categoryseparation[:, RT_TR][0, :]

            pyCombined_categoryseparation = np.full((py_nTRs, ), np.nan)
            pyCombined_categoryseparation[
                0:pyp1_nTRs] = pyBlkGrp1.categoryseparation
            pyCombined_categoryseparation[
                pyp1_nTRs:] = pyBlkGrp2.categoryseparation
            py_RT_CS = pyCombined_categoryseparation[RT_TR]

            all_matlab_evidence[RUN - 2, :, d] = mat_RT_CS
            all_python_evidence[RUN - 2, :, d] = py_RT_CS
            # now you have 2 (100,) arrays with the category separation from that feedback run

    outfile = subjectDir + '/' 'realtimeevidence'
    np.savez(outfile, mat=all_matlab_evidence, py=all_python_evidence)
Exemplo n.º 9
0
def validateModelsMatlabPython(subjectNum, subjectDay, usesamedata):

    dataPath = '/data/jag/cnds/amennen/rtAttenPenn/fmridata/behavdata/gonogo/'
    configFile = dataPath + 'subject' + str(
        subjectNum) + '/usedscripts/PennCfg_Day' + str(subjectDay) + '.toml'
    cfg = loadConfigFile(configFile)
    #subjectDayDir = getSubjectDayDir(cfg.session.subjectNum, cfg.session.subjectDay)
    subjectDayDir = '/data/jag/cnds/amennen/rtAttenPenn/fmridata/behavdata/gonogo/subject' + str(
        cfg.session.subjectNum) + '/day' + str(cfg.session.subjectDay)
    matDataDir = subjectDayDir  #os.path.join(cfg.session.dataDir, subjectDayDir)
    pyDataDir = matDataDir
    all_vals = np.zeros((100, 2, cfg.session.Runs[-1] - 1))
    usenewmodel = 1
    #usesamedata = 1 #whether or not to use same data as with matlab
    for runId in np.arange(1, cfg.session.Runs[-1]):
        runDir = 'run' + str(runId) + '/'
        matModelFn = utils.findNewestFile(
            matDataDir, runDir + 'trainedModel_' + str(runId) + '*.mat')
        pyModelFn = utils.findNewestFile(
            pyDataDir, 'trainedModel_r' + str(runId) + '*_py.mat')
        matModel_train = utils.loadMatFile(matModelFn)
        # to find what matModel includes use matModel.keys() --> trainedModel, trainPats, trainLabels
        # for each model we have W [ nVoxel x 2 classes], biases [ 1 x 2 classes]
        # we can't apply this model to any of the examples in this run, but let's apply it to the first 4 blocks of the next run
        # now load testing data from the next run to test it on
        pyModel_train = utils.loadMatFile(pyModelFn)
        # INSTEAD MAKE NEW MODEL
        print(runId)
        if usenewmodel:
            lrc1 = LogisticRegression(penalty='l2', solver='sag', max_iter=300)
            lrc2 = LogisticRegression(penalty='l2', solver='sag', max_iter=300)
            if usesamedata:
                lrc1.fit(matModel_train.trainPats,
                         pyModel_train.trainLabels[:, 0])
                lrc2.fit(matModel_train.trainPats,
                         pyModel_train.trainLabels[:, 1])
            else:
                lrc1.fit(pyModel_train.trainPats, pyModel_train.trainLabels[:,
                                                                            0])
                lrc2.fit(pyModel_train.trainPats, pyModel_train.trainLabels[:,
                                                                            1])
            newTrainedModel = utils.MatlabStructDict({}, 'trainedModel')
            newTrainedModel.trainedModel = StructDict({})
            newTrainedModel.trainedModel.weights = np.concatenate(
                (lrc1.coef_.T, lrc2.coef_.T), axis=1)
            newTrainedModel.trainedModel.biases = np.concatenate(
                (lrc1.intercept_, lrc2.intercept_)).reshape(1, 2)
            newTrainedModel.trainPats = pyModel_train.trainPats
            newTrainedModel.trainLabels = pyModel_train.trainLabels
        # now load the models to test on
        matModelFn = utils.findNewestFile(
            matDataDir, 'run' + str(runId + 1) + '/' + 'trainedModel_' +
            str(runId + 1) + '*.mat')
        pyModelFn = utils.findNewestFile(
            pyDataDir, 'trainedModel_r' + str(runId + 1) + '*_py.mat')
        matModel_test = utils.loadMatFile(matModelFn)
        pyModel_test = utils.loadMatFile(pyModelFn)
        nTRTest = 100
        mat_test_data = matModel_test.trainPats[nTRTest:, :]
        py_test_data = pyModel_test.trainPats[nTRTest:, :]
        test_labels = matModel_test.trainLabels[nTRTest:, :]
        mat_cs = np.zeros((nTRTest, 1))
        py_cs = np.zeros((nTRTest, 1))
        for t in np.arange(nTRTest):
            categ = np.flatnonzero(test_labels[t, :])
            otherCateg = (categ + 1) % 2
            _, _, _, activations_mat = Test_L2_RLR_realtime(
                matModel_train, mat_test_data[t, :], test_labels[t, :])
            mat_cs[t] = activations_mat[categ] - activations_mat[otherCateg]
            if not usenewmodel:
                if not usesamedata:
                    _, _, _, activations_py = Test_L2_RLR_realtime(
                        pyModel_train, py_test_data[t, :], test_labels[t, :])
                else:
                    _, _, _, activations_py = Test_L2_RLR_realtime(
                        pyModel_train, mat_test_data[t, :], test_labels[t, :])
            else:
                if not usesamedata:
                    _, _, _, activations_py = Test_L2_RLR_realtime(
                        newTrainedModel, py_test_data[t, :], test_labels[t, :])
                else:
                    _, _, _, activations_py = Test_L2_RLR_realtime(
                        newTrainedModel, mat_test_data[t, :],
                        test_labels[t, :])
            py_cs[t] = activations_py[categ] - activations_py[otherCateg]
        all_vals[:, 0, runId - 1] = mat_cs[:, 0]
        all_vals[:, 1, runId - 1] = py_cs[:, 0]
        #plt.figure()
        #if usenewmodel:
        #	plt.plot(matModel_train.weights[:,0],newTrainedModel.weights[:,0], '.')
        #else:
        #	plt.plot(matModel_train.weights[:,0],pyModel_train.weights[:,0], '.')
        #plt.xlim([-.02 ,.02])
        #plt.ylim([-.02 ,.02])
        #plt.xlabel('MATLAB')
        #plt.ylabel('PYTHON')
        #plt.show()
    all_mat_ev = np.reshape(all_vals[:, 0, :],
                            ((cfg.session.Runs[-1] - 1) * 100, 1))
    all_py_ev = np.reshape(all_vals[:, 1, :],
                           ((cfg.session.Runs[-1] - 1) * 100, 1))
    fix, ax = plt.subplots(figsize=(12, 7))
    plt.plot(all_mat_ev, all_py_ev, '.')
    plt.plot([-5, 5], [-5, 5], '--k')
    plt.title('S%i MAT x PY CORR = %4.4f' %
              (cfg.session.subjectNum,
               scipy.stats.pearsonr(all_mat_ev, all_py_ev)[0][0]))
    plt.xlabel('MATLAB')
    plt.ylabel('PYTHON')
    plt.xlim([-1.5, 1.5])
    plt.ylim([-1.5, 1.5])
    plt.show()

    plt.figure()
    plt.hist(all_mat_ev, alpha=0.6, label='matlab')
    plt.hist(all_py_ev, alpha=0.6, label='python')
    plt.xlabel('Correct - Incorrect Activation')
    plt.ylabel('Frequency')
    plt.title('S%i MAT x PY CORR = %4.4f' %
              (cfg.session.subjectNum,
               scipy.stats.pearsonr(all_mat_ev, all_py_ev)[0][0]))
    plt.legend()
    plt.show()
Exemplo n.º 10
0
 def test_pathInPattern(self):
     print("Test findNewestFile path embedded in pattern:")
     filename = utils.findNewestFile('/tmp/testdir',
                                     '/tmp/testdir/file1_20170101*')
     self.assert_result_matches_filename(filename)
    cfg = loadConfigFile(configFile)
    subjectDayDir = '/data/jux/cnds/amennen/rtAttenPenn/fmridata/behavdata/gonogo/subject' + str(cfg.session.subjectNum) + '/day' + str(cfg.session.subjectDay)
    pyDataDir = subjectDayDir
    if subjectDay == 1:
        nRuns = 7
        if subjectNum == 106:
            nRuns = 6
    elif subjectDay == 2:
        nRuns = 9
    elif subjectDay == 3:
        nRuns = 8
    #nruns = len(cfg.session.Runs) - 1
    for runId in np.arange(1,nRuns):
        print(runId)
        runDir = 'run'+str(runId)+'/'
        pyModelFn = utils.findNewestFile(pyDataDir, 'trainedModel_r'+str(runId)+'*_py.mat')
        # to find what matModel includes use matModel.keys() --> trainedModel, trainPats, trainLabels
        # for each model we have W [ nVoxel x 2 classes], biases [ 1 x 2 classes]
        # we can't apply this model to any of the examples in this run, but let's apply it to the first 4 blocks of the next run
        # now load testing data from the next run to test it on
        pyModel_train = utils.loadMatFile(pyModelFn)
        # INSTEAD MAKE NEW MODEL
        lrc1 = LogisticRegression(penalty='l2', solver='saga',max_iter=300)
        lrc2 = LogisticRegression(penalty='l2', solver='saga',max_iter=300)

        lrc1.fit(pyModel_train.trainPats, pyModel_train.trainLabels[:, 0])
        lrc2.fit(pyModel_train.trainPats, pyModel_train.trainLabels[:, 1])
        newTrainedModel = utils.MatlabStructDict({}, 'trainedModel')
        newTrainedModel.trainedModel = StructDict({})
        newTrainedModel.trainedModel.weights = np.concatenate((lrc1.coef_.T, lrc2.coef_.T), axis=1)
        newTrainedModel.trainedModel.biases = np.concatenate((lrc1.intercept_, lrc2.intercept_)).reshape(1, 2)
Exemplo n.º 12
0
 def test_pathPartiallyInPattern(self):
     print("Test findNewestFile path partially in pattern:")
     filename = utils.findNewestFile('/tmp', 'testdir/file1_20170101*')
     self.assert_result_matches_filename(filename)
Exemplo n.º 13
0
 def test_noMatchingFiles(self):
     print("Test findNewestFile no matching files:")
     filename = utils.findNewestFile('/tmp/testdir/', 'no_such_file')
     assert filename is None
Exemplo n.º 14
0
def validateModelsMatlabPython(subjectNum, subjectDay):

    dataPath = '/data/jag/cnds/amennen/rtAttenPenn/fmridata/behavdata/gonogo/'
    configFile = dataPath + 'subject' + str(
        subjectNum) + '/usedscripts/PennCfg_Day' + str(subjectDay) + '.toml'
    cfg = loadConfigFile(configFile)
    #subjectDayDir = getSubjectDayDir(cfg.session.subjectNum, cfg.session.subjectDay)
    subjectDayDir = '/data/jag/cnds/amennen/rtAttenPenn/fmridata/behavdata/gonogo/subject' + str(
        cfg.session.subjectNum) + '/day' + str(cfg.session.subjectDay)
    matDataDir = subjectDayDir  #os.path.join(cfg.session.dataDir, subjectDayDir)
    pyDataDir = matDataDir
    all_vals = np.zeros((100, 2, cfg.session.Runs[-1] - 1))
    for runId in np.arange(1, cfg.session.Runs[-1]):
        runDir = 'run' + str(runId) + '/'
        matModelFn = utils.findNewestFile(
            matDataDir, runDir + 'trainedModel_' + str(runId) + '*.mat')
        pyModelFn = utils.findNewestFile(
            pyDataDir, 'trainedModel_r' + str(runId) + '*_py.mat')
        matModel_train = utils.loadMatFile(matModelFn)
        # to find what matModel includes use matModel.keys() --> trainedModel, trainPats, trainLabels
        # for each model we have W [ nVoxel x 2 classes], biases [ 1 x 2 classes]
        # we can't apply this model to any of the examples in this run, but let's apply it to the first 4 blocks of the next run
        # now load testing data from the next run to test it on
        pyModel_train = utils.loadMatFile(pyModelFn)
        # now load the models to test on
        matModelFn = utils.findNewestFile(
            matDataDir, 'run' + str(runId + 1) + '/' + 'trainedModel_' +
            str(runId + 1) + '*.mat')
        pyModelFn = utils.findNewestFile(
            pyDataDir, 'trainedModel_r' + str(runId + 1) + '*_py.mat')
        matModel_test = utils.loadMatFile(matModelFn)
        pyModel_test = utils.loadMatFile(pyModelFn)
        nTRTest = 100
        mat_test_data = matModel_test.trainPats[nTRTest:, :]
        py_test_data = pyModel_test.trainPats[nTRTest:, :]
        test_labels = matModel_test.trainLabels[nTRTest:, :]
        mat_cs = np.zeros((nTRTest, 1))
        py_cs = np.zeros((nTRTest, 1))
        for t in np.arange(nTRTest):
            categ = np.flatnonzero(test_labels[t, :])
            otherCateg = (categ + 1) % 2
            _, _, _, activations_mat = Test_L2_RLR_realtime(
                matModel_train, mat_test_data[t, :], test_labels[t, :])
            mat_cs[t] = activations_mat[categ] - activations_mat[otherCateg]
            _, _, _, activations_py = Test_L2_RLR_realtime(
                pyModel_train, py_test_data[t, :], test_labels[t, :])
            py_cs[t] = activations_py[categ] - activations_py[otherCateg]
        all_vals[:, 0, runId - 1] = mat_cs[:, 0]
        all_vals[:, 1, runId - 1] = py_cs[:, 0]
    all_mat_ev = np.reshape(all_vals[:, 0, :],
                            ((cfg.session.Runs[-1] - 1) * 100, 1))
    all_py_ev = np.reshape(all_vals[:, 1, :],
                           ((cfg.session.Runs[-1] - 1) * 100, 1))
    #fix,ax = plt.subplots(figsize=(12,7))
    #plt.plot(all_mat_ev,all_py_ev, '.')
    #plt.plot([-5,5],[-5,5], '--k')
    #plt.title('S%i MAT x PY CORR = %4.4f' % (cfg.session.subjectNum, scipy.stats.pearsonr(all_mat_ev,all_py_ev)[0][0]))
    #plt.xlabel('MATLAB')
    #plt.ylabel('PYTHON')
    #plt.xlim([-1.5,1.5])
    #plt.ylim([-1.5,1.5])
    #plt.show()

    plt.figure()
    plt.hist(all_mat_ev, alpha=0.6, label='matlab')
    plt.hist(all_py_ev, alpha=0.6, label='python')
    plt.xlabel('Correct - Incorrect Activation')
    plt.ylabel('Frequency')
    plt.title('S%i MAT x PY CORR = %4.4f' %
              (cfg.session.subjectNum,
               scipy.stats.pearsonr(all_mat_ev, all_py_ev)[0][0]))
    plt.legend()
    plt.show()
Exemplo n.º 15
0
 def test_emptyPath(self):
     print("Test findNewestFile empty path:")
     filename = utils.findNewestFile('', '/tmp/testdir/file1_20170101*')
     self.assert_result_matches_filename(filename)
Exemplo n.º 16
0
 def on_message(client, message):
     fileWatcher = WebSocketFileWatcher.fileWatcher
     response = {'status': 400, 'error': 'unhandled request'}
     try:
         request = json.loads(message)
         cmd = request['cmd']
         if cmd == 'initWatch':
             dir = request['dir']
             filePattern = request['filePattern']
             minFileSize = request['minFileSize']
             demoStep = request.get('demoStep')
             logging.log(DebugLevels.L3, "initWatch: %s, %s, %d", dir,
                         filePattern, minFileSize)
             if dir is None or filePattern is None or minFileSize is None:
                 errStr = "InitWatch: Missing file information: {} {}".format(
                     dir, filePattern)
                 response = {'status': 400, 'error': errStr}
                 logging.log(logging.WARNING, errStr)
             elif WebSocketFileWatcher.validateRequestedFile(dir,
                                                             None) is False:
                 errStr = 'InitWatch: Non-allowed directory {}'.format(dir)
                 response = {'status': 400, 'error': errStr}
                 logging.log(logging.WARNING, errStr)
             elif not os.path.exists(dir):
                 errStr = 'InitWatch: No such directory: {}'.format(dir)
                 response = {'status': 400, 'error': errStr}
                 logging.log(logging.WARNING, errStr)
             else:
                 WebSocketFileWatcher.fileWatchLock.acquire()
                 try:
                     fileWatcher.initFileNotifier(dir, filePattern,
                                                  minFileSize, demoStep)
                 finally:
                     WebSocketFileWatcher.fileWatchLock.release()
                 response = {'status': 200}
         elif cmd == 'watchFile':
             filename = request['filename']
             timeout = request['timeout']
             logging.log(DebugLevels.L3, "watchFile: %s", filename)
             if filename is None:
                 errStr = 'WatchFile: Missing filename'
                 response = {'status': 400, 'error': errStr}
                 logging.log(logging.WARNING, errStr)
             elif WebSocketFileWatcher.validateRequestedFile(
                     None, filename) is False:
                 errStr = 'WatchFile: Non-allowed file {}'.format(filename)
                 response = {'status': 400, 'error': errStr}
                 logging.log(logging.WARNING, errStr)
             else:
                 WebSocketFileWatcher.fileWatchLock.acquire()
                 try:
                     retVal = fileWatcher.waitForFile(filename,
                                                      timeout=timeout)
                 finally:
                     WebSocketFileWatcher.fileWatchLock.release()
                 if retVal is None:
                     errStr = "WatchFile: 408 Timeout {}s: {}".format(
                         timeout, filename)
                     response = {'status': 408, 'error': errStr}
                     logging.log(logging.WARNING, errStr)
                 else:
                     # TODO - may need some retry logic here if the file was read
                     #  before it was completely written. Maybe checking filesize
                     #  against data size.
                     with open(filename, 'rb') as fp:
                         data = fp.read()
                     b64Data = b64encode(data)
                     b64StrData = b64Data.decode('utf-8')
                     response = {
                         'status': 200,
                         'filename': filename,
                         'data': b64StrData
                     }
         elif cmd == 'getFile':
             filename = request['filename']
             if filename is not None and not os.path.isabs(filename):
                 # relative path to the watch dir
                 filename = os.path.join(fileWatcher.watchDir, filename)
             logging.log(DebugLevels.L3, "getFile: %s", filename)
             if filename is None:
                 errStr = "GetFile: Missing filename"
                 response = {'status': 400, 'error': errStr}
                 logging.log(logging.WARNING, errStr)
             elif WebSocketFileWatcher.validateRequestedFile(
                     None, filename) is False:
                 errStr = 'GetFile: Non-allowed file {}'.format(filename)
                 response = {'status': 400, 'error': errStr}
                 logging.log(logging.WARNING, errStr)
             elif not os.path.exists(filename):
                 errStr = "GetFile: File not found {}".format(filename)
                 response = {'status': 400, 'error': errStr}
                 logging.log(logging.WARNING, errStr)
             else:
                 with open(filename, 'rb') as fp:
                     data = fp.read()
                 b64Data = b64encode(data)
                 b64StrData = b64Data.decode('utf-8')
                 response = {
                     'status': 200,
                     'filename': filename,
                     'data': b64StrData
                 }
         elif cmd == 'getNewestFile':
             filename = request['filename']
             logging.log(DebugLevels.L3, "getNewestFile: %s", filename)
             if filename is None:
                 errStr = "GetNewestFile: Missing filename"
                 response = {'status': 400, 'error': errStr}
                 logging.log(logging.WARNING, errStr)
             elif WebSocketFileWatcher.validateRequestedFile(
                     None, filename) is False:
                 errStr = 'GetNewestFile: Non-allowed file {}'.format(
                     filename)
                 response = {'status': 400, 'error': errStr}
                 logging.log(logging.WARNING, errStr)
             else:
                 baseDir, filePattern = os.path.split(filename)
                 if not os.path.isabs(baseDir):
                     # relative path to the watch dir
                     baseDir = os.path.join(fileWatcher.watchDir, baseDir)
                 filename = findNewestFile(baseDir, filePattern)
                 if filename is None or not os.path.exists(filename):
                     errStr = 'GetNewestFile: file not found: {}'.format(
                         os.path.join(baseDir, filePattern))
                     response = {'status': 400, 'error': errStr}
                     logging.log(logging.WARNING, errStr)
                 else:
                     with open(filename, 'rb') as fp:
                         data = fp.read()
                     b64Data = b64encode(data)
                     b64StrData = b64Data.decode('utf-8')
                     response = {
                         'status': 200,
                         'filename': filename,
                         'data': b64StrData
                     }
         elif cmd == 'ping':
             response = {'status': 200}
         elif cmd == 'putTextFile':
             filename = request['filename']
             text = request['text']
             logging.log(DebugLevels.L3, "putTextFile: %s", filename)
             if filename is None:
                 errStr = 'PutTextFile: Missing filename field'
                 response = {'status': 400, 'error': errStr}
                 logging.log(logging.WARNING, errStr)
             elif text is None:
                 errStr = 'PutTextFile: Missing text field'
                 response = {'status': 400, 'error': errStr}
                 logging.log(logging.WARNING, errStr)
             elif WebSocketFileWatcher.validateRequestedFile(
                     None, filename, textFileTypeOnly=True) is False:
                 errStr = 'PutTextFile: Non-allowed file {}'.format(
                     filename)
                 response = {'status': 400, 'error': errStr}
                 logging.log(logging.WARNING, errStr)
             elif type(text) is not str:
                 errStr = "PutTextFile: Only text allowed"
                 response = {'status': 400, 'error': errStr}
                 logging.log(logging.WARNING, errStr)
             else:
                 outputDir = os.path.dirname(filename)
                 if not os.path.exists(outputDir):
                     os.makedirs(outputDir)
                 # print('putTextFile: write {}'.format(filename))
                 with open(filename, 'w+') as volFile:
                     volFile.write(text)
                 response = {'status': 200}
         elif cmd == 'dataLog':
             filename = request['filename']
             logging.log(DebugLevels.L3, "dataLog: %s", filename)
             logLine = request['logLine']
             if filename is None:
                 errStr = 'DataLog: Missing filename field'
                 response = {'status': 400, 'error': errStr}
                 logging.log(logging.WARNING, errStr)
             elif logLine is None:
                 errStr = 'DataLog: Missing logLine field'
                 response = {'status': 400, 'error': errStr}
                 logging.log(logging.WARNING, errStr)
             elif WebSocketFileWatcher.validateRequestedFile(
                     None, filename, textFileTypeOnly=True) is False:
                 errStr = 'DataLog: Non-allowed file {}'.format(filename)
                 response = {'status': 400, 'error': errStr}
                 logging.log(logging.WARNING, errStr)
             else:
                 with open(filename, 'a+') as logFile:
                     logFile.write(logLine + '\n')
                 response = {'status': 200}
         elif cmd == 'error':
             errorCode = request['status']
             if errorCode == 401:
                 WebSocketFileWatcher.needLogin = True
                 WebSocketFileWatcher.sessionCookie = None
             errStr = 'Error {}: {}'.format(errorCode, request['error'])
             logging.log(logging.ERROR, request['error'])
             return
         else:
             errStr = 'OnMessage: Unrecognized command {}'.format(cmd)
             response = {'status': 400, 'error': errStr}
             logging.log(logging.WARNING, errStr)
     except Exception as err:
         errStr = "OnMessage Exception: {}: {}".format(cmd, err)
         logging.log(logging.WARNING, errStr)
         response = {'status': 400, 'error': errStr}
         if cmd == 'error':
             sys.exit()
     # merge response into the request dictionary
     request.update(response)
     response = request
     WebSocketFileWatcher.clientLock.acquire()
     try:
         client.send(json.dumps(response))
     finally:
         WebSocketFileWatcher.clientLock.release()
Exemplo n.º 17
0
def crossvalidateModels(matDataDir, pyDataDir, runId):
    runDir = 'run' + str(runId) + '/'
    matModelFn = utils.findNewestFile(
        matDataDir, runDir + 'trainedModel_' + str(runId) + '*.mat')
    pyModelFn = utils.findNewestFile(
        pyDataDir, 'trainedModel_r' + str(runId) + '*_py.mat')
    matModel = utils.loadMatFile(matModelFn)
    pyModel = utils.loadMatFile(pyModelFn)
    selector = np.concatenate((0 * np.ones((50)), 1 * np.ones(
        (50)), 2 * np.ones((50)), 3 * np.ones((50))),
                              axis=0)
    X = np.array([1, 2, 3, 4])
    nfold = 4
    kf = KFold(nfold)
    mat_roc = np.zeros((nfold))
    py_roc = np.zeros((nfold))
    i = 0
    for train_index, test_index in kf.split(X):
        print("TRAIN:", train_index, "TEST:", test_index)
        trTrain = np.in1d(selector, train_index)
        trTest = np.in1d(selector, test_index)
        # matlab first
        mat_lrc = LogisticRegression(solver='sag', penalty='l2', max_iter=300)
        categoryTrainLabels = np.argmax(matModel.trainLabels[trTrain, :],
                                        axis=1)
        mat_lrc.fit(matModel.trainPats[trTrain, :], categoryTrainLabels)
        mat_predict = mat_lrc.predict_proba(matModel.trainPats[trTest, :])
        categ_sep = -1 * np.diff(mat_predict, axis=1)
        C0 = np.argwhere(
            np.argmax(matModel.trainLabels[trTest, :], axis=1) == 0)
        C1 = np.argwhere(
            np.argmax(matModel.trainLabels[trTest, :], axis=1) == 1)
        correctLabels = np.ones((len(categ_sep)))
        correctLabels[C1] = -1
        mat_roc[i] = roc_auc_score(correctLabels, categ_sep)
        print("MAT AUC for iteration %i is: %.2f" % (i, mat_roc[i]))
        # python second
        py_lrc = LogisticRegression(solver='sag', penalty='l2', max_iter=300)
        categoryTrainLabels = np.argmax(pyModel.trainLabels[trTrain, :],
                                        axis=1)
        py_lrc.fit(pyModel.trainPats[trTrain, :], categoryTrainLabels)
        py_predict = py_lrc.predict_proba(pyModel.trainPats[trTest, :])
        categ_sep = -1 * np.diff(py_predict, axis=1)
        C0 = np.argwhere(
            np.argmax(pyModel.trainLabels[trTest, :], axis=1) == 0)
        C1 = np.argwhere(
            np.argmax(pyModel.trainLabels[trTest, :], axis=1) == 1)
        correctLabels = np.ones((len(categ_sep)))
        correctLabels[C1] = -1
        py_roc[i] = roc_auc_score(correctLabels, categ_sep)
        print("PY AUC for iteration %i is: %.2f\n" % (i, py_roc[i]))
        i += 1
    print("AVG AUC MAT,PY is: %.2f,%.2f\n" %
          (np.mean(mat_roc), np.mean(py_roc)))
    #mat_mean = np.mean(mat_roc)
    #py_mean = np.mean(py_roc)
    #all_ROC = np.concatenate((mat_roc[:,np.newaxis],py_roc[:,np.newaxis]),axis=1)
    #fullfilename = matDataDir + '/' + 'xvalresults.npy'
    #print("saving to %s\n" % fullfilename)
    #np.save(fullfilename,all_ROC)
    return mat_roc, py_roc
Exemplo n.º 18
0
def train_test_python_classifier(subjectNum):
    ndays = 3
    auc_score = np.zeros((8, ndays))  # save larger to fit all days in
    RT_cs = np.zeros((8, ndays))
    dataPath = '/data/jag/cnds/amennen/rtAttenPenn/fmridata/behavdata/gonogo/'
    subjectDir = dataPath + '/' + 'subject' + str(subjectNum)
    print(subjectNum)
    all_python_evidence = np.zeros(
        (9, 100, 3))  # time course of classifier evidence
    for d in np.arange(ndays):
        print(d)
        subjectDay = d + 1
        configFile = dataPath + 'subject' + str(
            subjectNum) + '/usedscripts/PennCfg_Day' + str(
                subjectDay) + '.toml'
        cfg = loadConfigFile(configFile)
        subjectDayDir = '/data/jag/cnds/amennen/rtAttenPenn/fmridata/behavdata/gonogo/subject' + str(
            cfg.session.subjectNum) + '/day' + str(cfg.session.subjectDay)
        pyDataDir = subjectDayDir
        if subjectDay == 1:
            nRuns = 7
            print('here')
            if str(subjectNum) == '106':
                nRuns = 6
                print('here')
            else:
                print(subjectNum)
                if subjectNum == 106:
                    print('finding it here')
                print('nothere')
        elif subjectDay == 2:
            nRuns = 9
        elif subjectDay == 3:
            nRuns = 8
        print('total number of runs: %i' % nRuns)
        print(subjectNum)
        print(subjectDay)
        print(nRuns)
        #nruns = len(cfg.session.Runs) - 1
        #nruns = len(cfg.session.Runs) - 1
        for r in np.arange(0, nRuns - 1):
            runId = r + 1  # now it goes from 0 : n Runs - 1
            print(runId)
            runDir = 'run' + str(runId) + '/'
            pyModelFn = utils.findNewestFile(
                pyDataDir, 'trainedModel_r' + str(runId) + '*_py.mat')
            # to find what matModel includes use matModel.keys() --> trainedModel, trainPats, trainLabels
            # for each model we have W [ nVoxel x 2 classes], biases [ 1 x 2 classes]
            # we can't apply this model to any of the examples in this run, but let's apply it to the first 4 blocks of the next run
            # now load testing data from the next run to test it on
            pyModel_train = utils.loadMatFile(pyModelFn)
            # INSTEAD MAKE NEW MODEL
            lrc1 = LogisticRegression(penalty='l2',
                                      solver='saga',
                                      max_iter=300)
            lrc2 = LogisticRegression(penalty='l2',
                                      solver='saga',
                                      max_iter=300)

            lrc1.fit(pyModel_train.trainPats, pyModel_train.trainLabels[:, 0])
            lrc2.fit(pyModel_train.trainPats, pyModel_train.trainLabels[:, 1])
            newTrainedModel = utils.MatlabStructDict({}, 'trainedModel')
            newTrainedModel.trainedModel = StructDict({})
            newTrainedModel.trainedModel.weights = np.concatenate(
                (lrc1.coef_.T, lrc2.coef_.T), axis=1)
            newTrainedModel.trainedModel.biases = np.concatenate(
                (lrc1.intercept_, lrc2.intercept_)).reshape(1, 2)
            newTrainedModel.trainPats = pyModel_train.trainPats
            newTrainedModel.trainLabels = pyModel_train.trainLabels

            # now load testing data for CV
            pyModelFn = utils.findNewestFile(
                pyDataDir, 'trainedModel_r' + str(runId + 1) + '*_py.mat')
            pyModel_test = utils.loadMatFile(pyModelFn)
            nTRTest = 100
            py_test_data = pyModel_test.trainPats[nTRTest:, :]
            test_labels = pyModel_test.trainLabels[nTRTest:, :]
            py_cs = np.zeros((nTRTest, 1))
            activations = np.zeros((nTRTest, 2))
            for t in np.arange(nTRTest):
                _, _, _, activations_py = Test_L2_RLR_realtime(
                    newTrainedModel, py_test_data[t, :], test_labels[t, :])
                activations[t, :] = activations_py

            fpr2, tpr2, thresholds2 = metrics.roc_curve(test_labels[:, 1],
                                                        activations[:, 1] -
                                                        activations[:, 0],
                                                        pos_label=1)
            auc_score[r, d] = metrics.auc(
                fpr2, tpr2
            )  # auc of this data applied to the first half of the next run
            # now apply to block data-- realtime values
            pyDataFn = utils.findNewestFile(
                pyDataDir, 'blkGroup_r' + str(runId + 1) + '_p2_*_py.mat')
            pyData_test = utils.loadMatFile(pyDataFn)
            regressor = pyData_test.regressor
            TRs_to_test = np.argwhere(np.sum(regressor, axis=0))
            RT_data = pyData_test.raw_sm_filt_z[TRs_to_test, :]
            RT_regressor = regressor[:, TRs_to_test].T.reshape(nTRTest, 2)
            # now do the same thing and test for every TR --> get category separation
            cs = np.zeros((nTRTest, 1))
            for t in np.arange(nTRTest):
                categ = np.flatnonzero(RT_regressor[t, :])
                otherCateg = (categ + 1) % 2
                _, _, _, activations_py = Test_L2_RLR_realtime(
                    newTrainedModel, RT_data[t, :].flatten(),
                    RT_regressor[t, :])
                cs[t] = activations_py[categ] - activations_py[otherCateg]

            # take average for this run
            RT_cs[r, d] = np.mean(cs)
            all_python_evidence[r, :, d] = cs[:, 0]
    outfile = subjectDir + '/' 'offlineAUC_RTCS'
    np.savez(outfile, auc=auc_score, cs=RT_cs, all_ev=all_python_evidence)
Exemplo n.º 19
0
 def test_normalCase(self):
     print("Test findNewestFile normal case:")
     filename = utils.findNewestFile('/tmp/testdir', 'file1_20170101*')
     self.assert_result_matches_filename(filename)