예제 #1
0
def analysis(ds):
    """Performs main analysis.

    :Parameter:
      ds: Dataset

    :Returns:
      Per measure sensitivities as returned from doSensitivityAnalysis()
    """
    # Lets replicate published obtained results.  We can do slightly
    # better using RFEs and initial feature selection, but lets just
    # replicate for the purpose of the paper
    clf = sg.SVM(kernel_type='linear')
    C = -2.0  # our default scaling is too soft
    # Scale C according  to the number of samples per class
    spl = ds.samplesperlabel
    ratio = N.sqrt(float(spl[0]) / spl[1])
    clf.C = (C / ratio, C * ratio)

    # If we were only to do classification, following snippet is sufficient.
    # But lets reuse doSensitivityAnalysis
    #
    # cv2A = CrossValidatedTransferError(
    #           TransferError(clf),
    #           NFoldSplitter(),
    #           enable_states=['confusion', 'training_confusion', 'splits'])
    #
    # verbose(1, "Running cross-validation on %s" % clf.descr)
    # error2A = cv2A(ds)
    # verbose(2, "Figure 2A LOO performance:\n%s" % cv2A.confusion)

    clfs = {
        # explicitly instruct SMLR just to fit a single set of weights for our
        # binary task
        'SMLR': SMLR(lm=1.0, fit_all_weights=False),
        'lCSVM': clf,
    }

    # define some pure sensitivities (or related measures)
    sensanas = {'ANOVA': OneWayAnova()}

    # perform the analysis and get all sensitivities
    senses = doSensitivityAnalysis(ds, clfs, sensanas, NFoldSplitter())

    # assign original single C
    clf.C = C
    # get results from Figure2B with resampling of the samples to
    # balance number of samples per label
    cv2B = CrossValidatedTransferError(
        TransferError(clf),
        NFoldSplitter(nperlabel='equal', nrunspersplit=100),
        enable_states=['confusion', 'training_confusion'])

    # compute
    error2B = cv2B(ds)

    # print full testing confusion table
    verbose(2, "Figure 2B LOO performance:\n%s" % cv2B.confusion)

    return senses
def analysis(ds):

    # Lets first replicate the obtained resuls.  We can do slightly
    # better using RFEs and initial feature selection, but lets just
    # replicate
    #
    clf = sg.SVM(kernel_type='linear')
    # C=-2.0 gives 84% when properly scaled and 5% ANOVA voxels
    # C=-1.0 and RFE gives up to 85% correct
    # clf = sg.SVM(kernel_type='linear')
    C = -2.0

    # Scale C according  to the number of samples per class
    spl = ds.samplesperlabel
    ratio = N.sqrt(float(spl[0])/spl[1])
    clf.C = (C/ratio, C*ratio)

    # If we were only to do classification, following snippet is sufficient.
    # But lets reuse doSensitivityAnalysis
    #
    # cv2A = CrossValidatedTransferError(
    #           TransferError(clf),
    #           NFoldSplitter(),
    #           enable_states=['confusion', 'training_confusion', 'splits'])
    #
    # verbose(1, "Running cross-validation on %s" % clf.descr)
    # error2A = cv2A(ds)
    # verbose(2, "Figure 2A LOO performance:\n%s" % cv2A.confusion)

    # Used in RFE implementations
    rfesvm = sg.SVM(kernel_type='linear')
    rfesvm2 = sg.SVM(kernel_type='linear')
    rfesvm.C = clf.C
    rfesvm2.C = clf.C
    rfesvm_split = SplitClassifier(rfesvm2)
    clfs = {
        # explicitly instruct SMLR just to fit a single set of weights for our
        # binary task
        'SMLR': SMLR(lm=1.0, fit_all_weights=False),
        'lCSVM': clf,
        #'lGPR': GPR(kernel=KernelLinear()),
        #'lCSVM+RFE(farm)': SplitClassifier( # which does splitting internally
        #   FeatureSelectionClassifier(
        #    clf = clf,
        #    feature_selection = RFE(             # on features selected via RFE
        #        sensitivity_analyzer=\
        #            rfesvm.getSensitivityAnalyzer(transformer=Absolute),
        #        transfer_error=TransferError(rfesvm),
        #        stopping_criterion=FixedErrorThresholdStopCrit(0.05),
        #        feature_selector=FractionTailSelector(
        #                           0.2, mode='discard', tail='lower'),
        #                           # remove 20% of features at each step
        #        update_sensitivity=True)),
        #        # update sensitivity at each step
        #    descr='LinSVM+RFE(farm,N-Fold)'),
        #'lCSVM+RFE(mean)': FeatureSelectionClassifier(
        #  clf = clf,
        #  feature_selection = RFE(             # on features selected via RFE
        #    # based on sensitivity of a clf which does splitting internally
        #    sensitivity_analyzer=rfesvm_split.getSensitivityAnalyzer(
        #        transformer=Absolute),
        #    transfer_error=ConfusionBasedError(
        #       rfesvm_split, confusion_state="confusion"),
        #       # and whose internal error we use
        #    feature_selector=FractionTailSelector(
        #                       0.2, mode='discard', tail='lower'),
        #                       # remove 20% of features at each step
        #    update_sensitivity=True),
        #    # update sensitivity at each step
        #  descr='LinSVM+RFE(avg,N-Fold)' )
        }

    # define some pure sensitivities (or related measures)
    sensanas={
        'ANOVA': OneWayAnova(),
        # Crashes for Yarik -- I guess openopt issue
        #'GPR_Model': GPRWeights(GPR(kernel=KernelLinear()), combiner=None),
        #
        # no I-RELIEF for now -- takes too long
        #'I-RELIEF': IterativeReliefOnline(),
        # gimme more !!
        }

    # perform the analysis and get all sensitivities
    senses = doSensitivityAnalysis(ds, clfs, sensanas, NFoldSplitter())

    # assign original single C
    clf.C = C
    # get results from Figure2B with resampling of the samples to
    # ballance number of samples per label
    cv2B = CrossValidatedTransferError(
              TransferError(clf),
              NFoldSplitter(nperlabel='equal',
                            # increase to reasonable number
                            nrunspersplit=4),
              enable_states=['confusion', 'training_confusion'])

    error2B = cv2B(ds)

    verbose(2, "Figure 2B LOO performance:\n%s" % cv2B.confusion)

    # Sure we repeat ourselves here but for the sake of clarity
    return senses
def analysis(ds):

    # Lets first replicate the obtained resuls.  We can do slightly
    # better using RFEs and initial feature selection, but lets just
    # replicate
    #
    clf = sg.SVM(kernel_type='linear')
    # C=-2.0 gives 84% when properly scaled and 5% ANOVA voxels
    # C=-1.0 and RFE gives up to 85% correct
    # clf = sg.SVM(kernel_type='linear')
    C = -2.0

    # Scale C according  to the number of samples per class
    spl = ds.samplesperlabel
    ratio = N.sqrt(float(spl[0]) / spl[1])
    clf.C = (C / ratio, C * ratio)

    # If we were only to do classification, following snippet is sufficient.
    # But lets reuse doSensitivityAnalysis
    #
    # cv2A = CrossValidatedTransferError(
    #           TransferError(clf),
    #           NFoldSplitter(),
    #           enable_states=['confusion', 'training_confusion', 'splits'])
    #
    # verbose(1, "Running cross-validation on %s" % clf.descr)
    # error2A = cv2A(ds)
    # verbose(2, "Figure 2A LOO performance:\n%s" % cv2A.confusion)

    # Used in RFE implementations
    rfesvm = sg.SVM(kernel_type='linear')
    rfesvm2 = sg.SVM(kernel_type='linear')
    rfesvm.C = clf.C
    rfesvm2.C = clf.C
    rfesvm_split = SplitClassifier(rfesvm2)
    clfs = {
        # explicitly instruct SMLR just to fit a single set of weights for our
        # binary task
        'SMLR': SMLR(lm=1.0, fit_all_weights=False),
        'lCSVM': clf,
        #'lGPR': GPR(kernel=KernelLinear()),
        #'lCSVM+RFE(farm)': SplitClassifier( # which does splitting internally
        #   FeatureSelectionClassifier(
        #    clf = clf,
        #    feature_selection = RFE(             # on features selected via RFE
        #        sensitivity_analyzer=\
        #            rfesvm.getSensitivityAnalyzer(transformer=Absolute),
        #        transfer_error=TransferError(rfesvm),
        #        stopping_criterion=FixedErrorThresholdStopCrit(0.05),
        #        feature_selector=FractionTailSelector(
        #                           0.2, mode='discard', tail='lower'),
        #                           # remove 20% of features at each step
        #        update_sensitivity=True)),
        #        # update sensitivity at each step
        #    descr='LinSVM+RFE(farm,N-Fold)'),
        #'lCSVM+RFE(mean)': FeatureSelectionClassifier(
        #  clf = clf,
        #  feature_selection = RFE(             # on features selected via RFE
        #    # based on sensitivity of a clf which does splitting internally
        #    sensitivity_analyzer=rfesvm_split.getSensitivityAnalyzer(
        #        transformer=Absolute),
        #    transfer_error=ConfusionBasedError(
        #       rfesvm_split, confusion_state="confusion"),
        #       # and whose internal error we use
        #    feature_selector=FractionTailSelector(
        #                       0.2, mode='discard', tail='lower'),
        #                       # remove 20% of features at each step
        #    update_sensitivity=True),
        #    # update sensitivity at each step
        #  descr='LinSVM+RFE(avg,N-Fold)' )
    }

    # define some pure sensitivities (or related measures)
    sensanas = {
        'ANOVA': OneWayAnova(),
        # Crashes for Yarik -- I guess openopt issue
        #'GPR_Model': GPRWeights(GPR(kernel=KernelLinear()), combiner=None),
        #
        # no I-RELIEF for now -- takes too long
        #'I-RELIEF': IterativeReliefOnline(),
        # gimme more !!
    }

    # perform the analysis and get all sensitivities
    senses = doSensitivityAnalysis(ds, clfs, sensanas, NFoldSplitter())

    # assign original single C
    clf.C = C
    # get results from Figure2B with resampling of the samples to
    # ballance number of samples per label
    cv2B = CrossValidatedTransferError(
        TransferError(clf),
        NFoldSplitter(
            nperlabel='equal',
            # increase to reasonable number
            nrunspersplit=4),
        enable_states=['confusion', 'training_confusion'])

    error2B = cv2B(ds)

    verbose(2, "Figure 2B LOO performance:\n%s" % cv2B.confusion)

    # Sure we repeat ourselves here but for the sake of clarity
    return senses
예제 #4
0
def analysis(ds):
    """Performs main analysis.

    :Parameter:
      ds: Dataset

    :Returns:
      Per measure sensitivities as returned from doSensitivityAnalysis()
    """
    # Lets replicate published obtained results.  We can do slightly
    # better using RFEs and initial feature selection, but lets just
    # replicate for the purpose of the paper
    clf = sg.SVM(kernel_type='linear')
    C = -2.0 # our default scaling is too soft
    # Scale C according  to the number of samples per class
    spl = ds.samplesperlabel
    ratio = N.sqrt(float(spl[0])/spl[1])
    clf.C = (C/ratio, C*ratio)

    # If we were only to do classification, following snippet is sufficient.
    # But lets reuse doSensitivityAnalysis
    #
    # cv2A = CrossValidatedTransferError(
    #           TransferError(clf),
    #           NFoldSplitter(),
    #           enable_states=['confusion', 'training_confusion', 'splits'])
    #
    # verbose(1, "Running cross-validation on %s" % clf.descr)
    # error2A = cv2A(ds)
    # verbose(2, "Figure 2A LOO performance:\n%s" % cv2A.confusion)

    clfs = {
        # explicitly instruct SMLR just to fit a single set of weights for our
        # binary task
        'SMLR': SMLR(lm=1.0, fit_all_weights=False),
        'lCSVM': clf,
        }

    # define some pure sensitivities (or related measures)
    sensanas={'ANOVA': OneWayAnova()}

    # perform the analysis and get all sensitivities
    senses = doSensitivityAnalysis(ds, clfs, sensanas, NFoldSplitter())

    # assign original single C
    clf.C = C
    # get results from Figure2B with resampling of the samples to
    # balance number of samples per label
    cv2B = CrossValidatedTransferError(
              TransferError(clf),
              NFoldSplitter(nperlabel='equal',
                            nrunspersplit=100),
              enable_states=['confusion', 'training_confusion'])

    # compute
    error2B = cv2B(ds)

    # print full testing confusion table
    verbose(2, "Figure 2B LOO performance:\n%s" % cv2B.confusion)

    return senses