print("inputList line is not properly formatted:", line)
            exit(-3)
        conf = GetInitializedCrabConfig()  # create fresh crab config
        dataset = split[0]
        nUnits = int(split[1])  # also used for total lumis for data
        nUnitsPerJob = int(
            split[2])  # used for files/dataset for MC and LS per data

        (
            datasetTag,
            datasetName,
            primaryDatasetName,
            secondaryDatasetName,
            isData,
        ) = utils.GetOutputDatasetTagAndModifiedDatasetName(dataset)
        outputFile = utils.GetOutputFilename(dataset, not isData)
        conf.Data.outputDatasetTag = datasetTag
        conf.Data.inputDataset = dataset
        print
        print("Consider dataset {0}".format(dataset))

        if not isData:
            conf.Data.splitting = "FileBased"
        else:
            conf.Data.splitting = "LumiBased"

        # get era
        # see, for example: https://twiki.cern.ch/twiki/bin/viewauth/CMS/PdmVAnalysisSummaryTable
        # secondaryDatasetName looks like 'Run2015D-PromptReco-v3'
        if "Summer16" in secondaryDatasetName or "Run2016" in secondaryDatasetName:
            year = 2016
    keepAndDrop = "keepAndDrop.txt"
## LQ2
elif analysis == "LQ2" :
    preselection = "Muon_pt[0] > 40"
    keepAndDrop = "keepAndDrop.txt"
## HH
elif analysis == "HH" :
    preselection="(Muon_pt[0]>16 && Muon_pt[1]>7 && nJet>2 && Jet_pt[0]>17 && Jet_pt[1]>17) || (Electron_pt[0]>22 && Electron_pt[1]>11 && nJet>2 && Jet_pt[0]>17 && Jet_pt[1]>17)"
    keepAndDrop = "keepAndDrop_hh.txt"
else :
    print "ERROR: Did not understand the analysis to run!  Should be one of LQ1, LQ2, HH. Quitting."
    exit(-1)


# for crab
haddFileName = utils.GetOutputFilename(dataset, isMC)
p = PostProcessor(
    ".",
    inputFiles(),
    cut=preselection,
    outputbranchsel=keepAndDrop,
    modules=modulesToRun,
    provenance=True,
    fwkJobReport=True,
    jsonInput=runsAndLumis(),
    haddFileName=haddFileName,
)
# interactive testing
# p=PostProcessor(".",utils.GetFileList(''),cut=preselection,outputbranchsel=keepAndDrop,modules=modulesToRun,provenance=True,fwkJobReport=True,jsonInput=runsAndLumis(),haddFileName=haddFileName)
p.run()
#config.Data.outLFNDirBase = '/store/group/phys_exotica/leptonsPlusJets/RootNtuple/RunII/%s/' % (getUsernameFromSiteDB())
config.Data.outLFNDirBase = '/store/group/phys_exotica/leptonsPlusJets/LQ/scooper/2016nanoPostProc/'
config.Data.publication = False
config.Data.outputDatasetTag = 'NanoPostDYJIncAMCNLO'

config.section_("JobType")
config.JobType.pluginName = 'Analysis'
config.JobType.psetName = 'PSet.py'
#config.JobType.pyCfgParams = ['dataset='+config.Data.inputDataset]
config.JobType.scriptExe = 'crab_script.sh'
config.JobType.scriptArgs = ['dataset=' + config.Data.inputDataset]
config.JobType.inputFiles = [
    'keepAndDrop.txt', 'utils.py', 'doSkim_stockNanoV5.py',
    cmsswBaseDir + '/src/PhysicsTools/NanoAODTools/scripts/haddnano.py'
]  #hadd nano will not be needed once nano tools are in cmssw
config.JobType.outputFiles = [
    utils.GetOutputFilename(config.Data.inputDataset, True)
]
config.JobType.sendPythonFolder = True

config.section_("Site")
config.Site.storageSite = "T2_CH_CERN"

# this will make sure jobs only run on sites which host the data.
# See: https://twiki.cern.ch/twiki/bin/view/CMSPublic/CRAB3FAQ#What_is_glideinWms_Overflow_and
config.section_("Debug")
config.Debug.extraJDL = ['+CMS_ALLOW_OVERFLOW=False']

#config.section_("User")
#config.User.voGroup = 'dcms'