def merge_files_by_process(root_files):
    global input_folder, output_folder
    electron_qcd_samples = [ 'QCD_Pt-20to30_BCtoE',
                 'QCD_Pt-30to80_BCtoE',
                 'QCD_Pt-80to170_BCtoE',
                 'QCD_Pt-20to30_EMEnriched',
                 'QCD_Pt-30to80_EMEnriched',
                 'QCD_Pt-80to170_EMEnriched',
                 'GJets_HT-40To100',
                 'GJets_HT-100To200',
                 'GJets_HT-200']
    singleTop_samples = [ 'T_tW-channel',
                 'T_t-channel',
                 'T_s-channel',
                 'Tbar_tW-channel',
                 'Tbar_t-channel',
                 'Tbar_s-channel']
    wplusjets_samples = [ 'W1Jet', 'W2Jets', 'W3Jets', 'W4Jets']
    vplusjets_samples = wplusjets_samples
    vplusjets_samples.append('DYJetsToLL')
    diboson_samples = [ 'WWtoAnything', 'WZtoAnything', 'ZZtoAnything']
    signal_samples = [ 'TTJet', 'SingleTop']
    
    summations = {
                  'QCD_Electron':electron_qcd_samples,
                  'SingleTop' : singleTop_samples,
                  'WPlusJets' : wplusjets_samples,
                  'VPlusJets' : vplusjets_samples,
                  'DiBoson': diboson_samples,
                  'Signal': signal_samples
                  }
    
    summation_files = {}
    file_template = ''
    template_token = '<temp>'
    for summation, samples in summations.iteritems():
        summation_files[summation] = []
        for file_in_path in root_files:
            process_name = get_process_from_file(file_in_path)
            if not file_template:
                file_template = file_in_path.replace(process_name, template_token)
                file_template = file_template.replace(input_folder, output_folder)
            if process_name in samples:
                summation_files[summation].append(file_in_path)
    
    for summation, files in summation_files.iteritems():
        output_file = file_template.replace(template_token, summation)
        merge_ROOT_files(files, output_file)        
    if not sample in ['WJets', 'DYJets', 'VJets-matchingup',
                      'VJets-matchingdown', 'VJets-scaleup',
                      'VJets-scaledown']: #
        continue
    print "Merging"
    current_working_directory = os.getcwd()  #find current working directory
    output_file = config_8TeV.central_general_template % sample
    output_file = output_file.replace("/hdfs/TopQuarkGroup/results/histogramfiles", current_working_directory)
    input_files = [config_8TeV.central_general_template % input_sample for input_sample in input_samples]

    print output_file
    for input_file in input_files:
        print input_file
     
    if not os.path.exists( output_file ):
        merge_ROOT_files( input_files, output_file, compression = 7 )
        print "merging ", sample
        new_files.append( output_file )
    print '=' * 120
     
    # if 8 concurrent processes, wait until they are finished before starting the next set to avoid overloading the machine
    while ( int( subprocess.check_output( "ps ax | grep 'hadd' | wc -l", shell = True ) ) - 2 ) >= 8:
        time.sleep( 30 )  # sleep for 30 seconds

# merge all other histogram files
for category in config_8TeV.categories_and_prefixes.keys():
    for sample, input_samples in sample_summations.iteritems():
        if not sample in ['QCD_Electron', 'QCD_Muon', 'VJets',
                          'SingleTop']: #
            continue
        print "Merging"
# Make folder
make_folder_if_not_exists(path_to_AN_folder + "/" + category)

current_working_directory = os.getcwd()  #find current working directory
output_file_hdfs = config.general_category_templates[category] % sample
output_file = output_file_hdfs.replace(
    "/hdfs/TopQuarkGroup/results/histogramfiles", current_working_directory)
input_files = [
    config.general_category_templates[category] % input_sample
    for input_sample in input_samples
]

if not os.path.exists(output_file):
    merge_ROOT_files(input_files,
                     output_file,
                     compression=7,
                     waitToFinish=True)
    print "merging ", sample
else:
    print 'Not merging ', sample, 'as', output_file, 'already exists'

# Now move output file to hdfs
# Check if file already exists on hdfs
if os.path.exists(output_file_hdfs):
    print "Output file on hdfs already exists.  Removing and replacing with new version."
    command = 'hadoop fs -rm -skipTrash ' + output_file_hdfs.split('/hdfs')[-1]
    p = subprocess.Popen(command, shell=True)
    p.wait()

print '\nStarting rsync'
output_log_file = output_file.replace(".root", ".log")
    current_working_directory = os.getcwd()  #find current working directory
    output_file = config_7TeV.central_general_template % sample
    output_file = output_file.replace(
        "/hdfs/TopQuarkGroup/results/histogramfiles",
        current_working_directory)
    input_files = [
        config_7TeV.central_general_template % input_sample
        for input_sample in input_samples
    ]

    print output_file
    for input_file in input_files:
        print input_file

    if not os.path.exists(output_file):
        merge_ROOT_files(input_files, output_file, compression=7)
        print "merging ", sample
        new_files.append(output_file)
    print '=' * 120

    # if 8 concurrent processes, wait until they are finished before starting the next set to avoid overloading the machine
    while (int(
            subprocess.check_output("ps ax | grep 'hadd' | wc -l", shell=True))
           - 2) >= 8:
        time.sleep(30)  # sleep for 30 seconds

# merge all other histogram files
for category in config_7TeV.categories_and_prefixes.keys():
    for sample, input_samples in sample_summations.iteritems():
        if not sample in ['QCD_Electron', 'QCD_Muon', 'VJets', 'SingleTop']:  #
            continue
category = job[0]
sample = job[1]
input_samples = job[2]

# print 'Test with :',sample, category, input_samples

# Make folder
make_folder_if_not_exists( path_to_AN_folder + "/" + category)

current_working_directory = os.getcwd()  #find current working directory
output_file_hdfs = config.general_category_templates[category] % sample
output_file = output_file_hdfs.replace("/hdfs/TopQuarkGroup/results/histogramfiles", current_working_directory)
input_files = [config.general_category_templates[category] % input_sample for input_sample in input_samples]

if not os.path.exists( output_file ):
    merge_ROOT_files( input_files, output_file, compression = 7, waitToFinish=True )
    print "merging ", sample
else :
    print 'Not merging ',sample,'as',output_file,'already exists'

# Now move output file to hdfs
# Check if file already exists on hdfs
if os.path.exists( output_file_hdfs ):
  print "Output file on hdfs already exists.  Removing and replacing with new version."
  command = 'hadoop fs -rm -skipTrash ' + output_file_hdfs.split('/hdfs')[-1]
  p = subprocess.Popen(command, shell=True)
  p.wait()

print '\nStarting rsync'
output_log_file = output_file.replace(".root", ".log")
command = 'rsync --verbose  --progress --stats --compress --recursive --times --update %s %s >> %s' % (output_file,output_file_hdfs,output_log_file)