def eval_bc_subdirs(tuple_list): """if possible call mrpt for subdirs in tuple_list, return output_directory if not skipped from evaluation tuple_list : [(bc, subdir), ...]. return output directory if mrptbc has been run for those """ print tuple_list, if len(tuple_list) != 4: print "=> faulty group, skip " return "" four_subdirs = [subdir for (bc, subdir) in tuple_list] # already done earlier: removed directories not containing any time series files output_directory = tuple_list[0][1].replace("bc"+tuple_list[0][0], "bc"+"averaged") if evalOnlyNew and glob(output_directory + "/mrpt-*.values"): print "already evaluated: skip, but still take into account:", output_directory return output_directory mkdir_p(output_directory) print "=> evaluate" # generate info.dat file with common metadata bc_infodat_files = ["%s/info.dat" % subdir for subdir in four_subdirs] bc_infodat_meta = [parseHeader(getHeader(f)) for f in bc_infodat_files] combined_metadata = getCommonDictionary(bc_infodat_meta) combined_metadata["bc"] = "averaged" writeMetadictToFile("%s/info.dat" % output_directory, combined_metadata) # run mrptbc commandline = "mrptbc " + options + " --outputDirectory " + output_directory for bc, sd in tuple_list: commandline += " --info-" + bc + " " + sd + "/info.dat" for bc, sd in tuple_list: commandline += " %s/p*/associatedEnergy.series %s/p*/normMeanPhi.series" % (sd, sd) commandline += " ; exit 0" print commandline print output_directory stdout_and_stderr = subprocess.check_output(commandline, shell=True, stderr=subprocess.STDOUT) print stdout_and_stderr return output_directory
def collect_mrpt_file(filename, mrpt_prefix): # get observable name observable_name = parseHeader(getHeader(filename))["observable"] output_filename = prefix + mrpt_prefix + variable + "-" + observable_name + "_" + \ multivalString + ".values" with open(output_filename, 'w') as output_file: # prepend commonmetadata, add key = variable output_file.write(commonHeader) for k,v in zip(multivalKeys, tup): output_file.write("# %s = %s" % (k, v) + "\n") output_file.write("# key = " + variable + "\n") # copy rest of file contents with open(filename, "r") as input_file: for line in input_file: output_file.write(line)
def find_intersection_for_subdirs(tuple_list): """if possible call mrpt-binderratio-intersect for subdirs in tuple_list, return output_directory if not skipped from evaluation tuple_list : [(L, subdir), ...]. return output directory if successful """ print tuple_list if len(tuple_list) < 2: print "=> too few subdirectories, skip" return "" my_subdirs = [subdir for (L, subdir) in tuple_list] L_to_subdirs = {int(L): subdir for (L, subdir) in tuple_list} my_subdirs = L_to_subdirs.values() # already done earlier: removed directories not containing any time series files # Find control_file for our group of subdirectories, if we have one. # Otherwise, skip this directory. my_cf = None my_cf_meta = None for cf, meta in controlFilesMetaData.items(): if dictContainedInDict(meta, infodata[my_subdirs[0]]): my_cf = cf my_cf_meta = meta break if my_cf is None: return "" print "control file:", my_cf for sd in my_subdirs[1:]: if not dictContainedInDict(my_cf_meta, infodata[sd]): print "Error: control file does not match subdirectory:", sd return "" # get information (4 columns) about where to look for L-pair # Binder cumulant intersections L1_L2_cpMin_cpMax = np.loadtxt(my_cf, ndmin=2) output_directory = prefix + "mrpt-binderratio-intersect" for key, value in my_cf_meta.items(): output_directory += "_" + key + value mkdir_p(output_directory) print output_directory print "=> evaluate" # generate info.dat file with common metadata L_infodat_files = ["%s/info.dat" % subdir for subdir in my_subdirs] L_infodat_meta = [parseHeader(getHeader(f)) for f in L_infodat_files] combined_metadata = getCommonDictionary(L_infodat_meta) writeMetadictToFile("%s/info.dat" % output_directory, combined_metadata) # make mrpt-binderratio-intersect calls for L1, L2, cpMin, cpMax in L1_L2_cpMin_cpMax: L1 = int(L1) L2 = int(L2) if evalOnlyNew and glob(output_directory + "/mrpt-binder-intersect-l%dl%d.dat" % (L1, L2)): print "already evaluated: skip, but still take into account: (%d, %d)" % ( L1, L2) continue sd1 = L_to_subdirs[L1] sd2 = L_to_subdirs[L2] info1 = sd1 + "/info.dat" info2 = sd2 + "/info.dat" print "Finding Binder-ratio intersection for L1=", L1, ", L2=", L2 commandline = "mrpt-binderratio-intersect " + options \ + " --outputDirectory " + output_directory \ + " --info1 " + info1 + " --info2 " + info2 \ + " --cp-range %f %f" % (cpMin, cpMax) for sd in [sd1, sd2]: commandline += " %s/p*/associatedEnergy.series %s/p*/normMeanPhi.series" % ( sd, sd) commandline += " ; exit 0" print "commandline:", commandline stdout_and_stderr = subprocess.check_output(commandline, shell=True, stderr=subprocess.STDOUT) print stdout_and_stderr return output_directory
# print subdir sys.stdout.flush() # potentially append "_" for the output prefix if prefix != "": if prefix[-1:] == "-": prefix = prefix[:-1] + "_" # replace "-" in tail by "_" elif prefix[-1:] != "_": # append a "_" if it is not there already prefix = prefix + "_" #collect info.dat contents (only common entries), potentially prune non simindexjoined ###################################################################################### infodata = {} for sd in subdirs: header = parseHeader(getHeader(sd + "/info.dat")) if not header is None: if only_simindexjoined and "simindex" in header and header[ "simindex"] != "joined": continue else: infodata[sd] = header subdirs = infodata.keys() # for subdir in infodata.keys(): # print subdir # collect controlFiles metadata controlFilesMetaData = {} for cf in controlFiles: header = parseHeader(getHeader(cf))
subdir_candidates.append(root) subdirs = [ f for f in subdir_candidates if (f.startswith(prefix) or f.startswith("./" + prefix)) and glob( f + "/*.series" ) # exclude subdirs without time series -- e.g. pt top directories and ("simindex" in f and not "simindexjoined" in f ) # we want to have the simindex written into the directory name ] #collect info.dat contents, maybe add simindex = 0 ################################################## infodata = {} for sd in subdirs: header = parseHeader(getHeader(sd + "/info.dat")) if not header is None: infodata[sd] = header if not 'simindex' in infodata[sd]: infodata[sd]['simindex'] = '0' # default simindex: 0 infodata = {k: v for k, v in infodata.iteritems() if v is not None} # We need to group the subdirectories in groups that differ only by the simindex ################################################################################ groupable_list = [] # keys in the following links are removed from the metadata before sorting by the metadata exclude_from_metadata = [ "simindex", # in one group we want to have all simindex # compilation related keys: "ARMA_VERSION", "BOOST_LIB_VERSION",
#!/usr/bin/env python import re import sys import numpy as np from scripthelpers import getHeader, parseHeader # meta files for the replica exchange process contain one column with # the control parameter index ("cpi") and one with the relevant data. # this script exchanges the cpi with the actual control parameter values if __name__ == "__main__": assert len(sys.argv) == 3 filename = sys.argv[1] outputfilename = sys.argv[2] header = getHeader(filename) data = np.loadtxt(filename) cpi = data[:,0] cpv = np.array([float(s) for s in parseHeader(header)["controlParameterValues"].split(" ")]) assert len(cpv) == len(cpi) data[:,0] = cpv np.savetxt(outputfilename, data, header="".join(header).replace("control parameter index", "control parameter value"))
print("pruned subdir_candidates for prefix") sys.stdout.flush() # potentially append "_" for the output prefix if prefix != "": if prefix[-1:] == "-": prefix = prefix[:-1] + "_" # replace "-" in tail by "_" elif prefix[-1:] != "_": # append a "_" if it is not there already prefix = prefix + "_" #collect info.dat contents (only common entries), potentially prune non simindexjoined ###################################################################################### infodata = {} for sd in subdirs: header = parseHeader(getHeader(sd + "/info.dat")) if not header is None: if only_simindexjoined: if not "simindex" in header or header["simindex"] != "joined": # ignore individual simindex data continue infodata[sd] = header else: if "simindex" in header and header["simindex"] == "joined": # only take into account individual simindex data continue infodata[sd] = header subdirs = infodata.keys() # print subdirs print("collected subdir info.dat contents")
def is_simindexjoined_subdir(sd): header = parseHeader(getHeader(sd + "/info.dat")) if not header is None: if "simindex" in header and header["simindex"] == "joined": return True return False
if sd != '': subdirs.remove(sd) # collect mrpt results and metadata ################################### # helper: def addControlParameterCount(meta_dict): if "controlParameterValues" in meta_dict: meta_dict["controlParameterCount"] = str(len(meta_dict["controlParameterValues"].split())) return meta_dict # map: subdirectory -> metadata dictionary [for replica exchange simulations: count controlParameterValues] metadata = {sd: addControlParameterCount(parseHeader(getHeader(sd + "/info.dat"))) for sd in subdirs} # prune subdirectories with empty metadata metadata = {sd:meta for sd,meta in metadata.iteritems() if meta is not None} # go over all the metadata dictionaries, each time take the keys of those dictionaries, then find all # the common ones (set intersection) commonkeys = set.intersection(*(set(d.iterkeys()) for d in metadata.itervalues())) # * : reverse of zip # map commonkeys -> metadata ; only if metadata also equal commonmetadata = dict(set.intersection(*(set(d.iteritems()) for d in metadata.itervalues()))) try: del commonmetadata['jkBlocks'] # remove metadata that is no longer valid for the eval-results except KeyError: pass variable = commonmetadata["controlParameterName"]
def find_intersection_for_subdirs(tuple_list): """if possible call mrpt-binderratio-intersect for subdirs in tuple_list, return output_directory if not skipped from evaluation tuple_list : [(L, bc, subdir), ...]. return output directory if successful """ if len(tuple_list) % 4 != 0: print "=> number of subdirectories should be multiple of 4, skipping" return "" # map: L -> bc -> subdir map_L_bc_subdir = { } for L, bc, subdir in tuple_list: map_L_bc_subdir.setdefault(int(L), { })[bc] = subdir print map_L_bc_subdir for L in map_L_bc_subdir: if len(map_L_bc_subdir[L].keys()) != 4: print "Wrong number of bc subdirs for L =", L, "=> skipping" return "" my_subdirs = [subdir for (L, bc, subdir) in tuple_list] # already done earlier: removed directories not containing any time series files # Find control_file for our group of subdirectories, if we have one. # Otherwise, skip this directory. my_cf = None my_cf_meta = None for cf, meta in controlFilesMetaData.items(): if dictContainedInDict(meta, infodata[my_subdirs[0]]): my_cf = cf my_cf_meta = meta break if my_cf is None: return "" print "control file:", my_cf for sd in my_subdirs[1:]: if not dictContainedInDict(my_cf_meta, infodata[sd]): print "Error: control file does not match subdirectory:", sd return "" # get information (4 columns) about where to look for L-pair # Binder cumulant intersections L1_L2_cpMin_cpMax = np.loadtxt(my_cf, ndmin=2) output_directory = prefix + "mrptbc-binderratio-intersect" for key, value in my_cf_meta.items(): output_directory += "_" + key + value output_directory += "_bcaveraged" mkdir_p(output_directory) print output_directory print "=> evaluate" # generate info.dat file with common metadata L_infodat_files = ["%s/info.dat" % subdir for subdir in my_subdirs] L_infodat_meta = [parseHeader(getHeader(f)) for f in L_infodat_files] combined_metadata = getCommonDictionary(L_infodat_meta) combined_metadata["bc"] = "averaged" writeMetadictToFile("%s/info.dat" % output_directory, combined_metadata) # make mrptbc-binderratio-intersect calls for L1, L2, cpMin, cpMax in L1_L2_cpMin_cpMax: L1 = int(L1) L2 = int(L2) if evalOnlyNew and glob(output_directory + "/mrptbc-binder-intersect-l%dl%d.dat" % (L1,L2)): print "already evaluated: skip, but still take into account: (%d, %d)" % (L1,L2) continue sd1_bc = {} sd2_bc = {} info1_bc = {} info2_bc = {} for bc in all_bc: sd1_bc[bc] = map_L_bc_subdir[L1][bc] sd2_bc[bc] = map_L_bc_subdir[L2][bc] info1_bc[bc] = sd1_bc[bc] + "/info.dat" info2_bc[bc] = sd2_bc[bc] + "/info.dat" print "Finding Binder-ratio intersection for L1=", L1, ", L2=", L2 commandline = "mrptbc-binderratio-intersect " + options \ + " --outputDirectory " + output_directory \ + " --cp-range %f %f" % (cpMin, cpMax) for bc in all_bc: commandline += " --info1-%s %s --info2-%s %s" % (bc, info1_bc[bc], bc, info2_bc[bc]) for bc in all_bc: for sd in [sd1_bc[bc], sd2_bc[bc]]: commandline += " %s/p*/associatedEnergy.series %s/p*/normMeanPhi.series" % (sd, sd) commandline += " ; exit 0" print "commandline:", commandline stdout_and_stderr = subprocess.check_output(commandline, shell=True, stderr=subprocess.STDOUT) print stdout_and_stderr return output_directory
# potentially append "_" for the output prefix if prefix != "": if prefix[-1:] == "-": prefix = prefix[:-1] + "_" # replace "-" in tail by "_" elif prefix[-1:] != "_": # append a "_" if it is not there already prefix = prefix + "_" #collect info.dat contents ########################## infodata = {} for sd in subdirs: header = parseHeader(getHeader(sd + "/info.dat")) if not header is None: infodata[sd] = header # collect controlFiles metadata controlFilesMetaData = {} for cf in controlFiles: header = parseHeader(getHeader(cf)) if not header is None: controlFilesMetaData[cf] = header else: print "control file", cf, "does not contain metadata"
print "pruned subdir_candidates for prefix" sys.stdout.flush() # potentially append "_" for the output prefix if prefix != "": if prefix[-1:] == "-": prefix = prefix[:-1] + "_" # replace "-" in tail by "_" elif prefix[-1:] != "_": # append a "_" if it is not there already prefix = prefix + "_" #collect info.dat contents (only common entries), potentially prune non simindexjoined ###################################################################################### infodata = {} for sd in subdirs: header = parseHeader(getHeader(sd + "/info.dat")) if not header is None: if only_simindexjoined: if not "simindex" in header or header["simindex"] != "joined": # ignore individual simindex data continue infodata[sd] = header else: if "simindex" in header and header["simindex"] == "joined": # only take into account individual simindex data continue infodata[sd] = header subdirs = infodata.keys() # print subdirs print "collected subdir info.dat contents"
# potentially append "_" for the output prefix if prefix != "": if prefix[-1:] == "-": prefix = prefix[:-1] + "_" # replace "-" in tail by "_" elif prefix[-1:] != "_": # append a "_" if it is not there already prefix = prefix + "_" #collect info.dat contents ########################## infodata = {} for sd in subdirs: header = parseHeader(getHeader(sd + "/info.dat")) if not header is None: infodata[sd] = header infodata = {k:v for k,v in infodata.iteritems() if v is not None} # We need to group the subdirectories in groups that differ only by the boundary conditions ########################################################################################### groupable_list = [] # keys in the following links are removed from the metadata before sorting by the metadata exclude_from_metadata = ["bc", # in one group we want to have all 4 possible b.c.'s # compilation related keys: "ARMA_VERSION", "BOOST_LIB_VERSION", "buildDate", "buildHost", "buildTime", "cppflags", "cxxflags", "gitBranch", "gitRevisionHash", # rng seed will be different: "rngSeed",