def String2VariableExpr(s, reject_internal): s = sstrip(s) if (String2ConstantExpr(s) is not None): return None gid = -1 tokens = tft_utils.String2Tokens(s, "$") assert (len(tokens) in [1, 2, 3]) # get label label = tokens[0] eid = None label_eid = tft_utils.String2Tokens(label, "_eid_") assert (len(label_eid) in [1, 2]) if (len(label_eid) == 2): label = label_eid[0] eid = int(label_eid[1]) # check label validity if (label[0] in ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]): return None # get gid if (len(tokens) in [2, 3]): gid = int(tokens[1]) # get vtype vtype = Fraction if (len(tokens) in [3]): if (tokens[2] == "Int"): vtype = int elif (tokens[2] == "Real"): vtype = Fraction else: assert (False) # create expression if (reject_internal): assert (0 <= gid) if (eid is not None): for ve in EXPR.ALL_VariableExprs: if (ve.index == eid): return ve print("ERROR: variable labeled with [" + label + "] " + "with expression id [" + str(eid) + "] was not defined...") assert (False) else: return EXPR.VariableExpr(label, vtype, gid, reject_internal)
def loadFromShortString(self, ss): tokens = tft_utils.String2Tokens(ss, " ") for gid_eps in tokens: ts = tft_utils.String2Tokens(gid_eps, ">") assert (len(ts) == 2) gid = int(ts[0]) eps = Fraction(ts[1]) assert (gid not in self.gid2eps.keys()) self.gid2eps[gid] = eps
def scikitDecisionTreeTraining(tdata): global DT_MODEL assert (os.path.isfile(tdata)) # -- load training data -- ftrain = open(tdata, "r") l_feat = None feats = [] labels = [] for aline in ftrain: aline = aline.strip() if (aline == ""): continue tokens = tft_utils.String2Tokens(aline, " ") assert (len(tokens) >= 2) if (l_feat is None): l_feat = len(tokens) - 1 else: assert (len(tokens) == l_feat + 1) this_feat = [] for i in range(0, len(tokens)): if (i == 0): # get the label labels.append(int(tokens[0])) else: # the a feature assert (tokens[i].startswith(str(i) + ":")) this_feat.append(float(tokens[i][len(str(i) + ":"):])) assert (len(this_feat) == l_feat) feats.append(this_feat) ftrain.close() assert (len(labels) == len(feats)) # -- train the model -- DT_MODEL = tree.DecisionTreeClassifier() DT_MODEL.max_depth = MAX_DT_DEPTH DT_MODEL = DT_MODEL.fit(feats, labels) # -- export -- scikitExportDecisionTree2Dot(FNAME_DT_DOT)
def String2BoundedVariableExpr(s): tokens = tft_utils.String2Tokens(s, "in") assert (len(tokens) == 2) assert (tokens[1].startswith("[") and tokens[1].endswith("]")) var = String2Expr(tokens[0], True) assert (isinstance(var, EXPR.VariableExpr)) ran = tft_utils.String2Tokens(tokens[1][1:len(tokens[1]) - 1], ",") assert (len(ran) == 2) vlb = EXPR.ConstantExpr(var.type()(ran[0])) vub = EXPR.ConstantExpr(var.type()(ran[1])) assert (isinstance(vlb, EXPR.ConstantExpr)) assert (isinstance(vub, EXPR.ConstantExpr)) if (not var.hasBounds()): var.setBounds(vlb, vub) else: # assert(var.lb() == vlb) # assert(var.ub() == vub) assert (abs(var.lb().value() - vlb.value()) <= float(1e-07)) assert (abs(var.ub().value() - vub.value()) <= float(1e-07)) return var
def String2Op(s, op_class, op_labels=[]): s = sstrip(s) gid = -1 tokens = tft_utils.String2Tokens(s, "$") if (len(tokens) not in [1, 2]): return None label = tokens[0] if (label in op_labels): if (len(tokens) == 2): gid = int(tokens[1]) return op_class(gid, label) else: return None
def SolveExprs(fname_exprs, optimizers={}): global EFORMS global E_UPPER_BOUND global M2 global GID_EPSS global GID_COUNTS global GID_WEIGHT global CASTING_MAP global EQ_GIDS global CONSTRAINT_EXPRS global OPT_ERROR_FORM global TARGET_EXPRS EFORMS = None E_UPPER_BOUND = None M2 = None time_parsing = time.time() tft_utils.VerboseMessage("parsing input expression...") tft_utils.DebugMessage("reading .exprs file...") assert (os.path.isfile(fname_exprs)) assert (ERROR_TYPE in ["abs", "rel"]) # variables input_vars = [] E_UPPER_BOUND = None ilines = [] efile = open(fname_exprs, "r") for aline in efile: aline = aline.strip() if (aline == ""): continue if (aline.startswith("#")): continue ilines.append(aline) efile.close() # options assert (ilines[0] == "options:") ilines = ilines[1:] while True: if (ilines[0] == "upper-bound:"): break tokens = tft_utils.String2Tokens(ilines[0], ":") assert (len(tokens) == 2) if (tokens[0] == "opt-error-form"): OPT_ERROR_FORM = tft_utils.String2Bool(tokens[1]) else: sys.exit("ERROR: unknown option setting: " + ilines[0]) ilines = ilines[1:] # read error bound assert (ilines[0] == "upper-bound:") ilines = ilines[1:] E_UPPER_BOUND = tft_expr.ConstantExpr(float(ilines[0])) ilines = ilines[1:] # M2 M2 = tft_expr.ConstantExpr(0.0) # read variable ranges assert (ilines[0] == "var-ranges:") ilines = ilines[1:] while True: if (ilines[0] == "group-epsilons:"): break var = tft_parser.String2BoundedVariableExpr(ilines[0]) assert (var not in input_vars) input_vars.append(var) ilines = ilines[1:] # get groups' epsilons assert (ilines[0] == "group-epsilons:") ilines = ilines[1:] while True: if (ilines[0] == "eq-gids:"): break tokens = tft_utils.String2Tokens(ilines[0], ":") assert (len(tokens) == 2) gid = int(tokens[0]) str_epss = tokens[1] assert (str_epss.startswith("[") and str_epss.endswith("]")) str_epss = str_epss[1:len(str_epss) - 1] str_eps_list = tft_utils.String2Tokens(str_epss, ",") eps_list = [] for i in range(0, len(str_eps_list)): try: i = tft_alloc.EpsLabels_String().index(str_eps_list[i]) eps_list.append( tft_expr.ConstantExpr(tft_alloc.EPSILONS[i].value)) except ValueError: expr_eps = tft_parser.String2Expr(str_eps_list[i], True) assert (isinstance(expr_eps, tft_expr.ConstantExpr)) eps_list.append(expr_eps) assert (gid not in GID_EPSS.keys()) GID_EPSS[gid] = eps_list ilines = ilines[1:] # get equal bit-width groups assert (len(ilines) > 0) assert (ilines[0] == "eq-gids:") ilines = ilines[1:] while True: assert (len(ilines) > 0) if (ilines[0] == "gid-counts:"): break tokens = tft_utils.String2Tokens(ilines[0], "=") assert (len(tokens) == 2) gid_1 = int(tokens[0]) gid_2 = int(tokens[1]) if (gid_1 == gid_2): ilines = ilines[1:] continue assert (gid_1 in GID_EPSS.keys()) assert (gid_2 in GID_EPSS.keys()) assert (GID_EPSS[gid_1] == GID_EPSS[gid_2]) gp12 = (gid_1, gid_2) gp21 = (gid_2, gid_1) if ((gp12 not in EQ_GIDS) and (gp21 not in EQ_GIDS)): EQ_GIDS.append(gp12) ilines = ilines[1:] # get GID_COUNTS assert (len(ilines) > 0) assert (ilines[0] == "gid-counts:") ilines = ilines[1:] while True: assert (len(ilines) > 0) if (ilines[0] == "casting-counts:"): break tokens = tft_utils.String2Tokens(ilines[0], ":") assert (len(tokens) == 2) gid = int(tokens[0]) c = int(tokens[1]) assert (gid >= 0) assert (c >= 0) assert (gid not in GID_COUNTS.keys()) if (c > 0): GID_COUNTS[gid] = c ilines = ilines[1:] # get CASTING_MAP assert (len(ilines) > 0) assert (ilines[0] == "casting-counts:") ilines = ilines[1:] while True: assert (len(ilines) > 0) if (ilines[0] == "gid-weight:"): break tokens = tft_utils.String2Tokens(ilines[0], ":") assert (len(tokens) == 2) p = tokens[0] c = int(tokens[1]) assert (c > 0) assert (p.startswith("(") and p.endswith(")")) p = p[1:len(p) - 1] tokens = tft_utils.String2Tokens(p, ",") assert (len(tokens) == 2) gid_from = int(tokens[0]) gid_to = int(tokens[1]) p = (gid_from, gid_to) assert (p not in CASTING_MAP.keys()) CASTING_MAP[p] = c ilines = ilines[1:] # get gid-weight mapping assert (len(ilines) > 0) assert (ilines[0] == "gid-weight:") ilines = ilines[1:] while True: assert (len(ilines) > 0) if (ilines[0] == "exprs:"): break tokens = tft_utils.String2Tokens(ilines[0], ":") assert (len(tokens) == 2) gid = int(tokens[0]) weight = float(tokens[1]) assert (0 <= gid) assert (0 <= weight) GID_WEIGHT[gid] = weight ilines = ilines[1:] # get expressions assert (len(ilines) > 0) assert (ilines[0] == "exprs:") ilines = ilines[1:] while True: assert (len(ilines) > 0) if (ilines[0] == "constraints:"): break target_expr = tft_parser.String2Expr(ilines[0], False) assert (isinstance(target_expr, tft_expr.ArithmeticExpr)) TARGET_EXPRS.append(target_expr) ilines = ilines[1:] assert (len(TARGET_EXPRS) > 0) assert (all([isinstance(te, tft_expr.Expr) for te in TARGET_EXPRS])) # get constraints assert (len(ilines) > 0) assert (ilines[0] == "constraints:") ilines = ilines[1:] while True: if (len(ilines) == 0): break pred_expr = tft_parser.String2Expr(ilines[0], False) assert (isinstance(pred_expr, tft_expr.Predicate)) CONSTRAINT_EXPRS.append(pred_expr) ilines = ilines[1:] # ---- generate the Error Forms ---- tft_utils.DebugMessage(".exprs file read") tft_utils.DebugMessage("generating ErrorForms...") target_alloc = None irstrings = None EFORMS = [] for te in TARGET_EXPRS: ef = GenerateErrorFormFromExpr(te, ERROR_TYPE, E_UPPER_BOUND, M2, EQ_GIDS, CONSTRAINT_EXPRS) EFORMS.append(ef) assert (len(EFORMS) == len(TARGET_EXPRS)) # ---- solve from the ErrorForms ---- tft_utils.DebugMessage("ErrorForms generated") tft_utils.TIME_PARSING = tft_utils.TIME_PARSING + (time.time() - time_parsing) EFORMS, target_alloc = SolveErrorForms(EFORMS, optimizers) if (VERBOSE): print("---- Error Forms after solving ----") for ef in EFORMS: print(str(ef)) print("------------") if (target_alloc is None): print("TFT: no available allocation for the main expr...") return EFORMS, None # ---- some finalize before return ---- if (VERBOSE): stat = {} for te in TARGET_EXPRS: tft_expr.ExprStatistics(te, stat) assert ("# constants" in stat.keys()) assert ("# variables" in stat.keys()) assert ("# operations" in stat.keys()) assert ("groups" in stat.keys()) stat["groups"].sort() print("---- # constants: " + str(stat["# constants"]) + " (# of appearances)") print("---- # variables: " + str(stat["# variables"]) + " (# of appearances)") print("---- # operations: " + str(stat["# operations"])) print("---- groups: " + str(stat["groups"])) # n_opts,n_insts = tft_error_form.countOptsInsts(EFORMS) # print ("---- # of (static) operations: " + str(n_opts) + " ----") # print ("---- # of (dynamic) instances: " + str(n_insts) + " ----") # ---- return ---- return EFORMS, target_alloc
def Cluster(): assert ((DEF.N_CTT_Samples is None) or ((type(DEF.N_CTT_Samples) is int) and (DEF.N_CTT_Samples > 0))) print("==== clustering sub-domains ====") # ---- load feature --> allocation ---- Features = [] Allocations = [] FID_AID = [] assert (os.path.isfile(DEF.FNAME_Feature_Allocation)) file_fa = open(DEF.FNAME_Feature_Allocation, "r") for aline in file_fa: aline = aline.strip() if (aline == ""): continue if (DEF.N_CTT_Samples is not None): assert (len(Features) <= DEF.N_CTT_Samples) if (len(Features) == DEF.N_CTT_Samples): break feat_alloc = tft_utils.String2Tokens(aline, ":") assert (len(feat_alloc) == 2) assert (feat_alloc[0].startswith("[") and feat_alloc[0].endswith("]")) str_feat = feat_alloc[0][1:len(feat_alloc[0]) - 1] feats = tft_utils.String2Tokens(str_feat, ",") this_feat = [float(feats[i]) for i in range(0, len(feats))] this_alloc = tft_alloc.Alloc() this_alloc.loadFromShortString(feat_alloc[1]) Features.append(this_feat) this_fid = len(Features) - 1 this_aid = -1 for aid in range(0, len(Allocations)): if (this_alloc == Allocations[aid]): this_aid = aid break if (this_aid == -1): Allocations.append(this_alloc) this_aid = len(Allocations) - 1 FID_AID.append(this_aid) file_fa.close() if (DEF.N_CTT_Samples is None): DEF.N_CTT_Samples = len(Features) # check the validity of the parameters assert (0 < len(Features)) assert (len(Features) == len(FID_AID)) assert (len(Features) == DEF.N_CTT_Samples) assert (DEF.N_Clusters <= len(Features)) # build FID -> HDLabel mapping for clustering fid_hdlabel = [] for fid in range(0, len(Features)): assert (fid < len(FID_AID)) aid = FID_AID[fid] assert (aid < len(Allocations)) alloc = Allocations[aid] hdlabel = DEF.Alloc2HDLabel(alloc) assert (DEF.isTypedList(int, hdlabel)) fid_hdlabel.append(hdlabel) assert (len(fid_hdlabel) == len(Features)) # -- export HDLID to GID -- # NOTE: this mapping is built by function DEF.Alloc2HDLabel assert (DEF.HDLID_GID is not None) file_hdlid_gid = open(DEF.FNAME_HDLID_GID, "w") for hdlid, gid in DEF.HDLID_GID.items(): assert ((type(hdlid) is int) and (type(gid) is int)) file_hdlid_gid.write(str(hdlid) + " " + str(gid) + "\n") file_hdlid_gid.close() # clustering rel_tree = treecluster(fid_hdlabel, None, None, 0, 'm', 'b', None) fid_cid = rel_tree.cut(DEF.N_Clusters) FID_CID = {} for fid in range(0, len(fid_cid)): cid = fid_cid[fid] assert (fid not in FID_CID.keys()) FID_CID[fid] = cid # -- merge the allocs classified to the same class -- print("==== merging tuning results based on clustering ====") assert (len(Features) == len(FID_AID)) assert (len(Features) == len(FID_CID)) DEF.CID_HDLabel = {} # [None for c in range(0, DEF.N_Clusters)] for fid in range(0, len(FID_CID)): cid = FID_CID[fid] if (cid not in DEF.CID_HDLabel.keys()): DEF.CID_HDLabel[cid] = None DEF.CID_HDLabel[cid] = DEF.MergeHDLsTowardTop(DEF.CID_HDLabel[cid], fid_hdlabel[fid]) # -- fixing the repeated HDLabels (which are allocations) -- EQ_CID = {} for cid in range(0, DEF.N_Clusters): same_cid = cid for later_cid in range(0, DEF.N_Clusters): if (DEF.CID_HDLabel[cid] == DEF.CID_HDLabel[later_cid]): same_cid = later_cid if (same_cid > cid): assert (cid not in EQ_CID.keys()) EQ_CID[cid] = same_cid for fid, cid in FID_CID.items(): if (cid in EQ_CID.keys()): FID_CID[fid] = EQ_CID[cid] if (cid in DEF.CID_HDLabel.keys()): del DEF.CID_HDLabel[cid] for cid1 in DEF.CID_HDLabel.keys(): for cid2 in DEF.CID_HDLabel.keys(): if (cid1 == cid2): continue assert (DEF.CID_HDLabel[cid1] != DEF.CID_HDLabel[cid2]) # -- print out the allocations -- for cid in range(0, DEF.N_Clusters): if (cid in DEF.CID_HDLabel.keys()): print("---- class alloc (" + str(cid) + ") ----") DEF.PrintHDLabel(DEF.CID_HDLabel[cid]) # -- export CID_HDLabel -- file_cid_hdlabel = open(DEF.FNAME_CID_HDLabel, "w") for cid in range(0, DEF.N_Clusters): if (cid in DEF.CID_HDLabel.keys()): file_cid_hdlabel.write( str(cid) + " : " + str(DEF.CID_HDLabel[cid]) + "\n") file_cid_hdlabel.close() # -- build DEF.CID_Training_Counts -- n_features = len(Features) DEF.CID_Training_Counts = {} # [0 for i in range(0, DEF.N_Clusters)] for fid in range(0, n_features): if (not DEF.isTrainingID(fid, n_features)): continue cid = FID_CID[fid] assert ((0 <= cid) and (cid < DEF.N_Clusters)) if (cid not in DEF.CID_Training_Counts.keys()): DEF.CID_Training_Counts[cid] = 0 DEF.CID_Training_Counts[cid] = DEF.CID_Training_Counts[cid] + 1 assert (sum(DEF.CID_Training_Counts.values()) == (int( float(n_features) * float(DEF.RATE_Trains_Samples)))) if (VERBOSE): print("---- cid to # of training partitions ----") for cid in range(0, DEF.N_Clusters): if (cid in DEF.CID_Training_Counts.keys()): print("CID: " + str(cid) + " : " + str(DEF.CID_Training_Counts[cid])) # -- export CID_Training_Counts -- file_ctc = open(DEF.FNAME_CID_Training_Counts, "w") for cid, tcounts in DEF.CID_Training_Counts.items(): file_ctc.write(str(cid) + " " + str(tcounts) + "\n") file_ctc.close() # -- export training and testing data -- # ( export feature -> cluster ) assert ((0 < DEF.RATE_Trains_Samples) and (DEF.RATE_Trains_Samples < 1)) file_train_f2c = open(DEF.FNAME_SVM_TRAIN_FEATURE_CLUSTER, "w") file_test_f2c = open(DEF.FNAME_SVM_TEST_FEATURE_CLUSTER, "w") file_train_f2c_csv = open(DEF.FNAME_CSV_TRAIN_FEATURE_CLUSTER, "w") file_test_f2c_csv = open(DEF.FNAME_CSV_TEST_FEATURE_CLUSTER, "w") assert (n_features > 0) l_feat = len(Features[0]) file_train_f2c_csv.write(",cid") file_test_f2c_csv.write(",cid") for f in range(0, l_feat): file_train_f2c_csv.write(",f" + str(f)) file_test_f2c_csv.write(",f" + str(f)) file_train_f2c_csv.write("\n") file_test_f2c_csv.write("\n") for fid in range(0, n_features): assert (len(Features[fid]) == l_feat) cid = FID_CID[fid] if (DEF.isTrainingID(fid, n_features)): file_train_f2c.write(str(cid)) file_train_f2c_csv.write(str(fid) + "," + str(cid)) for findex in range(0, l_feat): file_train_f2c.write(" " + str((findex + 1)) + ":" + str(Features[fid][findex])) file_train_f2c_csv.write("," + str(Features[fid][findex])) file_train_f2c.write("\n") file_train_f2c_csv.write("\n") else: file_test_f2c.write(str(cid)) file_test_f2c_csv.write(str(fid) + "," + str(cid)) for findex in range(0, l_feat): file_test_f2c.write(" " + str((findex + 1)) + ":" + str(Features[fid][findex])) file_test_f2c_csv.write("," + str(Features[fid][findex])) file_test_f2c.write("\n") file_test_f2c_csv.write("\n") file_train_f2c.close() file_test_f2c.close()
def main(): # ==== get parameters ==== parser = argparse.ArgumentParser() parser.add_argument("expr_spec", help="Expression Specification") parser.add_argument("-v", "--verbose", action="store_true", default=False, help="Verbose mode") parser.add_argument("-d", "--debug", action="store_true", default=False, help="Debug mode") parser.add_argument("-n", "--no-m2-check", action="store_true", default=False, help="Skip m2 check") parser.add_argument("-m", "--maxc", type=int, help="Maximum number of type casts") parser.add_argument("--linear-tc", action="store_true", default=False, help="Use linear type casting constraints") parser.add_argument( "--aopt", type=str, default="gurobi", help= "Allocation optimization solver: \"gurobi\" for Gurobi and \"glpk\" for GLPK (must work with --linear-tc)" ) parser.add_argument("--gopt-timeout", type=int, default=120, help="Timeout of the global optimization") parser.add_argument("--gopt-tolerance", type=float, default=5e-02, help="Tolerance of the global optimization") parser.add_argument("--optm", type=str, default="max-benefit", choices=tft_utils.OPT_METHODS, help="Optimization method") parser.add_argument("-e", "--error-bounds", type=str, required=True, help="Error bounds") parser.add_argument("-b", "--bitwidths", type=str, default="32 64", help="Bit-width candidates") parser.add_argument("--fix-const-type", action="store_true", default=False, help="Fix the constant type to the highest bit-width") args = parser.parse_args() INPUT_FILE = args.expr_spec if not os.path.isfile(INPUT_FILE): error("Input expression file doesn't exist: {}".format(INPUT_FILE)) tft_utils.FPTUNER_VERBOSE = args.verbose or args.debug tft_utils.FPTUNER_DEBUG = args.debug tft_utils.NO_M2_CHECK = args.no_m2_check if args.maxc != None: if args.maxc < 0: error("maxc must be >= 0") tft_utils.N_MAX_CASTINGS = args.maxc if args.gopt_timeout <= 0: error("gopt-timeout must be > 0") tft_utils.GOPT_TIMEOUT = args.gopt_timeout if args.gopt_tolerance < 0.0: error("gopt-tolerance must be >= 0.0") tft_utils.GOPT_TOLERANCE = args.gopt_tolerance tft_utils.OPT_METHOD = args.optm err_list = args.error_bounds.split() try: err_list = [float(e) for e in err_list] if len(err_list) == 0 or not all([e > 0.0 for e in err_list]): raise ValueError except ValueError: error("The error bounds must all be non-negative floats.") tft_tuning.ERROR_BOUNDS = err_list bit_widths = args.bitwidths.replace(',', ' ').split() try: bit_widths = [int(b) for b in bit_widths] bit_widths = list(set(bit_widths)) bit_widths.sort() if bit_widths not in ([32, 64, 128], [32, 64], [64, 128]): raise ValueError except ValueError: error( "Accepted bitwidth candidates are: '32 64', '64 128', '32 64 128'") IR.PREC_CANDIDATES = ["e{}".format(b) for b in bit_widths] tft_utils.FIX_CONST_TYPE = args.fix_const_type tft_utils.LINEAR_TYPE_CASTING_CONSTRAINTS = args.linear_tc tft_tuning.OPTIMIZERS["alloc"] = args.aopt if (tft_tuning.OPTIMIZERS['alloc'] == 'glpk'): assert (tft_utils.LINEAR_TYPE_CASTING_CONSTRAINTS ), "Solver GLPK must work with --linear-tc" # ==== load the input file as a module ==== if INPUT_FILE.endswith(".py"): tokens = tft_utils.String2Tokens(INPUT_FILE, "/") assert (len(tokens) >= 1) module_name = tokens[-1] assert (module_name.endswith(".py")) module_name = module_name[0:len(module_name) - 3] IR.LOAD_CPP_INSTS = True module_in = imp.load_source(module_name, INPUT_FILE) if (IR.TARGET_EXPR is None): error("no tuning target expression was specified.") IR.LOAD_CPP_INSTS = False else: # New frontend with open(INPUT_FILE, 'r') as f: data = f.read() processed_data = get_runmain_input(data) py_source = translate(processed_data) code_obj = compile(py_source, '<string>', 'exec') IR.LOAD_CPP_INSTS = True exec(code_obj) if (IR.TARGET_EXPR is None): error("No tuning target expression was specified.") IR.LOAD_CPP_INSTS = False # ==== tune the targeted expression ==== # reset the timers tft_utils.TIME_PARSING = 0 tft_utils.TIME_FIRST_DERIVATIVES = 0 tft_utils.TIME_GLOBAL_OPT = 0 tft_utils.TIME_ALLOCATION = 0 tft_utils.TIME_CHECK_M2 = 0 # possibly remove the .exprs file EXPRS_NAME = INPUT_FILE + ".exprs" if (os.path.isfile(EXPRS_NAME)): tft_utils.VerboseMessage("Warning: overwriting existing file: " + EXPRS_NAME) os.system("rm " + EXPRS_NAME) # go tuning for i in range(0, len(tft_tuning.ERROR_BOUNDS)): eforms = None alloc = None # Tune for the first error bound. # Need to generate the .exprs file first. if (i == 0): tft_ir_backend.ExportExpr2ExprsFile(IR.TARGET_EXPR, tft_tuning.ERROR_BOUNDS[0], EXPRS_NAME) # tune! eforms, alloc = tft_tuning.TFTRun(EXPRS_NAME) # otherwise, do some reset tasks else: tft_sol_exprs.ReadyToTune() new_eup = tft_expr.ConstantExpr(tft_tuning.ERROR_BOUNDS[i]) for ef in tft_sol_exprs.EFORMS: ef.upper_bound = new_eup # solve the error form eforms, alloc = tft_sol_exprs.SolveErrorForms( tft_sol_exprs.EFORMS, tft_tuning.OPTIMIZERS) # show the allocation print("==== error bound : " + str(tft_tuning.ERROR_BOUNDS[i]) + " ====") tft_tuning.PrintAlloc(alloc, eforms) print("") tft_ir_backend.ExportColorInsts(alloc) print("") # -- synthesize the mixed precision cpp file -- if (alloc is None): print( "Warning: no allocation was generated... Thus no .cpp file will be generated..." ) else: assert (isinstance(alloc, tft_alloc.Alloc)) assert (eforms is not None) str_error_bound = str(float(tft_tuning.ERROR_BOUNDS[i])) base = os.path.basename(INPUT_FILE) base = os.path.splitext(base)[0] fname_cpp = base + "." + str_error_bound + ".cpp" if (os.path.isfile(fname_cpp)): tft_utils.VerboseMessage( "Warning: overwrite the existed .cpp file: " + fname_cpp) tft_ir_backend.ExportCppInsts(alloc, fname_cpp) # show the timers timer_fname = base + ".timers.csv" write_header = (not os.path.isfile(timer_fname)) timer_file = None if (write_header): timer_file = open(timer_fname, "w") timer_file.write( "Total Parsing Time,First Derivatives,Global Optimization,QCQP,Check Higher-order Errors\n" ) else: timer_file = open(timer_fname, "a") timer_file.write( str(float(tft_utils.TIME_PARSING)) + "," + str(float(tft_utils.TIME_FIRST_DERIVATIVES)) + "," + str(float(tft_utils.TIME_GLOBAL_OPT)) + "," + str(float(tft_utils.TIME_ALLOCATION)) + "," + str(float(tft_utils.TIME_CHECK_M2)) + "\n") tft_utils.VerboseMessage("Total Parsing time : " + str(float(tft_utils.TIME_PARSING))) tft_utils.VerboseMessage(" First Dev. : " + str(float(tft_utils.TIME_FIRST_DERIVATIVES))) tft_utils.VerboseMessage("Time for global optimization: " + str(float(tft_utils.TIME_GLOBAL_OPT))) tft_utils.VerboseMessage("Time for solving QCQP : " + str(float(tft_utils.TIME_ALLOCATION))) tft_utils.VerboseMessage("Time for checking M2 : " + str(float(tft_utils.TIME_CHECK_M2))) timer_file.close()
def Testing (): print ("==== testing in small-scale ====") # turn off tft_solver.LIMIT_N_CASTINGS tft_solver.LIMIT_N_CASTINGS = False # load CID_Training_Counts if (DEF.CID_Training_Counts is None): DEF.CID_Training_Counts = {} assert(os.path.isfile(DEF.FNAME_CID_Training_Counts)) file_ctc = open(DEF.FNAME_CID_Training_Counts, "r") for aline in file_ctc: aline = aline.strip() if (aline == ""): continue tokens = tft_utils.String2Tokens(aline, " ") assert(len(tokens) == 2) cid = int(tokens[0]) tcounts = int(tokens[1]) print ("CID Counts : " + str(cid) + " : " + str(tcounts)) assert(cid not in DEF.CID_Training_Counts.keys()) DEF.CID_Training_Counts[cid] = tcounts # assert(sum(DEF.CID_Training_Counts.values()) == DEF.N_Samples) file_ctc.close() # load HDLID_GID if (DEF.HDLID_GID is None): DEF.HDLID_GID = {} assert(os.path.isfile(DEF.FNAME_HDLID_GID)) fild_hdlid_gid = open(DEF.FNAME_HDLID_GID, "r") for aline in fild_hdlid_gid: aline = aline.strip() if (aline == ""): continue tokens = tft_utils.String2Tokens(aline, " ") assert(len(tokens) == 2) hdlid = int(tokens[0]) gid = int(tokens[1]) print ("HDLID: " + str(hdlid) + " : GID: " + str(gid)) assert(hdlid not in DEF.HDLID_GID.keys()) DEF.HDLID_GID[hdlid] = gid fild_hdlid_gid.close() # load CID_HDLabel if (DEF.CID_HDLabel is None): DEF.CID_HDLabel = {} assert(DEF.DIM_HDL == 0) DEF.DIM_HDL = None assert(os.path.isfile(DEF.FNAME_CID_HDLabel)) file_cid_hdlabel = open(DEF.FNAME_CID_HDLabel, "r") for aline in file_cid_hdlabel: aline = aline.strip() if (aline == ""): continue tokens = tft_utils.String2Tokens(aline, ":") assert(len(tokens) == 2) cid = int(tokens[0]) str_hdlabel = tokens[1] assert((0 <= cid) and (cid < DEF.N_Clusters)) assert(cid not in DEF.CID_HDLabel.keys()) assert(str_hdlabel.startswith("[") and str_hdlabel.endswith("]")) str_hdlabel = str_hdlabel[1:len(str_hdlabel)-1] tokens = tft_utils.String2Tokens(str_hdlabel, ",") hdlabel = [int(tokens[i]) for i in range(0, len(tokens))] print ("CID: " + str(cid) + " : " + str(hdlabel)) if (DEF.DIM_HDL is None): DEF.DIM_HDL = len(hdlabel) else: assert(DEF.DIM_HDL == len(hdlabel)) DEF.CID_HDLabel[cid] = hdlabel[:] file_cid_hdlabel.close() # count DEF.N_CTT_Samples if (DEF.N_CTT_Samples is None): DEF.N_CTT_Samples = 0 file_fc = open(DEF.FNAME_SVM_TRAIN_FEATURE_CLUSTER, "r") for aline in file_fc: aline = aline.strip() if (aline == ""): continue DEF.N_CTT_Samples = DEF.N_CTT_Samples + 1 file_fc.close() file_fc = open(DEF.FNAME_SVM_TEST_FEATURE_CLUSTER, "r") for aline in file_fc: aline = aline.strip() if (aline == ""): continue DEF.N_CTT_Samples = DEF.N_CTT_Samples + 1 file_fc.close() # load testing partitions assert((type(DEF.N_CTT_Samples) is int) and (DEF.N_CTT_Samples > 0)) file_parts = open(DEF.FNAME_Partitions, "r") String_Partitions = [] for aline in file_parts: aline = aline.strip() if (aline == ""): continue assert(len(String_Partitions) <= DEF.N_CTT_Samples) if (len(String_Partitions) == DEF.N_CTT_Samples): break String_Partitions.append(aline) file_parts.close() assert(len(String_Partitions) == DEF.N_CTT_Samples) Testing_Partitions = [] pid = -1 for i in range(0, DEF.N_CTT_Samples): aline = String_Partitions[i] pid = pid + 1 if (DEF.isTrainingID(pid, DEF.N_CTT_Samples)): continue tokens = tft_utils.String2Tokens(aline, " ") this_part = [] for i in range(0, len(tokens)): bs = tft_utils.String2Tokens(tokens[i], "~") assert(len(bs) == 2) lb = float(bs[0]) ub = float(bs[1]) this_part.append((lb, ub)) Testing_Partitions.append(this_part) String_Partitions = [] # release the space ... # load testing feature -> CID Testing_Features = [] Testing_CIDs = [] assert(os.path.isfile(DEF.FNAME_SVM_TEST_FEATURE_CLUSTER)) file_fc = open(DEF.FNAME_SVM_TEST_FEATURE_CLUSTER, "r") for aline in file_fc: aline = aline.strip() if (aline == ""): continue tokens = tft_utils.String2Tokens(aline, " ") cid = int(tokens[0]) assert(cid in DEF.CID_HDLabel.keys()) this_feature = [] for i in range(1, len(tokens)): fv = tft_utils.String2Tokens(tokens[i], ":") assert(len(fv) == 2) assert(i == int(fv[0])) this_feature.append(float(fv[1])) Testing_CIDs.append(cid) Testing_Features.append(this_feature) file_fc.close() # check the validity of the data n_tests = len(Testing_Partitions) assert(n_tests == len(Testing_Features)) assert(n_tests == len(Testing_CIDs)) # go testing n_test_success = 0 n_exact_allocs = 0 CID_Testing_Counts = {} # [0 for i in range(0, DEF.N_Clusters)] while (len(Testing_Partitions) > 0): assert(len(Testing_Partitions) == len(Testing_Features)) assert(len(Testing_Features) == len(Testing_CIDs)) # print out the testing progress sys.stdout.write("\rTest [" + str(n_tests - len(Testing_Partitions)) + "] : ") # get this partition, feature, and the cid this_part = Testing_Partitions[0] this_dvec = DEF.InverseSampleInputPartitionFromVec(this_part) this_feature = Testing_Features[0] exact_cid = Testing_CIDs[0] # sanitation check this_feature_2 = DEF.InputPartition2Feature([DEF.Feature_Option, []], this_dvec) assert(len(this_feature) == len(this_feature_2)) for i in range(0, len(this_feature)): f = this_feature[i] f2 = this_feature_2[i] assert(abs(f - f2) < 0.00000001) # predict the alloc this_cid = CIDPredict(this_feature) assert(this_cid in DEF.CID_HDLabel.keys()) if (this_cid not in CID_Testing_Counts.keys()): CID_Testing_Counts[this_cid] = 0 sys.stdout.write("predicted/exact CID : [" + str(this_cid) + " / " + str(exact_cid) + "] ") sys.stdout.flush() CID_Testing_Counts[this_cid] = CID_Testing_Counts[this_cid] + 1 predicted_alloc = DEF.HDLabel2Alloc(DEF.CID_HDLabel[this_cid]) assert(predicted_alloc is not None) # count exact prediction if (this_cid == exact_cid): n_exact_allocs = n_exact_allocs + 1 # solve the alloc this_eforms = None this_alloc = None if (DEF.REUSE_EFORMS): assert(DEF.BASE_EFORMS is not None) original_gid_epss = None new_gid_epss = {} # record and overwrite epsilons for ef in DEF.BASE_EFORMS: if (original_gid_epss is None): original_gid_epss = ef.gid2epsilons.copy() else: assert(original_gid_epss.keys() == ef.gid2epsilons.keys()) for gid,epss in original_gid_epss.items(): assert(ef.gid2epsilons[gid] == epss) for et in ef.terms: et.stored_overapprox_expr = None etgid = et.getGid() assert(etgid >= 0) assert(predicted_alloc.isAssigned(etgid)) assert(etgid in original_gid_epss.keys()) ow_epss = [tft_expr.ConstantExpr(predicted_alloc[etgid])] if (etgid in new_gid_epss.keys()): assert(new_gid_epss[etgid] == ow_epss) else: new_gid_epss[etgid] = ow_epss for gid in original_gid_epss.keys(): assert(predicted_alloc.isAssigned(gid)) new_gid_epss[gid] = [tft_expr.ConstantExpr(predicted_alloc[gid])] assert(new_gid_epss.keys() == original_gid_epss.keys()) for ef in DEF.BASE_EFORMS: ef.gid2epsilons = new_gid_epss.copy() # solve alloc. tft_tuning.TFTSystemReset() DEF.RewriteVarBounds(this_part) this_eforms, this_alloc = tft_sol_exprs.SolveErrorForms(DEF.BASE_EFORMS, tft_tuning.OPTIMIZERS) # restore the original epsilons for ef in DEF.BASE_EFORMS: ef.gid2epsilons = original_gid_epss.copy() else: # create the exprs file fname_part = tft_dat_sampling.FNameExprs(tft_dat_sampling.FNAME_EXPRS, id_feat) # solve alloc. tft_dat_sampling.WriteExprsFile(fname_part, this_part, predicted_alloc) tft_tuning.TFTSystemReset() this_eforms, this_alloc = tft_sol_exprs.SolveExprs(fname_part, tft_tuning.OPTIMIZERS) os.system("rm " + fname_part) # count the correct prediction if (this_alloc is not None): assert(this_alloc == predicted_alloc) n_test_success = n_test_success + 1 if (VERBOSE): sys.stdout.write(" ---- prediction successed!!") sys.stdout.flush() else: if (VERBOSE): print (" ---- prediction failed...") # finalizing del Testing_Partitions[0] del Testing_Features[0] del Testing_CIDs[0] print ("") print ("Small-scale Testing Result: " + str(n_test_success) + " / " + str(n_tests) + " (" + str(float(n_test_success)/float(n_tests)) + ")") print (" Exact Result : " + str(n_exact_allocs) + " / " + str(n_tests) + " (" + str(float(n_exact_allocs)/float(n_tests)) + ")") # show CID_Testing_Counts assert(sum(CID_Testing_Counts.values()) == n_tests) if (VERBOSE): print ("---- cid to # of training partitions ----") for cid in range(0, DEF.N_Clusters): if (cid in CID_Testing_Counts.keys()): print ("CID: " + str(cid) + " : " + str(CID_Testing_Counts[cid]))
def LoadExprsFile(fname): global FNAME_EXPRS global FILE_PREFIX global FILE_GID2EPSS global FILE_POSTFIX assert (fname.endswith(".exprs")) FNAME_EXPRS = fname f_stage = "prefix" efile = open(fname, "r") for aline in efile: # decide loading stage if (aline.strip() == "var-ranges:"): assert (f_stage == "prefix") f_stage = "var-ranges" continue elif (aline.strip() == "group-epsilons:"): assert (f_stage == "var-ranges") f_stage = "group-epsilons" continue elif (aline.strip() == "eq-gids:"): assert (f_stage == "group-epsilons") f_stage = "postfix" FILE_POSTFIX.append("\n") else: pass # handle based on stage if (f_stage == "prefix"): FILE_PREFIX.append(aline) elif (f_stage == "var-ranges"): if (aline.strip() != ""): ve = tft_parser.String2BoundedVariableExpr(aline.strip()) assert (ve.label() not in DEF.VNames) if (not tft_expr.isConstVar(ve)): DEF.VarExprs.append(ve) DEF.VNames.append(ve.label()) DEF.VRanges.append( (float(ve.lb().value()), float(ve.ub().value()))) elif (f_stage == "group-epsilons"): if (aline.strip() != ""): tokens = tft_utils.String2Tokens(aline.strip(), ":") assert (len(tokens) == 2) gid = int(tokens[0]) epss = tokens[1] assert (gid not in FILE_GID2EPSS.keys()) FILE_GID2EPSS[gid] = epss elif (f_stage == "postfix"): FILE_POSTFIX.append(aline) else: assert (False) efile.close() # generate N_Var_Intervals DEF.assert_VNames() DEF.assert_VRanges() DEF.N_Var_Intervals = [DEF.N_Partitions for i in range(0, len(DEF.VNames))]
def LoadConfig (fname_config): global OPTIMIZERS # default settings tft_ask_gurobi.VERBOSE = False tft_ask_markian.VERBOSE = False tft_solver.VERBOSE = False tft_parser.VERBOSE = False tft_get_first_derivations.VERBOSE = False # -- beginning of loading config. file -- cfile = open(fname_config, "r") for aline in cfile: aline = aline.strip() if (aline == ""): continue if (aline.startswith("#")): continue tokens = tft_utils.String2Tokens(aline, "=") assert(len(tokens) == 2) opt = tokens[0] val = tokens[1] if (opt == "OPT_VRANGE"): assert(val in tft_solver.ALL_OPTIMIZERS) OPTIMIZERS["vrange"] = val elif (opt == "OPT_ALLOC"): assert(val in tft_solver.ALL_OPTIMIZERS) OPTIMIZERS["alloc"] = val elif (opt == "VERBOSE_GUROBI"): tft_ask_gurobi.VERBOSE = tft_utils.String2Bool(val) elif (opt == "VERBOSE_SAMPLER"): tft_ask_sampler.VERBOSE = tft_utils.String2Bool(val) elif (opt == "VERBOSE_SAMPLERS"): tft_ask_samplers.VERBOSE = tft_utils.String2Bool(val) elif (opt == "VERBOSE_SOLVER"): tft_solver.VERBOSE = tft_utils.String2Bool(val) elif (opt == "VERBOSE_PARSER"): tft_parser.VERBOSE = tft_utils.String2Bool(val) elif (opt == "VERBOSE_GET_FIRST_DERIVATIONS"): tft_get_first_derivations.VERBOSE = tft_utils.String2Bool(val) elif (opt == "ERROR_TYPE"): tft_sol_exprs.ERROR_TYPE = val elif (opt == "VERBOSE_SOL_EXPRS"): tft_sol_exprs.VERBOSE = tft_utils.String2Bool(val) elif (opt == "GELPIA_TIMEOUT"): tft_ask_gelpia.TIMEOUT = tft_utils.String2Int(val) assert(tft_ask_gelpia.TIMEOUT > 0) elif (opt == "GELPIA_TOLERANCE"): tft_ask_gelpia.DEFAULT_TOLERANCE = tft_utils.String2Float(val) assert(tft_ask_gelpia.DEFAULT_TOLERANCE >= 1e-07) elif (opt == "N_GELPIAS"): tft_ask_gelpias.N_GELPIAS = tft_utils.String2Int(val) assert(tft_ask_gelpias.N_GELPIAS > 0) elif (opt == "SOLVER_N_SAMPLES"): tft_solver.N_SAMPLES = tft_utils.String2Int(val) assert(tft_solver.N_SAMPLES > 0) elif (opt == "SOLVER_ADDRESS_CASTINGS"): tft_solver.ADDRESS_CASTINGS = tft_utils.String2Bool(val) elif (opt == "SOLVER_LIMIT_N_CASTINGS"): tft_solver.LIMIT_N_CASTINGS = tft_utils.String2Bool(val) elif (opt == "SOLVER_N_MAX_CASTINGS"): tft_solver.N_MAX_CASTINGS = tft_utils.String2Int(val) assert(0 <= tft_solver.N_MAX_CASTINGS) elif (opt == "SAMPLERS_N_SAMPLERS"): tft_ask_samplers.N_SAMPLERS = tft_utils.String2Int(val) assert(tft_ask_samplers.N_SAMPLERS > 0) elif (opt.startswith("DAT_")): continue else: sys.exit("ERROR: invalid option: " + opt) cfile.close()
def LoadDATConfig(fname_config): global N_Samples global N_CTT_Samples global N_Clusters global N_Partitions global VERBOSE global Feature_Option global TRAIN_MODE global TEST_MODE global STEP_SAMPLE global STEP_CLUSTER global STEP_TRAIN global STEP_TEST global Sampling_Method fconf = open(fname_config, "r") for aline in fconf: aline = aline.strip() if (aline == ""): continue if (aline.startswith("#")): continue tokens = tft_utils.String2Tokens(aline, "=") assert (len(tokens) == 2) opt = tokens[0] val = tokens[1] if (opt.startswith("DAT_")): if (opt == "DAT_VERBOSE"): VERBOSE = tft_utils.String2Bool(val) elif (opt == "DAT_N_SAMPLES"): N_Samples = tft_utils.String2Int(val) elif (opt == "DAT_N_CTT_SAMPLES"): N_CTT_Samples = tft_utils.String2Int(val) elif (opt == "DAT_N_CLUSTERS"): N_Clusters = tft_utils.String2Int(val) elif (opt == "DAT_N_PARTITIONS"): N_Partitions = tft_utils.String2Int(val) elif (opt == "DAT_FEATURE_OPT"): Feature_Option = val elif (opt == "DAT_SAMPLING_METHOD"): Sampling_Method = val elif (opt == "DAT_STEP_SAMPLE"): STEP_SAMPLE = tft_utils.String2Bool(val) elif (opt == "DAT_STEP_CLUSTER"): STEP_CLUSTER = tft_utils.String2Bool(val) elif (opt == "DAT_STEP_TRAIN"): STEP_TRAIN = tft_utils.String2Bool(val) elif (opt == "DAT_STEP_TEST"): STEP_TEST = tft_utils.String2Bool(val) elif (opt == "DAT_TRAIN_MODE"): TRAIN_MODE = val elif (opt == "DAT_TEST_MODE"): TEST_MODE = val else: sys.exit("ERROR: invalid DAT option: " + opt) else: pass fconf.close() # checking the correct of the settings assert (0 < N_Samples) assert (0 < N_Partitions) assert (Feature_Option in Available_Feature_Options) assert (Sampling_Method in Available_Sampling_Methods)
def DecisionTreeTraining(tdata): assert (type(tdata) is str) assert (os.path.isfile(tdata)) assert (tdata.endswith(".csv")) # calculate the # of features tfile = open(tdata, "r") n_feats = None for aline in tfile: aline = aline.strip() if (aline == ""): continue tokens = tft_utils.String2Tokens(aline, ",") if (n_feats is None): assert (DEF.DT_FLABELS is None) assert (len(tokens) > 1) assert (tokens[0] == "cid") n_feats = len(tokens) - 1 DEF.DT_FLABELS = tokens[1:] else: assert ((n_feats + 2) == len(tokens)) tfile.close() assert ((n_feats is not None) and (n_feats > 0)) # go training print("==== DecisionTree training model ====") DEF.DT_MODEL = DT.DecisionTree( training_datafile=tdata, csv_class_column_index=1, csv_columns_for_features=range(2, (2 + n_feats)), entropy_threshold=0.01, max_depth_desired=3, symbolic_to_numeric_cardinality_threshold=0.00001, ) # etd = DT.EvalTrainingData(training_datafile = tdata, # csv_class_column_index = 1, # csv_columns_for_features = range(2, (2+n_feats)), # entropy_threshold = 0.01, # max_depth_desired = 3, # symbolic_to_numeric_cardinality_threshold = 0.00001,) # etd.get_training_data() # score_train = etd.evaluate_training_data() # print ("ETD training score: " + str(score_train)) DEF.DT_MODEL.get_training_data() DEF.DT_MODEL.calculate_first_order_probabilities() DEF.DT_MODEL.calculate_class_priors() DEF.DT_MODEL.show_training_data() DEF.DT_ROOT = DEF.DT_MODEL.construct_decision_tree_classifier() if (VERBOSE): print("-- dump the trained decision tree --") DEF.DT_ROOT.display_decision_tree(" ")