Exemplo n.º 1
0
 def test_regress(self):
     NUM_INPUTS = 2
     NUM_DATAPOINTS = sr.randint(5, 1000)
     ACCEPTIBLE_ERROR = 1.0
     NUM_TRIALS = 2
     for trial_num in xrange(NUM_TRIALS):
         A = float(sr.randint(1, 100))
         B = float(sr.randint(1, 5))
         C = float(sr.randint(1, 100))
         inputs = [[
             sr.randint(1, 200) for data_point in xrange(NUM_DATAPOINTS)
         ] for inp in xrange(NUM_INPUTS)]
         outputs = [
             FUNCTION_TO_REGRESS_TEMPLATE(
                 [inputs[inp][data_point]
                  for inp in xrange(NUM_INPUTS)], A, B, C) + sr.random()
             for data_point in xrange(NUM_DATAPOINTS)
         ]
         function_guess = regression.regress(
             function_to_regress=FUNCTION_TO_REGRESS,
             outputs=outputs,
             inputs=inputs)
         test_inputs = [sr.randint(1, 100) for inp in xrange(NUM_INPUTS)]
         self.assertTrue(
             abs(
                 function_guess.function(test_inputs) -
                 FUNCTION_TO_REGRESS_TEMPLATE(test_inputs, A, B, C)) <
             ACCEPTIBLE_ERROR)
         rsquared = function_guess.get_rsquared(inputs, outputs)
         self.assertTrue(rsquared > .8)
 def _store_3d_latency_graph(self, selection_cols):
     """Stores the latency as a function of number of records returned and
     one other parameter, and its auxiliary information like rsquared value
     and function."""
     assert selection_cols in ["id", "*"], ("invalid selection cols %s" %
                                            selection_cols)
     parameters = self._get_parameters(selection_cols)
     # because python cannot handle attribute names containing "*", we must
     # map * to something else:
     selection_cols_attr_string = selection_cols
     if selection_cols == "*":
         selection_cols_attr_string = "star"
     # find all of the naming and reference strings:
     cat_name = self._inp[t1s.DBF_CAT]
     if self._inp.get(t1s.DBF_SUBCAT) != None:
         cat_name = self._inp[t1s.DBF_CAT] + "-" + self._inp[t1s.DBF_SUBCAT]
     caption = "Type %s SELECT %s Queries (%s)" % (
         cat_name, selection_cols,
         self._inp.test_db.get_short_database_name())
     tag = self.get_tag(inp=self._inp, aux=selection_cols_attr_string)
     graph_path = self.get_img_path(inp=self._inp,
                                    aux=selection_cols_attr_string)
     # find the data and create the graphs, etc:
     (x_label, y_label,
      z_label) = (self._config.var_nummatches + " = # new matching records",
                  self._config.var_ql + " = query latency (s)",
                  parameters["z_label"])
     [x_values, y_values, z_values] = parameters["values"]
     try:
         function = regression.regress(
             function_to_regress=parameters["ftr"],
             outputs=z_values,
             inputs=[x_values, y_values])
         functionstr = function.string
         rsquared = function.get_rsquared(inputs=[x_values, y_values],
                                          outputs=z_values)
     except regression.BadRegressionInputError:
         function = None
         functionstr = None
         rsquared = None
     try:
         graph = graphing.graph3d("",
                                  x_values,
                                  y_values,
                                  z_values,
                                  x_label,
                                  y_label,
                                  z_label,
                                  best_fit_function=function)
         self.write(graph_path, graph)
         graphimage = latex_classes.LatexImage(caption, tag, graph_path)
         graphstr = graphimage.get_string()
     except:
         graphstr = None
     for (suffix, value) in [("functionstr", functionstr),
                             ("rsquared", rsquared), ("graph", graphstr)]:
         self._outp[selection_cols_attr_string + "_" + suffix] = value
 def _store_latency_by_fieldtype_graph(self):
     """Stores the latency as a function of fieldtype graph."""
     # find all of the naming and reference strings:
     caption = "Type %s Queries (%s)" % (self._inp[
         t1s.DBF_CAT], self._inp.test_db.get_short_database_name())
     tag = self.get_tag(self._inp)
     graph_path = self.get_img_path(self._inp, "byfieldtype")
     # find the data and create the graph:
     constraint_list = (self._config.get_constraint_list() +
                        self._inp.get_constraint_list())
     categories = self._config.results_db.get_unique_query_values(
         simple_fields=[(t1s.DBP_TABLENAME, t1s.DBP_SELECTIONCOLS)],
         constraint_list=constraint_list,
         atomic_fields_and_functions=[
             (t1s.DBA_FIELDTYPE,
              t1s.Ta1ResultsSchema().get_complex_function(
                  t1s.DBA_TABLENAME, t1s.DBA_FIELDTYPE))
         ])
     (x_label,
      y_label) = (self._config.var_nummatches + " = # new matching records",
                  self._config.var_ql + " = query latency (s)")
     datasets = []
     for (selection_cols, field_type) in categories:
         this_constraint_list = constraint_list + [
             (t1s.DBP_TABLENAME, t1s.DBP_SELECTIONCOLS, selection_cols),
             (t1s.DBA_TABLENAME, t1s.DBA_FIELDTYPE, field_type)
         ]
         [x_values, y_values] = self._config.results_db.get_query_values(
             [(t1s.DBP_TABLENAME, t1s.DBP_NUMNEWRETURNEDRECORDS),
              (t1s.DBP_TABLENAME, t1s.DBP_QUERYLATENCY)],
             constraint_list=this_constraint_list)
         try:
             inputs = [x_values]
             outputs = y_values
             function = regression.regress(
                 function_to_regress=self._config.ql_all_ftr,
                 outputs=outputs,
                 inputs=inputs)
         except regression.BadRegressionInputError:
             function = None
         datasets.append(
             (x_values, y_values,
              "SELECT %s on %ss" % (selection_cols, field_type), function))
     graph = graphing.graph2d(plot_name="",
                              datasets=datasets,
                              x_label=x_label,
                              y_label=y_label)
     self.write(graph_path, graph)
     graph_image = latex_classes.LatexImage(caption, tag, graph_path)
     self._outp["latency_by_fieldtype_graph"] = graph_image.get_string()
Exemplo n.º 4
0
 def _store_query_latency_table(self):
     """Stores the LaTeX string representing the query latency table
     on the output object."""
     constraint_list = self._config.get_constraint_list(
         require_correct=True)
     categories = self._config.results_db.get_unique_query_values(
         simple_fields=[(t1s.DBF_TABLENAME, t1s.DBF_NUMRECORDS),
                        (t1s.DBF_TABLENAME, t1s.DBF_RECORDSIZE),
                        (t1s.DBP_TABLENAME, t1s.DBP_SELECTIONCOLS),
                        (t1s.DBF_TABLENAME, t1s.DBF_CAT)],
         constraint_list=constraint_list)
     # create the latency table:
     latency_table = latex_classes.LatexTable(
         "Query Latency vs. Number of Records Returned Best Fit Functions",
         "lat_main", [
             "DBNR", "DBRS", "Select", "Query Type", "Best-Fit Func",
             "R-Squared"
         ])
     # compute correctness for every query category:
     for (dbnr, dbrs, selection_cols, query_cat) in categories:
         inp = t1ai.Input()
         inp[t1s.DBF_CAT] = query_cat
         inp[t1s.DBF_NUMRECORDS] = dbnr
         inp[t1s.DBF_RECORDSIZE] = dbrs
         inp[t1s.DBP_SELECTIONCOLS] = selection_cols
         this_constraint_list = constraint_list + inp.get_constraint_list()
         [x_values, y_values] = self._config.results_db.get_query_values(
             simple_fields=[(t1s.DBP_TABLENAME,
                             t1s.DBP_NUMNEWRETURNEDRECORDS),
                            (t1s.DBP_TABLENAME, t1s.DBP_QUERYLATENCY)],
             constraint_list=this_constraint_list)
         try:
             inputs = [x_values]
             outputs = y_values
             function = regression.regress(
                 function_to_regress=self._config.ql_all_ftr,
                 outputs=outputs,
                 inputs=inputs)
             function_string = function.string
             rsquared = function.get_rsquared(inputs, outputs)
         except regression.BadRegressionInputError:
             function_string = "-"
             rsquared = "-"
         latency_table.add_content([
             inp.test_db.get_db_num_records_str(),
             inp.test_db.get_db_record_size_str(), selection_cols,
             query_cat, function_string, rsquared
         ])
     self._outp["query_latency_table"] = latency_table.get_string()
Exemplo n.º 5
0
 def _store_complex_evaluation_latency_graph(self, secparam):
     """Stores the evaluation latency function information for dependence on
     more than two variables."""
     fields = [(t2s.PARAM_TABLENAME, t2s.PARAM_D),
               (t2s.CIRCUIT_TABLENAME, t2s.CIRCUIT_W),
               (t2s.PARAM_TABLENAME, t2s.PARAM_L),
               (t2s.PEREVALUATION_TABLENAME,
                t2s.PEREVALUATION_EVALUATIONLATENCY)]
     additional_constraint_list = [
         (t2s.PARAM_TABLENAME, t2s.PARAM_K, secparam),
         (t2s.CIRCUIT_TABLENAME, t2s.CIRCUIT_TESTTYPE, "RANDOM")
     ]
     this_constraint_list = self._config.get_constraint_list(
         fields=fields, require_correct=True,
         usebaseline=False) + additional_constraint_list
     # this weeds out tests with no D defined (single gate type tests):
     this_non_standard_constraint_list = [(t2s.PARAM_TABLENAME, t2s.PARAM_D,
                                           "%s.%s IS NOT 'None'")]
     [x_values, y_values, z_values,
      w_values] = self._config.results_db.get_values(
          fields,
          constraint_list=this_constraint_list,
          non_standard_constraint_list=this_non_standard_constraint_list)
     function_to_regress = self._config.complexevallatency_ftr
     try:
         function = regression.regress(
             function_to_regress=function_to_regress,
             outputs=w_values,
             inputs=[x_values, y_values, z_values])
         functionstr = function.string
         rsquared = function.get_rsquared(
             inputs=[x_values, y_values, z_values], outputs=w_values)
     except (regression.BadRegressionInputError, TypeError):
         functionstr = None
         rsquared = None
     self._outp["evaluation_complexfunctionstr" +
                str(secparam)] = functionstr
     self._outp["evaluation_complexrsquared" + str(secparam)] = rsquared
 def _store_complex_function(self, selection_cols):
     """Stores the higher-order best-fit curve for the dnf."""
     # because python cannot handle attribute names containing "*", we must
     # map * to something else:
     selection_cols_attr_string = selection_cols
     if selection_cols == "*":
         selection_cols_attr_string = "star"
     this_constraint_list = (
         self._config.get_constraint_list() +
         self._inp.get_constraint_list() +
         [(t1s.DBP_TABLENAME, t1s.DBP_SELECTIONCOLS, selection_cols)])
     [x_values, y_values, c_values, t_values,
      z_values] = self._config.results_db.get_query_values(
          simple_fields=[(t1s.DBP_TABLENAME, t1s.DBP_NUMNEWRETURNEDRECORDS),
                         (t1s.DBP_TABLENAME, t1s.DBP_QUERYLATENCY),
                         (t1s.DBF_TABLENAME, t1s.DBF_P1NUMCLAUSES),
                         (t1s.DBF_TABLENAME, t1s.DBF_P1NUMTERMSPERCLAUSE)],
          full_fields_and_functions=[
              (t1s.DBF_P1ANDNUMRECORDSMATCHINGFIRSTTERM, sum)
          ],
          constraint_list=this_constraint_list)
     ftr = self._config.ql_p1dnfcomplex_ftr
     inputs = [x_values, c_values, t_values, z_values]
     outputs = y_values
     try:
         function = regression.regress(function_to_regress=ftr,
                                       outputs=outputs,
                                       inputs=inputs)
         functionstr = function.string
         rsquared = function.get_rsquared(inputs=inputs, outputs=outputs)
     except regression.BadRegressionInputError:
         functionstr = None
         rsquared = None
     for (suffix, value) in [("higher_order_functionstr", functionstr),
                             ("higher_order_rsquared", rsquared)]:
         self._outp[selection_cols_attr_string + "_" + suffix] = value
Exemplo n.º 7
0
    def _make_3d_info(self,
                      labels,
                      fields,
                      caption,
                      tag,
                      graph_path,
                      function_to_regress,
                      additional_constraint_list=None):
        """Makes a 3D graph.
        Args:
            labels: a list of the form (x_label, y_label, z_label)
            fields: a list of the form [(table1, field1), (table2, field2),
                (table3, field3)].
            caption: a caption string
            tag: a tag string
            graph_path: a path indicating the location where the graph is to be
                stored.
            function_to_regress: a FunctionToRegress object

        Returns:
            A tuple of the form (functionstr, rsquared, graphstr).
        """
        # find the data and create the graphs, etc:
        assert len(labels) == 3
        (x_label, y_label, z_label) = labels
        assert len(fields) == 3
        if not additional_constraint_list:
            additional_constraint_list = []
        this_constraint_list = self._config.get_constraint_list(
            fields=fields, require_correct=True,
            usebaseline=False) + additional_constraint_list
        # this weeds out tests with no D defined (single gate type tests):
        this_non_standard_constraint_list = [(t2s.PARAM_TABLENAME, t2s.PARAM_D,
                                              "%s.%s IS NOT 'None'")]
        [x_values, y_values, z_values] = self._config.results_db.get_values(
            fields,
            constraint_list=this_constraint_list,
            non_standard_constraint_list=this_non_standard_constraint_list)
        try:
            function = regression.regress(
                function_to_regress=function_to_regress,
                outputs=z_values,
                inputs=[x_values, y_values])
            functionstr = function.string
            rsquared = function.get_rsquared(inputs=[x_values, y_values],
                                             outputs=z_values)
        except (regression.BadRegressionInputError, TypeError):
            function = None
            functionstr = None
            rsquared = None
        try:
            graph = graphing.graph3d("",
                                     x_values,
                                     y_values,
                                     z_values,
                                     x_label,
                                     y_label,
                                     z_label,
                                     best_fit_function=function)
            self.write(graph_path, graph)
            graphimage = latex_classes.LatexImage(caption, tag, graph_path, .8)
            graphstr = graphimage.get_string()
        except graphing.BadGraphingInputs:
            graphstr = None
        return (functionstr, rsquared, graphstr)
    def _store_common_latency_graph(self,
                                    simple_fields=None,
                                    label_template=None):
        """Stores a graph of latency against number of records returned on the
        output object."""
        if not simple_fields:
            # set the simple_fields (which the categories will be based on) to
            # the default value:
            simple_fields = [(t1s.DBP_TABLENAME, t1s.DBP_SELECTIONCOLS),
                             (t1s.DBF_TABLENAME, t1s.DBF_CAT),
                             (t1s.DBF_TABLENAME, t1s.DBF_SUBCAT)]

            def get_label(category):
                (selectioncols, cat, subcat) = category
                label = "select %s %s" % (selectioncols, cat)
                if subcat:
                    label += "-%s" % subcat
                return label
        else:

            def get_label(category):
                return label_template % category

        # find all of the naming and reference strings:
        caption = "Type %s Queries (%s)" % (self._inp[
            t1s.DBF_CAT], self._inp.test_db.get_short_database_name())
        tag = self.get_tag(self._inp)
        graph_path = self.get_img_path(self._inp)
        # find the data and create the graph:
        constraint_list = (self._config.get_constraint_list() +
                           self._inp.get_constraint_list())
        categories = self._config.results_db.get_unique_query_values(
            simple_fields=simple_fields, constraint_list=constraint_list)
        (x_label,
         y_label) = (self._config.var_nummatches + " = # new matching records",
                     self._config.var_ql + " = query latency (s)")
        datasets = []
        for category in categories:
            # make the data label
            data_label = get_label(category)
            # get the data:
            auxiliary_constraint_list = [
                (table, field, val)
                for ((table, field), val) in zip(simple_fields, category)
            ]
            this_constraint_list = constraint_list + auxiliary_constraint_list
            [x_values, y_values] = self._config.results_db.get_query_values(
                simple_fields=[(t1s.DBP_TABLENAME,
                                t1s.DBP_NUMNEWRETURNEDRECORDS),
                               (t1s.DBP_TABLENAME, t1s.DBP_QUERYLATENCY)],
                constraint_list=this_constraint_list)
            try:
                inputs = [x_values]
                outputs = y_values
                function = regression.regress(
                    function_to_regress=self._config.ql_all_ftr,
                    outputs=outputs,
                    inputs=inputs)
            except regression.BadRegressionInputError:
                function = None
            datasets.append((x_values, y_values, data_label, function))
        if categories:
            graph = graphing.graph2d(plot_name="",
                                     datasets=datasets,
                                     x_label=x_label,
                                     y_label=y_label)
            self.write(graph_path, graph)
            graph_image = latex_classes.LatexImage(caption, tag, graph_path)
            self._outp["common_latency_graph"] = graph_image.get_string()