Пример #1
0
    def test_nullables(self):
        schema = test_schema + "nullables"
        pdf = PanDatFactory(table_with_stuffs=[["field one"], ["field two"]])
        pdf.set_data_type("table_with_stuffs", "field one")
        pdf.set_data_type("table_with_stuffs",
                          "field two",
                          number_allowed=False,
                          strings_allowed='*',
                          nullable=True)
        tdf = TicDatFactory.create_from_full_schema(
            pdf.schema(include_ancillary_info=True))
        tic_dat = tdf.TicDat(
            table_with_stuffs=[[101, "022"], [202, None], [303, "111"]])
        dat = tdf.copy_to_pandas(tic_dat, drop_pk_columns=False)
        self.assertFalse(tdf.find_data_type_failures(tic_dat))
        self.assertFalse(pdf.find_data_type_failures(dat))

        pdf.pgsql.write_schema(self.engine, schema)
        pdf.pgsql.write_data(dat, self.engine, schema)
        dat_1 = pdf.pgsql.create_pan_dat(self.engine, schema)
        self.assertTrue(
            pdf._same_data(dat, dat_1, nans_are_same_for_data_rows=True))
        tic_dat_1 = tdf.pgsql.create_tic_dat(self.engine, schema)
        self.assertTrue(
            tdf._same_data(tic_dat,
                           tic_dat_1,
                           nans_are_same_for_data_rows=True))
    def __init__(self):
        self.config_schema = PanDatFactory(action_settings=[
            [], ['Table', 'Column', 'Method', 'Value', 'Flag Column']
        ])

        self.config_defaults = self.config_schema.PanDat(
            action_settings=[{
                'Table': 'data',
                'Column': 'column1',
                'Method': 'zScore',
                'Value': '3',
                'Flag Column': 'flag'
            }])
Пример #3
0
 def test_missing_tables(self):
     schema = test_schema + "_missing_tables"
     tdf_1 = TicDatFactory(this=[["Something"], ["Another"]])
     pdf_1 = PanDatFactory(**tdf_1.schema())
     tdf_2 = TicDatFactory(
         **dict(tdf_1.schema(), that=[["What", "Ever"], []]))
     pdf_2 = PanDatFactory(**tdf_2.schema())
     dat = tdf_1.TicDat(this=[["a", 2], ["b", 3], ["c", 5]])
     pan_dat = tdf_1.copy_to_pandas(dat, drop_pk_columns=False)
     tdf_1.pgsql.write_schema(self.engine, schema)
     tdf_1.pgsql.write_data(dat, self.engine, schema)
     pg_dat = tdf_2.pgsql.create_tic_dat(self.engine, schema)
     self.assertTrue(tdf_1._same_data(dat, pg_dat))
     pg_pan_dat = pdf_2.pgsql.create_pan_dat(self.engine, schema)
     self.assertTrue(pdf_1._same_data(pan_dat, pg_pan_dat))
Пример #4
0
 def test_issue_68_pd(self):
     # kind of a dumb test since the numpy types tend to be the ones pandas creates naturally, but no harm
     # in being rigorous
     if not self.can_run:
         return
     tdf = diet_schema.clone()
     pdf = PanDatFactory.create_from_full_schema(
         tdf.schema(include_ancillary_info=True))
     pgtf = pdf.pgsql
     pgtf.write_schema(self.engine,
                       test_schema,
                       include_ancillary_info=False)
     dat = tdf.copy_tic_dat(diet_dat)
     import numpy
     dat.categories["protein"]["Max Nutrition"] = numpy.int64(200)
     dat.categories["fat"]["Max Nutrition"] = numpy.float64(65)
     pan_dat = pdf.copy_pan_dat(
         tdf.copy_to_pandas(dat, drop_pk_columns=False))
     pgtf.write_data(pan_dat, self.engine, test_schema)
     pg_pan_dat = pgtf.create_pan_dat(self.engine, test_schema)
     self.assertTrue(pdf._same_data(pan_dat, pg_pan_dat))
     from ticdat.pandatfactory import _faster_df_apply
     pan_dat.categories["Max Nutrition"] = _faster_df_apply(
         pan_dat.categories, lambda row: numpy.int64(row["Max Nutrition"]))
     pan_dat.foods["Cost"] = _faster_df_apply(
         pan_dat.foods, lambda row: numpy.float64(row["Cost"]))
     from framework_utils.helper_utils import memo
     memo(pan_dat)
     pgtf.write_data(pan_dat, self.engine, test_schema)
     pg_pan_dat = pgtf.create_pan_dat(self.engine, test_schema)
     self.assertTrue(pdf._same_data(pan_dat, pg_pan_dat))
Пример #5
0
def _add_inflow_table(full_schema_dict):
    # as per the clone docstring, this function will take a full_schema_dict as argument and
    # return the  PanDatFactory we want to make.. in this case, all we need to do is add inflow.
    full_schema_dict["tables_fields"]["inflow"] = [["Commodity", "Node"],
                                                   ["Quantity"]]
    rtn = PanDatFactory.create_from_full_schema(full_schema_dict)
    return rtn
Пример #6
0
    def find_duplicates(self, json_file_path, from_pandas=False):
        """
        Find the row counts for duplicated rows.

        :param json_file_path: A json file path. It should encode a dictionary
                               with table names as keys.

        :param from_pandas: boolean.  If truthy, then use pandas json readers. See
                            PanDatFactory json readers for more details.

        :return: A dictionary whose keys are table names for the primary-ed key tables.
                 Each value of the return dictionary is itself a dictionary.
                 The inner dictionary is keyed by the primary key values encountered in the table,
                 and the value is the count of records in the json entry with this primary key.
                 Row counts smaller than 2 are pruned off, as they aren't duplicates
        """
        _standard_verify(self.tic_dat_factory)
        if from_pandas:
            from ticdat import PanDatFactory
            pdf = PanDatFactory.create_from_full_schema(
                self.tic_dat_factory.schema(include_ancillary_info=True))
            _rtn = pdf.json.create_pan_dat(json_file_path)
            jdict = {
                t:
                [tuple(_) for _ in getattr(_rtn, t).itertuples(index=False)]
                for t in pdf.all_tables
            }
        else:
            jdict = self._create_jdict(json_file_path)
        rtn = find_duplicates_from_dict_ticdat(self.tic_dat_factory, jdict)
        return rtn or {}
Пример #7
0
    def test_time_stamp(self):
        tdf = TicDatFactory(table=[["Blah"], ["Timed Info"]])
        tdf.set_data_type("table", "Timed Info", nullable=True)
        tdf.set_default_value("table", "Timed Info", None)
        dat = tdf.TicDat()
        dat.table[1] = dateutil.parser.parse("2014-05-01 18:47:05.069722")
        dat.table[2] = dateutil.parser.parse("2014-05-02 18:47:05.178768")
        pgtf = tdf.pgsql
        pgtf.write_schema(self.engine,
                          test_schema,
                          forced_field_types={
                              ('table', 'Blah'): "integer",
                              ('table', 'Timed Info'): "timestamp"
                          })
        pgtf.write_data(dat,
                        self.engine,
                        test_schema,
                        dsn=self.postgresql.dsn())
        dat_2 = pgtf.create_tic_dat(self.engine, test_schema)
        self.assertTrue(tdf._same_data(dat, dat_2))
        self.assertTrue(
            all(
                isinstance(row["Timed Info"], datetime.datetime)
                for row in dat_2.table.values()))
        self.assertFalse(
            any(isinstance(k, datetime.datetime) for k in dat_2.table))

        pdf = PanDatFactory.create_from_full_schema(
            tdf.schema(include_ancillary_info=True))

        def same_data(pan_dat, pan_dat_2):
            df1, df2 = pan_dat.table, pan_dat_2.table
            if list(df1["Blah"]) != list(df2["Blah"]):
                return False
            for dt1, dt2 in zip(df1["Timed Info"], df2["Timed Info"]):
                delta = dt1 - dt2
                if abs(delta.total_seconds()) > 1e-6:
                    return False
            return True

        pan_dat = pdf.pgsql.create_pan_dat(self.engine, test_schema)
        pan_dat_2 = pan_dat_maker(tdf.schema(), dat_2)
        self.assertTrue(same_data(pan_dat, pan_dat_2))
        for df in [_.table for _ in [pan_dat, pan_dat_2]]:
            for i in range(len(df)):
                self.assertFalse(
                    isinstance(df.loc[i, "Blah"], datetime.datetime))
                self.assertTrue(
                    isinstance(df.loc[i, "Timed Info"], datetime.datetime))

        pan_dat.table.loc[1, "Timed Info"] = dateutil.parser.parse(
            "2014-05-02 18:48:05.178768")
        self.assertFalse(same_data(pan_dat, pan_dat_2))
        pdf.pgsql.write_data(pan_dat, self.engine, test_schema)
        pan_dat_2 = pdf.pgsql.create_pan_dat(self.engine, test_schema)
        self.assertTrue(same_data(pan_dat, pan_dat_2))

        dat.table[2] = dateutil.parser.parse("2014-05-02 18:48:05.178768")
        self.assertFalse(tdf._same_data(dat, dat_2))
Пример #8
0
 def test_pdf_2(self):
     pdf = PanDatFactory.create_from_full_schema(kehaar.input_schema.schema(include_ancillary_info=True))
     pdf.set_infinity_io_flag("N/A") # this speeds thing up, since less munging
     dat = _timeit(pdf.csv.create_pan_dat, 5)(os.path.join(_codeDir(), "bernardo_slowby"))
     pdf.pgsql.write_schema(self.engine, test_schemas[2], include_ancillary_info=False,
                            forced_field_types=_forced_field_types())
     _timeit(pdf.pgsql.write_data, 90)(dat, self.engine, test_schemas[2])
     _timeit(pdf.pgsql.create_pan_dat, 5)(self.engine, test_schemas[2])
Пример #9
0
 def test_pdf(self):
     pdf = PanDatFactory.create_from_full_schema(kehaar.input_schema.schema(include_ancillary_info=True))
     dat = _timeit(pdf.csv.create_pan_dat, 90)(os.path.join(_codeDir(), "bernardo_slowby"))
     pdf.pgsql.write_schema(self.engine, test_schemas[1], include_ancillary_info=False,
                            forced_field_types=_forced_field_types())
     # it takes a bit longer because thare might be infinities to manage into PG
     _timeit(pdf.pgsql.write_data, 180)(dat, self.engine, test_schemas[1])
     _timeit(pdf.pgsql.create_pan_dat, 50)(self.engine, test_schemas[1])
Пример #10
0
 def test_big_diet_pd(self):
     if not self.can_run:
         return
     tdf = diet_schema
     pdf = PanDatFactory(**tdf.schema())
     pgpf = PostgresPanFactory(pdf)
     big_dat = diet_schema.copy_tic_dat(diet_dat)
     for k in range(int(1e5)):
         big_dat.categories[str(k)] = [0, 100]
     pan_dat = pan_dat_maker(tdf.schema(), big_dat)
     schema = "test_pg_big_diet"
     now = time.time()
     pgpf.write_schema(self.engine, schema)
     pgpf.write_data(pan_dat, self.engine, schema)
     print(f"**&&*{time.time()-now}**&&**")
     now = time.time()
     pg_pan_dat = pgpf.create_pan_dat(self.engine, schema)
     print(f"*&&*{time.time()-now}**&&**")
     self.assertTrue(pdf._same_data(pan_dat, pg_pan_dat))
Пример #11
0
 def test_parameters_pd(self):
     schema = test_schema + "_parameters_pd"
     pdf = PanDatFactory(parameters=[["Key"], ["Value"]])
     pdf.add_parameter("Something", 100)
     pdf.add_parameter("Different",
                       'boo',
                       strings_allowed='*',
                       number_allowed=False)
     dat = TicDatFactory(**pdf.schema()).TicDat(
         parameters=[["Something", float("inf")], ["Different", "inf"]])
     dat = TicDatFactory(**pdf.schema()).copy_to_pandas(
         dat, drop_pk_columns=False)
     pdf.pgsql.write_schema(self.engine, schema)
     pdf.pgsql.write_data(dat, self.engine, schema)
     dat_ = pdf.pgsql.create_pan_dat(self.engine, schema)
     self.assertTrue(pdf._same_data(dat, dat_))
Пример #12
0
 def testDietWithInfFlaggingPd(self):
     pdf = PanDatFactory.create_from_full_schema(
         diet_schema.schema(include_ancillary_info=True))
     dat = diet_schema.copy_to_pandas(diet_dat, drop_pk_columns=False)
     pdf.set_infinity_io_flag(999999999)
     schema = test_schema + "_diet_inf_flagging_pd"
     pdf.pgsql.write_schema(self.engine, schema)
     pdf.pgsql.write_data(dat, self.engine, schema)
     dat_1 = pdf.pgsql.create_pan_dat(self.engine, schema)
     self.assertTrue(pdf._same_data(dat, dat_1))
     pdf = pdf.clone()
     dat_1 = pdf.pgsql.create_pan_dat(self.engine, schema)
     self.assertTrue(pdf._same_data(dat, dat_1))
     tdf = PanDatFactory(**diet_schema.schema())
     dat_1 = tdf.pgsql.create_pan_dat(self.engine, schema)
     self.assertFalse(tdf._same_data(dat, dat_1))
     protein = dat_1.categories["Name"] == "protein"
     self.assertTrue(
         list(dat_1.categories[protein]["Max Nutrition"])[0] == 999999999)
     dat_1.categories.loc[protein, "Max Nutrition"] = float("inf")
     self.assertTrue(tdf._same_data(dat, dat_1))
Пример #13
0
    def testIssue45(self):
        schema = test_schema + "issue45"
        pdf = PanDatFactory(data=[["a"], ["b"]])
        pdf.set_data_type("data",
                          "b",
                          number_allowed=False,
                          strings_allowed='*')
        tdf = TicDatFactory.create_from_full_schema(
            pdf.schema(include_ancillary_info=True))
        tic_dat = tdf.TicDat(data=[[2, "1"], [4, "3"], [44, "022"]])
        dat = tdf.copy_to_pandas(tic_dat, drop_pk_columns=False)
        self.assertFalse(tdf.find_data_type_failures(tic_dat))
        self.assertFalse(pdf.find_data_type_failures(dat))
        pdf.pgsql.write_schema(self.engine,
                               schema,
                               forced_field_types={("data", "a"): "integer"})
        pdf.pgsql.write_data(dat, self.engine, schema)

        def two_checks():
            dat_1 = pdf.pgsql.create_pan_dat(self.engine, schema)
            self.assertTrue(pdf._same_data(dat, dat_1))
            tic_dat_1 = tdf.pgsql.create_tic_dat(self.engine, schema)
            self.assertTrue(tdf._same_data(tic_dat, tic_dat_1))

        two_checks()
        tdf.pgsql.write_data(tic_dat, self.engine, schema)
        two_checks()
Пример #14
0
 def test_extra_fields_pd(self):
     pdf = PanDatFactory(boger=[["a"], ["b", "c"]])
     dat = pdf.PanDat(boger=pd.DataFrame({
         "a": [1, 2, 3],
         "b": [4, 5, 6],
         "c": ['a', 'b', 'c']
     }))
     schema = "test_pd_extra_fields"
     pdf.pgsql.write_schema(self.engine,
                            schema,
                            forced_field_types={
                                ("boger", "c"): "text",
                                ("boger", "a"): "float"
                            })
     pdf.pgsql.write_data(dat, self.engine, schema)
     pdf2 = PanDatFactory(boger=[["a"], ["b"]])
     dat2 = pdf2.pgsql.create_pan_dat(self.engine, schema)
     self.assertTrue(
         list(dat2.boger["a"]) == [1.0, 2.0, 3.0]
         and list(dat2.boger["b"]) == [4.0, 5.0, 6.0])
     dat2_2 = pdf2.PanDat(boger=pd.DataFrame({
         "a": [10, 300],
         "b": [40, 60]
     }))
     pdf2.pgsql.write_data(dat2_2, self.engine, schema)
     dat = pdf.pgsql.create_pan_dat(self.engine, schema)
     self.assertTrue(
         list(dat.boger["a"]) == [10, 300]
         and list(dat.boger["b"]) == [40, 60])
     self.assertTrue(len(set(dat.boger["c"])) == 1)
Пример #15
0
    def write_file(self,
                   tic_dat,
                   json_file_path,
                   allow_overwrite=False,
                   verbose=False,
                   to_pandas=False):
        """
        write the ticDat data to a json file (or json string)

        :param tic_dat: the data object to write (typically a TicDat)

        :param json_file_path: The file path of the json file to create. If empty string, then return a JSON string.

        :param allow_overwrite: boolean - are we allowed to overwrite an
                                existing file?

        :param verbose: boolean. Verbose mode writes the data rows as dicts
                        keyed by field name. Otherwise, they are lists.

        :param to_pandas: boolean. if truthy, then use the PanDatFactory method of writing to json.

        :return:
        """
        _standard_verify(self.tic_dat_factory)
        verify(not (to_pandas and verbose),
               "verbose argument is inconsistent with to_pandas")
        verify(
            not (json_file_path and os.path.exists(json_file_path)
                 and not allow_overwrite),
            "%s exists and allow_overwrite is not enabled" % json_file_path)
        if to_pandas:
            from ticdat import PanDatFactory
            pdf = PanDatFactory.create_from_full_schema(
                self.tic_dat_factory.schema(include_ancillary_info=True))
            return pdf.json.write_file(
                self.tic_dat_factory.copy_to_pandas(tic_dat,
                                                    drop_pk_columns=False),
                json_file_path)
        msg = []
        if not self.tic_dat_factory.good_tic_dat_object(
                tic_dat, lambda m: msg.append(m)):
            raise TicDatError("Not a valid TicDat object for this schema : " +
                              " : ".join(msg))
        jdict = make_json_dict(self.tic_dat_factory,
                               tic_dat,
                               verbose,
                               use_infinity_io_flag_if_provided=True)
        if not json_file_path:
            return json.dumps(jdict, sort_keys=True, indent=2)
        with open(json_file_path, "w") as fp:
            json.dump(jdict, fp, sort_keys=True, indent=2)
Пример #16
0
    def test_pgtd_active(self):
        if not self.can_run:
            return
        schema = test_schema + "_active"
        tdf = TicDatFactory(
            **{
                k: [pks, (["active_fld"] if k == "categories" else []) + dfs]
                for k, (pks, dfs) in diet_schema.schema().items()
            })
        tdf.pgsql.write_schema(self.engine,
                               schema,
                               include_ancillary_info=False,
                               forced_field_types={
                                   ('categories', 'active_fld'): 'boolean'
                               })
        tdf = diet_schema.clone()
        dat = tdf.copy_tic_dat(diet_dat)
        dat.categories["junk"] = {}
        tdf.pgsql.write_data(dat, self.engine, schema, active_fld="active_fld")
        self.assertTrue(
            set(_[0] for _ in self.engine.execute(
                f"Select active_fld from {schema}.categories")) == {True})
        self.engine.execute(
            f"Update {schema}.categories set active_fld = False where name = 'junk'"
        )
        dat_2 = tdf.pgsql.create_tic_dat(self.engine,
                                         schema,
                                         active_fld="active_fld")
        self.assertTrue(tdf._same_data(dat_2, diet_dat, epsilon=1e-10))

        pdf = PanDatFactory.create_from_full_schema(
            diet_schema.schema(include_ancillary_info=True))
        pan_dat = tdf.copy_to_pandas(diet_dat, drop_pk_columns=False)
        pan_dat_2 = pdf.pgsql.create_pan_dat(self.engine,
                                             schema,
                                             active_fld="active_fld")
        self.assertTrue(pdf._same_data(pan_dat, pan_dat_2, epsilon=1e-10))
        self.assertTrue(
            set(_[0] for _ in self.engine.execute(
                f"Select active_fld from {schema}.categories")) ==
            {True, False})
        pdf.pgsql.write_data(pan_dat,
                             self.engine,
                             schema,
                             active_fld="active_fld")
        self.assertTrue(
            set(_[0] for _ in self.engine.execute(
                f"Select active_fld from {schema}.categories")) == {True})
Пример #17
0
 def test_diet_no_inf_pd_flagging(self):
     pdf = PanDatFactory.create_from_full_schema(
         diet_schema.schema(include_ancillary_info=True))
     pan_dat = diet_schema.copy_to_pandas(diet_dat, drop_pk_columns=False)
     pgpf = pdf.pgsql
     pgpf.write_schema(self.engine,
                       test_schema,
                       include_ancillary_info=False)
     pgpf.write_data(pan_dat, self.engine, test_schema)
     self.assertTrue(
         sorted([
             _ for _ in self.engine.execute(
                 f"Select * from {test_schema}.categories")
         ]) == [('calories', 1800.0,
                 2200.0), ('fat', 0.0,
                           65.0), ('protein', 91.0,
                                   float("inf")), ('sodium', 0.0, 1779.0)])
     pg_pan_dat = pgpf.create_pan_dat(self.engine, test_schema)
     self.assertTrue(pdf._same_data(pan_dat, pg_pan_dat))
Пример #18
0
    def create_tic_dat(self,
                       json_file_path,
                       freeze_it=False,
                       from_pandas=False):
        """
        Create a TicDat object from a json file

        :param json_file_path: A json file path. It should encode a dictionary
                               with table names as keys. Could also be an actual JSON string

        :param freeze_it: boolean. should the returned object be frozen?

        :param from_pandas: boolean.  If truthy, then use pandas json readers. See
                            PanDatFactory json readers for more details.

        :return: a TicDat object populated by the matching tables.

        caveats: Table names matches are case insensitive and also
                 underscore-space insensitive.
                 Tables that don't find a match are interpreted as an empty table.
                 Dictionary keys that don't match any table are ignored.
        """
        _standard_verify(self.tic_dat_factory)
        if from_pandas:
            from ticdat import PanDatFactory
            pdf = PanDatFactory.create_from_full_schema(
                self.tic_dat_factory.schema(include_ancillary_info=True))
            _rtn = pdf.json.create_pan_dat(json_file_path)
            return pdf.copy_to_tic_dat(_rtn)
        jdict = self._create_jdict(json_file_path)
        tic_dat_dict = self._create_tic_dat_dict(jdict)
        missing_tables = set(
            self.tic_dat_factory.all_tables).difference(tic_dat_dict)
        if missing_tables:
            print(
                "The following table names could not be found in the json file/string\n%s\n"
                % "\n".join(missing_tables))
        rtn = self.tic_dat_factory.TicDat(**tic_dat_dict)
        rtn = self.tic_dat_factory._parameter_table_post_read_adjustment(rtn)
        if freeze_it:
            return self.tic_dat_factory.freeze_me(rtn)
        return rtn
Пример #19
0
#
# Perform KMeans-clustering on the Iris data set. Number of clusters can be controlled via an optional
# parameters table.
#
# Command line interface works like this
#    python iris.py -i sample_data -o solution_directory
#
from ticdat import PanDatFactory, standard_main
from sklearn.preprocessing import scale
from sklearn.cluster import KMeans

# ------------------------ define the input schema --------------------------------
_core_numeric_fields = [
    'Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width'
]
input_schema = PanDatFactory(parameters=[['Name'], ['Value']],
                             iris=[[], _core_numeric_fields + ['Species']])

# the core data fields should be positive, non-infinite numbers
for fld in _core_numeric_fields:
    input_schema.set_data_type("iris",
                               fld,
                               inclusive_min=False,
                               inclusive_max=False,
                               min=0,
                               max=float("inf"))
input_schema.set_data_type("iris",
                           'Species',
                           number_allowed=False,
                           strings_allowed='*')

# the number of clusters is our only parameter, but using a parameters table makes it easy to add more as needed
Пример #20
0
# particular day. We use lexicographic optimization to solve the model:
# first, we minimize the linear sum of the slacks. Then, we constrain
# the sum of the slacks, and minimize the total payment to workers.
# Finally, we minimize a quadratic objective that
# tries to balance the workload among the workers.
#

from ticdat import PanDatFactory, standard_main
try: # if you don't have amplpy installed, the code will still load and then fail on solve
    from amplpy import AMPL
except:
    AMPL = None
# ------------------------ define the input schema --------------------------------
input_schema = PanDatFactory(
    workers=[["Name"], ["Payment"]],
    shifts=[["Name"], ["Requirement"]],
    availability=[["Worker", "Shift"], []]
)
# Define the foreign key relationships
input_schema.add_foreign_key("availability", "workers", ['Worker', 'Name'])
input_schema.add_foreign_key("availability", "shifts", ['Shift', 'Name'])

# Define the data types
input_schema.set_data_type("workers", "Payment", min=0, max=float("inf"),
                           inclusive_min=True, inclusive_max=True)
input_schema.set_data_type("shifts", "Requirement", min=0, max=float("inf"),
                           inclusive_min=True, inclusive_max=True)
# ---------------------------------------------------------------------------------

# ------------------------ define the output schema -------------------------------
solution_schema = PanDatFactory(
Пример #21
0
def pan_dat_maker(schema, tic_dat):
    tdf = TicDatFactory(**schema)
    pdf = PanDatFactory(**schema)
    return pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, tic_dat))
Пример #22
0
from ticdat import PanDatFactory

input_schema = PanDatFactory(
    plants=[["Name"], []],
    warehouses=[["Name"], ["Max Assignment Capacity", "Fixed Cost"]],
    customers=[["Name"], []],
    products=[["Name"], ["Warehouse Volume"]],
    demand=[["Customer", "Product"], ["Demand"]],
    supply=[["Plant", "Product"], ["Supply"]],
    plant_to_warehouse_costs=[["Plant", "Warehouse", "Product"], ["Cost"]],
    warehouse_to_customer_costs=[["Warehouse", "Customer", "Product"],
                                 ["Cost"]],
    warehouse_to_customer_distances=[["Warehouse", "Customer"], ["Distance"]],
    parameters=[["Parameter"], ["Value"]])

input_schema.add_parameter("Number of Warehouses",
                           default_value=4,
                           inclusive_min=False,
                           inclusive_max=False,
                           min=0,
                           max=float("inf"),
                           must_be_int=True)
input_schema.add_parameter("High Service Distance",
                           default_value=0,
                           inclusive_min=True,
                           inclusive_max=True,
                           min=0,
                           max=float("inf"),
                           must_be_int=False)
input_schema.add_parameter("Maximum Average Service Distance",
                           default_value=float("inf"),
Пример #23
0
#   python netflow.py -i netflow_sample_data.json -o netflow_solution.json
# will read from the model stored in netflow_sample_data.json
# and write the solution to netflow_solution.json
#
# This version of the netflow example takes extra precautions to avoid generating
# unneeded constraints. See the simplest_examples directory for a simpler version of this model.

from ticdat import PanDatFactory, standard_main
try:  # if you don't have amplpy installed, the code will still load and then fail on solve
    from amplpy import AMPL
except:
    AMPL = None
# ------------------------ define the input schema --------------------------------
input_schema = PanDatFactory(commodities=[["Name"], ["Volume"]],
                             nodes=[["Name"], []],
                             arcs=[["Source", "Destination"], ["Capacity"]],
                             cost=[["Commodity", "Source", "Destination"],
                                   ["Cost"]],
                             inflow=[["Commodity", "Node"], ["Quantity"]])

# Define the foreign key relationships
input_schema.add_foreign_key("arcs", "nodes", ['Source', 'Name'])
input_schema.add_foreign_key("arcs", "nodes", ['Destination', 'Name'])
input_schema.add_foreign_key("cost", "nodes", ['Source', 'Name'])
input_schema.add_foreign_key("cost", "nodes", ['Destination', 'Name'])
input_schema.add_foreign_key("cost", "commodities", ['Commodity', 'Name'])
input_schema.add_foreign_key("inflow", "commodities", ['Commodity', 'Name'])
input_schema.add_foreign_key("inflow", "nodes", ['Node', 'Name'])

# Define the data types
input_schema.set_data_type("commodities",
                           "Volume",
Пример #24
0
#
# Provides command line interface via ticdat.standard_main
# For example, typing
#   python diet.py -i input_data.xlsx -o solution_data.xlsx
# will read from a model stored in the file input_data.xlsx and write the solution
# to solution_data.xlsx.
#

from amplpy import AMPL
from ticdat import PanDatFactory, standard_main

# ------------------------ define the input schema --------------------------------
# There are three input tables, with 4 primary key fields and 4 data fields.
input_schema = PanDatFactory(categories=[["Name"],
                                         ["Min Nutrition", "Max Nutrition"]],
                             foods=[["Name"], ["Cost"]],
                             nutrition_quantities=[["Food", "Category"],
                                                   ["Quantity"]])

# Define the foreign key relationships
input_schema.add_foreign_key("nutrition_quantities", "foods", ["Food", "Name"])
input_schema.add_foreign_key("nutrition_quantities", "categories",
                             ["Category", "Name"])

# Define the data types
input_schema.set_data_type("categories",
                           "Min Nutrition",
                           min=0,
                           max=float("inf"),
                           inclusive_min=True,
                           inclusive_max=False)
Пример #25
0
from ticdat import PanDatFactory
input_schema = PanDatFactory(cities=[["Name"], ["Demand"]],
                             distances=[["Source", "Destination"],
                                        ["Distance"]],
                             parameters=[["Parameter"], ["Value"]])

input_schema.add_parameter("Number of Centroids",
                           default_value=4,
                           inclusive_min=False,
                           inclusive_max=False,
                           min=0,
                           max=float("inf"),
                           must_be_int=True)
input_schema.set_data_type("cities",
                           "Demand",
                           min=0,
                           max=float("inf"),
                           inclusive_min=True,
                           inclusive_max=False)
input_schema.set_data_type("distances",
                           "Distance",
                           min=0,
                           max=float("inf"),
                           inclusive_min=True,
                           inclusive_max=False)
input_schema.add_foreign_key("distances", "cities", ['Source', 'Name'])
input_schema.add_foreign_key("distances", "cities", ['Destination', 'Name'])

solution_schema = PanDatFactory(openings=[['City'], []],
                                assignments=[['City', 'Assigned To'], []],
                                parameters=[["Parameter"], ["Value"]])
Пример #26
0
#  python fantop.py -i fantop_sample_data -o fantop_solution_data

from ticdat import PanDatFactory, standard_main
try:  # if you don't have amplpy installed, the code will still load and then fail on solve
    from amplpy import AMPL
except:
    AMPL = None
# ------------------------ define the input schema --------------------------------
input_schema = PanDatFactory(parameters=[["Parameter"], ["Value"]],
                             players=[['Player Name'],
                                      [
                                          'Position', 'Average Draft Position',
                                          'Expected Points', 'Draft Status'
                                      ]],
                             roster_requirements=[['Position'],
                                                  [
                                                      'Min Num Starters',
                                                      'Max Num Starters',
                                                      'Min Num Reserve',
                                                      'Max Num Reserve',
                                                      'Flex Status'
                                                  ]],
                             my_draft_positions=[['Draft Position'], []])

# add foreign key constraints (optional, but helps with preventing garbage-in, garbage-out)
input_schema.add_foreign_key("players", "roster_requirements",
                             ['Position', 'Position'])

# set data types (optional, but helps with preventing garbage-in, garbage-out)
input_schema.set_data_type("parameters",
                           "Parameter",
Пример #27
0
#
# Core engine file for tts_netflow_b
#
try:
    import gurobipy as gp
except:
    gp = None
from ticdat import PanDatFactory, Slicer

# ------------------------ define the input schema --------------------------------
input_schema = PanDatFactory(commodities=[["Name"], ["Volume"]],
                             nodes=[["Name"], []],
                             arcs=[["Source", "Destination"], ["Capacity"]],
                             cost=[["Commodity", "Source", "Destination"],
                                   ["Cost"]],
                             supply=[["Commodity", "Node"], ["Quantity"]],
                             demand=[["Commodity", "Node"], ["Quantity"]])

# Define the foreign key relationships
input_schema.add_foreign_key("arcs", "nodes", ['Source', 'Name'])
input_schema.add_foreign_key("arcs", "nodes", ['Destination', 'Name'])
input_schema.add_foreign_key(
    "cost", "arcs", [['Source', 'Source'], ['Destination', 'Destination']])
input_schema.add_foreign_key("cost", "commodities", ['Commodity', 'Name'])
input_schema.add_foreign_key("demand", "commodities", ['Commodity', 'Name'])
input_schema.add_foreign_key("demand", "nodes", ['Node', 'Name'])
input_schema.add_foreign_key("supply", "commodities", ['Commodity', 'Name'])
input_schema.add_foreign_key("supply", "nodes", ['Node', 'Name'])

# Define the data types
input_schema.set_data_type("commodities",
Пример #28
0
# Provides command line interface via ticdat.standard_main
# For example, typing
#   python metrorail.py -i metrorail_sample_data.json -o metrorail_solution_data.json
# will read from a model stored in the file metrorail_sample_data.json and write the
# solution to metrorail_solution_data.json.

# this version of the file uses amplpy and Gurobi
from amplpy import AMPL
from ticdat import PanDatFactory, standard_main
from itertools import product
from pandas import DataFrame

# ------------------------ define the input schema --------------------------------
input_schema = PanDatFactory (
    parameters=[["Parameter"], ["Value"]],
    load_amounts=[["Amount"],[]],
    number_of_one_way_trips=[["Number"],[]],
    amount_leftover=[["Amount"], []])

input_schema.set_data_type("load_amounts", "Amount", min=0, max=float("inf"),
                           inclusive_min=False, inclusive_max=False)

input_schema.set_data_type("number_of_one_way_trips", "Number", min=0, max=float("inf"),
                           inclusive_min=False, inclusive_max=False, must_be_int=True)

input_schema.set_data_type("amount_leftover", "Amount", min=0, max=float("inf"),
                           inclusive_min=True, inclusive_max=False)


default_parameters = {"One Way Price": 2.25, "Amount Leftover Constraint": "Upper Bound"}
def _good_parameter_key_value(key, value):
Пример #29
0
from ticdat import PanDatFactory
input_schema = PanDatFactory(cities=[["Name"],["Demand"]],
                             distances=[["Source", "Destination"], ["Distance"]],
                             parameters=[["Parameter"], ["Value"]])

input_schema.add_parameter("Number of Centroids", default_value=4, inclusive_min=False, inclusive_max=False, min=0,
                            max=float("inf"), must_be_int=True)
input_schema.set_data_type("cities", "Demand", min=0, max=float("inf"), inclusive_min=True, inclusive_max=False)
input_schema.set_data_type("distances", "Distance", min=0, max=float("inf"), inclusive_min=True, inclusive_max=False)
input_schema.add_foreign_key("distances", "cities", ['Source', 'Name'])
input_schema.add_foreign_key("distances", "cities", ['Destination', 'Name'])

# The distance matrix is bi-directionally safe. I.e. if the same source/dest and dest/source exist then the
# distances must match. If only one is present, it can fall back to the other in the code.
def _distance_matrix(dat):
    return {"distance_matrix": {tuple(row[:2]): row[2] for row in dat.distances.itertuples(index=False)}}
input_schema.add_data_row_predicate("distances", predicate_name="Check Bi-Directionally Safe",
    predicate=lambda row, distance_matrix: ((row["Destination"], row["Source"]) not in distance_matrix) or
                                            (row["Distance"] == distance_matrix[row["Destination"], row["Source"]]),
    predicate_kwargs_maker=_distance_matrix)

solution_schema = PanDatFactory(openings=[['City'],[]], assignments=[['City', 'Assigned To'],[]],
                                parameters=[["Parameter"], ["Value"]])

def solve(dat):
    assert input_schema.good_pan_dat_object(dat), "bad dat check"
    assert not input_schema.find_duplicates(dat), "duplicate row check"
    assert not input_schema.find_foreign_key_failures(dat), "foreign key check"
    assert not input_schema.find_data_type_failures(dat), "data type value check"
    assert not input_schema.find_data_row_failures(dat), "data row check"
Пример #30
0
# Simplest diet example using amplpy and ticdat

from amplpy import AMPL
from ticdat import PanDatFactory, standard_main

input_schema = PanDatFactory (
    categories=[["Name"],["Min Nutrition", "Max Nutrition"]],
    foods=[["Name"], ["Cost"]],
    nutrition_quantities=[["Food", "Category"], ["Quantity"]])

# There are three solution tables, with 3 primary key fields and 3 data fields.
solution_schema = PanDatFactory(
    parameters=[["Parameter"], ["Value"]],
    buy_food=[["Food"], ["Quantity"]],
    consume_nutrition=[["Category"], ["Quantity"]])

def solve(dat):
    # build the AMPL math model
    ampl = AMPL()
    ampl.setOption('solver', 'gurobi')
    ampl.eval("""
    set CAT;
    set FOOD;

    param cost {FOOD} > 0, < Infinity;

    param n_min {CAT} >= 0, < Infinity;
    param n_max {i in CAT} >= n_min[i];

    param amt {FOOD, CAT} >= 0, < Infinity;