Пример #1
0
 def get_records(self,table,number):
     self.db = DBConnection(self.datasource)
     print("running get records {}".format(number))
     generator,header = self.db.stream_table(table)
     def format_record(record):
         return {str(h.name):str(v) for h,v in zip(header,record)}
     try:
         out = []
         for i , x in enumerate(generator):
             out.append(format_record(x))
             if i == number:
                 reason = "break"
                 break
     except Exception as e:
         reason = e
     finally:
         resp = input("stopped on {} write to file? (y/n) : ".format(reason))
         write = resp == "y"
         print("\n chosen write {} ".format(write))
         if write:
             path = JSON_RECORDS
             print("out len {}".format(len(out)))
             with open(path,"w+") as f:
                 f.write(json.dumps(out))
             print("wrote {} records to \n{}".format(len(out),path))
         else:
             print("not writing")
Пример #2
0
class IngestionProducer(KafkaWriter):
    def __init__(self,bootstrap_servers,datasource,outfile = JSON_RECORDS):
        super().__init__(bootstrap_servers)
        self.datasource = datasource

    def get_records(self,table,number):
        self.db = DBConnection(self.datasource)
        print("running get records {}".format(number))
        generator,header = self.db.stream_table(table)
        def format_record(record):
            return {str(h.name):str(v) for h,v in zip(header,record)}
        try:
            out = []
            for i , x in enumerate(generator):
                out.append(format_record(x))
                if i == number:
                    reason = "break"
                    break
        except Exception as e:
            reason = e
        finally:
            resp = input("stopped on {} write to file? (y/n) : ".format(reason))
            write = resp == "y"
            print("\n chosen write {} ".format(write))
            if write:
                path = JSON_RECORDS
                print("out len {}".format(len(out)))
                with open(path,"w+") as f:
                    f.write(json.dumps(out))
                print("wrote {} records to \n{}".format(len(out),path))
            else:
                print("not writing")

    def get_records_json(self):
        path = JSON_RECORDS
        with open(path,"r") as f:
            data = json.loads(f.read())
        return data

    def ingest_data(self,table,number = False):
        print("in ingest data method max {}".format(number))
        records = self.get_records_json()

        print(" got {} records to stream".format(len(records)))
        topic = get_topic(self.datasource,table)

        print("streaming data from table {} to topic {}".format(table,topic))
        input("press enter to start producing")
        print("producing...")
        for i,record in enumerate(records):
            self.produce(record, topic)
            if number:
                if i == number:
                    break
        self.produce_debug("completed producing {}".format(table))
Пример #3
0
 def __init__(self, database="test_database"):
     self.config = {\
                    "sites": (10,random_site),\
                    "parts":(50,random_part),\
                    "customers":(15,random_customer),\
                    "suppliers":(5,random_supplier),\
                    "sales_orders":(50,random_sales_order),\
                    "purchase_orders":(50,random_purchase_order)}
     self.commands = [
         "sites", "parts", "customers", "sales_orders", "purchase_orders",
         "suppliers"
     ]
     print("mockdata got database name {}".format(database))
     self.dbc = DBConnection(database)
     self.current_table = None
     self.insert_statements = []
     global fake_factory
     global product_data
     product_data = read_product_data()
     fake_factory = Faker()
Пример #4
0
def run_statistics(datasource):
    print("running stats... ")
    dbc = DBConnection(datasource)
    dbc.get_base_table_descriptions()
    query = "select topic_name, min(timestamp) start_time,max(timestamp) end_time, max(quantity) amount from tracked_topics group by topic_name;"
    records = get_records(query=query, dbc=dbc)
    runid = get_records("select max(run_id) + 1 newid  from run_stats;",
                        dbc)[0]["newid"]
    insert_kwargs = {"table": "run_stats"}
    for record in records:
        record["elapsed_time"] = (record["end_time"] -
                                  record["start_time"]).total_seconds()
        insert_kwargs.update({"topic_name":record["topic_name"],\
                              "run_id":runid,\
                              "start_time":record["start_time"],\
                              "end_time":record["end_time"],\
                              "elapsed_time":record["elapsed_time"],\
                              "amount":record["amount"]})
        insert_db(dbc, **insert_kwargs)
    dbc.execute_cursor("delete from tracked_topics where 1 = 1;", commit=True)
Пример #5
0
def consume_stats(bootstrap_servers, datasource):
    dbc = DBConnection(datasource)
    dbc.get_base_table_descriptions()
    consumer = KafkaConsumer("stats",\
            group_id  = "test",\
            bootstrap_servers=bootstrap_servers,\
            auto_offset_reset ="smallest",\
            value_deserializer =lambda m: json.loads(m.decode('utf-8')))
    insert_kwargs = {}
    insert_kwargs.update({"table": "tracked_topics"})
    for i, message in enumerate(consumer):
        jsonmsg = json.loads(message.value)
        stat_topic = jsonmsg["topic"]
        stat_count = jsonmsg["count"]
        stat_time = jsonmsg["timestamp"]
        insert_kwargs.update({"topic_name":str(stat_topic),\
                          "quantity":int(stat_count),\
                          "timestamp":stat_time})
        insert_db(dbc, **insert_kwargs)
        if i % 100 == 0:
            print(" inserted i {} tracked records".format(i))
        if i == 1000000:
            break
    dbc.commit_connection()
Пример #6
0
class mockData(object):
    def __init__(self, database="test_database"):
        self.config = {\
                       "sites": (10,random_site),\
                       "parts":(50,random_part),\
                       "customers":(15,random_customer),\
                       "suppliers":(5,random_supplier),\
                       "sales_orders":(50,random_sales_order),\
                       "purchase_orders":(50,random_purchase_order)}
        self.commands = [
            "sites", "parts", "customers", "sales_orders", "purchase_orders",
            "suppliers"
        ]
        print("mockdata got database name {}".format(database))
        self.dbc = DBConnection(database)
        self.current_table = None
        self.insert_statements = []
        global fake_factory
        global product_data
        product_data = read_product_data()
        fake_factory = Faker()

    def set_quantity(self, configitem, amount):
        current_config = self.config.get(configitem)
        new_config = (amount, current_config[1])
        print("updating {}  from {} to {} ".format(configitem, current_config,
                                                   new_config))
        self.config[configitem] = new_config

    def generate_insert_statement(self, **kwargs):
        base_stmt = "insert into {} ({}) values ({});"
        column_definitions = self.dbc.descriptions.get(self.current_table)
        cols = []
        vals = []
        for column, data in kwargs.items():
            column_type = column_definitions.get(column, False)
            if not column_type:
                print(
                    "column type for column {} not found for table {}".format(
                        column, self.current_table))
            else:
                cols.append(column)
                if "CHAR" in column_type.upper():
                    val = "'{}'".format(data)
                elif "INT" in column_type.upper():
                    #                    print ("making int {} for column {}".format(data,column))
                    val = "{}".format(str(int(data)))
                elif "TIME" in column_type.upper():
                    #                    print("got time col {} of type{} ".format(data,type(data)))
                    val = "TIMESTAMP '{}'".format(data.isoformat())
                else:
                    val = data
                vals.append(val)
        columns = ", ".join(cols)
        values = ", ".join(vals)
        return base_stmt.format(self.current_table, columns, values)

    def part_customer_data(self):
        self.current_table = "part_customers"
        part_list = self.dbc.get_column_from_table("parts", "id")
        customer_list = self.dbc.get_column_from_table("customers", "id")
        print(
            "making part_customers data for {} customers and {} parts".format(
                len(customer_list), len(part_list)))

        for customer in customer_list:
            rd.shuffle(part_list)
            parts = rd.randint(0, len(part_list))
            for i in range(parts):
                record = {}
                record["part_id"] = part_list[i]
                record["customer_id"] = customer
                record["delivery_lead_time"] = rd.randint(2, 5)
                insert_stmt = self.generate_insert_statement(**record)
                self.insert_statements.append(insert_stmt)
        self.run_insert_statements()

    def part_supplier_data(self):
        print("making part_supplier data")
        self.current_table = "part_suppliers"
        part_list = self.dbc.get_column_from_table("parts", "id")
        supplier_list = self.dbc.get_column_from_table("suppliers", "id")
        for customer in supplier_list:
            rd.shuffle(part_list)
            parts = rd.randint(0, len(part_list))
            for i in range(parts):
                record = {}
                record["part_id"] = part_list[i]
                record["supplier_id"] = customer
                record["supply_lead_time"] = rd.randint(1, 30)
                insert_stmt = self.generate_insert_statement(**record)
                self.insert_statements.append(insert_stmt)
        self.run_insert_statements()

    def generate_starting_inventory(self):
        result = {}
        possible_status = ["available", "expired", "qa_hold"]
        result["site_id"] = get_random_value_from_column("sites", "id")[0]
        result["part_id"] = part_id
        result["status"] = rd.choice(possible_status)
        result["quantity"] = rd.randint(0, 20000)
        insert_stmt = self.generate_insert_statement(**result)
        self.insert_statements.append(insert_stmt)

    def data_generator(self):
        n, func = self.config.get(self.current_table)
        for i in range(n):
            row = func()
            yield self.generate_insert_statement(**row)

    def mock_data_from_config(self):
        self.insert_statements = []
        for command in self.commands:
            self.current_table = command
            table_data_generator = self.data_generator()
            self.insert_statements += [stmt for stmt in table_data_generator]

    def run_insert_statements(self):
        if self.insert_statements:
            for stmt in self.insert_statements:
                if isinstance(stmt, dict):
                    print(stmt)
                    stmt = self.generate_insert_statement(**stmt)
                self.dbc.execute_cursor(stmt, commit=True)
        self.insert_statements = []
Пример #7
0
class mockData(object):
    """ class for managing the generation of mocked data using the fakefactory """
    def __init__(self, database="test_database"):
        self.config = {\
                       "sites": (10,random_site),\
                       "parts":(50,random_part),\
                       "customers":(15,random_customer),\
                       "suppliers":(5,random_supplier),\
                       "sales_orders":(50,random_sales_order),\
                       "purchase_orders":(50,random_purchase_order)}
        self.commands = [
            "sites", "parts", "customers", "sales_orders", "purchase_orders",
            "suppliers"
        ]
        print("mockdata got database name {}".format(database))
        self.dbc = DBConnection(database)
        self.current_table = None
        self.insert_statements = []
        global fake_factory
        global product_data
        product_data = read_product_data()
        fake_factory = Faker()

    def set_quantity(self, configitem, amount):
        """update config for mocking any particular tables data"""
        current_config = self.config.get(configitem)
        new_config = (amount, current_config[1])
        print("updating {}  from {} to {} ".format(configitem, current_config,
                                                   new_config))
        self.config[configitem] = new_config

    def generate_insert_statement(self, **kwargs):
        """ take any set of input kwargs and turn them into a formatted insert statement"""
        base_stmt = "insert into {} ({}) values ({});"
        column_definitions = self.dbc.descriptions.get(self.current_table)
        cols = []
        vals = []
        for column, data in kwargs.items():
            column_type = column_definitions.get(column, False)
            if not column_type:
                print(
                    "column type for column {} not found for table {}".format(
                        column, self.current_table))
            else:
                cols.append(column)
                if "CHAR" in column_type.upper():
                    val = "'{}'".format(data)
                elif "INT" in column_type.upper():
                    #                    print ("making int {} for column {}".format(data,column))
                    val = "{}".format(str(int(data)))
                elif "TIME" in column_type.upper():
                    #                    print("got time col {} of type{} ".format(data,type(data)))
                    val = "TIMESTAMP '{}'".format(data.isoformat())
                else:
                    val = data
                vals.append(val)
        columns = ", ".join(cols)
        values = ", ".join(vals)
        return base_stmt.format(self.current_table, columns, values)

    def part_customer_data(self):
        """ special method for making customer data """
        self.current_table = "part_customers"
        part_list = self.dbc.get_column_from_table("parts", "id")
        customer_list = self.dbc.get_column_from_table("customers", "id")
        print(
            "making part_customers data for {} customers and {} parts".format(
                len(customer_list), len(part_list)))

        for customer in customer_list:
            rd.shuffle(part_list)
            parts = rd.randint(0, len(part_list))
            for i in range(parts):
                record = {}
                record["part_id"] = part_list[i]
                record["customer_id"] = customer
                record["delivery_lead_time"] = rd.randint(2, 5)
                insert_stmt = self.generate_insert_statement(**record)
                self.insert_statements.append(insert_stmt)
        self.run_insert_statements()

    def part_supplier_data(self):
        """ special method for making supplier data """
        print("making part_supplier data")
        self.current_table = "part_suppliers"
        part_list = self.dbc.get_column_from_table("parts", "id")
        supplier_list = self.dbc.get_column_from_table("suppliers", "id")
        for customer in supplier_list:
            rd.shuffle(part_list)
            parts = rd.randint(0, len(part_list))
            for i in range(parts):
                record = {}
                record["part_id"] = part_list[i]
                record["supplier_id"] = customer
                record["supply_lead_time"] = rd.randint(1, 30)
                insert_stmt = self.generate_insert_statement(**record)
                self.insert_statements.append(insert_stmt)
        self.run_insert_statements()

    def data_generator(self):
        """for turning a data fetch method into a generator as per config """
        n, func = self.config.get(self.current_table)
        for i in range(n):
            row = func()
            yield self.generate_insert_statement(**row)

    def mock_data_from_config(self):
        """ run all configured commands """
        self.insert_statements = []
        for command in self.commands:
            self.current_table = command
            table_data_generator = self.data_generator()
            self.insert_statements += [stmt for stmt in table_data_generator]

    def run_insert_statements(self):
        """ run all staged insert statements against the database """
        if self.insert_statements:
            for stmt in self.insert_statements:
                if isinstance(stmt, dict):
                    print(stmt)
                    stmt = self.generate_insert_statement(**stmt)
                self.dbc.execute_cursor(stmt, commit=True)
        self.insert_statements = []
Пример #8
0
def generate_reporting_tables(**kwargs):
    print("generate master data kwargs {}".format(kwargs))
    db = kwargs["db"]
    print("db outta kwargs = {}".format(db))
    dbc = DBConnection(db)
    create_tables(dbc)