Exemplo n.º 1
0
    def export_rel_supplier_partsupp(self):
        conn = None
        try:
            params = config()
            conn = psycopg2.connect(**params)
            cur = conn.cursor()
            file_name = "rel_supplier_partsupp"

            rel_supplier_partsupp = ExportDataCsv.path + file_name

            command = '''COPY 
                                (SELECT PS_PARTKEY, PS_SUPPKEY 
                                FROM supplier AS s INNER JOIN partsupp AS p 
                                ON s.S_SUPPKEY = p.PS_SUPPKEY) 
                                TO '{0}{1}.csv' 
                                DELIMITER '|' CSV HEADER;'''.format(
                ExportDataCsv.path, file_name)
            cur.execute(command)
            cur.close()
            conn.commit()
        except (Exception, psycopg2.DatabaseError) as error:
            print(error)
        finally:
            if conn is not None:
                conn.close()
Exemplo n.º 2
0
    def insert_REGION(self):
        conn = None
        try:
            params = config()
            conn = psycopg2.connect(**params)
            cur = conn.cursor()

            data_size = 5
            region_names = [
                "AFRICA", "AMERICA", "ASIA", "EUROPE", "MIDDLE EAST"
            ]

            for i in range(data_size):
                R_REGIONKEY = i
                R_NAME = region_names[i]
                R_COMMENT = InsertData.generate_random_string_data(
                    random.randint(31, 115))
                cur.execute(
                    "INSERT INTO REGION(R_REGIONKEY,R_NAME,R_COMMENT) VALUES (%s, %s, %s)",
                    (str(R_REGIONKEY), R_NAME, R_COMMENT))
            cur.close()
            conn.commit()

        except (Exception, psycopg2.DatabaseError) as error:
            print(error)

        finally:
            if conn is not None:
                conn.close()
Exemplo n.º 3
0
def connect():
    """ Connect to the PostgreSQL database server """
    conn = None
    try:
        # read connection parameters
        params = config()

        # connect to the PostgreSQL server
        print('Connecting to the PostgreSQL database...')
        conn = psycopg2.connect(**params)

        # create a cursor
        cur = conn.cursor()

        # execute a statement
        print('PostgreSQL database version:')
        cur.execute('SELECT version()')

        # display the PostgreSQL database server version
        db_version = cur.fetchone()
        print(db_version)

        # close the communication with the PostgreSQL
        cur.close()
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
    finally:
        if conn is not None:
            conn.close()
            print('Database connection closed.')
Exemplo n.º 4
0
    def run_queries(self):
        conn = None
        try:
            params = config()
            conn = psycopg2.connect(**params)

            q1 = Query1(conn)
            q1.execute()

            q2 = Query2(conn)
            q2.execute()

            q3 = Query3(conn)
            q3.execute()

            q4 = Query4(conn)
            q4.execute()

            q5 = Query5(conn)
            q5.execute()

        except (Exception, psycopg2.DatabaseError) as error:
            print(error)

        finally:
            if conn is not None:
                conn.close()
Exemplo n.º 5
0
    def insert_CUSTOMER(self):
        conn = None
        try:
            params = config()
            conn = psycopg2.connect(**params)
            cur = conn.cursor()

            data_size = int(self.scale_factor * 150000)

            keys = list(range(data_size))
            random.shuffle(keys)

            segments = [
                "AUTOMOBILE", "BUILDING", "FURNITURE", "MACHINERY", "HOUSEHOLD"
            ]

            for i in range(data_size):
                C_CUSTKEY = keys[i]
                C_NAME = "Customer#" + '{:09d}'.format(C_CUSTKEY)
                C_ADDRESS = InsertData.generate_random_string_data(
                    random.randint(10, 40))
                C_NATIONKEY = random.randint(0, 24)

                country_code = C_NATIONKEY + 10
                local_number1 = random.randint(100, 999)
                local_number2 = random.randint(100, 999)
                local_number3 = random.randint(1000, 9999)
                C_PHONE = str(country_code) + "-" + str(
                    local_number1) + "-" + str(local_number2) + "-" + str(
                        local_number3)
                C_ACCTBAL = random.uniform(-999.99, 9999.99)
                C_MKTSEGMENT = segments[random.randint(0, 4)]
                C_COMMENT = InsertData.generate_random_string_data(
                    random.randint(29, 116))

                cur.execute(
                    "INSERT INTO CUSTOMER(C_CUSTKEY, C_NAME, C_ADDRESS, C_NATIONKEY,C_PHONE,C_ACCTBAL, C_MKTSEGMENT,C_COMMENT) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)",
                    (str(C_CUSTKEY), C_NAME, C_ADDRESS, str(C_NATIONKEY),
                     C_PHONE, str(C_ACCTBAL), C_MKTSEGMENT, C_COMMENT))

            cur.close()
            conn.commit()

        except (Exception, psycopg2.DatabaseError) as error:
            print(error)

        finally:
            if conn is not None:
                conn.close()
Exemplo n.º 6
0
    def insert_PARTSUPP(self):
        conn = None
        try:
            params = config()
            conn = psycopg2.connect(**params)
            cur = conn.cursor()

            data_size = int(self.scale_factor * 200000)

            #Repeating Keys generated in part table
            keys = list(range(data_size))

            S = self.scale_factor * 10000

            for ind_part in range(data_size):
                for ind_partSupp in range(4):
                    PS_PARTKEY = keys[ind_part]
                    PS_SUPPKEY = int(((PS_PARTKEY +
                                       (ind_partSupp *
                                        ((S / 4) +
                                         ((int(PS_PARTKEY - 1)) / S)))) % S) +
                                     1)

                    PS_AVAILQTY = random.randint(1, 9999)
                    PS_SUPPLYCOST = random.uniform(1.0, 1000.0)
                    PS_COMMENT = InsertData.generate_random_string_data(
                        random.randint(49, 198))

                    cur.execute(
                        "INSERT INTO PARTSUPP(PS_PARTKEY, PS_SUPPKEY, PS_AVAILQTY, PS_SUPPLYCOST, PS_COMMENT) VALUES (%s, %s, %s, %s, %s)",
                        (str(PS_PARTKEY), str(PS_SUPPKEY), str(PS_AVAILQTY),
                         str(PS_SUPPLYCOST), PS_COMMENT))

            cur.close()
            conn.commit()

        except (Exception, psycopg2.DatabaseError) as error:
            print(error)

        finally:
            if conn is not None:
                conn.close()
Exemplo n.º 7
0
 def insert_REGION(self):
     conn = None
     try:
         params = config()
         conn = psycopg2.connect(**params)
         cur = conn.cursor()
         with open(InsertDataCsv.csv_path + 'region.csv', 'r') as f:
             reader = csv.reader(f, delimiter='|')
             next(reader)  # Skip the header row.
             for row in reader:
                 cur.execute(
                     "INSERT INTO REGION(R_REGIONKEY,R_NAME,R_COMMENT) VALUES (%s, %s, %s)",
                     row)
         cur.close()
         conn.commit()
     except (Exception, psycopg2.DatabaseError) as error:
         print(error)
     finally:
         if conn is not None:
             conn.close()
Exemplo n.º 8
0
 def insert_ORDERS(self):
     conn = None
     try:
         params = config()
         conn = psycopg2.connect(**params)
         cur = conn.cursor()
         with open(InsertDataCsv.csv_path + 'orders.csv', 'r') as f:
             reader = csv.reader(f, delimiter='|')
             next(reader)  # Skip the header row.
             for row in reader:
                 cur.execute(
                     "INSERT INTO ORDERS(O_ORDERKEY, O_CUSTKEY, O_ORDERSTATUS, O_TOTALPRICE, O_ORDERDATE, O_ORDERPRIORITY, O_CLERK, O_SHIPPRIORITY, O_COMMENT) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)",
                     row)
         cur.close()
         conn.commit()
     except (Exception, psycopg2.DatabaseError) as error:
         print(error)
     finally:
         if conn is not None:
             conn.close()
Exemplo n.º 9
0
 def insert_PARTSUPP(self):
     conn = None
     try:
         params = config()
         conn = psycopg2.connect(**params)
         cur = conn.cursor()
         with open(InsertDataCsv.csv_path + 'partsupp.csv', 'r') as f:
             reader = csv.reader(f, delimiter='|')
             next(reader)  # Skip the header row.
             for row in reader:
                 cur.execute(
                     "INSERT INTO PARTSUPP(PS_PARTKEY, PS_SUPPKEY, PS_AVAILQTY, PS_SUPPLYCOST, PS_COMMENT) VALUES (%s, %s, %s, %s, %s)",
                     row)
         cur.close()
         conn.commit()
     except (Exception, psycopg2.DatabaseError) as error:
         print(error)
     finally:
         if conn is not None:
             conn.close()
Exemplo n.º 10
0
 def insert_CUSTOMER(self):
     conn = None
     try:
         params = config()
         conn = psycopg2.connect(**params)
         cur = conn.cursor()
         with open(InsertDataCsv.csv_path + 'customer.csv', 'r') as f:
             reader = csv.reader(f, delimiter='|')
             next(reader)  # Skip the header row.
             for row in reader:
                 cur.execute(
                     "INSERT INTO CUSTOMER (C_CUSTKEY, C_NAME, C_ADDRESS, C_NATIONKEY,C_PHONE,C_ACCTBAL, C_MKTSEGMENT,C_COMMENT) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)",
                     row)
         cur.close()
         conn.commit()
     except (Exception, psycopg2.DatabaseError) as error:
         print(error)
     finally:
         if conn is not None:
             conn.close()
Exemplo n.º 11
0
 def insert_PART(self):
     conn = None
     try:
         params = config()
         conn = psycopg2.connect(**params)
         cur = conn.cursor()
         with open(InsertDataCsv.csv_path + 'part.csv', 'r') as f:
             reader = csv.reader(f, delimiter='|')
             next(reader)  # Skip the header row.
             for row in reader:
                 cur.execute(
                     "INSERT INTO PART(P_PARTKEY, P_NAME, P_MFGR, P_BRAND,  P_TYPE,  P_SIZE,  P_CONTAINER,  P_RETAILPRICE,  P_COMMENT) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)",
                     row)
         cur.close()
         conn.commit()
     except (Exception, psycopg2.DatabaseError) as error:
         print(error)
     finally:
         if conn is not None:
             conn.close()
Exemplo n.º 12
0
    def insert_LINEITEM(self):
        conn = None
        try:
            params = config()
            conn = psycopg2.connect(**params)
            cur = conn.cursor()

            with open(InsertDataCsv.csv_path + 'lineitem.csv', 'r') as f:
                reader = csv.reader(f, delimiter='|')
                next(reader)  # Skip the header row.
                for row in reader:
                    cur.execute(
                        "INSERT INTO LINEITEM(L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER, L_QUANTITY, L_EXTENDEDPRICE, L_DISCOUNT, L_TAX, L_RETURNFLAG, L_LINESTATUS, L_SHIPDATE, L_COMMITDATE, L_RECEIPTDATE, L_SHIPINSTRUCT, L_SHIPMODE, L_COMMENT) VALUES (%s, %s, %s, %s, %s, %s, %s,%s, %s, %s, %s, %s, %s, %s, %s, %s)",
                        row)
            cur.close()
            conn.commit()
        except (Exception, psycopg2.DatabaseError) as error:
            print(error)
        finally:
            if conn is not None:
                conn.close()
Exemplo n.º 13
0
    def export_node_orders(self):
        conn = None
        try:
            params = config()
            conn = psycopg2.connect(**params)
            cur = conn.cursor()
            file_name = "orders_neo4j"
            command = '''COPY (
            SELECT *, split_part(o_orderdate::TEXT,'-',1) AS o_year, split_part(o_orderdate::TEXT,'-',2) AS 
            o_month, split_part(o_orderdate::TEXT,'-',3) AS o_day FROM orders)
            TO '{0}{1}.csv'
            DELIMITER '|' CSV HEADER;'''.format(ExportDataCsv.path, file_name)
            cur.execute(command)
            cur.close()
            conn.commit()
        except (Exception, psycopg2.DatabaseError) as error:
            print(error)

        finally:
            if conn is not None:
                conn.close()
Exemplo n.º 14
0
 def export_rel_nation_supplier(self):
     conn = None
     try:
         params = config()
         conn = psycopg2.connect(**params)
         cur = conn.cursor()
         file_name = "rel_nation_supplier"
         command = '''COPY 
                 (SELECT S_SUPPKEY, S_NATIONKEY 
                 FROM nation AS n INNER JOIN supplier AS s 
                 ON n.N_NATIONKEY = s.S_NATIONKEY) 
                 TO '{0}{1}.csv' 
                 DELIMITER '|' CSV HEADER;'''.format(
             ExportDataCsv.path, file_name)
         cur.execute(command)
         cur.close()
         conn.commit()
     except (Exception, psycopg2.DatabaseError) as error:
         print(error)
     finally:
         if conn is not None:
             conn.close()
Exemplo n.º 15
0
    def export_rel_customer_nation(self):
        conn = None
        try:
            params = config()
            conn = psycopg2.connect(**params)
            cur = conn.cursor()
            file_name = "rel_customer_nation"

            command = '''COPY 
            (SELECT C_CUSTKEY, C_NATIONKEY 
            FROM customer AS c INNER JOIN nation AS n 
            ON c.C_NATIONKEY = n.N_NATIONKEY) 
            TO '{0}{1}.csv' 
            DELIMITER '|' CSV HEADER;'''.format(ExportDataCsv.path, file_name)
            cur.execute(command)
            cur.close()
            conn.commit()
        except (Exception, psycopg2.DatabaseError) as error:
            print(error)
        finally:
            if conn is not None:
                conn.close()
Exemplo n.º 16
0
 def export_rel_orders_customer(self):
     conn = None
     try:
         params = config()
         conn = psycopg2.connect(**params)
         cur = conn.cursor()
         file_name = "rel_orders_customer"
         command = '''COPY 
                     (SELECT O_ORDERKEY, O_CUSTKEY 
                     FROM orders AS o INNER JOIN customer AS c 
                     ON o.O_CUSTKEY = c.C_CUSTKEY) 
                     TO '{0}{1}.csv' 
                     DELIMITER '|' CSV HEADER;'''.format(
             ExportDataCsv.path, file_name)
         cur.execute(command)
         cur.close()
         conn.commit()
     except (Exception, psycopg2.DatabaseError) as error:
         print(error)
     finally:
         if conn is not None:
             conn.close()
Exemplo n.º 17
0
 def export_rel_lineitem_supplier(self):
     conn = None
     try:
         params = config()
         conn = psycopg2.connect(**params)
         cur = conn.cursor()
         file_name = "rel_lineitem_supplier"
         command = '''COPY 
            (SELECT L_ORDERKEY,L_PARTKEY,L_SUPPKEY,L_LINENUMBER,L_SUPPKEY  
            FROM lineitem AS l INNER JOIN supplier AS s 
            ON l.L_SUPPKEY = s.S_SUPPKEY) 
            TO '{0}{1}.csv' 
            DELIMITER '|' CSV HEADER;'''.format(ExportDataCsv.path,
                                                file_name)
         cur.execute(command)
         cur.close()
         conn.commit()
     except (Exception, psycopg2.DatabaseError) as error:
         print(error)
     finally:
         if conn is not None:
             conn.close()
Exemplo n.º 18
0
    def insert_NATION(self):
        conn = None
        try:
            params = config()
            conn = psycopg2.connect(**params)
            cur = conn.cursor()

            #Dict of nation_name["N_NATIONKEY"] = "N_NAME"
            nation_name = {
                0: "ALGERIA",
                1: "ARGENTINA",
                2: "BRAZIL",
                3: "CANADA",
                4: "EGYPT",
                5: "ETHIOPIA",
                6: "FRANCE",
                7: "GERMANY",
                8: "INDIA",
                9: "INDONESIA",
                10: "IRAN",
                11: "IRAQ",
                12: "JAPAN",
                13: "JORDAN",
                14: "KENYA",
                15: "MOROCCO",
                16: "MOZAMBIQUE",
                17: "PERU",
                18: "CHINA",
                19: "ROMANIA",
                20: "SAUDI ARABIA",
                21: "VIETNAM",
                22: "RUSSIA",
                23: "UNITED KINGDOM",
                24: "UNITED STATES"
            }

            # Dict of region_key["R_REGIONKEY"] = "R_NAME"
            region_key = {
                "ALGERIA": 0,
                "ARGENTINA": 1,
                "BRAZIL": 1,
                "CANADA": 1,
                "EGYPT": 4,
                "ETHIOPIA": 0,
                "FRANCE": 3,
                "GERMANY": 3,
                "INDIA": 2,
                "INDONESIA": 2,
                "IRAN": 4,
                "IRAQ": 4,
                "JAPAN": 2,
                "JORDAN": 4,
                "KENYA": 0,
                "MOROCCO": 0,
                "MOZAMBIQUE": 0,
                "PERU": 1,
                "CHINA": 2,
                "ROMANIA": 3,
                "SAUDI ARABIA": 4,
                "VIETNAM": 2,
                "RUSSIA": 3,
                "UNITED KINGDOM": 3,
                "UNITED STATES": 1
            }

            data_size = 25
            for N_NATIONKEY in range(data_size):
                N_NAME = nation_name[N_NATIONKEY]
                N_REGIONKEY = region_key[N_NAME]
                N_COMMENT = InsertData.generate_random_string_data(
                    random.randint(31, 114))
                cur.execute(
                    "INSERT INTO NATION(N_NATIONKEY,N_NAME,N_REGIONKEY,N_COMMENT) VALUES (%s, %s, %s, %s)",
                    (str(N_NATIONKEY), N_NAME, str(N_REGIONKEY), N_COMMENT))
            cur.close()
            conn.commit()

        except (Exception, psycopg2.DatabaseError) as error:
            print(error)

        finally:
            if conn is not None:
                conn.close()
Exemplo n.º 19
0
    def create_tables(self):
        conn = None
        table_names = ("PART", "SUPPLIER", "PARTSUPP", "CUSTOMER", "ORDERS",
                       "LINEITEM", "NATION", "REGION")

        try:
            params = config()
            conn = psycopg2.connect(**params)
            cur = conn.cursor()

            # Empty database by dropping tables
            for table_name in table_names:
                cur.execute("DROP TABLE IF EXISTS {};".format(table_name))

            # Create tables...
            """ create tables in the PostgreSQL database"""
            commands = ("""
                CREATE TABLE PART(
                    P_PARTKEY BIGINT NOT NULL PRIMARY KEY,
                    P_NAME VARCHAR(55),
                    P_MFGR CHAR(25),
                    P_BRAND CHAR(10),
                    P_TYPE VARCHAR(25),
                    P_SIZE INTEGER,
                    P_CONTAINER CHAR(10),
                    P_RETAILPRICE DECIMAL(12,2),
                    P_COMMENT VARCHAR(23)
                )
                """, """
                CREATE TABLE SUPPLIER(
                    S_SUPPKEY BIGINT NOT NULL PRIMARY KEY,
                    S_NAME CHAR(25),
                    S_ADDRESS VARCHAR(40),
                    S_NATIONKEY INTEGER,
                    S_PHONE CHAR(15),
                    S_ACCTBAL DECIMAL(12,2),
                    S_COMMENT VARCHAR(101)
                )
                """, """
                CREATE TABLE PARTSUPP(
                    PS_PARTKEY BIGINT NOT NULL,
                    PS_SUPPKEY BIGINT NOT NULL,
                    PS_AVAILQTY INTEGER,
                    PS_SUPPLYCOST DECIMAL(12,2),
                    PS_COMMENT VARCHAR(199),
                    PRIMARY KEY (PS_PARTKEY,PS_SUPPKEY)
                )
                """, """
                CREATE TABLE CUSTOMER(
                    C_CUSTKEY BIGINT NOT NULL PRIMARY KEY,
                    C_NAME VARCHAR(25),
                    C_ADDRESS VARCHAR(40),
                    C_NATIONKEY BIGINT,
                    C_PHONE CHAR(15),
                    C_ACCTBAL DECIMAL(12,2),
                    C_MKTSEGMENT CHAR(10),
                    C_COMMENT VARCHAR(117)
                )
                """, """
                CREATE TABLE ORDERS(
                    O_ORDERKEY BIGINT NOT NULL PRIMARY KEY,
                    O_CUSTKEY BIGINT,
                    O_ORDERSTATUS CHAR(1),
                    O_TOTALPRICE DECIMAL(12,2),
                    O_ORDERDATE DATE,
                    O_ORDERPRIORITY CHAR(15),
                    O_CLERK CHAR(15),
                    O_SHIPPRIORITY INTEGER,
                    O_COMMENT VARCHAR(79)
                )
                """, """
                CREATE TABLE LINEITEM(
                    L_ORDERKEY BIGINT NOT NULL,
                    L_PARTKEY BIGINT,
                    L_SUPPKEY BIGINT,
                    L_LINENUMBER INTEGER NOT NULL,
                    L_QUANTITY DECIMAL(12,2),
                    L_EXTENDEDPRICE DECIMAL(12,2),
                    L_DISCOUNT DECIMAL(12,2),
                    L_TAX DECIMAL(12,2),
                    L_RETURNFLAG CHAR(1),
                    L_LINESTATUS CHAR(1),
                    L_SHIPDATE DATE,
                    L_COMMITDATE DATE,
                    L_RECEIPTDATE DATE,
                    L_SHIPINSTRUCT CHAR(25),
                    L_SHIPMODE CHAR(10),
                    L_COMMENT VARCHAR(44),
                    PRIMARY KEY (L_ORDERKEY,L_LINENUMBER)
                )
                """, """
                CREATE TABLE NATION(
                    N_NATIONKEY BIGINT NOT NULL PRIMARY KEY,
                    N_NAME CHAR(25),
                    N_REGIONKEY BIGINT,
                    N_COMMENT VARCHAR(152)
                )
                """, """
                CREATE TABLE REGION(
                    R_REGIONKEY BIGINT NOT NULL PRIMARY KEY,
                    R_NAME CHAR(25),
                    R_COMMENT VARCHAR(152)
                )
                """)
            for command in commands:
                cur.execute(command)
            cur.close()
            conn.commit()

        except (Exception, psycopg2.DatabaseError) as error:
            print(error)

        finally:
            if conn is not None:
                conn.close()
Exemplo n.º 20
0
    def insert_PART(self):
        conn = None
        try:

            params = config()
            conn = psycopg2.connect(**params)
            cur = conn.cursor()

            data_size = int(self.scale_factor * 200000)

            keys = list(range(data_size))
            random.shuffle(keys)

            part_names = [
                "almond", "antique", "aquamarine", "azure", "beige", "bisque",
                "black", "blanched", "blue", "blush", "brown", "burlywood",
                "burnished", "chartreuse", "chiffon", "chocolate", "coral",
                "cornflower", "cornsilk", "cream", "cyan", "dark", "deep",
                "dim", "dodger", "drab", "firebrick", "floral", "forest",
                "frosted", "gainsboro", "ghost", "goldenrod", "green", "grey",
                "honeydew", "hot", "indian", "ivory", "khaki", "lace",
                "lavender", "lawn", "lemon", "light", "lime", "linen",
                "magenta", "maroon", "medium", "metallic", "midnight", "mint",
                "misty", "moccasin", "navajo", "navy", "olive", "orange",
                "orchid", "pale", "papaya", "peach", "peru", "pink", "plum",
                "powder", "puff", "purple", "red", "rose", "rosy", "royal",
                "saddle", "salmon", "sandy", "seashell", "sienna", "sky",
                "slate", "smoke", "snow", "spring", "steel", "tan", "thistle",
                "tomato", "turquoise", "violet", "wheat", "white", "yellow"
            ]

            len_part_names = len(part_names)

            types_syllable_1 = [
                "STANDARD", "SMALL", "MEDIUM", "LARGE", "ECONOMY", "PROMO"
            ]
            types_syllable_2 = [
                "ANODIZED", "BURNISHED", "PLATED", "POLISHED", "BRUSHED"
            ]
            types_syllable_3 = ["TIN", "NICKEL", "BRASS", "STEEL", "COPPER"]

            containers_syllable_1 = ["SM", "LG", "MED", "JUMBO", "WRAP"]
            containers_syllable_2 = [
                "CASE", "BOX", "BAG", "JAR", "PKG", "PACK", "CAN", "DRUM"
            ]

            for i in range(data_size):

                P_PARTKEY = keys[i]

                random_names = []
                while (len(random_names) < 5):
                    random_index = random.randint(0, (len_part_names - 1))
                    if (not random_names.__contains__(
                            part_names[random_index])):
                        random_names.append(part_names[random_index])
                space_char = " "
                P_NAME = space_char.join(random_names)

                M = random.randint(1, 5)
                P_MFGR = "Manufacturer#{0}".format(M)

                N = random.randint(1, 5)
                P_BRAND = "Brand#" + str(M) + str(N)

                P_TYPE =types_syllable_1[random.randint(0, 5)] + " " + \
                        types_syllable_2[random.randint(0, 4)] + " " + \
                        types_syllable_3[random.randint(0, 4)]

                P_SIZE = str(random.randint(1, 50))

                P_CONTAINER =containers_syllable_1[random.randint(0, 4)] + " " +\
                             containers_syllable_2[random.randint(0, 7)]

                temp_P_RETAILPRICE = (90000 +
                                      ((P_PARTKEY / 10) % 20001) + 100 *
                                      (P_PARTKEY % 1000)) / 100.0
                P_RETAILPRICE = '{:.2f}'.format(temp_P_RETAILPRICE)
                self.retail_price_part_table[P_PARTKEY] = temp_P_RETAILPRICE

                P_COMMENT = InsertData.generate_random_string_data(
                    random.randint(5, 22))

                cur.execute(
                    "INSERT INTO PART(P_PARTKEY, P_NAME, P_MFGR, P_BRAND,  P_TYPE,  P_SIZE,  P_CONTAINER,  P_RETAILPRICE,  P_COMMENT) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)",
                    (str(P_PARTKEY), P_NAME, P_MFGR, P_BRAND, P_TYPE,
                     str(P_SIZE), P_CONTAINER, str(P_RETAILPRICE), P_COMMENT))

            cur.close()
            conn.commit()
        except (Exception, psycopg2.DatabaseError) as error:
            print(error)

        finally:
            if conn is not None:
                conn.close()
Exemplo n.º 21
0
    def insert_SUPPLIER(self):
        conn = None
        try:
            params = config()
            conn = psycopg2.connect(**params)
            cur = conn.cursor()

            data_size = int(self.scale_factor * 10000)

            keys = list(range(data_size))
            random.shuffle(keys)

            comment_ratio = int(self.scale_factor * 5)

            for i in range(data_size):
                S_SUPPKEY = keys[i]
                S_NAME = "Supplier#r" + "{:09d}".format(S_SUPPKEY)

                S_ADDRESS = InsertData.generate_random_string_data(
                    random.randint(10, 40))

                S_NATIONKEY = random.randint(0, 24)

                country_code = S_NATIONKEY + 10
                local_number1 = random.randint(100, 999)
                local_number2 = random.randint(100, 999)
                local_number3 = random.randint(1000, 9999)
                S_PHONE = str(country_code) + "-" + str(
                    local_number1) + "-" + str(local_number2) + "-" + str(
                        local_number3)

                S_ACCTBAL = '{:.2f}'.format(random.uniform(-999.99, 9999.99))

                comment = InsertData.generate_random_string_data(
                    random.randint(25, 100))
                if S_SUPPKEY < comment_ratio:
                    while (comment < 25 or comment > 100):
                        comment = InsertData.generate_random_string_data(
                            random.randint(0, 20)
                        ) + "Customer " + InsertData.generate_random_string_data(
                            random.randint(0, 20)
                        ) + "Complaints" + InsertData.generate_random_string_data(
                            random.randint(0, 20))
                if S_SUPPKEY > data_size - comment_ratio:
                    while (comment < 25 or comment > 100):
                        comment = InsertData.generate_random_string_data(
                            random.randint(0, 20)
                        ) + "Customer " + InsertData.generate_random_string_data(
                            random.randint(0, 20)
                        ) + "Recommends" + InsertData.generate_random_string_data(
                            random.randint(0, 20))
                S_COMMENT = comment

                cur.execute(
                    "INSERT INTO SUPPLIER(S_SUPPKEY, S_NAME, S_ADDRESS, S_NATIONKEY,  S_PHONE,  S_ACCTBAL,  S_COMMENT) VALUES (%s, %s, %s, %s, %s, %s, %s)",
                    (str(S_SUPPKEY), S_NAME, S_ADDRESS, str(S_NATIONKEY),
                     S_PHONE, str(S_ACCTBAL), S_COMMENT))

            cur.close()
            conn.commit()

        except (Exception, psycopg2.DatabaseError) as error:
            print(error)

        finally:
            if conn is not None:
                conn.close()
Exemplo n.º 22
0
    def insert_ORDERS_LINEITEM(self):
        conn = None

        Priorities = [
            "1-URGENT", "2-HIGH", "3-MEDIUM", "4-NOT SPECIFIED", "5-LOW"
        ]
        STARTDATE = datetime.strptime("1992-1-1", "%Y-%m-%d")
        ENDDATE = datetime.strptime("1998-12-31", "%Y-%m-%d")
        CURRENTDATE = datetime.strptime("1995-6-17", "%Y-%m-%d")
        Instructions = [
            "DELIVER IN PERSON", "COLLECT COD", "NONE", "TAKE BACK RETURN"
        ]
        Modes = ["REG AIR", "AIR", "RAIL", "SHIP", "TRUCK", "MAIL", "FOB"]

        try:
            params = config()
            conn = psycopg2.connect(**params)
            cur = conn.cursor()

            customer_data_size = int(self.scale_factor * 150000)
            order_data_size = int(customer_data_size * 10)  #1500000
            order_key_size = int(order_data_size * 4)  #1500000 * 4
            part_data_size = int(self.scale_factor * 200000)

            subtracted_date = ENDDATE - timedelta(151)
            time_between_dates = subtracted_date - STARTDATE
            days_between_dates = time_between_dates.days

            keys = list(range(order_key_size))
            random.shuffle(keys)

            for id_order in range(order_data_size):

                O_ORDERKEY = keys[id_order]

                if O_ORDERKEY % 4 == 0:  #Only 25% of range key is populated

                    customer_key = random.randint(1, customer_data_size)
                    # not all customers have order. Every third customer is not assigned any order
                    while (customer_key % 3 == 0):
                        customer_key = random.randint(1, customer_data_size)
                    O_CUSTKEY = customer_key

                    random_number_of_days = random.randrange(
                        days_between_dates)
                    O_ORDERDATE = STARTDATE + timedelta(random_number_of_days)

                    O_ORDERPRIORITY = Priorities[random.randint(0, 4)]
                    O_CLERK = "Clerk#" + '{:09d}'.format(
                        random.randint(1, int(self.scale_factor * 1000)))
                    O_SHIPPRIORITY = 0
                    O_COMMENT = InsertData.generate_random_string_data(
                        random.randint(19, 78))

                    #-----------LINEITEM table
                    data_size_lineitem = random.randint(1, 7)
                    O_TOTALPRICE = 0
                    O_ORDERSTATUS = "P"

                    part_key_temp = []
                    temp_key = random.randint(0, (part_data_size - 1))
                    part_key_temp.append(temp_key)
                    for id_lineitem in range(data_size_lineitem):
                        L_ORDERKEY = O_ORDERKEY
                        L_PARTKEY = temp_key
                        while (temp_key in part_key_temp):
                            temp_key = random.randint(0, (part_data_size - 1))
                        part_key_temp.append(temp_key)

                        i_sup = random.randint(0, 3)
                        S = int(self.scale_factor * 10000)
                        L_SUPPKEY = (L_PARTKEY + int((i_sup * ((S / 4) + int(
                            (L_PARTKEY - 1) / S))) % S) + 1)

                        L_LINENUMBER = id_lineitem

                        L_QUANTITY = random.randint(1, 50)

                        L_EXTENDEDPRICE = L_QUANTITY * self.retail_price_part_table[
                            L_PARTKEY]
                        L_DISCOUNT = random.uniform(0.0, 0.10)
                        L_TAX = random.uniform(0.0, 0.08)

                        L_SHIPDATE = O_ORDERDATE + timedelta(
                            random.randint(1, 121))
                        if L_SHIPDATE > CURRENTDATE:
                            L_LINESTATUS = "O"
                        else:
                            L_LINESTATUS = "F"

                        if (L_LINESTATUS == "F"):
                            O_ORDERSTATUS = "F"
                        elif (L_LINESTATUS == "O"):
                            O_ORDERSTATUS = "O"

                        L_COMMITDATE = O_ORDERDATE + timedelta(
                            random.randint(30, 90))
                        L_RECEIPTDATE = L_SHIPDATE + timedelta(
                            random.randint(1, 30))
                        if L_RECEIPTDATE <= CURRENTDATE:
                            if random.random() < 0.5:
                                L_RETURNFLAG = "R"
                            else:
                                L_RETURNFLAG = "A"
                        else:
                            L_RETURNFLAG = "N"
                        L_SHIPINSTRUCT = Instructions[random.randint(0, 3)]
                        L_SHIPMODE = Modes[random.randint(0, 6)]
                        L_COMMENT = InsertData.generate_random_string_data(
                            random.randint(10, 43))

                        O_TOTALPRICE += L_EXTENDEDPRICE * (1 + L_TAX) * (
                            1 - L_DISCOUNT)
                        cur.execute(
                            "INSERT INTO LINEITEM(L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER, L_QUANTITY, L_EXTENDEDPRICE, L_DISCOUNT, L_TAX, L_RETURNFLAG, L_LINESTATUS, L_SHIPDATE, L_COMMITDATE, L_RECEIPTDATE, L_SHIPINSTRUCT, L_SHIPMODE, L_COMMENT) VALUES (%s, %s, %s, %s, %s, %s, %s,%s, %s, %s, %s, %s, %s, %s, %s, %s)",
                            (str(L_ORDERKEY), str(L_PARTKEY), str(L_SUPPKEY),
                             str(L_LINENUMBER), str(L_QUANTITY),
                             str(L_EXTENDEDPRICE), str(L_DISCOUNT), str(L_TAX),
                             str(L_RETURNFLAG), str(L_LINESTATUS),
                             str(L_SHIPDATE), str(L_COMMITDATE),
                             str(L_RECEIPTDATE), str(L_SHIPINSTRUCT),
                             str(L_SHIPMODE), str(L_COMMENT)))

                    cur.execute(
                        "INSERT INTO ORDERS(O_ORDERKEY, O_CUSTKEY, O_ORDERSTATUS, O_TOTALPRICE, O_ORDERDATE, O_ORDERPRIORITY, O_CLERK, O_SHIPPRIORITY, O_COMMENT) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)",
                        (str(O_ORDERKEY), str(O_CUSTKEY), O_ORDERSTATUS,
                         str(O_TOTALPRICE), str(O_ORDERDATE),
                         str(O_ORDERPRIORITY), str(O_CLERK),
                         str(O_SHIPPRIORITY), O_COMMENT))

            cur.close()
            conn.commit()

        except (Exception, psycopg2.DatabaseError) as error:
            print(error)

        finally:
            if conn is not None:
                conn.close()