def __init__(self): random.seed(7400) self.is_done = False self.cardinality = {} #self.schema = {"aka_name": ["id", "person_id", "name", "imdb_index", "name_pcode_cf", "name_pcode_nf", "surname_pcode", "md5sum"], "aka_title": ["id", "movie_id", "title", "imdb_index", "kind_id", "production_year", "phonetic_code", "episode_of_id", "season_nr", "episode_nr", "note", "md5sum"], "cast_info": ["id", "person_id", "movie_id", "person_role_id", "note", "nr_order", "role_id"], "char_name": ["id", "name", "imdb_index", "imdb_id", "name_pcode_nf", "surname_pcode", "md5sum"], "comp_cast_type": ["id", "kind"], "company_name": ["id", "name", "country_code", "imdb_id", "name_pcode_nf", "name_pcode_sf", "md5sum"], "company_type": ["id", "kind"], "complete_cast": ["id", "movie_id", "subject_id", "status_id"], "info_type": ["id", "info"], "keyword": ["id", "keyword", "phonetic_code"], "kind_type": ["id", "kind"], "link_type": ["id", "link"], "movie_companies": ["id", "movie_id", "company_id", "company_type_id", "note"], "movie_info": ["id", "movie_id", "info_type_id", "info", "note"], "movie_info_idx": ["id", "movie_id", "info_type_id", "info", "note"], "movie_keyword": ["id", "movie_id", "keyword_id"], "movie_link": ["id", "movie_id", "linked_movie_id", "link_type_id"], "name": ["id", "name", "imdb_index", "imdb_id", "gender", "name_pcode_cf", "name_pcode_nf", "surname_pcode", "md5sum"], "person_info": ["id", "person_id", "info_type_id", "info", "note"], "role_type": ["id", "role"], "title": ["id", "title", "imdb_index", "kind_id", "production_year", "imdb_id", "phonetic_code", "episode_of_id", "season_nr", "episode_nr", "series_years", "md5sum"]} self.schema = { "aka_name": [ "id", "person_id", "name", "imdb_index", "name_pcode_cf", "name_pcode_nf", "surname_pcode", "md5sum" ], "aka_title": [ "id", "movie_id", "title", "imdb_index", "kind_id", "production_year", "phonetic_code", "episode_of_id", "season_nr", "episode_nr", "note", "md5sum" ], "cast_info": [ "id", "person_id", "movie_id", "person_role_id", "note", "nr_order", "role_id" ], "char_name": [ "id", "name", "imdb_index", "imdb_id", "name_pcode_nf", "surname_pcode", "md5sum" ], "comp_cast_type": ["id", "kind"], "company_name": [ "id", "name", "country_code", "imdb_id", "name_pcode_nf", "name_pcode_sf", "md5sum" ], "company_type": ["id", "kind"], "complete_cast": ["id", "movie_id", "subject_id", "status_id"], "info_type": ["id", "info"], "keyword": ["id", "keyword", "phonetic_code"], "kind_type": ["id", "kind"], "link_type": ["id", "link"], "movie_companies": ["id", "movie_id", "company_id", "company_type_id", "note"], "movie_info": ["id", "movie_id", "info_type_id", "info", "note"], "movie_info_idx": ["id", "movie_id", "info_type_id", "info", "note"], "movie_keyword": ["id", "movie_id", "keyword_id"], "movie_link": ["id", "movie_id", "linked_movie_id", "link_type_id"], "name": [ "id", "name", "imdb_index", "imdb_id", "gender", "name_pcode_cf", "name_pcode_nf", "surname_pcode", "md5sum" ], "person_info": ["id", "person_id", "info_type_id", "info", "note"], "role_type": ["id", "role"], "title": [ "id", "title", "imdb_index", "kind_id", "production_year", "imdb_id", "phonetic_code", "episode_of_id", "season_nr", "episode_nr", "series_years", "md5sum" ], "comp_cast_type2": [], "company_name2": [], "info_type2": [], "kind_type2": [], "movie_companies2": [], "movie_info_idx2": [], "title2": [] #"comp_cast_type2": ["id", "kind"], #"company_name2": ["id", "name", "country_code", "imdb_id", "name_pcode_nf", "name_pcode_sf", "md5sum"], #"info_type2": ["id", "info"], #"kind_type2": ["id", "kind"], #"movie_companies2": ["id", "movie_id", "company_id", "company_type_id", "note"], #"movie_info_idx2": ["id", "movie_id", "info_type_id", "info", "note"], #"title2": ["id", "title", "imdb_index", "kind_id", "production_year", "imdb_id", "phonetic_code", # "episode_of_id", "season_nr", "episode_nr", "series_years", "md5sum"] } #self.primary = [] self.primary = [ 'akaname.md5sum', 'akaname2.md5sum', 'akaname.name', 'akaname2.name', 'akaname.surname_pcode', 'akaname2.surname_pcode', 'akaname.name_pcode_cf', 'akaname2.name_pcode_cf', 'akaname.name_pcode_nf', 'akaname2.name_pcode_nf', 'akaname.person_id', 'akaname2.person_id', 'akaname.id', 'akaname2.id', 'akatitle.episode_of_id', 'akatitle2.episode_of_id', 'akatitle.kind_id', 'akatitle2.kind_id', 'akatitle.md5sum', 'akatitle2.md5sum', 'akatitle.movie_id', 'akatitle2.movie_id', 'akatitle.phonetic_code', 'akatitle2.phonetic_code', 'akatitle.title', 'akatitle2.title', 'akatitle.production_year', 'akatitle2.production_year', 'akatitle.id', 'akatitle2.id', 'castinfo.person_role_id', 'castinfo2.person_role_id', 'castinfo.movie_id', 'castinfo2.movie_id', 'castinfo.person_id', 'castinfo2.person_id', 'castinfo.role_id', 'castinfo2.role_id', 'castinfo.id', 'castinfo2.id', 'charname.imdb_id', 'charname2.imdb_id', 'charname.md5sum', 'charname2.md5sum', 'charname.name', 'charname2.name', 'charname.surname_pcode', 'charname2.surname_pcode', 'charname.name_pcode_nf', 'charname2.name_pcode_nf', 'charname.id', 'charname2.id', 'compcasttype.kind', 'compcasttype2.kind', 'compcasttype.id', 'compcasttype2.id', 'companyname.country_code', 'companyname2.country_code', 'companyname.imdb_id', 'companyname2.imdb_id', 'companyname.md5sum', 'companyname2.md5sum', 'companyname.name', 'companyname2.name', 'companyname.name_pcode_nf', 'companyname2.name_pcode_nf', 'companyname.name_pcode_sf', 'companyname2.name_pcode_sf', 'companyname.id', 'companyname2.id', 'companytype.kind', 'companytype2.kind', 'companytype.id', 'companytype2.id', 'completecast.movie_id', 'completecast2.movie_id', 'completecast.subject_id', 'completecast2.subject_id', 'completecast.id', 'completecast2.id', 'infotype.info', 'infotype2.info', 'infotype.id', 'infotype2.id', 'keyword.keyword', 'keyword2.keyword', 'keyword.phonetic_code', 'keyword2.phonetic_code', 'keyword.id', 'keyword2.id', 'kindtype.kind', 'kindtype2.kind', 'kindtype.id', 'kindtype2.id', 'linktype.link', 'linktype2.link', 'linktype.id', 'linktype2.id', 'moviecompanies.company_id', 'moviecompanies2.company_id', 'moviecompanies.company_type_id', 'moviecompanies2.company_type_id', 'moviecompanies.movie_id', 'moviecompanies2.movie_id', 'moviecompanies.id', 'moviecompanies2.id', 'movieinfo.info_type_id', 'movieinfo2.info_type_id', 'movieinfo.movie_id', 'movieinfo2.movie_id', 'movieinfo.id', 'movieinfo2.id', 'movieinfoidx.id', 'movieinfoidx2.id', 'moviekeyword.keyword_id', 'moviekeyword2.keyword_id', 'moviekeyword.movie_id', 'moviekeyword2.movie_id', 'moviekeyword.id', 'moviekeyword2.id', 'movielink.linked_movie_id', 'movielink2.linked_movie_id', 'movielink.link_type_id', 'movielink2.link_type_id', 'movielink.movie_id', 'movielink2.movie_id', 'movielink.id', 'movielink2.id', 'name.gender', 'name2.gender', 'name.imdb_id', 'name2.imdb_id', 'name.md5sum', 'name2.md5sum', 'name.name', 'name2.name', 'name.surname_pcode', 'name2.surname_pcode', 'name.name_pcode_cf', 'name2.name_pcode_cf', 'name.name_pcode_nf', 'name2.name_pcode_nf', 'name.id', 'name2.id', 'personinfo.info_type_id', 'personinfo2.info_type_id', 'personinfo.person_id', 'personinfo2.person_id', 'personinfo.id', 'personinfo2.id', 'roletype.id', 'roletype2.id', 'roletype.role', 'roletype2.role', 'title.episode_nr', 'title2.episode_nr', 'title.episode_of_id', 'title2.episode_of_id', 'title.imdb_id', 'title2.imdb_id', 'title.kind_id', 'title2.kind_id', 'title.md5sum', 'title2.md5sum', 'title.phonetic_code', 'title2.phonetic_code', 'title.season_nr', 'title2.season_nr', 'title.title', 'title2.title', 'title.production_year', 'title2.production_year', 'title.id', 'title2.id' ] num_of_columns = sum(len(x) for x in self.schema.values()) num_of_relations = len(self.schema) print(num_of_columns) print(num_of_relations) #self.sql_query = list(open(get_queries('job_queries_simple_test_7400.txt'))) #self.sql_query = list(open(get_queries('job_queries_simple_rejoin_test.txt'))) self.sql_query = list( open( get_queries( 'crossval_sens/job_queries_simple_crossval_0_test.txt'))) self.sql_query_num = 0 try: conn = psycopg2.connect(**postgres_connection()) #conn = psycopg2.connect(host="localhost", database="imdbload", user="******", password="******") except: print("I am unable to connect to the database") #print(query) self.cursor = conn.cursor() # self.observation_space = spaces.Box(0,1, shape=(num_of_relations,num_of_columns,), dtype=np.float32) self.observation_space = spaces.Box(0, 1, shape=(num_of_relations * num_of_columns, ), dtype=np.float32) self.action_space = spaces.Discrete( num_of_relations * (num_of_relations - 1)) # #tables*(#tables-1) self.reward_range = [-float(10), float(0)] #[-float('inf'),float(0)] self.action_obj = []
import psycopg2 from helper.paths import get_queries from helper.database_connection import postgres_connection conn = psycopg2.connect(**postgres_connection()) cursor = conn.cursor() sql_query = list( open(get_queries('crossval_sens/job_queries_simple_crossval_0_test.txt'))) subquery = sql_query[3] subquery = "SELECT * FROM company_name, movie_companies WHERE company_name.id = movie_companies.company_id LIMIT 10" cursor.execute(subquery) cursor.fetchone() test = cursor.fetchall() for row in test: print( "Id = ", row[1], )