def get_id_and_table_from_id_entity(self, id, id_entity, label): query = "SELECT id,label FROM processed_characters WHERE id_entity=%s LIMIT 1" results = MysqlND.execute_query(query, (id, )) if results.rowcount > 0: result = results.fetchone() id, label = result[0], result[1] return id, label, 'id_character' query = "SELECT id FROM dbp_urls WHERE url = %s LIMIT 1" results = MysqlND.execute_query( query, ('http://www.wikidata.org/entity/' + id_entity, )) if results.rowcount > 0: result = results.fetchone() id_dbp = result[0] query = "SELECT id FROM processed_artworks WHERE id_wikidata = %s LIMIT 1" results = MysqlND.execute_query(query, (id_dbp, )) if results.rowcount > 0: result = results.fetchone() id = result[0] return id, label, 'id_artwork' query = "SELECT id,label FROM processed_references WHERE id_entity=%s LIMIT 1" results = MysqlND.execute_query(query, (id, )) if results.rowcount > 0: result = results.fetchone() id, label = result[0], result[1] return id, label, 'id_reference' return False
def execute(self): # for id_suggestion, suggestion in self.data.iteritems(): for id_suggestion, suggestion in self.data.items(): text = self.elimina_tildes(suggestion) self.texts_not_tagged[id_suggestion] = text if id_suggestion != '-1': query = "SELECT tags FROM surveys_suggestions WHERE id_question=%s AND file=%s" results = MysqlND.execute_query(query, ( id_suggestion, self.file_data, )) result = results.fetchone() if result[0] is None: self.texts_tagged[id_suggestion] = self.calcula_tags(text) update = "UPDATE surveys_suggestions SET tags=%s WHERE id_question=%s AND file=%s" MysqlND.execute_query(update, ( self.texts_tagged[id_suggestion], id_suggestion, self.file_data, )) else: self.texts_tagged[id_suggestion] = result[0] else: query = "SELECT tags FROM surveys_suggestions WHERE file=%s" results = MysqlND.execute_query(query, (self.file_data, )) self.texts_tagged[id_suggestion] = '' for res in results: self.texts_tagged[id_suggestion] = self.texts_tagged[ id_suggestion] + ' ' + res[0] self.filter_tags_texts() self.keywords_extraction()
def set_time_saved_solr_paragraph(id_entity_monument): """ :param id_entity_monument: """ query = "UPDATE monuments SET date_segmentation = CURRENT_TIMESTAMP WHERE id_entity = %s" MysqlND.execute_query_tourism(query, (id_entity_monument, ))
def save_list_of_narrative_elements(self): array_datos = [] query = "SELECT id,id_entity,label FROM processed_characters" results = MysqlND.execute_query(query, ()) for result in results: id, id_entity, label = result[0], result[1], result[2] array_datos.append([label, id, id_entity, 'processed_characters']) query = "SELECT id,id_entity,label FROM wikidata_entidades" results = MysqlND.execute_query(query, ()) for result in results: id, id_entity, label = result[0], result[1], result[2] array_datos.append([label, id, id_entity, 'wikidata_entidades']) query = "SELECT id,id_entity,label FROM wikidata_instancias_de" results = MysqlND.execute_query(query, ()) for result in results: id, id_entity, label = result[0], result[1], result[2] array_datos.append( [label, id, id_entity, 'wikidata_instancias_de']) query = "SELECT id,id_entity,label FROM processed_references" results = MysqlND.execute_query(query, ()) for result in results: id, id_entity, label = result[0], result[1], result[2] array_datos.append([label, id, id_entity, 'processed_references']) save_pickle('narrative_elements', array_datos)
def update_images_data(self): pi = PreprocessInformation() query = "SELECT id,image, url_dbp_urls_wikipedia, url_dbp_urls_museodelprado, id_entity FROM processed_characters" results = MysqlND.execute_query(query, ()) for res in results: id, image, url_dbp_urls_wikipedia, url_dbp_urls_museodelprado, id_entity = res[ 0], res[1], res[2], res[3], res[4] exists = False if image: exists = self.url_exists(image) if not exists: image_url = '/public/img/profile_default.png' if len(url_dbp_urls_wikipedia) > 3: if id_entity != -1: query = "SELECT id_entity FROM wikidata_entidades WHERE id = " + str( id_entity) results = MysqlND.execute_query(query, ()) result = results.fetchone() wikidata_id_entity = result[0] image = pi.extract_property_in_wikidata_url( wikidata_id_entity, 'P18') if len(image) > 3: image_url = image exists = True if len(url_dbp_urls_museodelprado) > 3: if not exists: hdr = { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3', 'Accept-Encoding': 'none', 'Accept-Language': 'en-US,en;q=0.8', 'Connection': 'keep-alive' } req = urllib2.Request(url_dbp_urls_museodelprado, headers=hdr) try: page = urllib2.urlopen(req) except urllib2.HTTPError, e: print e.fp.read() content = page.read() soup = BeautifulSoup(content, 'html.parser') if soup.find('meta', attrs={"name": "twitter:image"}): image = soup.find('meta', attrs={"name": "twitter:image"}) image = image['content'] if len(image) > 3: image_url = image exists = True update = "UPDATE processed_artworks SET image=%s WHERE id=" + str( id) pprint(update) MysqlND.execute_query(update, (str(image_url), ))
def add_wikidata_id(wikidata_id_entity, wikidata_label, id_instancia_de): query = "SELECT id FROM wikidata_entidades WHERE id_entity = '" + wikidata_id_entity + "'" results = MysqlND.execute_query(query, ()) if results.rowcount == 0: insert = "INSERT INTO wikidata_entidades(id_entity, label, id_instancia_de) VALUES (%s,%s,%s)" array = (wikidata_id_entity, wikidata_label, str(id_instancia_de)) result_insert = MysqlND.execute_query(insert, array) return result_insert.lastrowid else: res = results.fetchone() return res[0]
def save_pickle_label_characters(self): data = {} query = "SELECT id,label FROM processed_characters" results = MysqlND.execute_query(query, ()) for result in results: data[result[0]] = result[1] query = "SELECT id,label FROM processed_references WHERE type=6" results = MysqlND.execute_query(query, ()) for result in results: data[result[0]] = result[1] functions.save_pickle('labels/labels_characters.pickle', data)
def update_image_and_dates_from_wp_character(self, id_entity): if id_entity == -1: return True query = "SELECT id_entity FROM wikidata_entidades WHERE id = " + str( id_entity) results = MysqlND.execute_query(query, ()) result = results.fetchone() wikidata_id_entity = result[0] pi = PreprocessInformation() fecha_nacimiento = pi.extract_property_in_wikidata_url( wikidata_id_entity, 'P569') lugar_nacimiento, id_entity_lugar_nacimiento = pi.extract_property_in_wikidata_url( wikidata_id_entity, 'P19', get_id_entity=True) fecha_fallecimiento = pi.extract_property_in_wikidata_url( wikidata_id_entity, 'P570') lugar_fallecimiento, id_entity_lugar_fallecimiento = pi.extract_property_in_wikidata_url( wikidata_id_entity, 'P20', get_id_entity=True) if id_entity_lugar_nacimiento != '': lugar_nacimiento = pi.get_label_from_wikidata( id_entity_lugar_nacimiento) if id_entity_lugar_fallecimiento != '': lugar_fallecimiento = pi.get_label_from_wikidata( id_entity_lugar_fallecimiento) image = pi.extract_property_in_wikidata_url(wikidata_id_entity, 'P18') if fecha_nacimiento != '': fecha_nacimiento_ = fecha_nacimiento.replace("Gregorian", "") fecha_nacimiento = functions.get_timestamp( fecha_nacimiento_).replace(" 00:00:00", "") if fecha_fallecimiento != '': fecha_fallecimiento_ = fecha_fallecimiento.replace("Gregorian", "") fecha_fallecimiento = functions.get_timestamp( fecha_fallecimiento_).replace(" 00:00:00", "") if fecha_nacimiento != '': update = "UPDATE processed_characters SET fecha_nacimiento=%s,fecha_fallecimiento=%s,lugar_nacimiento=%s,lugar_fallecimiento=%s,image=%s WHERE id_entity=" + str( id_entity) pprint(update) MysqlND.execute_query(update, ( fecha_nacimiento, fecha_fallecimiento, lugar_nacimiento, lugar_fallecimiento, image, )) elif image != '': update = "UPDATE processed_characters SET image=%s WHERE id_entity=" + str( id_entity) pprint(update) MysqlND.execute_query(update, (image, ))
def index_itineraries(self, dir="single_topic_itineraries"): dirs = functions.read_dir("./" + dir) for file in dirs: ruta = './' + dir + '/' + file html = '' for line in open(ruta, 'r'): fila = line.rstrip() html += fila + ' ' soup = BeautifulSoup(html, 'html.parser') title = soup.find('title').getText() paragraphs = '' count_p = 0 for p in soup.find_all('p'): paragraphs += str(p) count_p += 1 file_data = file.replace(".txt", "").split("_") type_ne = file_data[0] id_entity = file_data[1] label_entity = self.labels_ne_entities[file.replace(".txt", "")][0] names_ne = [] if type_ne == 'CH': query = "SELECT label FROM processed_characters WHERE id_entity=" + str( id_entity) data = MysqlND.execute_query(query, ()) if data.rowcount > 0: for d in data: names_ne.append(d[0]) elif type_ne == 'REF': query = "SELECT label FROM processed_references WHERE id_entity=" + str( id_entity) data = MysqlND.execute_query(query, ()) if data.rowcount > 0: for d in data: names_ne.append(d[0]) elif type_ne == 'ARTW': names_ne.append(self.artworks_labels[id_entity]) names_ne = ','.join(names_ne) q = "SELECT id FROM index_itineraries WHERE ne=%s" array_q = (label_entity, ) results = MysqlND.execute_query(q, array_q) if results.rowcount == 0: query = "INSERT INTO index_itineraries (dir, ne, names_ne, title, count_paragraphs, html) VALUES (%s,%s,%s,%s,%s,%s); " result_q = MysqlND.execute_query( query, (dir, label_entity, names_ne, title, count_p, paragraphs))
def count_times_characters_appears(self): query = "SELECT id FROM processed_characters WHERE id = 100" results = MysqlND.execute_query_tourism(query, ()) array = {} array_grandes = {} for result in results: id = result[0] query_solr = Solrindex('http://localhost:8983/solr/Tourism') results_solr = query_solr.search('id_characters:' + str(id)) documents = results_solr.docs count = len(documents) if count > 10: array_grandes[id] = count array[id] = [count] if count > 0: extra_data = {} docus = {} for doc in documents: id_entity = doc['id_entity'] if id_entity not in docus: docus[id_entity] = 1 else: docus[id_entity] += 1 total_monuments = len(docus) extra_data['monuments'] = docus extra_data['total_monuments'] = total_monuments array[id].append(extra_data)
def add_url_to_database(type, url): # add url to database q = "SELECT count(*) as cuenta,id FROM dbp_urls WHERE TYPE=" + str( type) + " AND url LIKE %s" if type == 4 or type == 5 or type == 6 or type == 7 or type == 8: if 'https://es.wikipedia.org' not in url: url = 'https://es.wikipedia.org' + url url = url.split("#", 1)[0] array_q = (url, ) cuenta = MysqlND.execute_query(q, array_q).fetchone() if cuenta[0] == 0: query = "INSERT INTO dbp_urls(type, url) VALUES(" + str(type) + ", %s)" result_q = MysqlND.execute_query(query, array_q) return result_q.lastrowid else: return cuenta[1]
def annotate_next_mp_artwork_data(self): query = "SELECT id,id_wikidata,id_wikipedia,id_museodelprado FROM processed_artworks WHERE segmentated=0" artworks = MysqlND.execute_query(query, ()) for artwork in artworks: id, id_wikidata, id_wikipedia, id_museodelprado = artwork[ 0], artwork[1], artwork[2], artwork[3] file = 'descriptions_processed_mp/' + str(id) + ".pickle" print(file) if not exists_pickle(file): data_artwork = self.get_solr_artwork_data(id) if not data_artwork: print("Pasa de mi") else: if 'description_wp' in data_artwork: description_wikipedia = data_artwork['description_wp'] else: description_wikipedia = '' if 'description_mp' in data_artwork: description_mp = data_artwork['description_mp'] else: description_mp = '' # 1. description_wikipedia -> limpiar la descr de wikipedia description_wikipedia = self.clean_html_wikipedia( description_wikipedia, id) # 2. description_mp -> anotar self.preprocess.id_artwork_processed = id description_mp = self.process_mp_description( description_mp, id_museodelprado=id_museodelprado) data_dict = { "description_wikipedia": description_wikipedia, "description_mp": description_mp } # print(description_mp) save_pickle(file, data_dict)
def save_pickle_label_events(self): data = {} query = "SELECT id,label FROM processed_events" results = MysqlND.execute_query(query, ()) for result in results: data[result[0]] = result[1] functions.save_pickle('labels/labels_events.pickle', data)
def save_pickle_label_references(self): data = {} query = "SELECT id,label FROM processed_references WHERE type!=6 and type!=2 AND type!=8 AND type!=3" results = MysqlND.execute_query(query, ()) for result in results: data[result[0]] = result[1] functions.save_pickle('labels/labels_references.pickle', data)
def add_nodes_characters(self): print("add_nodes_characters") query = """ SELECT processed_characters.id,processed_characters.label,processed_characters.id_entity, processed_characters.id_entity_instancia_de, wikidata_entidades.label AS label_entity, wikidata_instancias_de.label AS label_instancia_de FROM processed_characters LEFT JOIN wikidata_entidades ON wikidata_entidades.id = processed_characters.id_entity LEFT JOIN wikidata_instancias_de ON wikidata_instancias_de.id = processed_characters.id_entity_instancia_de """ results = MysqlND.execute_query(query, ()) for result in results: id, label, id_entity, id_entity_instancia_de, label_entity, label_instancia_de = result[ 0], result[1], result[2], result[3], result[4], result[5] label = clean_labels_for_graph(label) key = "CH_" + str(id) + "_" + label.replace(" ", "_") # self.graph_ids.add_node(id, label=label) self.graph.add_node(key) if id_entity != -1: label_entity = clean_labels_for_graph(label_entity) key_entity = "E_" + str(id_entity) + "_" + label_entity self.graph.add_node(key_entity) self.graph.add_edge(key_entity, key) if id_entity_instancia_de != -1: label_instancia_de = clean_labels_for_graph( label_instancia_de) key_instance_of = "IO_" + str( id_entity_instancia_de) + "_" + label_instancia_de self.graph.add_node(key_instance_of) self.graph.add_edge(key_instance_of, key_entity)
def segmentate_next_artwork_data(self): query = "SELECT id,id_wikidata,id_wikipedia,id_museodelprado FROM processed_artworks WHERE segmentated=0" artworks = MysqlND.execute_query(query, ()) for artwork in artworks: id, id_wikidata, id_wikipedia, id_museodelprado = artwork[ 0], artwork[1], artwork[2], artwork[3] file = 'descriptions_processed_mp/' + str(id) + ".pickle" print(file) if exists_pickle(file): artwork_mp_description = get_pickle(file) description_mp = artwork_mp_description['description_mp'] description_wikipedia = artwork_mp_description[ 'description_wikipedia'] description_mp_segmentated = self.segmentate( description_mp, id, 'mp') description_wikipedia_segmentated = self.segmentate( description_wikipedia, id, 'wp') segmentated = merge_two_dicts( description_mp_segmentated, description_wikipedia_segmentated) dict_solr = { 'id_wikidata': id_wikidata, 'id_wikipedia': id_wikipedia, 'id_museodelprado': id_museodelprado } data_artwork = self.get_solr_artwork_data(id) dict_solr = self.process_metadata_to_dict( id, dict_solr, data_artwork) for key, value in segmentated.iteritems(): print(key + ": " + value) dict_solr['id'] = key dict_solr['text'] = value dict_solr['list_artworks_segment'] = [] dict_solr['list_references_segment'] = [] dict_solr['list_characters_segment'] = [] dict_solr['list_events_segment'] = [] dict_solr = self.process_text_to_dict_narrative_elements( value, dict_solr) save_solr_registry( dict_solr, core_solr='http://localhost:8983/solr/TFM') MysqlND.execute_query( "UPDATE processed_artworks SET segmentated=1 WHERE id=" + str(id), ())
def get_label_from_entity(self, id_entity, label): query = "SELECT label FROM wikidata_entidades WHERE id=" + str( id_entity) results = MysqlND.execute_query(query, ()) if results.rowcount == 0: return label else: return results.fetchone()[0]
def save_pickle_label_artworks(self): results = {} query = "SELECT id FROM processed_artworks" artworks = MysqlND.execute_query(query, ()) for artwork in artworks: id = artwork[0] data = self.get_solr_artwork_data(id, multiple=False) if data: label = data['name'] results[id] = label query = "SELECT id,label FROM processed_references WHERE type=2" rs = MysqlND.execute_query(query, ()) for result in rs: results[result[0]] = result[1] functions.save_pickle('labels/labels_artworks.pickle', results)
def define_classification_types(): """ """ query = "SELECT text,COUNT(text) AS cuenta FROM log GROUP BY text ORDER BY cuenta DESC" results = MysqlND.execute_query_tourism(query, ()) for res in results: text = res[0] cuenta = res[1] text_pretty = text.replace("_", " ").capitalize() text = text.replace("ñ", 'n').strip() query = "INSERT INTO classification_segments(label,label_pretty,count) VALUES(%s,%s,%s)" MysqlND.execute_query_tourism(query, ( text, text_pretty, cuenta, ))
def get_id_entity_wikidata_from_id_entidad(id_wikidata_entidades): query = "SELECT id_entity, label, id_instancia_de FROM wikidata_entidades WHERE id =" + str( id_wikidata_entidades) results = MysqlND.execute_query(query, ()) if results.rowcount > 0: result = results.fetchone() return result[0], result[1], result[2] else: return -1, '', -1
def add_wikidata_instancia_de(self, wikidata_instancia_de_id_entity, wikidata_instancia_de_label): query = "SELECT id FROM wikidata_instancias_de WHERE id_entity = '" + str( wikidata_instancia_de_id_entity) + "'" results = MysqlND.execute_query(query, ()) if results.rowcount == 0: if wikidata_instancia_de_label == 'página de desambiguación de Wikimedia': wikidata_instancia_de_label = '' insert = "INSERT INTO wikidata_instancias_de(id_entity, label) VALUES (%s,%s)" array = ( wikidata_instancia_de_id_entity, wikidata_instancia_de_label, ) result_insert = MysqlND.execute_query(insert, array) return result_insert.lastrowid else: res = results.fetchone() return res[0]
def get_classification_id(self, label): # print(label) if label == 'description': return 0 label = label.replace("-", "–") label = label.replace("–", "-") query = "SELECT id FROM classification_segments WHERE label = %s LIMIT 1" result = MysqlND.execute_query_tourism(query, (label, )).fetchone() return result[0]
def get_id_entity_wikidata_from_id_character(id_character): query = "SELECT id_entity, id_entity_instancia_de FROM processed_characters WHERE id =" + str( id_character) results = MysqlND.execute_query(query, ()) if results.rowcount > 0: result = results.fetchone() id_entity = result[0] return get_id_entity_wikidata_from_id_entidad(id_entity) else: return -1, '', -1
def add_nodes_artworks(self): print("add_nodes_artworks") query = """SELECT id FROM processed_artworks """ results = MysqlND.execute_query(query, ()) for result in results: id = result[0] label_artwork = self.artworks_labels[id] label_artwork = clean_labels_for_graph(label_artwork).replace( " ", "_") key = "ARTW_" + str(id) + "_" + label_artwork.replace(" ", "_") self.graph.add_node(key)
def get_document(self): if functions_files.exists_pickle(self.pickle_file_data_text_complete): self.data = functions_files.get_pickle(self.pickle_file_data) self.text_complete = functions_files.get_pickle( self.pickle_file_data_text_complete) else: if functions_files.exists_file(self.file_data + '.xlsx'): df = pd.read_excel(self.file_data + '.xlsx', sheetname=self.sheet_name, header=None) data = {} text_complete = '' for i in df.get_values(): id = i[0] answer = i[1] query = "SELECT suggestion FROM surveys_suggestions WHERE id_question=%s AND file=%s AND suggestion=%s" results = MysqlND.execute_query(query, ( id, self.file_data, answer, )) if results.rowcount == 0: query = "INSERT INTO surveys_suggestions(id_question,suggestion,file) VALUES(%s,%s,%s)" MysqlND.execute_query(query, ( id, answer, self.file_data, )) data[id] = answer text_complete = text_complete + " " + answer self.data = data self.text_complete['-1'] = text_complete functions_files.save_pickle(self.pickle_file_data, self.data) functions_files.save_pickle( self.pickle_file_data_text_complete, self.text_complete) else: raise Exception("The Excel file does not exist!!")
def update_references_data(self): query = "SELECT id,url, label, id_entity FROM processed_references WHERE TYPE != 2 AND TYPE != 8 AND TYPE != 6 and id>12887" results = MysqlND.execute_query(query, ()) for res in results: data = {} id, url, label, id_entity = res[0], res[1], res[2], res[3] pprint(id) data['id'] = id data['label'] = label data['url'] = url preprocess_information = PreprocessInformation() values = preprocess_information.search_label_in_wikidata_and_wikipedia( label) if values: id_wikipedia_dbp_urls, id_url_wikipedia_author, type, wikidata_label, wikidata_instancia_de_id_entity, wikidata_instancia_de_label, wikidata_id_entity, wikidata_label = values[ 0], values[1], values[2], values[3], values[4], values[ 5], values[6], values[7] data['description'] = '' if id_wikipedia_dbp_urls != -1: query = 'id:' + str(id_wikipedia_dbp_urls) query_solr_final = Solrindex( 'http://localhost:8983/solr/ND_preprocessing') result = query_solr_final.search(query) if len(result.docs) > 0: doc = result.docs[0] texto = '' for key, value in doc.iteritems(): if '_string_txt_es' in key: texto += '<h3 class="wp_title">' + key.replace( "_string_txt_es", "" ).replace( "description", "descripción" ).replace("_", " ").capitalize( ) + '</h3><p class="wp_paragraph">' + functions.cleanhtml( value) + '</p>' data['description'] = texto update = "UPDATE processed_references SET description=%s WHERE id=" + str( id) MysqlND.execute_query(update, (data['description'], ))
def update_images_artworks(self): query = "SELECT processed_artworks.id,processed_artworks.image, dbp_urls.url FROM processed_artworks INNER JOIN dbp_urls ON processed_artworks.id_museodelprado = dbp_urls.id" artworks = MysqlND.execute_query(query, ()) for artwork in artworks: id, image, url_museodelprado = artwork[0], artwork[1], artwork[2] # c = httplib.HTTPConnection(image) # c.request("HEAD", '') # if c.getresponse().status == 200: # print('web site exists') # else: hdr = { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3', 'Accept-Encoding': 'none', 'Accept-Language': 'en-US,en;q=0.8', 'Connection': 'keep-alive' } req = urllib2.Request(url_museodelprado, headers=hdr) try: page = urllib2.urlopen(req) except urllib2.HTTPError, e: print e.fp.read() content = page.read() soup = BeautifulSoup(content, 'html.parser') if soup.find('meta', attrs={"name": "twitter:image"}): image_s = soup.find('meta', attrs={"name": "twitter:image"}) image_s = image_s['content'] update = "UPDATE processed_artworks SET image=%s WHERE id=" + str( id) pprint(update) MysqlND.execute_query(update, (image_s, )) else: print("NO ENCUENTRO imagen para " + url_museodelprado)
def get_id_entity_wikidata_from_id_artwork(id_artwork): query = "SELECT id_wikidata FROM processed_artworks WHERE id =" + str( id_artwork) results = MysqlND.execute_query(query, ()) if results.rowcount > 0: result = results.fetchone() id_dbp_urls_artwork = result[0] query = "SELECT url FROM dbp_urls WHERE id =" + str( id_dbp_urls_artwork) results = MysqlND.execute_query(query, ()) if results.rowcount > 0: result = results.fetchone() url = result[0] id_wikidata = url.replace("http://www.wikidata.org/entity/", "") query = "SELECT id_entity, label, id_instancia_de FROM wikidata_entidades WHERE id_entity = '" + str( id_wikidata) + "'" results = MysqlND.execute_query(query, ()) if results.rowcount > 0: result = results.fetchone() return result[0], result[1], result[2] else: wikidata_url = "http://www.wikidata.org/entity/" + str( id_wikidata) id_wikipedia_, id_url_wikipedia_author, type, wikidata_label, wikidata_instancia_de_id_entity, wikidata_instancia_de_label = extract_wikipedia_url( id_wikidata, wikidata_url) id_instancia_de = add_wikidata_instancia_de( wikidata_instancia_de_id_entity, wikidata_instancia_de_label) id_entity = add_wikidata_id(id_wikidata, wikidata_label, id_instancia_de) return id_entity, wikidata_label, id_instancia_de else: return -1, '', -1 else: return -1, '', -1
def get_id_event_associated_with_element(year, id_reference, type): if type == 3 or type == 6: column = 'id_character_related' elif type == 2: column = 'id_artwork_related' else: column = 'id_reference_related' query = "SELECT id FROM processed_events WHERE year =" + str( year) + " AND " + str(column) + " = " + str(id_reference) results = MysqlND.execute_query(query, ()) if results.rowcount > 0: result = results.fetchone() return result[0] else: return -1
def update_artworks_basic_data(self): query = "SELECT id FROM processed_artworks" artworks = MysqlND.execute_query(query, ()) for artwork in artworks: id = artwork[0] data = self.get_solr_artwork_data(id, multiple=False) if data: # label = data['name'] # image = data['image'] # update = "UPDATE processed_artworks SET name=%s,image=%s WHERE id=" + str(id) # MysqlND.execute_query(update, (label, image,)) # del data['description_mp'] # del data['description_wp'] del data['image'] del data['name'] del data['lookfor'] json_data = json.dumps(data, ensure_ascii=False) update = "UPDATE processed_artworks SET json_metadata=%s WHERE id=" + str( id) MysqlND.execute_query(update, (json_data, )) else: print("NOPE" + str(id))