Exemplo n.º 1
0
    def get_id_and_table_from_id_entity(self, id, id_entity, label):
        query = "SELECT id,label FROM processed_characters WHERE id_entity=%s LIMIT 1"
        results = MysqlND.execute_query(query, (id, ))
        if results.rowcount > 0:
            result = results.fetchone()
            id, label = result[0], result[1]
            return id, label, 'id_character'

        query = "SELECT id FROM dbp_urls WHERE url = %s LIMIT 1"
        results = MysqlND.execute_query(
            query, ('http://www.wikidata.org/entity/' + id_entity, ))
        if results.rowcount > 0:
            result = results.fetchone()
            id_dbp = result[0]
            query = "SELECT id FROM processed_artworks WHERE id_wikidata = %s LIMIT 1"
            results = MysqlND.execute_query(query, (id_dbp, ))
            if results.rowcount > 0:
                result = results.fetchone()
                id = result[0]
                return id, label, 'id_artwork'

        query = "SELECT id,label FROM processed_references WHERE id_entity=%s LIMIT 1"
        results = MysqlND.execute_query(query, (id, ))
        if results.rowcount > 0:
            result = results.fetchone()
            id, label = result[0], result[1]
            return id, label, 'id_reference'
        return False
Exemplo n.º 2
0
    def execute(self):
        # for id_suggestion, suggestion in self.data.iteritems():
        for id_suggestion, suggestion in self.data.items():
            text = self.elimina_tildes(suggestion)
            self.texts_not_tagged[id_suggestion] = text
            if id_suggestion != '-1':
                query = "SELECT tags FROM surveys_suggestions WHERE id_question=%s AND file=%s"
                results = MysqlND.execute_query(query, (
                    id_suggestion,
                    self.file_data,
                ))
                result = results.fetchone()
                if result[0] is None:
                    self.texts_tagged[id_suggestion] = self.calcula_tags(text)
                    update = "UPDATE surveys_suggestions SET tags=%s WHERE id_question=%s AND file=%s"
                    MysqlND.execute_query(update, (
                        self.texts_tagged[id_suggestion],
                        id_suggestion,
                        self.file_data,
                    ))

                else:
                    self.texts_tagged[id_suggestion] = result[0]
            else:
                query = "SELECT tags FROM surveys_suggestions WHERE file=%s"
                results = MysqlND.execute_query(query, (self.file_data, ))
                self.texts_tagged[id_suggestion] = ''
                for res in results:
                    self.texts_tagged[id_suggestion] = self.texts_tagged[
                        id_suggestion] + ' ' + res[0]

        self.filter_tags_texts()

        self.keywords_extraction()
Exemplo n.º 3
0
    def set_time_saved_solr_paragraph(id_entity_monument):
        """

        :param id_entity_monument:
        """
        query = "UPDATE monuments SET date_segmentation = CURRENT_TIMESTAMP WHERE id_entity = %s"
        MysqlND.execute_query_tourism(query, (id_entity_monument, ))
Exemplo n.º 4
0
    def save_list_of_narrative_elements(self):
        array_datos = []
        query = "SELECT id,id_entity,label FROM processed_characters"
        results = MysqlND.execute_query(query, ())
        for result in results:
            id, id_entity, label = result[0], result[1], result[2]
            array_datos.append([label, id, id_entity, 'processed_characters'])

        query = "SELECT id,id_entity,label FROM wikidata_entidades"
        results = MysqlND.execute_query(query, ())
        for result in results:
            id, id_entity, label = result[0], result[1], result[2]
            array_datos.append([label, id, id_entity, 'wikidata_entidades'])

        query = "SELECT id,id_entity,label FROM wikidata_instancias_de"
        results = MysqlND.execute_query(query, ())
        for result in results:
            id, id_entity, label = result[0], result[1], result[2]
            array_datos.append(
                [label, id, id_entity, 'wikidata_instancias_de'])

        query = "SELECT id,id_entity,label FROM processed_references"
        results = MysqlND.execute_query(query, ())
        for result in results:
            id, id_entity, label = result[0], result[1], result[2]
            array_datos.append([label, id, id_entity, 'processed_references'])

        save_pickle('narrative_elements', array_datos)
Exemplo n.º 5
0
    def update_images_data(self):
        pi = PreprocessInformation()
        query = "SELECT id,image, url_dbp_urls_wikipedia, url_dbp_urls_museodelprado, id_entity FROM processed_characters"
        results = MysqlND.execute_query(query, ())
        for res in results:
            id, image, url_dbp_urls_wikipedia, url_dbp_urls_museodelprado, id_entity = res[
                0], res[1], res[2], res[3], res[4]
            exists = False
            if image:
                exists = self.url_exists(image)
            if not exists:
                image_url = '/public/img/profile_default.png'
                if len(url_dbp_urls_wikipedia) > 3:
                    if id_entity != -1:
                        query = "SELECT id_entity FROM wikidata_entidades WHERE id = " + str(
                            id_entity)
                        results = MysqlND.execute_query(query, ())
                        result = results.fetchone()
                        wikidata_id_entity = result[0]
                        image = pi.extract_property_in_wikidata_url(
                            wikidata_id_entity, 'P18')
                        if len(image) > 3:
                            image_url = image
                            exists = True
                if len(url_dbp_urls_museodelprado) > 3:
                    if not exists:
                        hdr = {
                            'User-Agent':
                            'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
                            'Accept':
                            'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
                            'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
                            'Accept-Encoding': 'none',
                            'Accept-Language': 'en-US,en;q=0.8',
                            'Connection': 'keep-alive'
                        }
                        req = urllib2.Request(url_dbp_urls_museodelprado,
                                              headers=hdr)
                        try:
                            page = urllib2.urlopen(req)
                        except urllib2.HTTPError, e:
                            print e.fp.read()
                        content = page.read()
                        soup = BeautifulSoup(content, 'html.parser')
                        if soup.find('meta', attrs={"name": "twitter:image"}):
                            image = soup.find('meta',
                                              attrs={"name": "twitter:image"})
                            image = image['content']
                            if len(image) > 3:
                                image_url = image
                                exists = True

                update = "UPDATE processed_artworks SET image=%s WHERE id=" + str(
                    id)
                pprint(update)
                MysqlND.execute_query(update, (str(image_url), ))
Exemplo n.º 6
0
def add_wikidata_id(wikidata_id_entity, wikidata_label, id_instancia_de):
    query = "SELECT id FROM wikidata_entidades WHERE id_entity = '" + wikidata_id_entity + "'"
    results = MysqlND.execute_query(query, ())
    if results.rowcount == 0:
        insert = "INSERT INTO wikidata_entidades(id_entity, label, id_instancia_de) VALUES (%s,%s,%s)"
        array = (wikidata_id_entity, wikidata_label, str(id_instancia_de))
        result_insert = MysqlND.execute_query(insert, array)
        return result_insert.lastrowid
    else:
        res = results.fetchone()
        return res[0]
    def save_pickle_label_characters(self):
        data = {}
        query = "SELECT id,label FROM processed_characters"
        results = MysqlND.execute_query(query, ())
        for result in results:
            data[result[0]] = result[1]

        query = "SELECT id,label FROM processed_references WHERE type=6"
        results = MysqlND.execute_query(query, ())
        for result in results:
            data[result[0]] = result[1]

        functions.save_pickle('labels/labels_characters.pickle', data)
Exemplo n.º 8
0
    def update_image_and_dates_from_wp_character(self, id_entity):
        if id_entity == -1:
            return True
        query = "SELECT id_entity FROM wikidata_entidades WHERE id = " + str(
            id_entity)
        results = MysqlND.execute_query(query, ())
        result = results.fetchone()
        wikidata_id_entity = result[0]
        pi = PreprocessInformation()
        fecha_nacimiento = pi.extract_property_in_wikidata_url(
            wikidata_id_entity, 'P569')
        lugar_nacimiento, id_entity_lugar_nacimiento = pi.extract_property_in_wikidata_url(
            wikidata_id_entity, 'P19', get_id_entity=True)
        fecha_fallecimiento = pi.extract_property_in_wikidata_url(
            wikidata_id_entity, 'P570')
        lugar_fallecimiento, id_entity_lugar_fallecimiento = pi.extract_property_in_wikidata_url(
            wikidata_id_entity, 'P20', get_id_entity=True)

        if id_entity_lugar_nacimiento != '':
            lugar_nacimiento = pi.get_label_from_wikidata(
                id_entity_lugar_nacimiento)
        if id_entity_lugar_fallecimiento != '':
            lugar_fallecimiento = pi.get_label_from_wikidata(
                id_entity_lugar_fallecimiento)

        image = pi.extract_property_in_wikidata_url(wikidata_id_entity, 'P18')
        if fecha_nacimiento != '':
            fecha_nacimiento_ = fecha_nacimiento.replace("Gregorian", "")
            fecha_nacimiento = functions.get_timestamp(
                fecha_nacimiento_).replace(" 00:00:00", "")
        if fecha_fallecimiento != '':
            fecha_fallecimiento_ = fecha_fallecimiento.replace("Gregorian", "")
            fecha_fallecimiento = functions.get_timestamp(
                fecha_fallecimiento_).replace(" 00:00:00", "")

        if fecha_nacimiento != '':
            update = "UPDATE processed_characters SET fecha_nacimiento=%s,fecha_fallecimiento=%s,lugar_nacimiento=%s,lugar_fallecimiento=%s,image=%s WHERE id_entity=" + str(
                id_entity)
            pprint(update)
            MysqlND.execute_query(update, (
                fecha_nacimiento,
                fecha_fallecimiento,
                lugar_nacimiento,
                lugar_fallecimiento,
                image,
            ))
        elif image != '':
            update = "UPDATE processed_characters SET image=%s WHERE id_entity=" + str(
                id_entity)
            pprint(update)
            MysqlND.execute_query(update, (image, ))
Exemplo n.º 9
0
    def index_itineraries(self, dir="single_topic_itineraries"):
        dirs = functions.read_dir("./" + dir)
        for file in dirs:
            ruta = './' + dir + '/' + file
            html = ''
            for line in open(ruta, 'r'):
                fila = line.rstrip()
                html += fila + ' '

            soup = BeautifulSoup(html, 'html.parser')
            title = soup.find('title').getText()
            paragraphs = ''
            count_p = 0
            for p in soup.find_all('p'):
                paragraphs += str(p)
                count_p += 1

            file_data = file.replace(".txt", "").split("_")
            type_ne = file_data[0]
            id_entity = file_data[1]

            label_entity = self.labels_ne_entities[file.replace(".txt", "")][0]

            names_ne = []
            if type_ne == 'CH':
                query = "SELECT label FROM processed_characters WHERE id_entity=" + str(
                    id_entity)
                data = MysqlND.execute_query(query, ())
                if data.rowcount > 0:
                    for d in data:
                        names_ne.append(d[0])
            elif type_ne == 'REF':
                query = "SELECT label FROM processed_references WHERE id_entity=" + str(
                    id_entity)
                data = MysqlND.execute_query(query, ())
                if data.rowcount > 0:
                    for d in data:
                        names_ne.append(d[0])
            elif type_ne == 'ARTW':
                names_ne.append(self.artworks_labels[id_entity])

            names_ne = ','.join(names_ne)

            q = "SELECT id FROM index_itineraries WHERE ne=%s"
            array_q = (label_entity, )
            results = MysqlND.execute_query(q, array_q)
            if results.rowcount == 0:
                query = "INSERT INTO index_itineraries (dir, ne, names_ne, title, count_paragraphs, html) VALUES (%s,%s,%s,%s,%s,%s); "
                result_q = MysqlND.execute_query(
                    query,
                    (dir, label_entity, names_ne, title, count_p, paragraphs))
Exemplo n.º 10
0
 def count_times_characters_appears(self):
     query = "SELECT id FROM processed_characters WHERE id = 100"
     results = MysqlND.execute_query_tourism(query, ())
     array = {}
     array_grandes = {}
     for result in results:
         id = result[0]
         query_solr = Solrindex('http://localhost:8983/solr/Tourism')
         results_solr = query_solr.search('id_characters:' + str(id))
         documents = results_solr.docs
         count = len(documents)
         if count > 10:
             array_grandes[id] = count
         array[id] = [count]
         if count > 0:
             extra_data = {}
             docus = {}
             for doc in documents:
                 id_entity = doc['id_entity']
                 if id_entity not in docus:
                     docus[id_entity] = 1
                 else:
                     docus[id_entity] += 1
             total_monuments = len(docus)
             extra_data['monuments'] = docus
             extra_data['total_monuments'] = total_monuments
             array[id].append(extra_data)
Exemplo n.º 11
0
def add_url_to_database(type, url):
    # add url to database
    q = "SELECT count(*) as cuenta,id FROM dbp_urls WHERE TYPE=" + str(
        type) + " AND url LIKE %s"
    if type == 4 or type == 5 or type == 6 or type == 7 or type == 8:
        if 'https://es.wikipedia.org' not in url:
            url = 'https://es.wikipedia.org' + url
    url = url.split("#", 1)[0]
    array_q = (url, )
    cuenta = MysqlND.execute_query(q, array_q).fetchone()
    if cuenta[0] == 0:
        query = "INSERT INTO dbp_urls(type, url) VALUES(" + str(type) + ", %s)"
        result_q = MysqlND.execute_query(query, array_q)
        return result_q.lastrowid
    else:
        return cuenta[1]
Exemplo n.º 12
0
    def annotate_next_mp_artwork_data(self):
        query = "SELECT id,id_wikidata,id_wikipedia,id_museodelprado FROM processed_artworks WHERE segmentated=0"
        artworks = MysqlND.execute_query(query, ())
        for artwork in artworks:
            id, id_wikidata, id_wikipedia, id_museodelprado = artwork[
                0], artwork[1], artwork[2], artwork[3]
            file = 'descriptions_processed_mp/' + str(id) + ".pickle"
            print(file)
            if not exists_pickle(file):
                data_artwork = self.get_solr_artwork_data(id)
                if not data_artwork:
                    print("Pasa de mi")
                else:
                    if 'description_wp' in data_artwork:
                        description_wikipedia = data_artwork['description_wp']
                    else:
                        description_wikipedia = ''

                    if 'description_mp' in data_artwork:
                        description_mp = data_artwork['description_mp']
                    else:
                        description_mp = ''
                    # 1. description_wikipedia -> limpiar la descr de wikipedia
                    description_wikipedia = self.clean_html_wikipedia(
                        description_wikipedia, id)
                    # 2. description_mp -> anotar
                    self.preprocess.id_artwork_processed = id
                    description_mp = self.process_mp_description(
                        description_mp, id_museodelprado=id_museodelprado)
                    data_dict = {
                        "description_wikipedia": description_wikipedia,
                        "description_mp": description_mp
                    }
                    # print(description_mp)
                    save_pickle(file, data_dict)
Exemplo n.º 13
0
 def save_pickle_label_events(self):
     data = {}
     query = "SELECT id,label FROM processed_events"
     results = MysqlND.execute_query(query, ())
     for result in results:
         data[result[0]] = result[1]
     functions.save_pickle('labels/labels_events.pickle', data)
Exemplo n.º 14
0
 def save_pickle_label_references(self):
     data = {}
     query = "SELECT id,label FROM processed_references WHERE type!=6 and type!=2 AND type!=8 AND type!=3"
     results = MysqlND.execute_query(query, ())
     for result in results:
         data[result[0]] = result[1]
     functions.save_pickle('labels/labels_references.pickle', data)
Exemplo n.º 15
0
    def add_nodes_characters(self):
        print("add_nodes_characters")
        query = """
            SELECT processed_characters.id,processed_characters.label,processed_characters.id_entity,
    processed_characters.id_entity_instancia_de, wikidata_entidades.label AS label_entity, wikidata_instancias_de.label AS label_instancia_de
    FROM processed_characters
    LEFT JOIN wikidata_entidades ON wikidata_entidades.id = processed_characters.id_entity
    LEFT JOIN wikidata_instancias_de ON wikidata_instancias_de.id = processed_characters.id_entity_instancia_de 
            """
        results = MysqlND.execute_query(query, ())

        for result in results:
            id, label, id_entity, id_entity_instancia_de, label_entity, label_instancia_de = result[
                0], result[1], result[2], result[3], result[4], result[5]
            label = clean_labels_for_graph(label)
            key = "CH_" + str(id) + "_" + label.replace(" ", "_")

            # self.graph_ids.add_node(id, label=label)
            self.graph.add_node(key)
            if id_entity != -1:
                label_entity = clean_labels_for_graph(label_entity)
                key_entity = "E_" + str(id_entity) + "_" + label_entity
                self.graph.add_node(key_entity)
                self.graph.add_edge(key_entity, key)
                if id_entity_instancia_de != -1:
                    label_instancia_de = clean_labels_for_graph(
                        label_instancia_de)
                    key_instance_of = "IO_" + str(
                        id_entity_instancia_de) + "_" + label_instancia_de
                    self.graph.add_node(key_instance_of)
                    self.graph.add_edge(key_instance_of, key_entity)
Exemplo n.º 16
0
    def segmentate_next_artwork_data(self):
        query = "SELECT id,id_wikidata,id_wikipedia,id_museodelprado FROM processed_artworks WHERE segmentated=0"
        artworks = MysqlND.execute_query(query, ())
        for artwork in artworks:
            id, id_wikidata, id_wikipedia, id_museodelprado = artwork[
                0], artwork[1], artwork[2], artwork[3]
            file = 'descriptions_processed_mp/' + str(id) + ".pickle"
            print(file)
            if exists_pickle(file):
                artwork_mp_description = get_pickle(file)
                description_mp = artwork_mp_description['description_mp']
                description_wikipedia = artwork_mp_description[
                    'description_wikipedia']
                description_mp_segmentated = self.segmentate(
                    description_mp, id, 'mp')
                description_wikipedia_segmentated = self.segmentate(
                    description_wikipedia, id, 'wp')
                segmentated = merge_two_dicts(
                    description_mp_segmentated,
                    description_wikipedia_segmentated)
                dict_solr = {
                    'id_wikidata': id_wikidata,
                    'id_wikipedia': id_wikipedia,
                    'id_museodelprado': id_museodelprado
                }

                data_artwork = self.get_solr_artwork_data(id)
                dict_solr = self.process_metadata_to_dict(
                    id, dict_solr, data_artwork)

                for key, value in segmentated.iteritems():
                    print(key + ": " + value)
                    dict_solr['id'] = key
                    dict_solr['text'] = value

                    dict_solr['list_artworks_segment'] = []
                    dict_solr['list_references_segment'] = []
                    dict_solr['list_characters_segment'] = []
                    dict_solr['list_events_segment'] = []

                    dict_solr = self.process_text_to_dict_narrative_elements(
                        value, dict_solr)
                    save_solr_registry(
                        dict_solr, core_solr='http://localhost:8983/solr/TFM')
                    MysqlND.execute_query(
                        "UPDATE processed_artworks SET segmentated=1 WHERE id="
                        + str(id), ())
Exemplo n.º 17
0
 def get_label_from_entity(self, id_entity, label):
     query = "SELECT label FROM wikidata_entidades WHERE id=" + str(
         id_entity)
     results = MysqlND.execute_query(query, ())
     if results.rowcount == 0:
         return label
     else:
         return results.fetchone()[0]
Exemplo n.º 18
0
    def save_pickle_label_artworks(self):
        results = {}
        query = "SELECT id FROM processed_artworks"
        artworks = MysqlND.execute_query(query, ())
        for artwork in artworks:
            id = artwork[0]
            data = self.get_solr_artwork_data(id, multiple=False)
            if data:
                label = data['name']
                results[id] = label

        query = "SELECT id,label FROM processed_references WHERE type=2"
        rs = MysqlND.execute_query(query, ())
        for result in rs:
            results[result[0]] = result[1]

        functions.save_pickle('labels/labels_artworks.pickle', results)
Exemplo n.º 19
0
    def define_classification_types():
        """

        """
        query = "SELECT text,COUNT(text) AS cuenta FROM log GROUP BY text ORDER BY cuenta DESC"
        results = MysqlND.execute_query_tourism(query, ())
        for res in results:
            text = res[0]
            cuenta = res[1]
            text_pretty = text.replace("_", " ").capitalize()
            text = text.replace("ñ", 'n').strip()
            query = "INSERT INTO classification_segments(label,label_pretty,count) VALUES(%s,%s,%s)"
            MysqlND.execute_query_tourism(query, (
                text,
                text_pretty,
                cuenta,
            ))
Exemplo n.º 20
0
def get_id_entity_wikidata_from_id_entidad(id_wikidata_entidades):
    query = "SELECT id_entity, label, id_instancia_de FROM wikidata_entidades WHERE id =" + str(
        id_wikidata_entidades)
    results = MysqlND.execute_query(query, ())
    if results.rowcount > 0:
        result = results.fetchone()
        return result[0], result[1], result[2]
    else:
        return -1, '', -1
Exemplo n.º 21
0
 def add_wikidata_instancia_de(self, wikidata_instancia_de_id_entity,
                               wikidata_instancia_de_label):
     query = "SELECT id FROM wikidata_instancias_de WHERE id_entity = '" + str(
         wikidata_instancia_de_id_entity) + "'"
     results = MysqlND.execute_query(query, ())
     if results.rowcount == 0:
         if wikidata_instancia_de_label == 'página de desambiguación de Wikimedia':
             wikidata_instancia_de_label = ''
         insert = "INSERT INTO wikidata_instancias_de(id_entity, label) VALUES (%s,%s)"
         array = (
             wikidata_instancia_de_id_entity,
             wikidata_instancia_de_label,
         )
         result_insert = MysqlND.execute_query(insert, array)
         return result_insert.lastrowid
     else:
         res = results.fetchone()
         return res[0]
Exemplo n.º 22
0
    def get_classification_id(self, label):
        # print(label)
        if label == 'description':
            return 0
        label = label.replace("-", "–")
        label = label.replace("–", "-")
        query = "SELECT id FROM classification_segments WHERE label = %s LIMIT 1"

        result = MysqlND.execute_query_tourism(query, (label, )).fetchone()
        return result[0]
Exemplo n.º 23
0
def get_id_entity_wikidata_from_id_character(id_character):
    query = "SELECT id_entity, id_entity_instancia_de FROM processed_characters WHERE id =" + str(
        id_character)
    results = MysqlND.execute_query(query, ())
    if results.rowcount > 0:
        result = results.fetchone()
        id_entity = result[0]
        return get_id_entity_wikidata_from_id_entidad(id_entity)
    else:
        return -1, '', -1
Exemplo n.º 24
0
 def add_nodes_artworks(self):
     print("add_nodes_artworks")
     query = """SELECT id FROM processed_artworks  """
     results = MysqlND.execute_query(query, ())
     for result in results:
         id = result[0]
         label_artwork = self.artworks_labels[id]
         label_artwork = clean_labels_for_graph(label_artwork).replace(
             " ", "_")
         key = "ARTW_" + str(id) + "_" + label_artwork.replace(" ", "_")
         self.graph.add_node(key)
Exemplo n.º 25
0
    def get_document(self):
        if functions_files.exists_pickle(self.pickle_file_data_text_complete):
            self.data = functions_files.get_pickle(self.pickle_file_data)
            self.text_complete = functions_files.get_pickle(
                self.pickle_file_data_text_complete)
        else:
            if functions_files.exists_file(self.file_data + '.xlsx'):
                df = pd.read_excel(self.file_data + '.xlsx',
                                   sheetname=self.sheet_name,
                                   header=None)
                data = {}

                text_complete = ''
                for i in df.get_values():
                    id = i[0]
                    answer = i[1]
                    query = "SELECT suggestion FROM surveys_suggestions WHERE id_question=%s AND file=%s AND suggestion=%s"
                    results = MysqlND.execute_query(query, (
                        id,
                        self.file_data,
                        answer,
                    ))
                    if results.rowcount == 0:
                        query = "INSERT INTO surveys_suggestions(id_question,suggestion,file) VALUES(%s,%s,%s)"
                        MysqlND.execute_query(query, (
                            id,
                            answer,
                            self.file_data,
                        ))
                    data[id] = answer
                    text_complete = text_complete + " " + answer

                self.data = data
                self.text_complete['-1'] = text_complete
                functions_files.save_pickle(self.pickle_file_data, self.data)
                functions_files.save_pickle(
                    self.pickle_file_data_text_complete, self.text_complete)
            else:
                raise Exception("The Excel file does not exist!!")
Exemplo n.º 26
0
 def update_references_data(self):
     query = "SELECT id,url, label, id_entity FROM processed_references WHERE TYPE != 2 AND TYPE != 8 AND TYPE != 6 and id>12887"
     results = MysqlND.execute_query(query, ())
     for res in results:
         data = {}
         id, url, label, id_entity = res[0], res[1], res[2], res[3]
         pprint(id)
         data['id'] = id
         data['label'] = label
         data['url'] = url
         preprocess_information = PreprocessInformation()
         values = preprocess_information.search_label_in_wikidata_and_wikipedia(
             label)
         if values:
             id_wikipedia_dbp_urls, id_url_wikipedia_author, type, wikidata_label, wikidata_instancia_de_id_entity, wikidata_instancia_de_label, wikidata_id_entity, wikidata_label = values[
                 0], values[1], values[2], values[3], values[4], values[
                     5], values[6], values[7]
             data['description'] = ''
             if id_wikipedia_dbp_urls != -1:
                 query = 'id:' + str(id_wikipedia_dbp_urls)
                 query_solr_final = Solrindex(
                     'http://localhost:8983/solr/ND_preprocessing')
                 result = query_solr_final.search(query)
                 if len(result.docs) > 0:
                     doc = result.docs[0]
                     texto = ''
                     for key, value in doc.iteritems():
                         if '_string_txt_es' in key:
                             texto += '<h3 class="wp_title">' + key.replace(
                                 "_string_txt_es", ""
                             ).replace(
                                 "description", "descripción"
                             ).replace("_", " ").capitalize(
                             ) + '</h3><p class="wp_paragraph">' + functions.cleanhtml(
                                 value) + '</p>'
                     data['description'] = texto
                 update = "UPDATE processed_references SET description=%s WHERE id=" + str(
                     id)
                 MysqlND.execute_query(update, (data['description'], ))
Exemplo n.º 27
0
    def update_images_artworks(self):
        query = "SELECT processed_artworks.id,processed_artworks.image, dbp_urls.url FROM processed_artworks INNER JOIN dbp_urls ON processed_artworks.id_museodelprado = dbp_urls.id"
        artworks = MysqlND.execute_query(query, ())
        for artwork in artworks:
            id, image, url_museodelprado = artwork[0], artwork[1], artwork[2]
            # c = httplib.HTTPConnection(image)
            # c.request("HEAD", '')
            # if c.getresponse().status == 200:
            #     print('web site exists')
            # else:
            hdr = {
                'User-Agent':
                'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
                'Accept':
                'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
                'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
                'Accept-Encoding': 'none',
                'Accept-Language': 'en-US,en;q=0.8',
                'Connection': 'keep-alive'
            }

            req = urllib2.Request(url_museodelprado, headers=hdr)

            try:
                page = urllib2.urlopen(req)
            except urllib2.HTTPError, e:
                print e.fp.read()
            content = page.read()
            soup = BeautifulSoup(content, 'html.parser')
            if soup.find('meta', attrs={"name": "twitter:image"}):
                image_s = soup.find('meta', attrs={"name": "twitter:image"})
                image_s = image_s['content']
                update = "UPDATE processed_artworks SET image=%s WHERE id=" + str(
                    id)
                pprint(update)
                MysqlND.execute_query(update, (image_s, ))
            else:
                print("NO ENCUENTRO imagen para " + url_museodelprado)
Exemplo n.º 28
0
def get_id_entity_wikidata_from_id_artwork(id_artwork):
    query = "SELECT id_wikidata FROM processed_artworks WHERE id =" + str(
        id_artwork)
    results = MysqlND.execute_query(query, ())
    if results.rowcount > 0:
        result = results.fetchone()
        id_dbp_urls_artwork = result[0]
        query = "SELECT url FROM dbp_urls WHERE id =" + str(
            id_dbp_urls_artwork)
        results = MysqlND.execute_query(query, ())
        if results.rowcount > 0:
            result = results.fetchone()
            url = result[0]
            id_wikidata = url.replace("http://www.wikidata.org/entity/", "")
            query = "SELECT id_entity, label, id_instancia_de FROM wikidata_entidades WHERE id_entity = '" + str(
                id_wikidata) + "'"
            results = MysqlND.execute_query(query, ())
            if results.rowcount > 0:
                result = results.fetchone()
                return result[0], result[1], result[2]
            else:
                wikidata_url = "http://www.wikidata.org/entity/" + str(
                    id_wikidata)
                id_wikipedia_, id_url_wikipedia_author, type, wikidata_label, wikidata_instancia_de_id_entity, wikidata_instancia_de_label = extract_wikipedia_url(
                    id_wikidata, wikidata_url)

                id_instancia_de = add_wikidata_instancia_de(
                    wikidata_instancia_de_id_entity,
                    wikidata_instancia_de_label)
                id_entity = add_wikidata_id(id_wikidata, wikidata_label,
                                            id_instancia_de)

                return id_entity, wikidata_label, id_instancia_de
        else:
            return -1, '', -1
    else:
        return -1, '', -1
Exemplo n.º 29
0
def get_id_event_associated_with_element(year, id_reference, type):
    if type == 3 or type == 6:
        column = 'id_character_related'
    elif type == 2:
        column = 'id_artwork_related'
    else:
        column = 'id_reference_related'
    query = "SELECT id FROM processed_events WHERE year =" + str(
        year) + " AND " + str(column) + " = " + str(id_reference)
    results = MysqlND.execute_query(query, ())
    if results.rowcount > 0:
        result = results.fetchone()
        return result[0]
    else:
        return -1
Exemplo n.º 30
0
    def update_artworks_basic_data(self):
        query = "SELECT id FROM processed_artworks"
        artworks = MysqlND.execute_query(query, ())
        for artwork in artworks:
            id = artwork[0]
            data = self.get_solr_artwork_data(id, multiple=False)
            if data:
                # label = data['name']
                # image = data['image']
                # update = "UPDATE processed_artworks SET name=%s,image=%s WHERE id=" + str(id)
                # MysqlND.execute_query(update, (label, image,))

                # del data['description_mp']
                # del data['description_wp']
                del data['image']
                del data['name']
                del data['lookfor']
                json_data = json.dumps(data, ensure_ascii=False)
                update = "UPDATE processed_artworks SET json_metadata=%s WHERE id=" + str(
                    id)
                MysqlND.execute_query(update, (json_data, ))

            else:
                print("NOPE" + str(id))