Exemplo n.º 1
0
    def get_artist_id(self, artist_url):
        # We go to artist page to pick data we need to make the ARTIST_INFO key.

        artist_id = None
        if artist_url in KEY_INFO.keys():
            # print("\n\n\n\nA:\n\n\n\n")
            key = KEY_INFO[artist_url]
            if key in ARTIST_INFO.keys():
                artist_id = ARTIST_INFO[key]
                return artist_id

        # Key maker here, writes the artist data in the db. Makes it much simpler. Nah?
        key = self.key_maker(artist_url)
        if key is not None and artist_url is not None:
            if key in ARTIST_INFO.keys():
                artist_id = ARTIST_INFO.get(key)
                return artist_id
                # print(artist_id)
            else:
                print("FATAL ERROR :: Artist_id not found.")
        else:
            # If it ever comes to here, the page will not have an Artist
            print("FATAL ERROR :: Artist_id not found. Artist_url broken")
        # Let's return None here, and not pick rest of the data if the artist_id is not found.
        # Artist id is used in artworks table only.
        return artist_id
Exemplo n.º 2
0
    def get_artist_id(self, artist_url):
        # We go to artist page to pick data we need to make the ARTIST_INFO key.

        artist_id = None
        if artist_url in KEY_INFO.keys():
            key = KEY_INFO.get(artist_url)
            if key in ARTIST_INFO.keys():
                artist_id = ARTIST_INFO.get(key)
            return artist_id
        else:
            # self.artist_id_slave (key_maker) returns the artist_id
            artist_id = self.artist_id_slave(artist_url)
            return artist_id
Exemplo n.º 3
0
 def read_artist_data(self):
     self.create_table_artist()
     self.my_cursor.execute("""SELECT * FROM artists""")
     artists = list(self.my_cursor.fetchall())
     ARTIST_INFO.clear()
     # We are not saving the artist_url in db so we can not initiate the KEY_INFO here.
     # KEY_INFO will get initiated when TheAuthor tries to write the data.
     # Purpose of KEY_INFO is to map the url for the artist to a key, that key in turn is stored with ARTIST_INFO
     # ARTIST_INFO stores the artist_id against the said 'key'. (key is generated by db->Artist->key_maker() )
     for artist in artists:
         key = Artist.key_maker(artist)
         artist_id = artist[4]
         ARTIST_INFO[key] = int(artist_id)
Exemplo n.º 4
0
    def artist_id_slave(self, artist_url):
        visited.discard(artist_url)
        soup = TheMiner.fetch_page(artist_url)
        if soup is not None:
            self.get_artist_data(soup, artist_url)
            # Getting the key from KEY_INFO
            if artist_url in KEY_INFO.keys():
                key = KEY_INFO.get(artist_url)
                # Getting artist_id using the key from ARTIST_INFO
                if key in ARTIST_INFO.keys():
                    artist_id = ARTIST_INFO.get(key)
                    return artist_id
                else:
                    print("ARTIST_ID_SLAVE : Artist id not in ARTIST_INFO")
                    return None
            else:
                print("ARTIST_ID_SLAVE : Could not find artist_id")
                return None

        else:
            print("ARTIST_ID_SLAVE : Soup not returned")
            return None
Exemplo n.º 5
0
    def get_artist_id(self, artist_url):
        # We go to artist page to pick data we need to make the ARTIST_INFO key.
        # print(f"\n\n\n\nARTIST_ID_GET:\n{artist_url}\n{KEY_INFO}\n\n\n\n")

        artist_id = None
        if artist_url in KEY_INFO.keys():
            # print("\n\n\n\nA:\n\n\n\n")
            key = KEY_INFO[artist_url]
            artist_id = ARTIST_INFO[key]
        else:
            key = self.key_maker(artist_url)
            if key is not None and artist_url is not None:
                if key in ARTIST_INFO.keys():
                    artist_id = ARTIST_INFO.get(key)
                    # print(artist_id)
                else:
                    print("FATAL ERROR :: Artist_id not found.")
            else:
                # If it ever comes to here, the page will not have an Artist
                print("FATAL ERROR :: Artist_id not found. Artist_url broken")
            # Let's return None here, and not pick rest of the data if the artist_id is not found.
            # Artist id is used in artworks table only.
        return artist_id
Exemplo n.º 6
0
    def insert_data_artists(self, *args):

        # True returned by either(any) of these comparison functions, eliminates the entry.

        # Returns boolean. True if born does not match (conclusively). False otherwise.
        def born_comp(args_born, db_born):
            if args_born is None or db_born is None:
                # Inconclusive. (Can't eliminate)
                return False
            if str(args_born).strip().upper() == str(db_born).strip().upper():
                # Born matches. (Can't eliminate.)
                return False
            return True

        # Returns boolean.
        def country_comp(args_country, db_country):
            if args_country is None or db_country is None:
                # Inconclusive.
                return False
            if str(args_country).strip().upper() == str(db_country).strip().upper():
                # countries match, can't eliminate
                return False
            return True

        values = [*args]
        # args = name, born, country, about
        # Artist and Artwork are similar in respect that they are both consistent across websites, ie, the data is
        # not site specific.

        # We write look for all the entries with "name".
        # Then we filter out, if the entry and args have the same born field and country then we remove those entries.
        # Make a provision for born == "None", ie, escape it. If values[born] == None, then we don't check further.
        # (implement the same for artworks, check for all the fields together and not one by one.
        insert_query = """INSERT INTO artists(
                                 NAME, BORN, COUNTRY, ABOUT
                                  )
                                 VALUES(%s, %s, %s, %s)
                                 """

        self.my_cursor.execute("""SELECT * FROM artists
                WHERE NAME = %s""", [values[0]])
        # Check if the entry already exists. We fetch all entries with this name. Thereafter we eliminate
        results = list(self.my_cursor.fetchall())

        if len(results) > 0:
            for result in results:
                # if born not same OR country not same, eliminate the entry
                if born_comp(values[1], result[1]) or country_comp(values[2], result[2]):
                    results.remove(result)

        if len(results) == 0:
            try:
                self.my_cursor.execute(insert_query, values)
                self.mydb.commit()
                self.my_cursor.execute("""SELECT LAST_INSERT_ID()""")
                artist_id = self.my_cursor.fetchone()

                # Updating ARTIST_INFO, since a new entry has been made.
                ARTIST_INFO[Artist.key_maker(values)] = int(artist_id[0])

                # No need to return the artist_id
                # return artist_id
            except pymysql.err.IntegrityError:
                # Trying to make duplicate entries.
                print("ARTIST ENTRY EXISTS")
                self.my_cursor.execute("""SELECT * FROM artists
                                WHERE LINK = %s""", [values[0]])
                results = list(self.my_cursor.fetchall())
                if Artist.key_maker(values) not in ARTIST_INFO.keys():
                    ARTIST_INFO[Artist.key_maker(values)] = int(results[0][4])

        elif len(results) == 1:
            print("ARTIST ENTRY EXISTS")
            if values[0] not in ARTIST_INFO.keys():
                ARTIST_INFO[Artist.key_maker(values)] = int(results[0][4])
            # Return artist_id
            # return results[0][4]

        else:
            print("DUPLICATION ERROR :: Multiple Artist entries for the same name, age and location")
            print("REMOVING DUPLICATE ENTRIES.")
            while len(results) > 1:
                result = results[-1]
                self.remove_duplicates(result[4])