def get_song(self, thread_id, url, song, artist): """ Get song from a URL :param thread_id: As usual :param url: As usual :param song: As usual :param artist: Artist of song """ if db_operations.exists_song(self.start_url, url): print_util.print_warning( '{0} --> Song {1} already exists. Skipping.'.format( thread_id, song)) complete_url = self.start_url + url raw_html = open_request(complete_url, delayed=self.delay_request) album, lyrics, lyricist, additional_artists = self.get_song_details( raw_html ) # Note: additional_artists are artist(s) featured in the song db_operations.save(song, url, album, url, self.start_url, lyrics, additional_artists + [ artist, ], [ artist, ], lyricist)
def download_song(self, thread_id, url, song, movie, movie_url): """ Method to get song details from website. :param thread_id: As usual :param url: URL of song :param song: Name of song :param movie: Name of movie :param movie_url: URL of movie """ # Song already exists if db_operations.exists_song(self.start_url, url): print_util.print_warning( '{0} -> Song {1} already exists. Skipping.'.format( thread_id, song)) return # Get HTML song_url_ = self.start_url + url song_html = open_request(song_url_, delayed=self.delay_request) lyrics, singers, music_by, lyricist = self.get_song_details(song_html) # Save in database db_operations.save(song=song, song_url=url, movie=movie, movie_url=movie_url, start_url=self.start_url, lyrics=lyrics, singers=singers, director=music_by, lyricist=lyricist)
def get_song(self, thread_id, url, song, album, album_url, artist): """ Method to get details of a song and save in database :param thread_id: As usual :param url: As usual :param song: Song title :param album: Album name :param album_url: URL of album (same as artist) on the website :param artist: As usual """ if db_operations.exists_song(self.start_url, url): print_util.print_warning( '{0} -> Song {1} already exists. Skipping'.format( thread_id, song)) return song_website = self.start_url + url song_html = open_request(song_website, delayed=self.delay_request) lyrics = self.get_song_details(song_html) db_operations.save(song=song, song_url=url, movie=album, movie_url=album_url, start_url=self.start_url, lyrics=lyrics, singers=artist, director=artist, lyricist=artist)
def get_song(self, thread_id, url, song, artist): """ Get song from a URL :param thread_id: As usual :param url: As usual :param song: As usual :param artist: Artist of song """ if db_operations.exists_song(self.start_url, url): print_util.print_warning( '{0} --> Song {1} already exists. Skipping.'.format( thread_id, song ) ) complete_url = self.start_url + url raw_html = open_request(complete_url, delayed=self.delay_request) album, lyrics, lyricist, additional_artists = self.get_song_details( raw_html ) # Note: additional_artists are artist(s) featured in the song db_operations.save( song, url, album, url, self.start_url, lyrics, additional_artists + [artist, ], [artist, ], lyricist )
def download_movie(self, thread_id, url, movie): """ Method to get all songs from a movie website. :param thread_id: As usual :param url: URL of movie :param movie: Name of movie """ movie_website = self.start_url + url raw_html = open_request(movie_website, delayed=self.delay_request) song_with_url = self.get_songs_with_url(raw_html) # No new songs added if db_operations.number_of_songs(self.start_url, url) == len(song_with_url): db_operations.update_last_crawl(self.start_url, url) print_util.print_warning( '{0} --> Movie {1} contains no new songs. Skipping.'.format( thread_id, movie)) return # Add all songs for song_url, song in song_with_url: self.task_queue.put({ 'type': 2, 'url': song_url, 'song': song, 'movie': movie, 'movie_url': url, 'n_errors': 0 })
def get_song(self, thread_id, url, song, album, album_url, artist): """ Method to get details of a song and save in database :param thread_id: As usual :param url: As usual :param song: Song title :param album: Album name :param album_url: URL of album (same as artist) on the website :param artist: As usual """ if db_operations.exists_song(self.start_url, url): print_util.print_warning( '{0} -> Song {1} already exists. Skipping'.format( thread_id, song ) ) return song_website = self.start_url + url song_html = open_request(song_website, delayed=self.delay_request) lyrics = self.get_song_details(song_html) db_operations.save( song=song, song_url=url, movie=album, movie_url=album_url, start_url=self.start_url, lyrics=lyrics, singers=artist, director=artist, lyricist=artist )
def threader(self, thread_id): """ Worker function. :return: :param thread_id: Assigned ID of thread. """ while not self.task_queue.empty(): # While there are any tasks task = self.task_queue.get() # Get one of them if task['n_errors'] >= self.max_allowed_errors: # Too many errors print_util.print_warning( '{0} --> Too many errors in task {1}. Skipping.'.format( thread_id, task)) continue print_util.print_info('{0} --> New task : {1}'.format( thread_id, task)) # Log the task try: # Call corresponding function if task['type'] == 0: self.get_movies(thread_id, task['url']) elif task['type'] == 1: self.download_movie(thread_id, task['url'], task['movie']) elif task['type'] == 2: self.download_song(thread_id, task['url'], task['song'], task['movie'], task['movie_url']) print_util.print_info('{0} --> Task complete : {1}'.format( thread_id, task), Colors.GREEN) # Log success except Exception as e: # Some error print_util.print_error('{0} --> Error : {1}'.format( thread_id, e)) # Log it task['n_errors'] += 1 # Increment number of errors self.task_queue.put(task) # Put back in queue
def download_song(self, thread_id, url, song, movie, movie_url): """ Method to get song details from website. :param thread_id: As usual :param url: URL of song :param song: Name of song :param movie: Name of movie :param movie_url: URL of movie """ # Song already exists if db_operations.exists_song(self.start_url, url): print_util.print_warning( '{0} -> Song {1} already exists. Skipping.'.format( thread_id, song ) ) return # Get HTML song_url_ = self.start_url + url song_html = open_request(song_url_, delayed=self.delay_request) lyrics, singers, music_by, lyricist = self.get_song_details(song_html) # Save in database db_operations.save( song=song, song_url=url, movie=movie, movie_url=movie_url, start_url=self.start_url, lyrics=lyrics, singers=singers, director=music_by, lyricist=lyricist )
def threader(self, thread_id): """ Worker function :param thread_id: Ass usual """ while not self.task_queue.empty(): task = self.task_queue.get() if task['n_errors'] >= self.max_allowed_errors: print_util.print_warning( '{0} --> Too many errors in task {1}. Skipping.'.format( thread_id, task)) continue print_util.print_info('{0} --> New task : {1}'.format( thread_id, task)) try: if task['type'] == 0: self.get_artists(thread_id, task['url']) elif task['type'] == 1: self.get_artist(thread_id, task['url'], task['artist']) elif task['type'] == 2: self.get_songs_from_page(thread_id, task['url'], task['artist']) elif task['type'] == 3: self.get_song(thread_id, task['url'], task['song'], task['artist']) print_util.print_info( '{0} --> Task complete : {1}'.format(thread_id, task), Colors.GREEN) except Exception as e: print_util.print_error('{0} --> Error : {1}'.format( thread_id, e)) task['n_errors'] += 1 self.task_queue.put(task)
def download_movie(self, thread_id, url, movie): """ Method to get all songs from a movie website. :param thread_id: As usual :param url: URL of movie :param movie: Name of movie """ movie_website = self.start_url + url raw_html = open_request(movie_website, delayed=self.delay_request) song_with_url = self.get_songs_with_url(raw_html) # No new songs added if db_operations.number_of_songs(self.start_url, url) == len( song_with_url): db_operations.update_last_crawl(self.start_url, url) print_util.print_warning( '{0} --> Movie {1} contains no new songs. Skipping.'.format( thread_id, movie ) ) return # Add all songs for song_url, song in song_with_url: self.task_queue.put( { 'type': 2, 'url': song_url, 'song': song, 'movie': movie, 'movie_url': url, 'n_errors': 0 } )
def threader(self, thread_id): """ Worker function :param thread_id: Ass usual """ while not self.task_queue.empty(): task = self.task_queue.get() if task['n_errors'] >= self.max_allowed_errors: print_util.print_warning( '{0} --> Too many errors in task {1}. Skipping.'.format( thread_id, task ) ) continue print_util.print_info( '{0} --> New task : {1}'.format( thread_id, task ) ) try: if task['type'] == 0: self.get_artists( thread_id, task['url'] ) elif task['type'] == 1: self.get_artist( thread_id, task['url'], task['artist'] ) elif task['type'] == 2: self.get_songs_from_page( thread_id, task['url'], task['artist'] ) elif task['type'] == 3: self.get_song( thread_id, task['url'], task['song'], task['artist'] ) print_util.print_info( '{0} --> Task complete : {1}'.format( thread_id, task ), Colors.GREEN ) except Exception as e: print_util.print_error( '{0} --> Error : {1}'.format( thread_id, e ) ) task['n_errors'] += 1 self.task_queue.put(task)
def threader(self, thread_id): """ Worker function. :return: :param thread_id: Assigned ID of thread. """ while not self.task_queue.empty(): # While there are any tasks task = self.task_queue.get() # Get one of them if task['n_errors'] >= self.max_allowed_errors: # Too many errors print_util.print_warning( '{0} --> Too many errors in task {1}. Skipping.'.format( thread_id, task ) ) continue print_util.print_info( '{0} --> New task : {1}'.format( thread_id, task ) ) # Log the task try: # Call corresponding function if task['type'] == 0: self.get_movies( thread_id, task['url'] ) elif task['type'] == 1: self.download_movie( thread_id, task['url'], task['movie'] ) elif task['type'] == 2: self.download_song( thread_id, task['url'], task['song'], task['movie'], task['movie_url'] ) print_util.print_info( '{0} --> Task complete : {1}'.format( thread_id, task ), Colors.GREEN ) # Log success except Exception as e: # Some error print_util.print_error( '{0} --> Error : {1}'.format( thread_id, e ) ) # Log it task['n_errors'] += 1 # Increment number of errors self.task_queue.put(task) # Put back in queue