Exemplo n.º 1
0
    def download_movie(self, thread_id, url, movie):
        """
        Method to get all songs from a movie website.
        :param thread_id: As usual
        :param url: URL of movie
        :param movie: Name of movie
        """
        movie_website = self.start_url + url
        raw_html = open_request(movie_website, delayed=self.delay_request)

        song_with_url = self.get_songs_with_url(raw_html)

        # No new songs added
        if db_operations.number_of_songs(self.start_url,
                                         url) == len(song_with_url):
            db_operations.update_last_crawl(self.start_url, url)
            print_util.print_warning(
                '{0} --> Movie {1} contains no new songs. Skipping.'.format(
                    thread_id, movie))
            return

        # Add all songs
        for song_url, song in song_with_url:
            self.task_queue.put({
                'type': 2,
                'url': song_url,
                'song': song,
                'movie': movie,
                'movie_url': url,
                'n_errors': 0
            })
Exemplo n.º 2
0
    def get_song(self, thread_id, url, song, artist):
        """
        Get song from a URL
        :param thread_id: As usual
        :param url: As usual
        :param song: As usual
        :param artist: Artist of song
        """
        if db_operations.exists_song(self.start_url, url):
            print_util.print_warning(
                '{0} --> Song {1} already exists. Skipping.'.format(
                    thread_id, song))
        complete_url = self.start_url + url
        raw_html = open_request(complete_url, delayed=self.delay_request)

        album, lyrics, lyricist, additional_artists = self.get_song_details(
            raw_html
        )  # Note: additional_artists are artist(s) featured in the song

        db_operations.save(song, url, album, url, self.start_url, lyrics,
                           additional_artists + [
                               artist,
                           ], [
                               artist,
                           ], lyricist)
Exemplo n.º 3
0
    def get_artist(self, thread_id, url, artist):
        """
        Get songs for artist from URL in two parts:
            1. Get songs from first page (:param url)
            2. Add all other pages to task queue
        :param thread_id:
        :param url:
        :param artist:
        """
        complete_url = self.start_url + url
        raw_html = open_request(complete_url, delayed=self.delay_request)

        pages = self.get_pages_for_artist(raw_html)

        # Add all songs from current page
        for song_url, song in self.get_songs(raw_html):
            self.task_queue.put({
                'type': 3,
                'url': song_url,
                'song': song,
                'artist': artist,
                'n_errors': 0
            })

        # Add rest of pages in task queue
        for page in pages[1:]:
            self.task_queue.put({
                'type': 2,
                'url': page,
                'artist': artist,
                'n_errors': 0
            })
Exemplo n.º 4
0
    def get_song(self, thread_id, url, song, album, album_url, artist):
        """
        Method to get details of a song and save in database
        :param thread_id: As usual
        :param url: As usual
        :param song: Song title
        :param album: Album name
        :param album_url: URL of album (same as artist) on the website
        :param artist: As usual
        """
        if db_operations.exists_song(self.start_url, url):
            print_util.print_warning(
                '{0} -> Song {1} already exists. Skipping'.format(
                    thread_id, song))
            return

        song_website = self.start_url + url
        song_html = open_request(song_website, delayed=self.delay_request)
        lyrics = self.get_song_details(song_html)
        db_operations.save(song=song,
                           song_url=url,
                           movie=album,
                           movie_url=album_url,
                           start_url=self.start_url,
                           lyrics=lyrics,
                           singers=artist,
                           director=artist,
                           lyricist=artist)
Exemplo n.º 5
0
    def get_song(self, thread_id, url, song, album, album_url, artist):
        """
        Method to get details of a song and save in database
        :param thread_id: As usual
        :param url: As usual
        :param song: Song title
        :param album: Album name
        :param album_url: URL of album (same as artist) on the website
        :param artist: As usual
        """
        if db_operations.exists_song(self.start_url, url):
            print_util.print_warning(
                '{0} -> Song {1} already exists. Skipping'.format(
                    thread_id,
                    song
                )
            )
            return

        song_website = self.start_url + url
        song_html = open_request(song_website, delayed=self.delay_request)
        lyrics = self.get_song_details(song_html)
        db_operations.save(
            song=song,
            song_url=url,
            movie=album,
            movie_url=album_url,
            start_url=self.start_url,
            lyrics=lyrics,
            singers=artist,
            director=artist,
            lyricist=artist
        )
Exemplo n.º 6
0
    def download_song(self, thread_id, url, song, movie, movie_url):
        """
        Method to get song details from website.
        :param thread_id: As usual
        :param url: URL of song
        :param song: Name of song
        :param movie: Name of movie
        :param movie_url: URL of movie
        """
        # Song already exists
        if db_operations.exists_song(self.start_url, url):
            print_util.print_warning(
                '{0} -> Song {1} already exists. Skipping.'.format(
                    thread_id, song))
            return

        # Get HTML
        song_url_ = self.start_url + url
        song_html = open_request(song_url_, delayed=self.delay_request)

        lyrics, singers, music_by, lyricist = self.get_song_details(song_html)

        # Save in database
        db_operations.save(song=song,
                           song_url=url,
                           movie=movie,
                           movie_url=movie_url,
                           start_url=self.start_url,
                           lyrics=lyrics,
                           singers=singers,
                           director=music_by,
                           lyricist=lyricist)
Exemplo n.º 7
0
    def get_song(self, thread_id, url, song, artist):
        """
        Get song from a URL
        :param thread_id: As usual
        :param url: As usual
        :param song: As usual
        :param artist: Artist of song
        """
        if db_operations.exists_song(self.start_url, url):
            print_util.print_warning(
                '{0} --> Song {1} already exists. Skipping.'.format(
                    thread_id,
                    song
                )
            )
        complete_url = self.start_url + url
        raw_html = open_request(complete_url, delayed=self.delay_request)

        album, lyrics, lyricist, additional_artists = self.get_song_details(
            raw_html
        )  # Note: additional_artists are artist(s) featured in the song

        db_operations.save(
            song,
            url,
            album,
            url,
            self.start_url,
            lyrics,
            additional_artists + [artist, ],
            [artist, ],
            lyricist
        )
Exemplo n.º 8
0
    def get_artist_albums(self, thread_id, url, artist):
        """
        Method to get all songs for an artist
        :param thread_id: As usual
        :param url: As usual
        :param artist: Artist name
        """
        website = self.start_url + '/' + url
        raw_html = open_request(website, delayed=self.delay_request)

        albums_with_songs = self.get_albums_with_songs(raw_html)

        for album, song_with_url in albums_with_songs:
            for song_url, song in song_with_url:
                self.task_queue.put(
                    {
                        'type': 2,
                        'song': song,
                        'url': song_url,
                        'album': album,
                        'album_url': url,
                        'artist': artist,
                        'n_errors': 0
                    }
                )
Exemplo n.º 9
0
    def get_songs_from_page(self, thread_id, url, artist):
        """
        Get songs from other pages of artist
        :param thread_id: As usual
        :param url: As usual
        :param artist: As usual
        """
        complete_url = self.start_url + url
        raw_html = open_request(complete_url, delayed=self.delay_request)

        for song_url, song in self.get_songs(raw_html):
            self.task_queue.put({
                'type': 3,
                'url': song_url,
                'song': song,
                'artist': artist,
                'n_errors': 0
            })
Exemplo n.º 10
0
    def get_artists(self, thread_id, url):
        """
        Method to get artists from a URL
        :param thread_id: As usual
        :param url: As usual
        """
        complete_url = self.start_url + url
        raw_html = open_request(complete_url, delayed=self.delay_request)

        artists_with_url = self.get_artist_with_url(raw_html)

        for artist_url, artist in artists_with_url:
            self.task_queue.put({
                'type': 1,
                'url': artist_url,
                'artist': artist,
                'n_errors': 0
            })
Exemplo n.º 11
0
    def get_movies(self, thread_id, url):
        # Get website HTML
        """
        Get movie list from website
        :param thread_id: As usual
        :param url: URL of website from which movies are to be fetched
        """
        website = self.start_url + url
        raw_html = open_request(website, delayed=self.delay_request)

        # Add movies to task queue
        movies_with_url = self.get_movies_with_url(raw_html)
        for url, movie in movies_with_url:
            self.task_queue.put({
                'type': 1,
                'url': url,
                'movie': movie,
                'n_errors': 0
            })
Exemplo n.º 12
0
    def get_songs_from_page(self, thread_id, url, artist):
        """
        Get songs from other pages of artist
        :param thread_id: As usual
        :param url: As usual
        :param artist: As usual
        """
        complete_url = self.start_url + url
        raw_html = open_request(complete_url, delayed=self.delay_request)

        for song_url, song in self.get_songs(raw_html):
            self.task_queue.put(
                {
                    'type': 3,
                    'url': song_url,
                    'song': song,
                    'artist': artist,
                    'n_errors': 0
                }
            )
Exemplo n.º 13
0
    def get_artists(self, thread_id, url):
        """
        Method to get artists with URL from a web address
        :param thread_id: As usual
        :param url: As usual
        """
        website = self.start_url + url
        raw_html = open_request(website, delayed=self.delay_request)

        artists_with_url = self.get_artists_with_url(raw_html)

        for artist_url, artist in artists_with_url:
            self.task_queue.put(
                {
                    'type': 1,
                    'url': artist_url,
                    'artist': artist,
                    'n_errors': 0
                }
            )
Exemplo n.º 14
0
    def get_movies(self, thread_id, url):
        # Get website HTML
        """
        Get movie list from website
        :param thread_id: As usual
        :param url: URL of website from which movies are to be fetched
        """
        website = self.start_url + url
        raw_html = open_request(website, delayed=self.delay_request)

        # Add movies to task queue
        movies_with_url = self.get_movies_with_url(raw_html)
        for url, movie in movies_with_url:
            self.task_queue.put(
                {
                    'type': 1,
                    'url': url,
                    'movie': movie,
                    'n_errors': 0
                }
            )
Exemplo n.º 15
0
    def get_artist_albums(self, thread_id, url, artist):
        """
        Method to get all songs for an artist
        :param thread_id: As usual
        :param url: As usual
        :param artist: Artist name
        """
        website = self.start_url + '/' + url
        raw_html = open_request(website, delayed=self.delay_request)

        albums_with_songs = self.get_albums_with_songs(raw_html)

        for album, song_with_url in albums_with_songs:
            for song_url, song in song_with_url:
                self.task_queue.put({
                    'type': 2,
                    'song': song,
                    'url': song_url,
                    'album': album,
                    'album_url': url,
                    'artist': artist,
                    'n_errors': 0
                })
Exemplo n.º 16
0
    def download_song(self, thread_id, url, song, movie, movie_url):
        """
        Method to get song details from website.
        :param thread_id: As usual
        :param url: URL of song
        :param song: Name of song
        :param movie: Name of movie
        :param movie_url: URL of movie
        """
        # Song already exists
        if db_operations.exists_song(self.start_url, url):
            print_util.print_warning(
                '{0} -> Song {1} already exists. Skipping.'.format(
                    thread_id,
                    song
                )
            )
            return

        # Get HTML
        song_url_ = self.start_url + url
        song_html = open_request(song_url_, delayed=self.delay_request)

        lyrics, singers, music_by, lyricist = self.get_song_details(song_html)

        # Save in database
        db_operations.save(
            song=song,
            song_url=url,
            movie=movie,
            movie_url=movie_url,
            start_url=self.start_url,
            lyrics=lyrics,
            singers=singers,
            director=music_by,
            lyricist=lyricist
        )
Exemplo n.º 17
0
    def download_movie(self, thread_id, url, movie):
        """
        Method to get all songs from a movie website.
        :param thread_id: As usual
        :param url: URL of movie
        :param movie: Name of movie
        """
        movie_website = self.start_url + url
        raw_html = open_request(movie_website, delayed=self.delay_request)

        song_with_url = self.get_songs_with_url(raw_html)

        # No new songs added
        if db_operations.number_of_songs(self.start_url, url) == len(
                song_with_url):
            db_operations.update_last_crawl(self.start_url, url)
            print_util.print_warning(
                '{0} --> Movie {1} contains no new songs. Skipping.'.format(
                    thread_id,
                    movie
                )
            )
            return

        # Add all songs
        for song_url, song in song_with_url:
            self.task_queue.put(
                {
                    'type': 2,
                    'url': song_url,
                    'song': song,
                    'movie': movie,
                    'movie_url': url,
                    'n_errors': 0
                }
            )
Exemplo n.º 18
0
    def get_artist(self, thread_id, url, artist):
        """
        Get songs for artist from URL in two parts:
            1. Get songs from first page (:param url)
            2. Add all other pages to task queue
        :param thread_id:
        :param url:
        :param artist:
        """
        complete_url = self.start_url + url
        raw_html = open_request(complete_url, delayed=self.delay_request)

        pages = self.get_pages_for_artist(raw_html)

        # Add all songs from current page
        for song_url, song in self.get_songs(raw_html):
            self.task_queue.put(
                {
                    'type': 3,
                    'url': song_url,
                    'song': song,
                    'artist': artist,
                    'n_errors': 0
                }
            )

        # Add rest of pages in task queue
        for page in pages[1:]:
            self.task_queue.put(
                {
                    'type': 2,
                    'url': page,
                    'artist': artist,
                    'n_errors': 0
                }
            )