def cleanup(self):
        """
        The function that deletes all zero byte files from download directory.
        """
        """ Remove zero bytes files created by youtube-dl """

        logger.info('Deleting zero byte files...')
        if not os.path.isdir(self.download_dir):
            raise ValueError('Parameter path is not a directory.')
        files = glob.glob(self.download_dir + '/*')

        # Create generator to find and remove zero byte files
        generator = (os.remove(f) for f in files
                     if os.path.isfile(f) and os.path.getsize(f) == 0)

        try:
            while True:
                next(generator)
        except StopIteration:
            logger.info('Done.')
            return
        except Exception as e:
            logger.error('{}: {}'.format(type(e), e))
    def __merge_playlist(self, pl_data):
        """
        The function that merges remote (head_playlist) and local playlist (base_playlist) and generate scheduled queue indices

        :param dict pl_data: Playlist data contains downloaded songs

        :rtype: (dict, list)
        :return: (playlist, indices):  (Merged playlist, Queue indices to download)
        """

        head_playlist_data = pl_data  # Playlist data downloaded from url
        base_playlist_data = None  # Playlist data previously saved on download directory
        """ Load playlist previously saved """

        if os.path.exists(self.playlist_file):
            with open(self.playlist_file) as f:
                # base_playlist_data = json.load(f, object_pairs_hook=OrderedDict)
                base_playlist_data = json.load(f)
        """ Merge Playlist """

        candidate_queue_indices = []
        candidate_queue_index = 1

        # Playlist
        if base_playlist_data:
            # Copy list to avoid index shifting when elements are removed while iterating.
            # https://stackoverflow.com/questions/1207406/how-to-remove-items-from-a-list-while-iterating
            head_index = 0
            head_entries = head_playlist_data['entries'][:]
            for head_entry in head_entries:
                # Delete entry if invalid.
                if head_entry is None or head_entry.get(
                        'title', 'N/A').lower() in [
                            '[private video]', '[deleted video]'
                        ]:
                    song_title = head_entry.get('title', None)
                    song_title = '[title:{}]'.format(
                        song_title) if song_title else ''
                    logger.error('[Playlist:{}/{}][ID:{}]{} {}'.format(
                        head_index + 1, len(head_playlist_data['entries']),
                        head_entry.get('id', 'N/A'), song_title,
                        'The video is private or deleted. Removed from the playlist.'
                    ))
                    del head_playlist_data['entries'][head_index]
                    candidate_queue_index += 1

                else:
                    # Copy list to avoid index shifting when elements are removed while iterating.
                    # https://stackoverflow.com/questions/1207406/how-to-remove-items-from-a-list-while-iterating
                    base_index = 0
                    base_entries = base_playlist_data['entries'][:]
                    for base_entry in base_entries:

                        # Delete entry if invalid.
                        if base_entry is None or base_entry.get(
                                'title', 'N/A').lower() in [
                                    '[private video]', '[deleted video]'
                                ]:
                            song_title = base_entry.get('title', None)
                            song_title = '[title:{}]'.format(
                                song_title) if song_title else ''
                            logger.error('[Playlist:{}/{}][ID:{}]{} {}'.format(
                                head_index + 1,
                                len(head_playlist_data['entries']),
                                base_entry.get('id', 'N/A'),
                                song_title,
                                'The video is private or deleted. Removed from the playlist.',
                            ))
                            del base_playlist_data['entries'][base_index]

                        else:
                            # If same entry is found, update status
                            if head_entry['id'] == base_entry['id']:

                                # Merge base status into head status
                                base_entry_status = base_entry.get(
                                    'status', YDLQueueStatus.ready.value)
                                head_playlist_data['entries'][head_index][
                                    'status'] = base_entry_status

                                # Queue index is out of range requested
                                if not self.__is_queue_in_range(head_index):
                                    song_title = base_entry.get('title', None)
                                    song_title = '[title:{}]'.format(
                                        song_title) if song_title else ''
                                    logger.debug(
                                        '[Playlist:{}/{}][ID:{}]{} {}'.format(
                                            head_index + 1,
                                            len(head_playlist_data['entries']),
                                            base_entry.get('id', 'N/A'),
                                            song_title,
                                            'This queue is out of range requested. Skipped.',
                                        ))

                                # Song is already downloaded
                                elif base_entry_status == YDLQueueStatus.finished.value:
                                    song_title = base_entry.get('title', None)
                                    song_title = '[title:{}]'.format(
                                        song_title) if song_title else ''
                                    logger.warning(
                                        '[Playlist:{}/{}][ID:{}]{} {}'.format(
                                            head_index + 1,
                                            len(head_playlist_data['entries']),
                                            base_entry.get('id', 'N/A'),
                                            song_title,
                                            'This queue is already finished. Skipped.',
                                        ))

                                # Song is not downloaded yet
                                else:
                                    song_title = base_entry.get('title', None)
                                    song_title = '[title:{}]'.format(
                                        song_title) if song_title else ''
                                    logger.info(
                                        '[Playlist:{}/{}][ID:{}]{} {}'.format(
                                            head_index + 1,
                                            len(head_playlist_data['entries']),
                                            base_entry.get('id', 'N/A'),
                                            song_title,
                                            'This queue is not finished yet. Added to scheduled queues.',
                                        ))

                                # Delete entry to make iteration faster
                                del base_playlist_data['entries'][base_index]
                                break

                            base_index += 1

                    # Update track number
                    head_entry['track_number'] = head_index + 1

                    # Add queue
                    is_not_finished = head_entry.get(
                        'status', YDLQueueStatus.ready.value
                    ) != YDLQueueStatus.finished.value
                    if self.__is_queue_in_range(
                            head_index) and is_not_finished:
                        candidate_queue_indices.append(candidate_queue_index)

                    # Add element to dictionary that maps index and entry_id
                    self.playlist_data_map[head_entry['id']] = head_index

                    candidate_queue_index += 1
                    head_index += 1

        # Single song
        else:
            # Copy list to avoid index shifting when elements are removed while iterating.
            # https://stackoverflow.com/questions/1207406/how-to-remove-items-from-a-list-while-iterating
            head_index = 0
            head_entries = head_playlist_data['entries'][:]
            for head_entry in head_entries:
                # Delete entry if invalid.
                if head_entry is None or head_entry.get(
                        'title', 'N/A').lower() in [
                            '[private video]', '[deleted video]'
                        ]:
                    song_title = head_entry.get('title', None)
                    song_title = '[title:{}]'.format(
                        song_title) if song_title else ''
                    logger.error('[Playlist:{}/{}][ID:{}]{} {}'.format(
                        head_index + 1, len(head_playlist_data['entries']),
                        head_entry.get('id', 'N/A'), song_title,
                        'The video is private or deleted. Removed from the playlist.'
                    ))
                    del head_playlist_data['entries'][head_index]
                    candidate_queue_index += 1

                else:
                    song_title = head_entry.get('title', None)
                    song_title = '[title:{}]'.format(
                        song_title) if song_title else ''

                    # Add queue
                    if self.__is_queue_in_range(head_index):
                        candidate_queue_indices.append(candidate_queue_index)
                        logger.info('[Playlist:{}/{}][ID:{}]{} {}'.format(
                            head_index + 1, len(head_playlist_data['entries']),
                            head_entry.get('id', 'N/A'), song_title,
                            'This queue is not finished yet. Added to scheduled queues.'
                        ))
                    else:
                        logger.debug('[Playlist:{}/{}][ID:{}]{} {}'.format(
                            head_index + 1, len(head_playlist_data['entries']),
                            head_entry.get('id', 'N/A'), song_title,
                            'This queue is out of range requested. Skipped.'))

                    # Update value
                    head_entry['status'] = YDLQueueStatus.ready.value

                    # Update track number
                    head_entry['track_number'] = head_index + 1

                    # Add element to dictionary that maps index and entry_id
                    self.playlist_data_map[head_entry['id']] = head_index

                    candidate_queue_index += 1
                    head_index += 1

        # Save playlist
        with open(self.playlist_file, 'w') as file:
            json.dump(head_playlist_data, file, indent=4, ensure_ascii=False)

        self.playlist_data = head_playlist_data

        return head_playlist_data, candidate_queue_indices
    def __download_hook(self, data, queue_index=0, queue_total=0):
        """
        The function that get called on download progress, with a dictionary with the entries.
        More info is available at https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py

        :param dict data: Progress information for each queue
        :param int queue_index: Current queue index displayed in log message
        :param int queue_total: Total number of queue displayed in log message
        :return:

        :param dict data: Progress information for each queue
        """

        try:
            # Filename
            song_filename = os.path.basename(data['filename'])
            elapsed = "{0:.2f}".format(data.get('elapsed', -1))

            if queue_index > 0 and queue_total > 0:

                try:
                    # Get index from id-index map
                    song_id, song_ext = os.path.splitext(song_filename)
                    song_index = self.playlist_data_map.get(song_id, -1)
                    entry_found = None
                    if song_index >= 0:
                        # YouTube user playlist or SoundCloud playlist
                        entry_found = self.playlist_data['entries'][song_index]

                    else:
                        # YouTube auto-generated playlist
                        for entry in self.playlist_data['entries']:
                            if entry['id'] == song_id:
                                entry_found = entry
                                break

                    if entry_found:
                        # Finish queue
                        entry_found['status'] = YDLQueueStatus.finished.value
                        with open(self.playlist_file, 'w') as f:
                            json.dump(self.playlist_data,
                                      f,
                                      indent=4,
                                      ensure_ascii=False)

                        # Print song info
                        song_title = self.playlist_data['entries'][
                            song_index].get('title', None)
                        song_title = '[title:{}]'.format(
                            song_title) if song_title else ''
                        elapsed = '[Elapsed:{}]'.format(
                            elapsed) if float(elapsed) >= 0 else ''
                        logger.info(
                            '[Process:{}/{}][ID:{}]{}[Size:{}]{} {}'.format(
                                queue_index, queue_total, song_id, song_title,
                                data['_total_bytes_str'], elapsed,
                                'Finished.'))

                    else:
                        # Print warning
                        song_title = self.playlist_data['entries'][
                            song_index].get('title', None)
                        song_title = '[title:{}]'.format(
                            song_title) if song_title else ''
                        elapsed = '[Elapsed:{}]'.format(
                            elapsed) if float(elapsed) >= 0 else ''
                        logger.warning(
                            '[Process:{}/{}][ID:{}]{}[Size:{}]{} {}'.format(
                                queue_index, queue_total, song_id, song_title,
                                data['_total_bytes_str'], elapsed,
                                'The downloaded song is different from the song on the playlist initially requested. This is caused by YouTube auto-generated playlist.'
                            ))

                except Exception as e:
                    logger.error('[Process:{}/{}] {}:{}'.format(
                        queue_index,
                        queue_total,
                        type(e),
                        str(e),
                    ))

            else:
                song_title, song_ext = os.path.splitext(song_filename)
                elapsed = '[Elapsed:{}]'.format(
                    elapsed) if float(elapsed) >= 0 else ''
                logger.info('[Title:{}][Size:{}]{} {}'.format(
                    song_title, data['_total_bytes_str'], elapsed,
                    'Finished.'))

        except Exception as e:
            logger.error('[Process:{}/{}] {}:{}'.format(
                queue_index, queue_total, type(e), str(e)))
Exemplo n.º 4
0
    def update(self, download_dir, pl_data, is_playlist):
        """
        The function that update audio metadata.

        :param str download_dir: Download directory
        :param dict pl_data: Playlist data which contains downloaded song information
        :param bool is_playlist: Flag that indicates playlist contains multiple songs
        """

        logger.info('Updating metadata...')

        if is_playlist:
            entries = pl_data.get('entries', [])
            album_title = pl_data.get('title', 'Unknown Album')
            album_artist = pl_data.get('uploader', None)
            album_composer = pl_data.get('extractor_key')

            process_index = 1
            process_total = len(entries)
            for entry in entries:
                try:
                    # Determine filename from entry id
                    song_id = entry['id']
                    # song_index = self.playlist_data_map.get(song_id, -1)

                    song_track_number = process_index
                    song_title = sanitize_filename(entry.get('title', song_id))
                    source_audio_file = os.path.join(download_dir, '{}.{}'.format(song_id, self.audio_codec))

                    # Artwork
                    image_file = None
                    try:
                        image_file = entry['thumbnails'][0]['filename']
                    except:
                        pass

                    # Update tag
                    self.__update_tag(
                        download_dir=download_dir,
                        song_title=song_title,
                        audio_file=source_audio_file,
                        image_file=image_file,
                        album_title=album_title,
                        album_artist=album_artist,
                        album_composer=album_composer,
                        track_number=song_track_number,
                        process_index=process_index,
                        process_total=process_total,
                    )

                except:
                    message = 'Could not update metadata because there is no data found on the playlist. The video may be private or deleted. Audio data is not saved.'
                    logger.error('[Process:{}/{}][Track:{}] {}'.format(process_index, process_total, 'N/A', message))

                process_index += 1

        else:
            base_filename = sanitize_filename(pl_data.get('title', 'Unknown'))
            audio_file = os.path.join(download_dir, '{}.{}'.format(base_filename, self.audio_codec))
            if os.path.exists(audio_file):
                image_file = None
                try:
                    image_file = pl_data['thumbnails'][0]['filename']
                except:
                    pass
                self.__update_tag(
                    download_dir=download_dir,
                    audio_file=audio_file,
                    image_file=image_file
                )

        logger.info('Done.')
Exemplo n.º 5
0
    def __update_tag(self, download_dir, audio_file, image_file,
                     song_title=None, album_title=None, album_artist=None, album_composer=None,
                     track_number=-1, process_index=-1, process_total=-1):
        """
        The function that update audio metadata for each song.

        :param str download_dir: Download directory
        :param str audio_file: Path to audio file
        :param str image_file: Path to image file
        :param str song_title: Song title
        :param str album_title: Album title to be saved in metadata
        :param str album_artist: Album artist to be saved in metadata
        :param str album_composer: Album composer to be saved in metadata
        :param int track_number: track number to be saved in metadata
        :param int process_index: Current process index displayed in log message
        :param int process_total: Total number of process displayed in log message
        """

        if audio_file is None:
            logger.warning('[Process:{}/{}][Track:{}] Could not update metadata because there is no data found on the playlist. The video may be private or deleted.'.format(process_index, process_total, track_number))
            return

        if process_index > 0 and process_total > 0:

            if track_number > 0:
                log_prefix = '[Process:{}/{}][Track:{}]'.format(process_index, process_total, track_number)

            else:
                log_prefix = '[Process:{}/{}]'.format(process_index, process_total)

        else:
            log_prefix = ''

        audio_filename = os.path.basename(audio_file)

        try:
            # Validate audio data
            if not os.path.isfile(audio_file):
                raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), audio_file)

            audio_mime_type = mimetypes.guess_type(audio_file)

            if contains_at_least(audio_mime_type, ['audio/x-mp4', 'audio/x-m4a', 'audio/mp4a-latm']):

                # For more info about mp4 tag is available at
                # https://github.com/quodlibet/mutagen/blob/cf399dc58940fb1356f672809d763be9e2af0033/mutagen/mp4/__init__.py
                # http://atomicparsley.sourceforge.net/mpeg-4files.html
                mp4_data = mp4.MP4(audio_file)
                # Track Number
                if not self.no_track_number and track_number > 0:
                    mp4_data['trkn'] = [(track_number, 0)]
                # Cover image
                if not self.no_artwork:
                    image_data = self.__get_tag_image(image_file, audio_mime_type)
                    if image_data:
                        mp4_data['covr'] = [image_data]
                # Album title
                if not self.no_album_title and album_title is not None:
                    mp4_data['\xa9alb'] = album_title
                # Album artist
                if not self.no_album_artist and album_artist is not None:
                    mp4_data['aART'] = album_artist
                # Composer
                if not self.no_composer and album_composer is not None:
                    mp4_data['\xa9wrt'] = album_composer
                # Part of compilation
                if not self.no_compilation:
                    mp4_data['cpil'] = True
                # Save
                mp4_data.save()

            elif contains_at_least(audio_mime_type, ['audio/x-mp3', 'audio/mpeg']):

                # For more info about ID3v2 tag is available at
                # https://github.com/quodlibet/mutagen/blob/4a5d7d17f1a611280cc52d229aa70b77ca3c55dd/mutagen/id3/_frames.py
                # https://help.mp3tag.de/main_tags.html
                mp3_data = id3.ID3(audio_file)
                # Cover image
                if not self.no_artwork:
                    image_data = self.__get_tag_image(image_file, audio_mime_type)
                    if image_data:
                        mp3_data['APIC'] = image_data
                # Track number
                if not self.no_track_number and track_number > 0:
                    mp3_data.add(id3.TRCK(encoding=3, text=['{}/{}'.format(track_number, 0)]))
                # Album title
                if not self.no_album_title and album_title is not None:
                    mp3_data["TALB"] = id3.TALB(encoding=0, text=album_title)
                # Album artist
                if not self.no_album_artist and album_artist is not None:
                    mp3_data["TPE2"] = id3.TPE2(encoding=0, text=album_artist)
                # Composer
                if not self.no_composer and album_composer is not None:
                    mp3_data["TCOM"] = id3.TCOM(encoding=0, text=album_composer)
                # Part of compilation
                if not self.no_compilation:
                    mp3_data['TCMP'] = id3.TCMP(encoding=0, text=['1'])
                # Save
                mp3_data.save()

            elif contains_at_least(audio_mime_type, ['audio/x-aac']):

                # TODO: Add AAC support
                pass
                # image_data = __get_tag_image(image_file, audio_mime_type)
                # aac_data = aac.AAC(audio_file)
                # if not self.no_track_number:
                #     if track_number > 0 and track_total > 0:
                #         aac_data.add_tags(id3.TRCK(encoding=3, text=['{}/{}'.format(track_number, track_total)]))
                #         # mp3_data['TRCK'] = id3.TRCK(encoding=3, text=[str(track_number)])
                # if image_data:
                #     mp3_data['APIC'] = image_data
                #     aac_data.save()

            elif contains_at_least(audio_mime_type, ['audio/x-flac']):

                # https://github.com/quodlibet/mutagen/blob/a1db79ece62c4e86259f15825e360d1ce0986a22/mutagen/flac.py
                # https://github.com/quodlibet/mutagen/blob/4a5d7d17f1a611280cc52d229aa70b77ca3c55dd/tests/test_flac.py

                flac_data = flac.FLAC(audio_file)
                # Artwork
                if not self.no_artwork:
                    image_data = self.__get_tag_image(image_file, audio_mime_type)
                    if image_data:
                        flac_data.add_picture(image_data)
                # Save
                flac_data.save()

                flac_data = File(audio_file)
                # Track number
                if not self.no_track_number and track_number > 0:
                    flac_data.tags['tracknumber'] = str(track_number)
                # Album title
                if not self.no_album_title and album_title is not None:
                    flac_data.tags['album'] = album_title
                # Album artist
                if not self.no_album_artist and album_artist is not None:
                    flac_data.tags['albumartist'] = album_artist
                # Composer
                if not self.no_composer and album_composer is not None:
                    flac_data.tags['composer'] = album_composer
                # Part of compilation
                if not self.no_compilation:
                    pass
                # Save
                flac_data.save()
                # audio = File(audio_file, easy=True)

            else:
                raise InvalidMimeTypeException("Invalid audio format.", audio_mime_type)

            # Remove artwork if succeeded
            if os.path.exists(image_file):
                os.remove(image_file)

            # Rename filename from id to title
            dest_audio_file = os.path.join(download_dir, '{}.{}'.format(song_title, self.audio_codec))
            os.rename(audio_file, dest_audio_file)

            dest_audio_filename = os.path.basename(dest_audio_file)
            logger.info('{}[File:{}] Updated.'.format(log_prefix, dest_audio_filename))

        except FileNotFoundError:
            message = 'File not found. Skipped.'
            logger.warning('{}[File:{}] {}'.format(log_prefix, audio_filename, message))

        except InvalidDataException as e:
            message = e.message + ' Skipped.'
            logger.warning('{}[File:{}] {}'.format(log_prefix, audio_filename, message))

        except InvalidMimeTypeException as e:
            message = e.message + ' Skipped.'
            logger.warning('{}[File:{}] {}'.format(log_prefix, audio_filename, message))

        except Exception as e:
            message = 'Error {}: {} Skipped.'.format(type(e), str(e))
            logger.error('{}[File:{}] {}'.format(log_prefix, audio_filename, message))
    def download(self):
        """
        The function that downloads songs from YouTube and SoundCloud

        :return bool result: Result of process, used by unit test
        """
        print()
        atexit.register(print)
        """ Set log level """

        if self.verbose:
            logger.setLevel(logging.DEBUG)
        else:
            logger.setLevel(logging.INFO)
        """ Print version """

        logger.info(pkg_resources.require("music_dl")[0])
        """ Validate parameters """

        logger.info('Validating parameters...')

        try:
            # Validate download url
            url_parsed = urlparse(self.download_url)
            if not url_parsed.scheme.startswith('http'):
                raise DirectoryException(
                    'Invalid URL. URL must start with http*. Input value is {}'
                    .format(self.download_url))
            tld_parsed = tldextract.extract(self.download_url)
            if not (tld_parsed.domain in ['youtube', 'soundcloud']):
                raise DirectoryException(
                    'Invalid URL. Music Downloader supports only YouTube and SoundCloud. Input value is {}'
                    .format(self.download_url))
            # Validate download directory
            if not is_path_exists_or_creatable(self.working_dir):
                raise DirectoryException(
                    'Invalid directory. Please specify valid download directory. Input value is {}'
                    .format(self.working_dir))

        except DirectoryException as e:
            logger.error(e.message)
            logger.fatal('Aborted.')
            exit()

        # Validate playlist configuration
        try:
            self.playlist.validate()

        except PlaylistParameterException as e:
            logger.error(e.message)
            logger.fatal('Aborted.')
            exit()

        logger.info('Done.')
        """ Retrieve playlist """

        download_dir = None
        try:
            download_dir = self.playlist.preprocess(self.download_url,
                                                    self.working_dir)

        except PlaylistPreprocessException as e:
            logger.error(e.message)
            logger.error(e.data)
            logger.fatal('Aborted.')
            exit()
        """ Download playlist """

        is_downloaded = False
        try:
            is_downloaded = self.playlist.download()

        except PlaylistPreprocessException as e:
            logger.error(e.message)
            logger.error(e.data)
            logger.fatal('Aborted.')
            exit()
        """ Update metadata """
        """ Cleanup download directory """

        self.playlist.cleanup()
        """ Print completion message """

        logger.info('All process has done.')
        logger.info('Now you can find downloaded songs at {}'.format(
            colorama.Fore.LIGHTCYAN_EX + download_dir))

        return True