Exemplo n.º 1
0
    def __init__( self ):
        """__init__
    
        Args: None

        Returns:
    
        Raises:
        """

        self.session = HentaiSession( )
        self.session.load_from_file( )

        self.IsWorking = False
        self.sleep_time = 0.3

        self.task_name = None
        self.task_status_tab = {}

        self.task_update_callback = None
        self.task_finish_callback = None
Exemplo n.º 2
0
class HentaiDownloadManager:
    """
    HentaiDownloadManager's description.
    """

    def __init__( self ):
        """__init__
    
        Args: None

        Returns:
    
        Raises:
        """

        self.session = HentaiSession( )
        self.session.load_from_file( )

        self.IsWorking = False
        self.sleep_time = 0.3

        self.task_name = None
        self.task_status_tab = {}

        self.task_update_callback = None
        self.task_finish_callback = None

    def _check_finished( self, save_folder, urls ):
        """check if which images are always downloaded, and remove them from download list.
    
        Args: None

        Returns:
    
        Raises:
        """

        urls = urls[:]
    
        if not os.path.isdir( save_folder ):
            # print( "[HentaiManager]save folder is not exits", save_folder )
            return urls

        
        conf_file_name = os.path.join( save_folder, DATA_FILE_NAME )

        # test if config is exist
        try:
            file_obj = open( conf_file_name, "r" )
            task_status_tab = json.load( file_obj )
        # except requests.exceptions.Timeout as e:
        except:
            answer = messagebox.askyesno( "", "文件夹(%s)已经存在。是否覆盖?" % os.path.abspath( save_folder ) )
            if answer:
                return urls
            else:
                return []

        remove_table = []
        for i, url in enumerate( urls ):
            # 若文件已下载成功,则不下载
            if url in task_status_tab.keys() and task_status_tab[url] != "False":
                remove_table.append( i )

        for index in sorted( remove_table, reverse=True ):
            print( "[HentaiManager] image %d(%s) is exists " % ( index, urls[index] ) )
            del urls[index]

        # print( "[HentaiManager]urls", urls )
        return urls

    def is_login( self ):
        return self.session.is_login

    def login( self, user_name, password ):
        return self.session.get_cookies_from_internet( user_name, password )

    def logout( self ):
        self.session.cookies.clear( )
        self.session.clean_file( )

    def get_user_name( self ):
        return self.session.user_name

    def get_user_info( self ):
        return self.session.get_user_info( )
        
    def parse( self, url ):
        """parse e-hentai's gallery page, and get target images.
    
        Args:
            url: hentai gallery url

        Returns:
            HentaiGallery obj
    
        Raises:
        """
    
        gallery = HentaiGallery( self.session, url )
        gallery.open( )

        self.task_name = gallery.get_name( )
        self.task_urls = gallery.get_all_image( )

        # print( "111", self.task_name, self.task_urls )

        # return ( gallery.get_name( ), gallery.get_all_image( ) )

    
    def download( self, save_folder ):
        """download
    
        Args: 
            url: #TODO
            save_folder: #TODO

        Returns:
    
        Raises:
        """

        if not self.task_name:
            print( "No task to download. Please parse url first!" )
            return

        if self.IsWorking == True:
            return False

        self.IsWorking = True

        self.thread_lock = threading.Lock()
    
        # save_folder = os.path.join( save_folder, self.task_name )

        task_urls = self._check_finished( save_folder, self.task_urls )

        self.task_status_tab = {}
        for page_url in self.task_urls:
            if page_url in task_urls:
                self.task_status_tab[page_url] = str( False )
            else:
                self.task_status_tab[page_url] = str( True )

        # open data file.
        self.conf_file_name = os.path.join( save_folder, DATA_FILE_NAME )

        # self._start_download( task_urls, save_folder )

        self._download_thread_table = []

        thr = threading.Thread( target=self._download_thread, args=(self.session, task_urls, save_folder, self._download_task_update_callback, self._download_task_finished_callback, self.sleep_time) )
        thr.start( )
        self._download_thread_table.append( thr )


    def _download_thread( self, session, page_list, save_folder, update_callback, finish_callback, sleep_time ):
        for page_url in page_list:
            try:
                print( "[HentaiManager]start download page %s..." % page_url )
                page = HentaiPage( session, page_url )
                page.open( )

                # Enter critical section
                self.thread_lock.acquire( )

                page.save( save_folder )
                print( "[HentaiManager]download page %s succeed." % page_url )

                update_callback( page_url, True )

                self.thread_lock.release( )
                # Leave critical section

                time.sleep( sleep_time )
            except requests.exceptions.Timeout as e:
                update_callback( page_url, False )
                print( "[HentaiManager]download page %s failed!Time out." % page_url )
            except Exception as e:
                update_callback( page_url, False )
                print( "[HentaiManager]download page %s failed!" % page_url, e )


        # Enter critical section
        self.thread_lock.acquire( )

        finish_callback( )

        self.thread_lock.release( )
        # Leave critical section


    def _download_task_update_callback( self, url, is_succeed ):
        if is_succeed:
            self.task_status_tab[url] = str( True )
        else:
            self.task_status_tab[url] = str( False )

        with open( self.conf_file_name, "w" ) as file_obj:
            json.dump( self.task_status_tab, file_obj, indent=4 )

        if self.task_update_callback:
            ( cur, total ) = self.get_progress( )
            self.task_update_callback( cur, total )

    def _download_task_finished_callback( self ):
        thr = threading.currentThread()
        if thr in self._download_thread_table:
            print( "task(%s) is finish" % thr.getName() )
            self._download_thread_table.remove( thr )

        if len( self._download_thread_table ) == 0:
            print( "all task is finish!" )

            if "False" not in self.task_status_tab.values():
                # open data file.
                if os.path.isfile( self.conf_file_name ):
                    os.remove( self.conf_file_name )

        if self.task_finish_callback:
            self.task_finish_callback( )

    def get_progress( self ):
        """get_progress
    
        Args: None

        Returns:
    
        Raises:
        """

        return ( len( [ v for v in self.task_status_tab.values() if v == "True"  ] ), len( self.task_status_tab ) )
Exemplo n.º 3
0
            print( "Get all image urls:", len( self.img_urls ) )
            for idx, url in enumerate( self.img_urls ):
                print( "url %d: %s" % ( idx, url ) )

    def get_name( self ):
        if self.title_gj:
            return self.title_gj
        else:
            return self.title_gn

    def get_all_image( self ):
        if not self.is_enabled():
            print( self.enabled, self.is_enabled() )
            print( "Can not open! Gallery is not enabled! Try open() first" )
            return None

        return self.img_urls

if __name__ == "__main__":
    pass
    from hentai_session import HentaiSession

    user_session = HentaiSession( )
    user_session.load_from_file( )

    url = "http://exhentai.org/g/852948/91d0a78b1c/"

    gallery = HentaiGallery( user_session, url )
    gallery.open( )