def __init__(self, starting_url):
        '''Initialize to set urlContentRetrieve object with startingURL.
        
        Keyword arguments:
        starting_url -- URL to start crawling.

        '''

        # Setup Log
        self.setup_log()
        # Define url content retrieve to use
        self.url_content_retrieve = UrlContentRetrieve(starting_url)
class HttpLinksCollector:
    '''Class to manage links from url.
    
    Created on 27/09/2012

    @author: Ricardo García Fernández
    @mail: [email protected]

    '''

    def __init__(self, starting_url):
        '''Initialize to set urlContentRetrieve object with startingURL.
        
        Keyword arguments:
        starting_url -- URL to start crawling.

        '''

        # Setup Log
        self.setup_log()
        # Define url content retrieve to use
        self.url_content_retrieve = UrlContentRetrieve(starting_url)
    
    def retrieve_links(self, target_url, depth=1, level=1):
        '''
        Retrieve links from url content until defined depth organized in levels.
        
        Keyword arguments:
        target_url -- URL to analyze content and retrive links.
        depth -- Depth of links to analyze.
        level -- Level in which start to analyze.
        
        '''

        # Define ScrapItem to generate json file
        # scrap_item = ScrapItem()
        
        links = {}

        if depth >= level:

            soup_code = self.url_content_retrieve.url_content(target_url)
            
            if soup_code:
                formatted_links = \
                    self.url_content_retrieve.\
                        retrieve_formatted_links(soup_code)
                for link in formatted_links :
    
                    self.logger.info(self.print_depth(level) + " " + link)

                    try:
                        sublinks = \
                            self.retrieve_links(link, depth, level + 1)
                        links[link] = sublinks
                    except ValueError, value_error:
                        # Invalid URL
                        self.logger.error("URL is not correct:\t" + link + \
                                          "\nException:\t"\
                                           + str(value_error)\
                                           + "\nStack trace:\t" + \
                                           traceback.format_exc())
                            
        return links