Exemplo n.º 1
0
def crawl(url):
    '''
    Crawl entire site

    :param url: URL of website
    :type url: str
    '''
    if not url.startswith('http://') and not url.startswith('https://'):
        url = 'http://%s/' % url
    if newurl(url):
        print "New URL"
    else:
        print "URL already in DB"
        return
    print url
    # get correct title, detect language
    p = Site(title=url, url=url, show=0)
    p.published = datetime.datetime.today()
    p.save()

    thr = threading.Thread(target=__crawl_address, args=[url], kwargs={})
    thr.start()
    return