示例#1
0
'''
Created on Oct 22, 2013

@author: hsartoris
'''
import parse_url

if __name__ == "__main__":
    url = raw_input("Please enter a URL: ")
    links = []
    if url.startswith("http://"):
        links = parse_url.get_links(url)
    else:
        links = parse_url.get_links("http://" + url)

    for l in links:
        print l
示例#2
0
'''
Created on Oct 22, 2013

@author: hsartoris
'''
import parse_url

max_links = 10

if __name__ == "__main__":
    url = raw_input("Please enter a URL: ")
    links = []
    if url.startswith("http://"):
        links = parse_url.get_links(url, max_links)
    else:
        links = parse_url.get_links("http://" + url, max_links)
    
    all_links = links.copy()
    
    for link in links:
        all_links = all_links.union(parse_url.get_links(link, max_links))
    
    for link in all_links:
        print link
    print len(all_links)
示例#3
0
'''
Created on Oct 22, 2013

@author: hsartoris
'''
import parse_url

if __name__ == "__main__":
    url = raw_input("Please enter a URL: ")
    links = []
    if url.startswith("http://"):
        links = parse_url.get_links(url)
    else:
        links = parse_url.get_links("http://" + url)
    
    for l in links:
        print l
        
示例#4
0
@author: hsartoris
'''
import parse_url

max_links = 10
max_total_links = 100

if __name__ == "__main__":
    url = raw_input("Please enter a URL: ")
    links = []
    unvisited = []
    if url.startswith("http://"):
        unvisited.append(url)
    else:
        unvisited.append("http://" + url)

    all_links = set([unvisited[0]])

    while (len(unvisited) > 0 and len(all_links) < max_total_links):
        l = unvisited.pop(0)
        new_links = parse_url.get_links(l)
        for link in new_links:
            if link not in all_links:
                unvisited.append(link)
        all_links = all_links.union(new_links)
        print "URL: ", l
        print "Number of links returned: ", len(new_links)
        print "Number of unvisited links: ", len(unvisited)
        print "Number of total links: ", len(all_links)
        raw_input('<enter>')
    print ""