示例#1
0
def update_network(accounts, depth):
    for account_id in accounts.keys():
        print '\tupdating user:'******'\t\tupdating', acc_type, 'account:', acc_username

            if acc_username == '': continue

            if acc_type == DELICIOUS:
                crawl = DeliciousCrawler.factory(acc_username, depth=depth) 
            elif acc_type == FLICKR:
                crawl = FlickrCrawler.factory(acc_username, depth=depth)
            elif acc_type == SLIDESHARE:
                crawl = SlideShareCrawler.factory(acc_username, depth=depth)
            elif acc_type == TWITTER:
                crawl = TwitterCrawler.factory(acc_username, depth=depth)
            elif acc_type == YOUTUBE:
                crawl = YouTubeCrawler.factory(acc_username, depth=depth)

            personUris.append(crawl.getUserUri(acc_username))
            crawlers.append(crawl)

        # create crawlers using user network accounts data
        t1 = time.clock()
        CrawlNetworks(crawlers).crawl(start_time=sdate)
        t2 = time.clock()

        # link resources to each other
        Resource.unify_all(personUris)

        print 'Finished in %d seconds' % (t2 - t1)
示例#2
0
        # wait for all threads to finish
        for cth in cthreads:
            cth.join()

if __name__ == '__main__':
    # create crawlers using user network accounts data
    # Delicious crawler
    from sna.crawler.deliciouscrawler import DeliciousCrawler
    delicrawl = DeliciousCrawler()
    delicrawl.setStartUserId('anamaria0509')
    delicrawl.setMaxLevel(2)

    # Flickr crawler   
    from sna.crawler.flickrcrawler import FlickrCrawler
    params = {'api_key': 'ac91a445a4223af2ceafb06ae50f9a25'}
    fcrawl = FlickrCrawler(params)
    fcrawl.setStartUserId('anamaria stoica')
    fcrawl.setMaxLevel(2)

    # YouTube crawler
    from sna.crawler.youtubecrawler import YouTubeCrawler
    ytcrawl = YouTubeCrawler()
    ytcrawl.setStartUserId('anamaria0509')
    ytcrawl.setMaxLevel(2)

    # SlideShare crawler
    from sna.crawler.slidesharecrawler import SlideShareCrawler
    params = {'api_key': 'hGB0A4by', 'secret_key': '3qjmDPUM'}
    sscrawl = SlideShareCrawler(params)
    sscrawl.setStartUserId('anamaria0509')
    sscrawl.setMaxLevel(2)