Exemplo n.º 1
0
def run_command(name):
    
    from dojang.util import parse_config_file, import_object
    from easycrawl.models import CrawlSite
    from easycrawl.helper import create_crawl_class, update_crawl_status
    from easycrawl.database import crawldb
    # from keepcd.admin.easycrawl.douban.app import worker
    

    site = CrawlSite.query.filter_by(name=name).first()
    worker_name = "easycrawl.%s.app.worker" % name
    print worker_name
    worker = import_object(worker_name)
    clz = create_crawl_class('voa_news')
    urls = [
        'http://learningenglish.voanews.com/archive/as-it-is/latest/3521/3521.html',
        'http://learningenglish.voanews.com/archive/learningenglish-programs-radio-american-mosaic/latest/986/986.html',
        'http://learningenglish.voanews.com/archive/learningenglish-programs-radio-in-the-news/latest/978/1577.html',
        'http://learningenglish.voanews.com/archive/learningenglish-programs-radio-making-of-a-nation/latest/978/979.html',
        'http://learningenglish.voanews.com/archive/learningenglish-programs-radio-science-in-the-news/latest/978/1579.html',
        'http://learningenglish.voanews.com/archive/learningenglish-programs-radio-this-is-america/latest/978/1580.html',
        'http://learningenglish.voanews.com/archive/learningenglish-programs-radio-words-stories/latest/978/987.html'
    ]

    for url in urls:
        update_crawl_status(clz, crawldb, url, 0)

    worker.run(callback=None)
    worker.run(callback=None)
    worker.finished()
Exemplo n.º 2
0
def run_command(name, second_arg):
    if name == 'dl_eslpod':
        download_eslmp3()
    elif name == "parse_elspod":
        parse_eslmp3()
    else:
        from dojang.util import parse_config_file, import_object
        from easycrawl.models import CrawlSite

        # from keepcd.admin.easycrawl.douban.app import worker
        site = CrawlSite.query.filter_by(name=name).first()
        worker_name = "easycrawl.%s.app.worker" % name
        print worker_name
        worker = import_object(worker_name)
        if second_arg == 'debug':
            print "\nexec debug()\n\n"
            worker.debug()
            worker.finished()
        elif second_arg == 'parse':
            print "\nexec parse()\n\n"
            worker.parse()
        else:
            print "\nexec run()\n\n"
            worker.run()
            worker.finished()
Exemplo n.º 3
0
def run_command(name):
    
    from dojang.util import parse_config_file, import_object
    from easycrawl.models import CrawlSite
    from easycrawl.helper import create_crawl_class, update_crawl_status
    from easycrawl.database import crawldb
    # from keepcd.admin.easycrawl.douban.app import worker
    

    site = CrawlSite.query.filter_by(name=name).first()
    worker_name = "easycrawl.%s.app.worker" % name
    print worker_name
    worker = import_object(worker_name)
    clz = create_crawl_class('imax_im')
    url = 'http://imax.im/movies?page=1'
    update_crawl_status(clz, crawldb, url, 0)

    worker.run(callback=None)
    worker.run(callback=None)
    worker.finished()
Exemplo n.º 4
0
def run_command(name):
    
    from dojang.util import parse_config_file, import_object
    from easycrawl.models import CrawlSite
    from easycrawl.helper import create_crawl_class, update_crawl_status
    from easycrawl.database import crawldb
    # from keepcd.admin.easycrawl.douban.app import worker
    

    site = CrawlSite.query.filter_by(name=name).first()
    worker_name = "easycrawl.%s.app.worker" % name
    print worker_name
    worker = import_object(worker_name)
    clz = create_crawl_class('esl_pod')
    url = 'http://www.eslpod.com/website/show_all.php?cat_id=-59456'
    update_crawl_status(clz, crawldb, url, 0)

    worker.run(callback=None)
    worker.run(callback=None)
    worker.finished()