Пример #1
0
def main():
    """main function"""
    # 开始准备
    prepare()
    thread_pool = []
    while_n = 0  # 循环计数器
    imglist = []
    make_dir(Config.DIRECTORY)
    print 'Generate search url'
    url = base_url(get_keyword())
    # 下载 #############
    # 获取搜索结果数量并与_count比较取其较小值
    count = min(search_result(url), Config.COUNT)
    # 没有搜索结果时退出
    if not count:
        print "No search result at current condition."
        sys.exit(1)
    # 获得指定数量的url, 存放于list
    print 'Fetching page',
    while len(imglist) < count:
        print while_n,
        while_n += 1
        tmplist = get_image_url_list(url)
        imglist = imglist + tmplist
        url = next_page(url, len(tmplist))
    print ''  # 换行
    count = len(imglist)
    print "There're %d files to download" % count
#    # 将已有文件从imglist中去除
#    imglist = [url for url in imglist
#          if not get_filename_from_url(url) in os.listdir(Config.DIRECTORY)]
#    print "There's %d files already downloaded." % (count - len(imglist))
    # 下载该list
    print 'Fetching list of %d files' % len(imglist)
    queue = Queue()
    for url in imglist:
        queue.put(url)
    failure = []
    for _ in range(Config.THREAD_COUNT):
        thread_pool.append(start_new_thread(download_from_queue, (
                    queue, failure, Config.DIRECTORY)))
    queue.join()
    print "%d failed to fetch." % len(failure)
Пример #2
0
def main():
  # 开始准备
  prepare()
  while_n = 0 # 循环计数器
  imglist = []
  makedir(Config.directory)
  print 'Generate search url'
  URL = baseURL()
  # 下载 #############
  # 获取搜索结果数量并与_count比较取其较小值
  count = min(searchResult(URL), Config.count)
  # 没有搜索结果时退出
  if not count:
    print "No search result at current condition."
    sys.exit(1)
  # 获得指定数量的url, 存放于list  
  print 'Fetching page',
  while len(imglist) < count:
    print while_n,
    while_n += 1
    tmplist = getImageUrlList(URL)
    imglist = imglist + tmplist
    URL = nextPage(URL, len(tmplist))
  print '' # 换行
  count = len(imglist)
  print "There're %d files to download" % count
  # 将已有文件从imglist中去除
  imglist = [url for url in imglist
             if not getFilenameFromURL(url) in os.listdir(Config.directory)]
  print "There's %d files already downloaded." % (count - len(imglist))
  # 下载该list 
  print 'Fetching list of %d files' % len(imglist)
  queue = Queue()
  for url in imglist:
    queue.put(url)
  failure = []
  for i in range(Config.thread_count):
    start_new_thread(downloadFromQueue, (
                                         queue, failure, Config.directory, Config.timeout))
  queue.join()
  print "%d failed to fetch." % len(failure)