Exemplo n.º 1
0
def main():
  # 开始准备
  prepare()
  while_n = 0 # 循环计数器
  imglist = []
  makedir(Config.directory)
  print 'Generate search url'
  URL = baseURL()
  # 下载 #############
  # 获取搜索结果数量并与_count比较取其较小值
  count = min(searchResult(URL), Config.count)
  # 没有搜索结果时退出
  if not count:
    print "No search result at current condition."
    sys.exit(1)
  # 获得指定数量的url, 存放于list  
  print 'Fetching page',
  while len(imglist) < count:
    print while_n,
    while_n += 1
    tmplist = getImageUrlList(URL)
    imglist = imglist + tmplist
    URL = nextPage(URL, len(tmplist))
  print '' # 换行
  count = len(imglist)
  print "There're %d files to download" % count
  # 将已有文件从imglist中去除
  imglist = [url for url in imglist
             if not getFilenameFromURL(url) in os.listdir(Config.directory)]
  print "There's %d files already downloaded." % (count - len(imglist))
  # 下载该list 
  print 'Fetching list of %d files' % len(imglist)
  queue = Queue()
  for url in imglist:
    queue.put(url)
  failure = []
  for i in range(Config.thread_count):
    start_new_thread(downloadFromQueue, (
                                         queue, failure, Config.directory, Config.timeout))
  queue.join()
  print "%d failed to fetch." % len(failure)
Exemplo n.º 2
0
  count = 500     # 要下载的数量,自动进到20的倍数
  # 代理设置
  proxy = 'http://localhost:7001'
  use_proxy = False
  # 开始准备
  if use_proxy: # 设置代理
    proxy_install(proxy)

  while_n = 0 # 循环计数器
  imglist = []
  makedir(directory)
  print 'Generate search url'
  searchURL = search(keyword.encode('gbk'), addtional)
  # 下载 #############
  # 获取搜索结果数量并与count比较取其较小值
  count = min(searchResult(searchURL), count)
  # 没有搜索结果时退出
  if not count:
    print "No search result at current condition."
    sys.exit(1)
  # 获得指定数量的url, 存放于list  ,one page by one page
  print 'Fetching page',
  while len(imglist) < count:
    print while_n,
    #mark the times of while
    while_n += 1
    tmplist = getImageUrlList(searchURL)
    imglist = imglist + tmplist
    searchURL = nextPage(searchURL, len(tmplist))
  print '' # 换行
  count = len(imglist)