m.append([i]) mstr = str(i) d = [] curpath = "/tmp/" os.chdir(curpath) for j in range(1, day): if (j < 10): d.append(["0"+str(j)]) dstr = "0"+str(j) else: d.append([j]) dstr = str(j) url = "https://elpais.com/hemeroteca/elpais/2017/"+mstr+"/"+dstr+"/m/portada.html" print(url) path = mstr+"-"+dstr if (os.path.isdir(path)): print("Path is created") else: os.makedirs(path) os.chdir(path) c = Crawler(url) c.urlsLevelHost(2) for u in c.urls: caux = Crawler(u) faux = Formatter(u) name = faux.hostFromUrl() + str(time.time()) caux.downloadOneUrlThread(name) os.chdir(curpath)
d = [] # print(os.getcwd()) for j in range(1, day): if (j < 10): d.append(["0"+str(j)]) dstr = "0"+str(j) else: d.append([j]) dstr = str(j) url = "https://elmundo.es/elmundo/hemeroteca/2017/"+mstr+"/"+dstr+"/m/index.html" print(url) logging.info(url) path = mstr+"-"+dstr if (os.path.isdir(path)): print("Path is created") logging.info('Path is created') else: os.makedirs(path) os.chdir(path) c = Crawler(url) c.urlsLevelHost(2) for u in c.urls: caux = Crawler(u) faux = Formatter(u) name = faux.hostFromUrl() + str(time.time()) + ".xml" logging.info(faux.hostFromUrl() + str(time.time())) caux.downloadOneUrlNewspaperThread(name) os.chdir(curpath)