Exemplo n.º 1
0
        def button1():
            self.target = []
            self.names = []  #存放章节名
            self.urls = []  #存放章节链接
            self.nums = 0  #章节数
            self.book_name = str(e1.get())
            self.target = self.url_data[self.book_name]
            self.save_path = str(e3.get())
            myfunc.mkdir(model.save_path)
            model.get_download_url()

            #model.novel_save()
            book_save_path = model.save_path + f'{self.book_name}.txt'
            if os.path.exists(book_save_path):
                # 大于100kb的不要删,直接关闭
                if os.path.getsize(book_save_path) / 1024 > 100:
                    e4.delete(0, "end")
                    e4.insert(0, f'已存在{self.book_name},中止爬虫')
                    return
                else:
                    os.remove(book_save_path)
            e4.delete(0, "end")
            e4.insert(0, f'《{self.book_name}》开始下载')
            for i in range(model.nums):
                #time.sleep(1)
                model.writer(model.names[i],
                             model.save_path + f'{self.book_name}.txt',
                             model.get_contents(model.urls[i]))
                #sys.stdout.write("  已下载:%.3f%%" %  float(i/model.nums*100) + '\r')
                #sys.stdout.flush()
                e4.delete(0, "end")
                e4.insert(0, "%.2f%%" % float((i + 1) / model.nums * 100))
            print(f"{self.book_name}下载完成")
Exemplo n.º 2
0
 def __init__(self, name="火影忍者"):
     self.headers = {
         'User-Agent':
         'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
     }
     self.server = 'https://manhua.fzdm.com/'
     self.web = 'http://p1.manhuapan.com/'
     self.save_path = './漫画/' + name + "/"
     self.save_path_word = []
     self.index = 0
     self.name = name
     '''
     if os.path.exists(self.save_path):
         os.remove(self.save_path)
     '''
     myfunc.mkdir(self.save_path)
Exemplo n.º 3
0
    def download(self):
        # 下边的域名规则可能有问题,进行修正
        urls = []
        url_names = []
        req = requests.get(url=self.traget_url, headers=self.headers)
        html = req.text
        bf = BeautifulSoup(html)
        url_texts = bf.find_all('a')
        for each in url_texts[:]:
            url = re.findall('''.*href="(.*)" title=.*''', str(each))
            url_name = re.findall('''.*title="(.*)">.*''', str(each))
            self.traget_chapter_url = self.traget_url + str(url)[2:-2]
            self.url_name = str(url_name)[2:-2]
            if len(self.traget_chapter_url) > len(self.traget_url):
                url_names.append(self.url_name)
                urls.append(self.traget_chapter_url)
        # 倒序
        urls = urls[::-1]
        url_names = url_names[::-1]

        for i in range(len(urls)):
            #sys.stdout.write("  已下载章节:%.2f%%" %  float((i+1)/len(urls)*100) + '\r')
            #sys.stdout.flush()
            self.save_path_word = self.save_path + url_names[i] + '/'
            myfunc.mkdir(self.save_path_word)
            self.chapter_name = url_names[i]
            urls_chapter = [
                urls[i] + 'index_{}.html'.format(str(j))
                for j in range(0, 200)
            ]
            self.index = 0
            for url in urls_chapter:
                try:
                    self.index = self.index + 1
                    if os.path.exists(self.save_path_word + str(self.index) +
                                      '.jpg'):
                        print(
                            f"已存在{self.save_path_word + str(self.index) + '.jpg'} 跳过"
                        )
                        continue
                    self.get_info(url)
                    #time.sleep(1)
                except:
                    break
Exemplo n.º 4
0
        tkinter.Button(window, text="搜索", width=10,
                       command=button2).grid(row=4,
                                             column=0,
                                             sticky="w",
                                             padx=10,
                                             pady=5)
        tkinter.Button(window, text="退出", width=10,
                       command=window.quit).grid(row=4,
                                                 column=1,
                                                 sticky="e",
                                                 padx=10,
                                                 pady=5)
        window.mainloop()


if __name__ == "__main__":
    #book_name = input()
    model = downloader()
    #myfunc.rmdir(model.save_path)
    myfunc.mkdir(model.save_path)
    model.tkinter()
    '''
	model.get_download_url()
	print(f'《{model.book_name}》开始下载:')
	for i in range(model.nums):
		model.writer(model.names[i], model.save_path + f'{model.book_name}.txt', model.get_contents(model.urls[i]))
		sys.stdout.write("  已下载:%.3f%%" %  float(i/model.nums*100) + '\r')
		sys.stdout.flush()
	print(f'《{model.book_name}》下载完成')
	'''
Exemplo n.º 5
0
import time
import pickle
import numpy
import copy
import gc
import re
import time
import warnings
import torch
import random
from qqd_model import myfunc

# 清除目录
data_path = "../Data/dataset"
myfunc.rmdir(data_path)
myfunc.mkdir(data_path)

# 设置
data_interval = 120
predict_interval = 3
profit_chg = 5
dev_date = '2020-11-30'
train_split_num = 200000

#读取数据
stockprice = []
for root, dirs, files in os.walk('../Data/stockdata/'):
    for fname in files:
        code = fname.split('.')[1]
        tmp = pandas.read_csv('../Data/stockdata/' + fname)
        stockprice.append(tmp)