def getUrl(): directory = path2.get() pages = int(page2.get()) if pages < 1: pages = 1 if (pages is not None and pages != "") and (directory is not None and directory != ""): if askokcancel('即将开始获取URL!', '确定要开始获取吗?\n注意,获取过程中不会有GUI展示(因为我懒!)'): showinfo("正在获取中", "正在获取中,请不要关闭程序!!!") dl.executeUrls(pages, directory) top = Toplevel() top.title('URL获取完毕!') # 让窗口初始化在最中间 sw = top.winfo_screenwidth() # 得到屏幕宽度 sh = top.winfo_screenheight() # 得到屏幕高度 ww = 600 wh = 300 x = (sw - ww) / 2 y = (sh - wh) / 2 # 设定窗口的大小(长 * 宽) top.geometry("%dx%d+%d+%d" % (ww, wh, x, y)) # window.geometry('600x400') # 这里的乘是小x label = tk.Label(top, text="获取完毕!") label.pack() label2 = tk.Label(top, text="文件路径:" + directory) label2.pack() button = tk.Button(top, text="打开文件夹", command=lambda: open_dir(system, directory)) button.pack() else: showinfo('路径为空或页数不正确!', '路径为空或页数不正确!')
def parse(url: str, url_type: str) -> dict: file_path = Download.downloadFile(url) deserialize_raw = JDeserialize.Deserialize(file_path) autoconfig = parse_jd(deserialize_raw, url_type) Download.cleanup(file_path) return autoconfig
def searchQuery(q, start): API_HOST = choice(loadBestNIP(config.TOP_IP_FILE, 20)) SEARCH_API = 'https://' + API_HOST + '/search?q=' dobj = Download(SEARCH_API + q + '&start=' + start) if (dobj.doRequest()): return None else: return dobj.getSOURCE()
def download(self,command): #NOTE check input currentDL = Download(command,self.library) #passes reference to library so it can check if things are already in the library and stop the process if currentDL.downloadedSuccessfully(): self.library.addSong(currentDL.getSong()) print "[!] DONE!" else: print "[!] ERROR WHILE DOWNLOADING"
def queryGoogle(q, start): API_HOST = choice(loadBestNIP(config.TOP_IP_FILE, 20)) # print "Debug: " + API_HOST GOOGLE_API = 'http://' + API_HOST + '/search?q=' dobj = Download(GOOGLE_API + q + '&start=' + start) if (dobj.doRequest()): return None else: return dobj.getSOURCE()
def scholarQuery(q, start): API_HOST = choice(loadBestNIP(config.TOP_IP_FILE, 20)) SCHOLAR_API = 'http://' + API_HOST + '/scholar?q=' # dobj = Download(SCHOLAR_API + q + '&start=' + start) dobj = Download(SCHOLAR_API + q + '&start=' + start) if (dobj.doRequest()): return None else: return dobj.getSOURCE()
def extract_articles(): matches = [] for root, dirnames, filenames in os.walk('./Download'): for filename in fnmatch.filter(filenames, '*.tar.gz'): matches.append(os.path.join(root, filename)) for match in matches: Download.tar_gz(match)
def run(self): global mutex sP = sfunc.create_socket_client(func.roll_the_dice(self.peer[0]), self.peer[1]) if sP is None: #tfunc.error('Error: could not open socket in download') var = "" # giusto per fargli fare qualcosa else: try: if mutex.acquire(timeout = const.TIME_TO_UPDATE): dnl.update_own_memory(self.md5, self.partN, self.listPartOwned, "2") mutex.release() #tfunc.gtext("Start download della parte " + str(self.partN) + " da " + str(self.peer[0], "ascii")) pk = pack.request_download(self.md5, self.partN) sP.sendall(pk) ricevutoByte = sP.recv(const.LENGTH_HEADER) if str(ricevutoByte[0:4], "ascii") == pack.CODE_ANSWER_DOWNLOAD: nChunk = int(ricevutoByte[4:10]) ricevutoByte = b'' i = 0 while i != nChunk: ricevutoLen = sP.recv(const.LENGTH_NCHUNK) while (len(ricevutoLen) < const.LENGTH_NCHUNK): ricevutoLen = ricevutoLen + sP.recv(const.LENGTH_NCHUNK - len(ricevutoLen)) buff = sP.recv(int(ricevutoLen)) while(len(buff) < int(ricevutoLen)): buff = buff + sP.recv(int(ricevutoLen) - len(buff)) ricevutoByte = ricevutoByte + buff i = i + 1 sP.close() # Modifico nel file la parte che ho appena scaricato, se il file non esiste lo creo (es b'00000') dnl.create_part(ricevutoByte, self.fileName, self.partN, self.lenFile, self.lenPart) if mutex.acquire(timeout = const.TIME_TO_UPDATE): # Aggiorno la mia memoria dnl.update_own_memory(self.md5, self.partN, self.listPartOwned, "1") mutex.release() pfunc.part_all(self.listPartOwned[self.md5][0]) # Invio l'update al tracker send_update(self.t_host, self.sessionID, self.md5, self.partN, self.listPartOwned, self.peer) else: raise Exception("Error Download Code") else: raise Exception("Error Download Code") else: raise Exception("Error Download Code") except Exception as e: #tfunc.write_daemon_error(self.name, str(self.peer[0], "ascii"), "ERRORE DOWNLOAD: {0}".format(e)) dnl.update_own_memory(self.md5, self.partN, self.listPartOwned, "0")
def __init__(self): self.UrlAndIDContr = URLSchedul.UrlManager() self.downloader = Download.Downloader() self.parser = Html_pareser.HtmlPare() self.ProceClean = Pipeline.pinline self.outjson = FeeDExport.FeedExp() self.CollectAllData = []
def classify(): prints("正在获取分类信息") url = "http://wap.xqishuta.com/sort.html" for i in range(1, 4): try: response = Download.download_page(url, url) break except: if i != 3: prints("第 {0} 次获取 {1} 页面信息失败".format(i, url)) continue else: prints("获取 {} 页面信息失败,退出程序".format(url)) exit(0) html = etree.HTML(response.content.decode("UTF-8")) url_lists = html.xpath("//div[@class='menu_nav']/ul/li/a/@href") name_list = html.xpath("//div[@class='menu_nav']/ul/li/a/text()") lists = {} for i in range(0, len(url_lists)): lists[name_list[i]] = "http://wap.xqishuta.com" + url_lists[i] prints("分类信息:") for string in lists: prints(string, " ") prints("\n") return lists
def get_novellilsts(type, url): prints("-----------------------------------------------------") prints("正在获取 {0} 分类下,{1} 页面数据".format(type, url)) for i in range(1, 4): try: response = Download.download_page(url, url) break except: if i != 3: prints("第 {0} 次获取 {1} 页面信息失败".format(i, url)) continue else: prints("获取 {} 页面信息失败".format(url)) error_url.append(url) return html = etree.HTML(response.content.decode("UTF-8")) url_list = html.xpath("//div[@class='article']//h6/a/@href") novel_lists = [] for string in url_list: temp = "http://wap.xqishuta.com/" + string[1:] novel_lists.append(temp) prints("获取数据成功,该页面一共有 {} 本书籍".format(len(novel_lists))) return novel_lists
def get_urllists(type, url): prints("正在获取 {} 分类下书籍信息".format(type)) first_url = url + "index_1.html" for i in range(1, 4): try: response = Download.download_page(first_url, url) break except: if i != 3: prints("第 {0} 次获取 {1} 页面信息失败".format(i, url)) continue else: prints("获取 {} 页面信息失败".format(url)) error_url.append(first_url) return html = etree.HTML(response.content.decode("UTF-8")) temp = html.xpath("//div[@class='page']/a/@href") temp = temp[1] temp = temp.split(".") temp = temp[0].split("_") last_page = int(str(temp[1])) url_list = [] for i in range(1, last_page + 1): temp = url + "index_" + str(i) + ".html" url_list.append(temp) prints("{0} 分类下一共有 {1} 页数据".format(type, last_page)) return url_list
def on_buttondownload_clicked(self, widget): """Event if the button clicked""" try: Download.Downloadmp3(self.entry1.get_text()) Download.Downloadmp3.movefile("Download") pass except Exception as e: raise
def __main__(): check_files_and_folders() socket = Socket.ServerSocket() connection = socket.Socket() sr = Server(connection, Cryptography.session_crypto(None), Registery.Registery(), Login.serverLogin(), Download.Download(), Upload.Upload(), List.List(), Read.Read(), Write.Write(), SessionKeyExchange.ServerSession(None), DACCommands.DACCommands(), Auditor.Auditor()) sr.Handler()
def Update(endDate, dataframe): startDate = max(dataframe.index) delta = endDate - startDate if delta.days < 0: endDate, startDate = startDate, endDate else: if delta.days == 0: endDate = startDate + datetime.timedelta(days=2) new_data = Download.download(startDate, endDate) return dataframe.append(new_data).sort_index()
def __init__(self): self.UrlAndIDContr = URLSchedul.UrlManager() self.downloader = Download.Downloader() self.parser = Html_pareser.HtmlPare() self.ProceClean = Pipeline.pinline() self.outjson = FeeDExport.FeedExp() self.CollectAllData={} self.errGeoGet = [] msgKeys = ["geo_code","latitude","longitude","nation","province","city","district","street","street_number"] for k in msgKeys: self.CollectAllData[k] = []
def run(self): Download.CreatePath() t = open("CardsDB.txt") ln = 0 for ch in t: start = time.time() ln = ln + 1 ch = ch.rstrip('\n') if ch != "": size = Download.Download(ch + ".jpg") end = time.time() elapsedtime = end - start linevar = 13636 - ln speed = 0 if type(size) != type(None): try: speed = ((int(size) / 1024) / elapsedtime) except ZeroDivisionError: pass percentage = 100 - round((linevar * 50) / 6815) if percentage > 99 and linevar != 0: percentage = 99 if ch != "": if speed == 0: progr = "Skipping " + ch + ".jpg" + "! Already Exists..." print(progr) else: progr = "Downloading " + ch + ".jpg" + " @" + str( round(speed)) + "kb/s " + str( linevar) + " cards left..." print(progr) time.sleep(0.001) self.change_perc.emit(percentage) self.change_lab.emit(progr) #self.progressBar.setValue(percentage) print("Task Completed ! Check you pics folder.") t.close()
def OnDownload(self, event): self.filename = None self.busy = True if len(self.servers) == 0: rc = Messages.messageChoice( "SAP Server configuration not found. Do you want to configure?", "Server configuration") if rc == wx.ID_YES: ConfigWindow.showConfigWindow(self) else: Download.Show(self) self.busy = False event.Skip()
def get_novel(url): prints("------------------------------------------------------") prints("正在下载 {0} 页面小说".format(url)) for i in range(1, 4): try: response = Download.download_page(url, url) break except: if i != 3: prints("第 {0} 次获取 {1} 页面信息失败".format(i, url)) continue else: prints("获取 {} 页面信息失败".format(url)) error_url.append(url) return html = etree.HTML(response.content.decode("UTF-8")) img = html.xpath("//div[@class='pic']/img/@src")[0] name = html.xpath("//div[@class='cataloginfo']/h3/text()")[0] temp = html.xpath("//div[@class='infotype']/p/text()") author = temp[0].split(":")[1] type = temp[1].split(":")[1] date = temp[2].split(":")[1].split("T")[0] link = html.xpath("//ul[@class='infolink']//p/script/text()")[0].split("'")[3] information = html.xpath("//div[@class='intro']/p/text()")[0] prints("图片地址:" + img) prints("书籍名称:" + name) prints("作者:" + author) prints("类型:" + type) prints("最后更新时间:" + date) prints("下载地址:" + link) prints("小说简介:" + information) novel = { "link": url, "img": img, "author": author, "name": name, "type": type, "date": date, "text_link": link, "小说简介": information } return novel
def download(self): print "Start downloading " + self.url #download youtube info if self.get_youtube_info()==False: return False signature = self.download_stream_info["sig"] url = self.download_stream_info["url"] download_url = "%s&signature=%s" % (url, signature) if self.output_file_name=="FF": dmn = Download( download_url, self.title) self.output_file_name = self.title else: dmn = Download( download_url, self.output_file_name) dmn.download() print "\nConverting:\n" ofile = "result_"+self.output_file_name+"."+self.ofe print ofile con = Converter( self.output_file_name, ofile) con.ffmpeg_converter(self.audio_codec,self.video_codec) return True
def download_image(self, url, filename): """ Download an image and save it to a location :param url: The url :param filename: The filename """ if self.debug: print " Downloading: %s\n => %s" % \ (url, self.image_folder + filename) if path.isfile(self.image_folder + filename): return data = Download.download_page(url) Files.write(data, filename, self.image_folder)
def download(self): """ Download HTML web pages """ if self.debug: print " Downloading: %s" % self.test_file + FILE_EXT_HTML url_list = [self.base_url + self.test_file + FILE_EXT_HTML] else: url_list = Files.get_raw_contents(self.name + ".urls", "./") for web_file in url_list: cached_file = web_file.replace(self.base_url, "") if not Files.file_exists(self.download_folder + cached_file): data = Download.download_page(web_file) Files.write(data, cached_file, self.download_folder) elif self.debug: print " File %s already exists" % (self.download_folder + cached_file)
def create_download(self, name, size, description=GithubObject.NotSet, content_type=GithubObject.NotSet): assert isinstance(name, (str, unicode)), name assert isinstance(size, (int, long)), size assert description is GithubObject.NotSet or isinstance( description, (str, unicode)), description assert content_type is GithubObject.NotSet or isinstance( content_type, (str, unicode)), content_type post_parameters = { "name": name, "size": size, } if description is not GithubObject.NotSet: post_parameters["description"] = description if content_type is not GithubObject.NotSet: post_parameters["content_type"] = content_type headers, data = self._requester.requestAndCheck( "POST", self.url + "/downloads", None, post_parameters) return Download.Download(self._requester, data, completed=True)
def install(self, url, targetPath, key=None, ui=None): """Install the file located in the url given. Args: url (str): The url to download the package to install. targetPath (str): Where to install the package. key (str): If the system needs a key to access. Defaults to None. ui (InstallatorUI): User Interface connection. Defaults to None. """ if ui: ui.progressText.append('Starting downloading files...') download = Download.Download(url, key, ui=ui) download.download() downloadedPath = download.getExtractedPath( ) or download.getFileDownloaded() if ui: ui.progressText.append('Installing the files...') install = Install.Install(downloadedPath, targetPath, ui=ui) install.install() if ui: ui.progressText.append('Installation successful') ui.progressBar.setValue(100)
def EmailHunterDetect(self): ''' A function to use EmailHunter to use their JSON API to detect the email format. ''' try: # This returns a JSON object dl = Download.Download(self.verbose) url = "https://api.hunter.io/v2/domain-search?domain=" + \ self.domain + self.type + "&limit=100&offset=0" + "&api_key=" + self.apikeyv r = dl.requesturl(url, useragent=self.UserAgent, raw=True) results = r.json() pattern = str(results['data']['pattern']) if pattern: return pattern else: if self.verbose: e = ' [!] No pattern detected via EmailHunter API' print helpers.color(e, firewall=True) return False except: error = "[!] Major issue with EmailHunter Search:" + str(e) print helpers.color(error, warning=True)
def link_carwler(seed_url, wr_csv, max_depth): crawler = [seed_url] # seen=set(crawler) seen = {seed_url: 0} #set delay time,防止封IP, delay_time = limit_bandwidth(3) #创建要写入的CSV while crawler: url = crawler.pop() #获取下载页面的深度,防止爬虫陷阱。 depth = seen[url] #check delay time delay_time.wait(url) html = gf.download(url) #写入爬取的数据 if html is not None: beautifulsoup(html, wr_csv) if html is not None and depth < max_depth: #若HTML返回不为空开始进行爬取,且深度不超过最大值 for link in get_links(html): if link not in seen: #重复链接不记录 #由于所有发现的链接都是在当前页面下所有新链接,所以深度都是当前页面深度+1 seen[link] = depth + 1 crawler.append(link)
import Download url = input("Digite a Url: ") caminho = input("Digite o caminho para salvar: ") resultado_da_veridicacao = Download.verificando_arquivo_online(url) if (resultado_da_veridicacao[0]): Download.download_arquivos(caminho, resultado_da_veridicacao[1]) else: print("Arquivo OffLine ou Não Existe") nomes = ['mathias', 'da', 'silva', 'maurilio'] if ('testando' or 'mathias' == nomes): print("Acertou Miseravi") else: print("ta serto") string = "Ola para você" print(string) lista = string.split(" ") #Serve para separar uma string em varias partes string = lista[0] + " " + lista[2] print(string) string = string.replace(" ", " para ") #substitui uma parte da string print(string) tel = {30132: "Pericles", 30142: "Menelau", 30154: "Atreu", 31000: "Tieste"} print(tel) ola = tel.popitem() #Retorna um item aleatorio valor do dicionario e o apaga print(ola[1])
import numpy as np import ACO.CityFilter import ACO.CUDA import time world_cities_path = "./Download/worldcitiespop.txt.gz" world_cities_txt_path = "./Download/worldcitiespop.txt" # Download world_cities #world_cities_path = Download.download("http://download.maxmind.com/download/worldcities/worldcitiespop.txt.gz", "./Download/") # Decompress #world_cities_txt_path = Download.gz(world_cities_path) # Download country_codes country_codes_path = Download.download("http://data.okfn.org/data/core/country-list/r/data.csv", "./Download/") def load_country_codes(): country_codes = {} with open(country_codes_path) as file: for row in file.readlines(): row = row.replace("\n", "").replace("\"","") split = row.rsplit(',', 1) #country_codes[row[1]] = row[0] return country_codes def load_world_cities(loc=["*"], max=None): nodes = [] nodes_latitude = [] nodes_longitude = []
def download_articles(): a_b_path = Download.download("ftp://ftp.ncbi.nlm.nih.gov/pub/pmc/articles.txt.0-9A-B.tar.gz", "./Download/") c_h_path = Download.download("ftp://ftp.ncbi.nlm.nih.gov/pub/pmc/articles.txt.C-H.tar.gz", "./Download/") i_n_path = Download.download("ftp://ftp.ncbi.nlm.nih.gov/pub/pmc/articles.txt.I-N.tar.gz", "./Download/") o_z_path = Download.download("ftp://ftp.ncbi.nlm.nih.gov/pub/pmc/articles.txt.O-Z.tar.gz", "./Download/")
def main(): # prototype source of tables: csv files file1 = '../Test/acQuire Data/01c All Collar.csv' file2 = '../Test/acQuire Data/02 Survey.csv' file3 = '../Test/acQuire Data/03 Lithology.csv' file4 = '../Test/acQuire Data/04 Alteration.csv' file5 = '../Test/acQuire Data/05 Samples Checks.csv' # from Download ################################# # create instance: session session = Download() # define source tables session.setTableSource("Hole", file1) session.setTableSource("Survey", file2) session.setTableSource("Interval1", file3) session.setTableSource("Interval2", file4) session.setTableSource("Samples", file5) # set the fields list session.setFieldsList("Hole") session.setFieldsList("Survey") session.setFieldsList("Interval1") session.setFieldsList("Interval2") session.setFieldsList("Samples") # define the Hole List session.defineHoleList() # define final contents to create offline db session.setAvailableContent() # from WebView ################################## # create instance: web web = WebView() # create files list web.createFileList(session.contentList, session.holeList) # create index.html file web.createIndexPage(session.contentList, session.holeList) # create Hole%.html files web.createHolePages(session.contentList, session.holeList, session.dictOfFields[1]) # create Survey%.html files web.createSurveyPages(session.contentList, session.holeList, session.dictOfFields[2]) # create Interval%.html files web.createIntervalPages(session.contentList, session.holeList, session.dictOfFields) # TEST ########################################## # Download ###################################### print session.holeList print session.contentList #print session.tableOrder["Samples"] #print session.tableOrder.keys() #print help(session) #print session.getFieldNames("Interval1") #print session.dictOfTables #print session.dictOfTables[session.tableOrder["Hole"]] print session.dictOfFields[1]
def search(songs) : process = 5 #最大进程数 total = len(songs.slist) notfoundlist = [] #未找到歌曲列表 i,j = 0,0 PS = {} #进程pid与歌曲名字的字典 path = raw_input('Path to download : ') if path : if not os.path.exists(path) : os.mkdir(path) os.chdir(path) current_path = os.getcwd() print 'Now download songs to :',current_path songlist = filterlist(songs.namelist,songs.slist,current_path) print 'There are %d in list and %d need to download' % (len(songs.slist),len(songlist)) # for (i,(song,album,singer)) in enumerate(songlist) : # print song+'\t'+album+'\t'+singer try : for (i,(song,album,singer)) in enumerate(songlist) : if process <= 0: pid,failed = os.wait() process += 1 if failed : print 'Download Failed in',song notfoundlist.append(PS[pid]) del PS[pid] process -= 1 pid = os.fork() if not pid : print '%3d now trying to download %s' % (i,song) linklist = Sougou.select(song,album,singer)+Yahoo.select(song,album,singer)#+Top100.select(song,album,singer) if not linklist : exit(1) linklist.sort() for distinction,downlink in linklist[:3] : exname = downlink.split('.')[-1].rstrip().lower().split('?')[0] filename = song+'.'+exname if Download.begin_download(downlink,filename,5) : print 'Download complete : ',song exit() else : print 'Download crashed : ',song,'\tTry to download from another source' exit(1) else : PS[pid]=(song,album,singer) for key in PS.keys() : pid,failed = os.wait() process += 1 if failed : print 'Download Failed in',song notfoundlist.append(PS[pid]) del PS[pid] except KeyboardInterrupt : for key in PS.keys() : print key for key in PS.keys() : os.kill(key,9) if notfoundlist : #输出未找到的歌曲 print 'Failed to find following songs' for song,album,singer in notfoundlist : print song+'\t'+album+'\t'+singer
def signal_strategy(): Download.url=HTTP Final_url = Download._final_url_contents() Date = Final_url[0] date_list=[] closing_price_list=[] for i in Date: Day=i date_list.insert(0, Day) date_list.insert(0,'Date') date_list.pop() Title_closing_prices=Final_url[1] sma_list=[] Ticker_closing_prices=(Title_closing_prices[1:]) prices=Title_closing_prices[0:] for i in range(len(Ticker_closing_prices)): Ticker_closing_prices[i] = float(Ticker_closing_prices[i]) for i in prices: closing_price_list.append(i) response=input('What signal strategy would you like to use?(directional or sma)') if response=='sma': sma_list=[] strategy_list=[] days=signal_days() Indicator=Indicators.execute(Indicators.Simple_moving_average(days),Ticker_closing_prices) sma_list.append('Sma') for i in Indicator: myIndicator=i if i != None: myIndicator=str(i)[:6] sma_list.append(myIndicator) Strategy=Signal_strategies.execute(Signal_strategies.Signal_sma(Indicators.execute(Indicators.Simple_moving_average(days), Ticker_closing_prices),Ticker_closing_prices)) print(Strategy) strategy_list.append('Signal_strategies') for i in Strategy: myStrategy=i strategy_list.append(myStrategy) print(len(date_list)) print(len(closing_price_list)) print(len(sma_list)) print(len(strategy_list)) for i in range(len(date_list)): print('{:10} {:10} {:10} {:10}'.format(date_list[i], closing_price_list[i], sma_list[i],strategy_list[i])) elif response=='directional': indicator_list=[] strategy_list=[] days=signal_days() buy_threshold=int(input('What is the buy threshold')) sell_threshold=int(input('What is the sell threshold')) Indicator=Indicators.execute(Indicators.Directional_indicator(days),Ticker_closing_prices) indicator_list.append('Indicator') for i in Indicator: myIndicator=i indicator_list.append(myIndicator) Strategy=Signal_strategies.execute(Signal_strategies.Signal_Directional(Indicators.execute(Indicators.Directional_indicator(days),Ticker_closing_prices),buy_threshold,sell_threshold)) strategy_list.append('Signal_strategies') for i in Strategy: myStrategy=i strategy_list.append(myStrategy) for i in range(len(date_list)): print('{:10} {:10} {:10} {:10}'.format(date_list[i], closing_price_list[i], indicator_list[i], strategy_list[i])) else: print('You did not select simple-moving-average or directional. Please try again.') signal_strategy()
import csv import random import time import sys import Download try: Download.fromdrive() except Exception: print("Failed to update database") pass try: db = open('db.csv') reader = csv.reader(db) teamsList = list(reader) except FileNotFoundError: print("No database file found") teamsList = [] if not teamsList: print("Something went wrong: no users in database") time.sleep(1) sys.exit() acaOnly = [] for singleList in teamsList: acaOnly = acaOnly + singleList[2:]
import Download pagelist=['http://www.economist.com/'] crawler = Download.crawler('') crawler.crawl(pagelist)
def download(self, option): object = Download.download(self.url, self.title) check = object.start(option) return check
print "Select a peer ('c' to cancel): " for idx, file in enumerate(available_files): # Visualizzazione la lista dei peer da cui è possibile scaricarlo if selected_file == idx: for idx2, owner in enumerate(file.owners): print str(idx2) + ": " + owner.ipv4 + " | " + owner.ipv6 + " | " + owner.port selected_peer = None while selected_peer is None: try: option = raw_input() # Selezione di un peer da cui scaricare il file except SyntaxError: option = None if option is None: print 'Please select an option' elif option == 'c': return else: try: selected_peer = int(option) except ValueError: print "A number is required" for idx2, owner in enumerate(file_to_download.owners): # Download del file selezionato if selected_peer == idx2: print "Downloading file from: " + owner.ipv4 + " | " + owner.ipv6 + " " + owner.port Download.get_file(self.session_id, owner.ipv4, owner.ipv6, owner.port, file_to_download, self.directory) else: print "Unknown error, check your code!"
def update_process(self): if self.asset_type == 'stocks': startTime = time.time() print '\tReading in Stock List' temp = [] with open('NYSE.csv', 'r') as f: for line in f: line = line.replace('"',"").strip() line = line.replace(' ',"").strip() spl =line.split(',') temp.append(spl[0]) with open('NASDAQ.csv', 'r') as f: for line in f: line = line.replace('"',"").strip() line = line.replace(' ',"").strip() spl =line.split(',') temp.append(spl[0]) with open('AMEX.csv', 'r') as f: for line in f: line = line.replace('"',"").strip() line = line.replace(' ',"").strip() spl =line.split(',') temp.append(spl[0]) ''' with open('ETF_STOCKS.txt', 'r') as f: for line in f: line = line.replace('"',"").strip() line = line.replace(' ',"").strip() spl =line.split(',') temp.append(spl[0]) ''' print '\t\tComplete:\t' + str(time.time()-startTime) symbols = set(temp) if os.path.isfile('exceptionFile.csv'): with open('exceptionFile.csv','rb') as exception_file: for line in exception_file: line = line.strip() if line in symbols: symbols.remove(line) startTime = time.time() print '\tDownloading Data' try: shutil.rmtree('C:/Users/Keith/Desktop/Workspace/Eclipse Workspace/Stock_Predictor/src/'+self.folder) except: print '\t\t\tUnable to delete old stock data!' pass time.sleep(1) try: os.mkdir('C:/Users/Keith/Desktop/Workspace/Eclipse Workspace/Stock_Predictor/src/'+self.folder+'') except: print '\t\t\tUnable to create stocks_old folder!' pass Download.download_data(symbols) print '\t\tComplete:\t' + str(time.time()-startTime) #Update data startTime = time.time() print '\tProcessing Data' num_cpus = multiprocessing.cpu_count() threads = [] self.update_queue = Queue() for k in symbols: self.update_queue.put(k) for k in xrange(num_cpus): try: p = Process(target=self.process_updates, args=(self.update_queue, )) threads.append(p) except IOError: pass for thread in threads: thread.start() for thread in threads: thread.join() print '\t\tComplete:\t' + str(time.time()-startTime) startTime = time.time() print '\tDeriving New Data' num_cpus = multiprocessing.cpu_count() threads = [] self.derive_queue = Queue() for k in symbols: self.derive_queue.put(k) for k in xrange(num_cpus): p = Process(target=self.derive, args=(self.derive_queue, )) threads.append(p) for thread in threads: thread.start() for thread in threads: thread.join() print '\t\tComplete:\t' + str(time.time()-startTime) startTime = time.time() print '\tCreating Exceptions List' ''' self.symbol_queue = [] self.exceptions_queue = [] for k in symbols: self.symbol_queue.append(k) self.find_exceptions(self.symbol_queue, self.exceptions_queue) suspicious_companies = [] with open('suspicious_companies.csv', 'wb') as output: while len(self.exceptions_queue)>0: line = str(self.exceptions_queue.pop()) line = line.replace("'",'') line = line.replace("(",'') line = line.replace(")",'') suspicious_companies.append(line.split(',')[0]) output.write(str(line)+'\n') for k in suspicious_companies: shutil.rmtree('C:/Users/Keith/Desktop/Workspace/Eclipse Workspace/Stock_Predictor/src/'+self.folder+'/'+k) ''' print '\t\tComplete:\t' + str(time.time()-startTime) if self.asset_type=='etf': startTime = time.time() print '\tReading in ETF List' temp = [] with open('./ETF/ETF_STOCKS.txt', 'r') as f: for line in f: temp.append(line.strip()) print '\t\tComplete:\t' + str(time.time()-startTime) symbols = set(temp) startTime = time.time() print '\tDownloading Data' try: shutil.rmtree('C:/Users/Keith/Desktop/Workspace/Eclipse Workspace/Stock_Predictor/src/ETF/ETF_old') except: print '\t\t\tUnable to delete old ETF data!' pass time.sleep(1) try: os.mkdir('C:/Users/Keith/Desktop/Workspace/Eclipse Workspace/Stock_Predictor/src/ETF/ETF_old') except: print '\t\t\tUnable to create ETF_old folder!' pass Download.folder = 'ETF/ETF_old' Download.download_data(symbols) print '\t\tComplete:\t' + str(time.time()-startTime) #Update data startTime = time.time() print '\tProcessing Data' num_cpus = multiprocessing.cpu_count() threads = [] self.update_queue = Queue() for k in symbols: self.update_queue.put(k) for k in xrange(num_cpus): try: p = Process(target=self.process_updates, args=(self.update_queue, )) threads.append(p) except IOError: pass for thread in threads: thread.start() for thread in threads: thread.join() print '\t\tComplete:\t' + str(time.time()-startTime) startTime = time.time() print '\tDeriving New Data' num_cpus = multiprocessing.cpu_count() threads = [] self.derive_queue = Queue() for k in symbols: self.derive_queue.put(k) for k in xrange(num_cpus): p = Process(target=self.derive, args=(self.derive_queue, )) threads.append(p) for thread in threads: thread.start() for thread in threads: thread.join() print '\t\tComplete:\t' + str(time.time()-startTime) startTime = time.time() print '\tCreating Exceptions List' ''' self.symbol_queue = [] self.exceptions_queue = [] for k in symbols: self.symbol_queue.append(k) self.find_exceptions(self.symbol_queue, self.exceptions_queue) suspicious_companies = [] with open('suspicious_companies.csv', 'wb') as output: while len(self.exceptions_queue)>0: line = str(self.exceptions_queue.pop()) line = line.replace("'",'') line = line.replace("(",'') line = line.replace(")",'') suspicious_companies.append(line.split(',')[0]) output.write(str(line)+'\n') for k in suspicious_companies: shutil.rmtree('C:/Users/Keith/Desktop/Workspace/Eclipse Workspace/Stock_Predictor/src/'+self.folder+'/'+k) ''' print '\t\tComplete:\t' + str(time.time()-startTime)
import kdb_csv as kc import Download #--------------------------------------------- ''' 加载 config 文件 ''' with open('configs/config_main.json', 'r') as f: config_main = json.loads(f.read()) runD = config_main['runD'] if (runD): print(dt.datetime.now(), 'load config: config_main') with open('configs/config_download.json', 'r') as f: config_download = json.loads(f.read()) if (runD): print(dt.datetime.now(), 'load config: config_download') #--------------------------------------------- ''' 下载数据 ''' if config_main['option']['whether_download_data'] == 1: if (runD): print(dt.datetime.now(), 'process: download data') Download.Download(config_download) else: if (runD): print(dt.datetime.now(), 'process: give up download data') ''' 加载数据 ''' ''' 处理数据 '''
# make a COM object iTunes = win32com.client.gencache.EnsureDispatch('iTunes.Application') # find My Documents objShell = win32com.client.Dispatch('WScript.Shell') myDocs = objShell.SpecialFolders('MyDocuments') + '\\' print('My Documents is located at: %s' % myDocs) # default downloadFolder is My Documents\Aethyr downloadFolder = myDocs + 'Aethyr\\' # config file holding location of download folder configFileLocation = myDocs + 'aethyr.ini' storedLocation = Download.loadStoredDownloadFolder(configFileLocation) if (storedLocation is not None): downloadFolder = storedLocation if (not Helper.isFolderIntegrityOK(downloadFolder)): downloadFolder = Download.resetDefaultDownloadFolder(myDocs, configFileLocation) totalSize = 0 totalTime = 0 # dictionary for caching libraries alreadyLoaded = {} # seperating elements of an array DELIMITER = '&&&'
def readExcel(tickerName): fileVariables = Variables() directory = fileVariables.directory endings = fileVariables.ending fileEnding = fileVariables.returnFileEnding(tickerName) """Opens each excel file and puts them in sheets array """ sheets = [] for i in fileEnding: fileNameTemp = directory + i if(os.path.isfile(fileNameTemp) == False): Download.downloadAll(tickerName) tempBook = xlrd.open_workbook(fileNameTemp) sheets.append(tempBook.sheet_by_index(0)) """Get dates from first sheet. These dates will be used for all other sheets""" i = 0 j = 0 dates = [] dates.append("dates") firstSheet = sheets[0] while(j < firstSheet.ncols): tempDate = firstSheet.cell_value(0,j) j+=1 if(tempDate != ' ' and tempDate != ''): dateTuple = xlrd.xldate_as_tuple(tempDate,0) dates.append(str(dateTuple[0]) + "/" + str(dateTuple[1]) + "/" + str(dateTuple[2])) """Now, get all other data from all sheets. """ """Add dates and other data to totalArray """ j = 0 totalArray = [] totalArray.append(dates) """Goes through each sheet""" for iterator in range(0,len(sheets)): sheet = sheets[iterator] ending = endings[iterator] i = 1 """Goes down row in each sheet and then across each column to get all data""" while(i < sheet.nrows): tempData = [] j = 0 while(j < sheet.ncols): """Will add file ending (-Q, -T, -QB etc) for variable name which is j = 0.""" if(j == 0): tempData.append(str(sheet.cell_value(i,j)) + ending) else: tempData.append(sheet.cell_value(i,j)) j += 1 i += 1 totalArray.append(tempData) # for i in totalArray: # print(len(i)) # print(i) """Now make sure length of all arrays are the same""" longestArray = 0 for i in totalArray: if(len(i) > longestArray): longestArray = len(i) for i in totalArray: appendNumber = longestArray - len(i) for j in range(0,appendNumber): i.append('') # for i in totalArray: # # print(len(i)) # print(i) # return Utility.invert(totalArray)
def get_download(self, id): assert isinstance(id, (int, long)), id headers, data = self._requester.requestAndCheck( "GET", self.url + "/downloads/" + str(id), None, None) return Download.Download(self._requester, data, completed=True)
#from _pickle import cPickle from datetime import timedelta, datetime from dateutil.tz import tzutc import Download import dtFeatures from train_Data import train date_start = datetime(2019, 1, 3, 0, 0, tzinfo=tzutc()) date_end = date_start + timedelta(days=2) data = Download.download(date_start, date_end) dtFeatures.dateTime_features(data, date_start, date_end) path = r'C:\Users\Iacob\Desktop\ProiectStrongBytes' name = "WindCSV" dtFeatures.exportCSV(data, path, name) print(data.head(10))