Пример #1
0
def getUrl():
    directory = path2.get()
    pages = int(page2.get())
    if pages < 1: pages = 1
    if (pages is not None and pages != "") and (directory is not None
                                                and directory != ""):
        if askokcancel('即将开始获取URL!', '确定要开始获取吗?\n注意,获取过程中不会有GUI展示(因为我懒!)'):
            showinfo("正在获取中", "正在获取中,请不要关闭程序!!!")
            dl.executeUrls(pages, directory)
            top = Toplevel()
            top.title('URL获取完毕!')

        # 让窗口初始化在最中间
        sw = top.winfo_screenwidth()  # 得到屏幕宽度
        sh = top.winfo_screenheight()  # 得到屏幕高度
        ww = 600
        wh = 300
        x = (sw - ww) / 2
        y = (sh - wh) / 2
        # 设定窗口的大小(长 * 宽)
        top.geometry("%dx%d+%d+%d" % (ww, wh, x, y))
        # window.geometry('600x400')  # 这里的乘是小x
        label = tk.Label(top, text="获取完毕!")
        label.pack()
        label2 = tk.Label(top, text="文件路径:" + directory)
        label2.pack()
        button = tk.Button(top,
                           text="打开文件夹",
                           command=lambda: open_dir(system, directory))
        button.pack()

    else:
        showinfo('路径为空或页数不正确!', '路径为空或页数不正确!')
Пример #2
0
def parse(url: str, url_type: str) -> dict:
    file_path = Download.downloadFile(url)
    deserialize_raw = JDeserialize.Deserialize(file_path)
    autoconfig = parse_jd(deserialize_raw, url_type)

    Download.cleanup(file_path)
    return autoconfig
Пример #3
0
def searchQuery(q, start):
    API_HOST = choice(loadBestNIP(config.TOP_IP_FILE, 20))
    SEARCH_API = 'https://' + API_HOST + '/search?q='
    dobj = Download(SEARCH_API + q + '&start=' + start)
    if (dobj.doRequest()):
        return None
    else:
        return dobj.getSOURCE()
Пример #4
0
def searchQuery(q, start):
  API_HOST = choice(loadBestNIP(config.TOP_IP_FILE, 20))
  SEARCH_API = 'https://' + API_HOST + '/search?q='
  dobj = Download(SEARCH_API + q + '&start=' + start)
  if (dobj.doRequest()):
    return None
  else:
    return dobj.getSOURCE()
Пример #5
0
	def download(self,command):
		#NOTE check input
		currentDL = Download(command,self.library) #passes reference to library so it can check if things are already in the library and stop the process
		if currentDL.downloadedSuccessfully():
			self.library.addSong(currentDL.getSong())
			print "[!] DONE!"
		else:
			print "[!] ERROR WHILE DOWNLOADING"
Пример #6
0
def queryGoogle(q, start):
  API_HOST = choice(loadBestNIP(config.TOP_IP_FILE, 20))
  # print "Debug: " + API_HOST
  GOOGLE_API = 'http://' + API_HOST + '/search?q='
  dobj = Download(GOOGLE_API + q + '&start=' + start)
  if (dobj.doRequest()):
   return None
  else:
    return dobj.getSOURCE()
Пример #7
0
def scholarQuery(q, start):
    API_HOST = choice(loadBestNIP(config.TOP_IP_FILE, 20))
    SCHOLAR_API = 'http://' + API_HOST + '/scholar?q='
    # dobj = Download(SCHOLAR_API + q + '&start=' + start)
    dobj = Download(SCHOLAR_API + q + '&start=' + start)
    if (dobj.doRequest()):
        return None
    else:
        return dobj.getSOURCE()
Пример #8
0
def queryGoogle(q, start):
    API_HOST = choice(loadBestNIP(config.TOP_IP_FILE, 20))
    # print "Debug: " + API_HOST
    GOOGLE_API = 'http://' + API_HOST + '/search?q='
    dobj = Download(GOOGLE_API + q + '&start=' + start)
    if (dobj.doRequest()):
        return None
    else:
        return dobj.getSOURCE()
Пример #9
0
def scholarQuery(q, start):
  API_HOST = choice(loadBestNIP(config.TOP_IP_FILE, 20))
  SCHOLAR_API = 'http://' + API_HOST + '/scholar?q='
  # dobj = Download(SCHOLAR_API + q + '&start=' + start)
  dobj = Download(SCHOLAR_API + q + '&start=' + start)
  if (dobj.doRequest()):
    return None
  else:
    return dobj.getSOURCE()
Пример #10
0
def extract_articles():

    matches = []
    for root, dirnames, filenames in os.walk('./Download'):
        for filename in fnmatch.filter(filenames, '*.tar.gz'):
            matches.append(os.path.join(root, filename))

    for match in matches:
        Download.tar_gz(match)
	def run(self):

		global mutex
		
		sP = sfunc.create_socket_client(func.roll_the_dice(self.peer[0]), self.peer[1])
		if sP is None:
		    #tfunc.error('Error: could not open socket in download')
		    var = "" # giusto per fargli fare qualcosa
		else:
			try:
				if mutex.acquire(timeout = const.TIME_TO_UPDATE):
					dnl.update_own_memory(self.md5, self.partN, self.listPartOwned, "2")
					mutex.release()

					#tfunc.gtext("Start download della parte " + str(self.partN) + " da " + str(self.peer[0], "ascii"))

					pk = pack.request_download(self.md5, self.partN)
					sP.sendall(pk)
					ricevutoByte = sP.recv(const.LENGTH_HEADER)
					if str(ricevutoByte[0:4], "ascii") == pack.CODE_ANSWER_DOWNLOAD:
						nChunk = int(ricevutoByte[4:10])
						ricevutoByte = b''
						i = 0
						
						while i != nChunk:
							ricevutoLen = sP.recv(const.LENGTH_NCHUNK)
							while (len(ricevutoLen) < const.LENGTH_NCHUNK):
								ricevutoLen = ricevutoLen + sP.recv(const.LENGTH_NCHUNK - len(ricevutoLen))
							buff = sP.recv(int(ricevutoLen))
							while(len(buff) < int(ricevutoLen)):
								buff = buff + sP.recv(int(ricevutoLen) - len(buff))
							ricevutoByte = ricevutoByte + buff
							i = i + 1

						sP.close()

						# Modifico nel file la parte che ho appena scaricato, se il file non esiste lo creo (es b'00000')
						dnl.create_part(ricevutoByte, self.fileName, self.partN, self.lenFile, self.lenPart)

						if mutex.acquire(timeout = const.TIME_TO_UPDATE):
							# Aggiorno la mia memoria
							dnl.update_own_memory(self.md5, self.partN, self.listPartOwned, "1")
							mutex.release()

							pfunc.part_all(self.listPartOwned[self.md5][0])

							# Invio l'update al tracker
							send_update(self.t_host, self.sessionID, self.md5, self.partN, self.listPartOwned, self.peer)
						else:
							raise Exception("Error Download Code")
					else:
						raise Exception("Error Download Code")

				else:
					raise Exception("Error Download Code")

			except Exception as e:
				#tfunc.write_daemon_error(self.name, str(self.peer[0], "ascii"), "ERRORE DOWNLOAD: {0}".format(e))
				dnl.update_own_memory(self.md5, self.partN, self.listPartOwned, "0")
Пример #12
0
 def __init__(self):
     self.UrlAndIDContr = URLSchedul.UrlManager()
     self.downloader = Download.Downloader()
     self.parser = Html_pareser.HtmlPare()
     self.ProceClean = Pipeline.pinline
     self.outjson = FeeDExport.FeedExp()
     self.CollectAllData = []
Пример #13
0
def classify():
    prints("正在获取分类信息")
    url = "http://wap.xqishuta.com/sort.html"

    for i in range(1, 4):
        try:
            response = Download.download_page(url, url)
            break
        except:
            if i != 3:
                prints("第 {0} 次获取 {1} 页面信息失败".format(i, url))
                continue
            else:
                prints("获取 {} 页面信息失败,退出程序".format(url))
                exit(0)
    html = etree.HTML(response.content.decode("UTF-8"))
    url_lists = html.xpath("//div[@class='menu_nav']/ul/li/a/@href")
    name_list = html.xpath("//div[@class='menu_nav']/ul/li/a/text()")
    lists = {}
    for i in range(0, len(url_lists)):
        lists[name_list[i]] = "http://wap.xqishuta.com" + url_lists[i]
    prints("分类信息:")
    for string in lists:
        prints(string, " ")
    prints("\n")
    return lists
Пример #14
0
def get_novellilsts(type, url):
    prints("-----------------------------------------------------")
    prints("正在获取 {0} 分类下,{1} 页面数据".format(type, url))

    for i in range(1, 4):
        try:
            response = Download.download_page(url, url)
            break
        except:
            if i != 3:
                prints("第 {0} 次获取 {1} 页面信息失败".format(i, url))
                continue
            else:
                prints("获取 {} 页面信息失败".format(url))
                error_url.append(url)
                return

    html = etree.HTML(response.content.decode("UTF-8"))
    url_list = html.xpath("//div[@class='article']//h6/a/@href")
    novel_lists = []
    for string in url_list:
        temp = "http://wap.xqishuta.com/" + string[1:]
        novel_lists.append(temp)
    prints("获取数据成功,该页面一共有 {} 本书籍".format(len(novel_lists)))
    return novel_lists
Пример #15
0
def get_urllists(type, url):
    prints("正在获取 {} 分类下书籍信息".format(type))
    first_url = url + "index_1.html"

    for i in range(1, 4):
        try:
            response = Download.download_page(first_url, url)
            break
        except:
            if i != 3:
                prints("第 {0} 次获取 {1} 页面信息失败".format(i, url))
                continue
            else:
                prints("获取 {} 页面信息失败".format(url))
                error_url.append(first_url)
                return

    html = etree.HTML(response.content.decode("UTF-8"))
    temp = html.xpath("//div[@class='page']/a/@href")
    temp = temp[1]
    temp = temp.split(".")
    temp = temp[0].split("_")
    last_page = int(str(temp[1]))
    url_list = []
    for i in range(1, last_page + 1):
        temp = url + "index_" + str(i) + ".html"
        url_list.append(temp)
    prints("{0} 分类下一共有 {1} 页数据".format(type, last_page))
    return url_list
Пример #16
0
 def on_buttondownload_clicked(self, widget):
     """Event if the button clicked"""
     try:
         Download.Downloadmp3(self.entry1.get_text())
         Download.Downloadmp3.movefile("Download")
         pass
     except Exception as e:
         raise
Пример #17
0
def __main__():
    check_files_and_folders()
    socket = Socket.ServerSocket()
    connection = socket.Socket()
    sr = Server(connection, Cryptography.session_crypto(None), Registery.Registery(), Login.serverLogin(),
                Download.Download(), Upload.Upload(), List.List(), Read.Read(), Write.Write(),
                SessionKeyExchange.ServerSession(None),
                DACCommands.DACCommands(), Auditor.Auditor())
    sr.Handler()
Пример #18
0
def Update(endDate, dataframe):
    startDate = max(dataframe.index)
    delta = endDate - startDate
    if delta.days < 0:
        endDate, startDate = startDate, endDate
    else:
        if delta.days == 0:
            endDate = startDate + datetime.timedelta(days=2)
    new_data = Download.download(startDate, endDate)
    return dataframe.append(new_data).sort_index()
Пример #19
0
 def __init__(self):
     self.UrlAndIDContr = URLSchedul.UrlManager()
     self.downloader = Download.Downloader()
     self.parser = Html_pareser.HtmlPare()
     self.ProceClean = Pipeline.pinline()
     self.outjson = FeeDExport.FeedExp()
     self.CollectAllData={}
     self.errGeoGet = []
     msgKeys = ["geo_code","latitude","longitude","nation","province","city","district","street","street_number"]
     for k in msgKeys:
         self.CollectAllData[k] = []
Пример #20
0
    def run(self):
        Download.CreatePath()
        t = open("CardsDB.txt")
        ln = 0
        for ch in t:
            start = time.time()
            ln = ln + 1

            ch = ch.rstrip('\n')
            if ch != "":
                size = Download.Download(ch + ".jpg")

            end = time.time()
            elapsedtime = end - start

            linevar = 13636 - ln
            speed = 0
            if type(size) != type(None):
                try:
                    speed = ((int(size) / 1024) / elapsedtime)
                except ZeroDivisionError:
                    pass
            percentage = 100 - round((linevar * 50) / 6815)
            if percentage > 99 and linevar != 0:
                percentage = 99
            if ch != "":
                if speed == 0:
                    progr = "Skipping " + ch + ".jpg" + "! Already Exists..."
                    print(progr)
                else:
                    progr = "Downloading " + ch + ".jpg" + " @" + str(
                        round(speed)) + "kb/s " + str(
                            linevar) + " cards left..."
                    print(progr)
                time.sleep(0.001)
                self.change_perc.emit(percentage)
                self.change_lab.emit(progr)
                #self.progressBar.setValue(percentage)
        print("Task Completed ! Check you pics folder.")
        t.close()
Пример #21
0
    def OnDownload(self, event):
        self.filename = None
        self.busy = True
        if len(self.servers) == 0:
            rc = Messages.messageChoice(
                "SAP Server configuration not found. Do you want to configure?",
                "Server configuration")
            if rc == wx.ID_YES:
                ConfigWindow.showConfigWindow(self)

        else:
            Download.Show(self)
        self.busy = False
        event.Skip()
Пример #22
0
def get_novel(url):
    prints("------------------------------------------------------")
    prints("正在下载 {0} 页面小说".format(url))

    for i in range(1, 4):
        try:
            response = Download.download_page(url, url)
            break
        except:
            if i != 3:
                prints("第 {0} 次获取 {1} 页面信息失败".format(i, url))
                continue
            else:
                prints("获取 {} 页面信息失败".format(url))
                error_url.append(url)
                return

    html = etree.HTML(response.content.decode("UTF-8"))

    img = html.xpath("//div[@class='pic']/img/@src")[0]
    name = html.xpath("//div[@class='cataloginfo']/h3/text()")[0]

    temp = html.xpath("//div[@class='infotype']/p/text()")
    author = temp[0].split(":")[1]
    type = temp[1].split(":")[1]
    date = temp[2].split(":")[1].split("T")[0]

    link = html.xpath("//ul[@class='infolink']//p/script/text()")[0].split("'")[3]

    information = html.xpath("//div[@class='intro']/p/text()")[0]

    prints("图片地址:" + img)
    prints("书籍名称:" + name)
    prints("作者:" + author)
    prints("类型:" + type)
    prints("最后更新时间:" + date)
    prints("下载地址:" + link)
    prints("小说简介:" + information)

    novel = {
        "link": url,
        "img": img,
        "author": author,
        "name": name,
        "type": type,
        "date": date,
        "text_link": link,
        "小说简介": information
    }
    return novel
Пример #23
0
    def download(self):
        print "Start downloading " + self.url
        
        #download youtube info
        if self.get_youtube_info()==False:
            return False
        signature = self.download_stream_info["sig"]
        url = self.download_stream_info["url"]
        download_url = "%s&signature=%s" % (url, signature)
        if self.output_file_name=="FF":
            dmn = Download( download_url, self.title)
            self.output_file_name = self.title
        else:
            dmn = Download( download_url, self.output_file_name)
        dmn.download()

        print "\nConverting:\n"
        ofile = "result_"+self.output_file_name+"."+self.ofe
        print ofile
        con = Converter( self.output_file_name,  ofile)
        
        con.ffmpeg_converter(self.audio_codec,self.video_codec)
        
        return True
Пример #24
0
    def download_image(self, url, filename):
        """ Download an image and save it to a location
            :param url: The url
            :param filename: The filename
        """

        if self.debug:
            print "    Downloading: %s\n     => %s" % \
                  (url, self.image_folder + filename)

        if path.isfile(self.image_folder + filename):
            return

        data = Download.download_page(url)
        Files.write(data, filename, self.image_folder)
Пример #25
0
    def download(self):
        """ Download HTML web pages
        """

        if self.debug:
            print "  Downloading: %s" % self.test_file + FILE_EXT_HTML
            url_list = [self.base_url + self.test_file + FILE_EXT_HTML]
        else:
            url_list = Files.get_raw_contents(self.name + ".urls", "./")

        for web_file in url_list:
            cached_file = web_file.replace(self.base_url, "")
            if not Files.file_exists(self.download_folder + cached_file):
                data = Download.download_page(web_file)
                Files.write(data, cached_file, self.download_folder)
            elif self.debug:
                print "    File %s already exists" % (self.download_folder +
                                                      cached_file)
Пример #26
0
 def create_download(self,
                     name,
                     size,
                     description=GithubObject.NotSet,
                     content_type=GithubObject.NotSet):
     assert isinstance(name, (str, unicode)), name
     assert isinstance(size, (int, long)), size
     assert description is GithubObject.NotSet or isinstance(
         description, (str, unicode)), description
     assert content_type is GithubObject.NotSet or isinstance(
         content_type, (str, unicode)), content_type
     post_parameters = {
         "name": name,
         "size": size,
     }
     if description is not GithubObject.NotSet:
         post_parameters["description"] = description
     if content_type is not GithubObject.NotSet:
         post_parameters["content_type"] = content_type
     headers, data = self._requester.requestAndCheck(
         "POST", self.url + "/downloads", None, post_parameters)
     return Download.Download(self._requester, data, completed=True)
Пример #27
0
    def install(self, url, targetPath, key=None, ui=None):
        """Install the file located in the url given.

        Args:
            url (str):  The url to download the package to install.
            targetPath (str): Where to install the package.
            key (str): If the system needs a key to access. Defaults to None.
            ui (InstallatorUI): User Interface connection. Defaults to None.
        """
        if ui:
            ui.progressText.append('Starting downloading files...')
        download = Download.Download(url, key, ui=ui)
        download.download()
        downloadedPath = download.getExtractedPath(
        ) or download.getFileDownloaded()
        if ui:
            ui.progressText.append('Installing the files...')
        install = Install.Install(downloadedPath, targetPath, ui=ui)
        install.install()
        if ui:
            ui.progressText.append('Installation successful')
            ui.progressBar.setValue(100)
Пример #28
0
 def EmailHunterDetect(self):
     '''
     A function to use EmailHunter to use their
     JSON API to detect the email format.
     '''
     try:
         # This returns a JSON object
         dl = Download.Download(self.verbose)
         url = "https://api.hunter.io/v2/domain-search?domain=" + \
             self.domain + self.type + "&limit=100&offset=0" + "&api_key=" + self.apikeyv
         r = dl.requesturl(url, useragent=self.UserAgent, raw=True)
         results = r.json()
         pattern = str(results['data']['pattern'])
         if pattern:
             return pattern
         else:
             if self.verbose:
                 e = ' [!] No pattern detected via EmailHunter API'
                 print helpers.color(e, firewall=True)
                 return False
     except:
         error = "[!] Major issue with EmailHunter Search:" + str(e)
         print helpers.color(error, warning=True)
Пример #29
0
def link_carwler(seed_url, wr_csv, max_depth):
    crawler = [seed_url]
    # seen=set(crawler)
    seen = {seed_url: 0}
    #set delay time,防止封IP,
    delay_time = limit_bandwidth(3)
    #创建要写入的CSV

    while crawler:
        url = crawler.pop()
        #获取下载页面的深度,防止爬虫陷阱。
        depth = seen[url]
        #check delay time
        delay_time.wait(url)
        html = gf.download(url)
        #写入爬取的数据
        if html is not None:
            beautifulsoup(html, wr_csv)
        if html is not None and depth < max_depth:  #若HTML返回不为空开始进行爬取,且深度不超过最大值
            for link in get_links(html):
                if link not in seen:  #重复链接不记录
                    #由于所有发现的链接都是在当前页面下所有新链接,所以深度都是当前页面深度+1
                    seen[link] = depth + 1
                    crawler.append(link)
Пример #30
0
import Download

url = input("Digite a Url: ")
caminho = input("Digite o caminho para salvar: ")
resultado_da_veridicacao = Download.verificando_arquivo_online(url)
if (resultado_da_veridicacao[0]):
    Download.download_arquivos(caminho, resultado_da_veridicacao[1])
else:
    print("Arquivo OffLine ou Não Existe")

nomes = ['mathias', 'da', 'silva', 'maurilio']
if ('testando' or 'mathias' == nomes):
    print("Acertou Miseravi")
else:
    print("ta serto")

string = "Ola para você"
print(string)
lista = string.split(" ")  #Serve para separar uma string em varias partes
string = lista[0] + " " + lista[2]
print(string)
string = string.replace(" ", " para ")  #substitui uma parte da string
print(string)

tel = {30132: "Pericles", 30142: "Menelau", 30154: "Atreu", 31000: "Tieste"}
print(tel)
ola = tel.popitem()  #Retorna um item aleatorio valor do dicionario e o apaga
print(ola[1])
import numpy as np
import ACO.CityFilter
import ACO.CUDA
import time

world_cities_path = "./Download/worldcitiespop.txt.gz"
world_cities_txt_path = "./Download/worldcitiespop.txt"

# Download world_cities
#world_cities_path = Download.download("http://download.maxmind.com/download/worldcities/worldcitiespop.txt.gz", "./Download/")

# Decompress
#world_cities_txt_path = Download.gz(world_cities_path)

# Download country_codes
country_codes_path = Download.download("http://data.okfn.org/data/core/country-list/r/data.csv", "./Download/")


def load_country_codes():
    country_codes = {}
    with open(country_codes_path) as file:
        for row in file.readlines():
            row = row.replace("\n", "").replace("\"","")
            split = row.rsplit(',', 1)
            #country_codes[row[1]] = row[0]
    return country_codes

def load_world_cities(loc=["*"], max=None):
    nodes = []
    nodes_latitude = []
    nodes_longitude = []
Пример #32
0
def download_articles():
    a_b_path = Download.download("ftp://ftp.ncbi.nlm.nih.gov/pub/pmc/articles.txt.0-9A-B.tar.gz", "./Download/")
    c_h_path = Download.download("ftp://ftp.ncbi.nlm.nih.gov/pub/pmc/articles.txt.C-H.tar.gz", "./Download/")
    i_n_path = Download.download("ftp://ftp.ncbi.nlm.nih.gov/pub/pmc/articles.txt.I-N.tar.gz", "./Download/")
    o_z_path = Download.download("ftp://ftp.ncbi.nlm.nih.gov/pub/pmc/articles.txt.O-Z.tar.gz", "./Download/")
Пример #33
0
def main():
    # prototype source of tables: csv files
    file1 = '../Test/acQuire Data/01c All Collar.csv'
    file2 = '../Test/acQuire Data/02 Survey.csv'
    file3 = '../Test/acQuire Data/03 Lithology.csv'
    file4 = '../Test/acQuire Data/04 Alteration.csv'
    file5 = '../Test/acQuire Data/05 Samples Checks.csv'

    # from Download #################################

    # create instance: session
    session = Download()
    # define source tables
    session.setTableSource("Hole", file1)
    session.setTableSource("Survey", file2)
    session.setTableSource("Interval1", file3)
    session.setTableSource("Interval2", file4)
    session.setTableSource("Samples", file5)
    # set the fields list
    session.setFieldsList("Hole")
    session.setFieldsList("Survey")
    session.setFieldsList("Interval1")
    session.setFieldsList("Interval2")
    session.setFieldsList("Samples")
    # define the Hole List
    session.defineHoleList()
    # define final contents to create offline db
    session.setAvailableContent()

    # from WebView ##################################

    # create instance: web
    web = WebView()
    # create files list
    web.createFileList(session.contentList, session.holeList)
    # create index.html file
    web.createIndexPage(session.contentList, session.holeList)
    # create Hole%.html files
    web.createHolePages(session.contentList, session.holeList,
                        session.dictOfFields[1])
    # create Survey%.html files
    web.createSurveyPages(session.contentList, session.holeList,
                          session.dictOfFields[2])
    # create Interval%.html files
    web.createIntervalPages(session.contentList, session.holeList,
                            session.dictOfFields)

    # TEST ##########################################

    # Download ######################################
    print session.holeList
    print session.contentList

    #print session.tableOrder["Samples"]
    #print session.tableOrder.keys()
    #print help(session)
    #print session.getFieldNames("Interval1")
    #print session.dictOfTables
    #print session.dictOfTables[session.tableOrder["Hole"]]
    print session.dictOfFields[1]
Пример #34
0
def search(songs) :
	process = 5								#最大进程数
	total = len(songs.slist)
	notfoundlist = []						#未找到歌曲列表
	i,j = 0,0
	PS = {}									#进程pid与歌曲名字的字典
	path = raw_input('Path to download : ')
	if path :
		if not os.path.exists(path) : os.mkdir(path)
		os.chdir(path)

	current_path = os.getcwd()
	print 'Now download songs to :',current_path
	songlist = filterlist(songs.namelist,songs.slist,current_path) 
	print 'There are %d in list and %d need to download' % (len(songs.slist),len(songlist))
#	for (i,(song,album,singer)) in enumerate(songlist) :
#		print song+'\t'+album+'\t'+singer

	try :
		for (i,(song,album,singer)) in enumerate(songlist) :
			if process <= 0:
				pid,failed = os.wait()
				process += 1
				if failed :
					print 'Download Failed in',song
					notfoundlist.append(PS[pid])
				del PS[pid]

			process -= 1
			pid = os.fork()
			if not pid :
				print '%3d now trying to download %s' % (i,song)
				linklist = Sougou.select(song,album,singer)+Yahoo.select(song,album,singer)#+Top100.select(song,album,singer)
				if not linklist : exit(1)
				linklist.sort()
				for distinction,downlink in linklist[:3] :
					exname = downlink.split('.')[-1].rstrip().lower().split('?')[0]
					filename = song+'.'+exname
					if Download.begin_download(downlink,filename,5) :
						print 'Download complete : ',song
						exit()
					else :
						print 'Download crashed : ',song,'\tTry to download from another source'
				exit(1)
			else :
				PS[pid]=(song,album,singer)


		for key in PS.keys() :
			pid,failed = os.wait()
			process += 1
			if failed :
				print 'Download Failed in',song
				notfoundlist.append(PS[pid])
				del PS[pid]

	except KeyboardInterrupt :
		for key in PS.keys() :
			print key
		for key in PS.keys() :
			os.kill(key,9)
	
	
	if notfoundlist :											#输出未找到的歌曲
		print 'Failed to find following songs'
		for song,album,singer in notfoundlist :
			print song+'\t'+album+'\t'+singer
Пример #35
0
def main():
	# prototype source of tables: csv files  
	file1 = '../Test/acQuire Data/01c All Collar.csv' 
	file2 = '../Test/acQuire Data/02 Survey.csv'
	file3 = '../Test/acQuire Data/03 Lithology.csv'
	file4 = '../Test/acQuire Data/04 Alteration.csv'
	file5 = '../Test/acQuire Data/05 Samples Checks.csv'


	# from Download #################################

	# create instance: session
	session = Download()
	# define source tables
	session.setTableSource("Hole", file1)
	session.setTableSource("Survey", file2)
	session.setTableSource("Interval1", file3)
	session.setTableSource("Interval2", file4)
	session.setTableSource("Samples", file5)
	# set the fields list
	session.setFieldsList("Hole")
	session.setFieldsList("Survey")
	session.setFieldsList("Interval1")
	session.setFieldsList("Interval2")
	session.setFieldsList("Samples")
	# define the Hole List
	session.defineHoleList()
	# define final contents to create offline db
	session.setAvailableContent()

	
	# from WebView ##################################

	# create instance: web
	web = WebView()
	# create files list
	web.createFileList(session.contentList, session.holeList)
	# create index.html file
	web.createIndexPage(session.contentList, session.holeList)
	# create Hole%.html files
	web.createHolePages(session.contentList, session.holeList, session.dictOfFields[1])
	# create Survey%.html files
	web.createSurveyPages(session.contentList, session.holeList, session.dictOfFields[2])
	# create Interval%.html files
	web.createIntervalPages(session.contentList, session.holeList, session.dictOfFields)





	# TEST ##########################################

	
	
	# Download ######################################
	print session.holeList
	print session.contentList
	
	#print session.tableOrder["Samples"]
	#print session.tableOrder.keys()
	#print help(session)
	#print session.getFieldNames("Interval1")
	#print session.dictOfTables
	#print session.dictOfTables[session.tableOrder["Hole"]]
	print session.dictOfFields[1]
Пример #36
0
def signal_strategy():
   Download.url=HTTP
   Final_url = Download._final_url_contents()
   Date = Final_url[0]
   date_list=[]
   closing_price_list=[]
   for i in Date:
      Day=i
      date_list.insert(0, Day)
   date_list.insert(0,'Date')
   date_list.pop()
   Title_closing_prices=Final_url[1]
   sma_list=[]
   Ticker_closing_prices=(Title_closing_prices[1:])
   prices=Title_closing_prices[0:]
   for i in range(len(Ticker_closing_prices)):
      Ticker_closing_prices[i] = float(Ticker_closing_prices[i])

   for i in prices:
      closing_price_list.append(i)
   
   response=input('What signal strategy would you like to use?(directional or sma)')
   if response=='sma':
      sma_list=[]
      strategy_list=[]
      days=signal_days()
      Indicator=Indicators.execute(Indicators.Simple_moving_average(days),Ticker_closing_prices)
      sma_list.append('Sma')
      for i in Indicator:
         myIndicator=i
         if i != None:
            myIndicator=str(i)[:6]
         sma_list.append(myIndicator)
      Strategy=Signal_strategies.execute(Signal_strategies.Signal_sma(Indicators.execute(Indicators.Simple_moving_average(days), Ticker_closing_prices),Ticker_closing_prices))
      print(Strategy)
      strategy_list.append('Signal_strategies')
      for i in Strategy:
         myStrategy=i
         strategy_list.append(myStrategy)
      print(len(date_list))
      print(len(closing_price_list))
      print(len(sma_list))
      print(len(strategy_list))
      for i in range(len(date_list)):
         
         print('{:10}   {:10}  {:10}   {:10}'.format(date_list[i], closing_price_list[i], sma_list[i],strategy_list[i]))
   elif response=='directional':
      indicator_list=[]
      strategy_list=[]
      days=signal_days()
      buy_threshold=int(input('What is the buy threshold'))
      sell_threshold=int(input('What is the sell threshold'))
      Indicator=Indicators.execute(Indicators.Directional_indicator(days),Ticker_closing_prices)
      indicator_list.append('Indicator')
      for i in Indicator:
         myIndicator=i
         indicator_list.append(myIndicator)
      Strategy=Signal_strategies.execute(Signal_strategies.Signal_Directional(Indicators.execute(Indicators.Directional_indicator(days),Ticker_closing_prices),buy_threshold,sell_threshold))
      strategy_list.append('Signal_strategies')
      for i in Strategy:
         myStrategy=i
         strategy_list.append(myStrategy)

      for i in range(len(date_list)):
         print('{:10}  {:10}  {:10}  {:10}'.format(date_list[i], closing_price_list[i], indicator_list[i], strategy_list[i]))

         
   else:
      print('You did not select simple-moving-average or directional. Please try again.')
      signal_strategy()                  
Пример #37
0
import csv
import random
import time
import sys

import Download

try:
    Download.fromdrive()
except Exception:
    print("Failed to update database")
    pass

try:
    db = open('db.csv')
    reader = csv.reader(db)
    teamsList = list(reader)
except FileNotFoundError:
    print("No database file found")
    teamsList = []

if not teamsList:
    print("Something went wrong: no users in database")
    time.sleep(1)
    sys.exit()

acaOnly = []
for singleList in teamsList:
    acaOnly = acaOnly + singleList[2:]

Пример #38
0
import Download

pagelist=['http://www.economist.com/']

crawler = Download.crawler('')
crawler.crawl(pagelist)

Пример #39
0
 def download(self, option):
     object = Download.download(self.url, self.title)
     check = object.start(option)
     return check
Пример #40
0
                                print "Select a peer ('c' to cancel): "
                                for idx, file in enumerate(available_files):            # Visualizzazione la lista dei peer da cui è possibile scaricarlo
                                    if selected_file == idx:
                                        for idx2, owner in enumerate(file.owners):
                                            print str(idx2) + ": " + owner.ipv4 + " | " + owner.ipv6 + " | " + owner.port

                                selected_peer = None
                                while selected_peer is None:
                                    try:
                                        option = raw_input()                            # Selezione di un peer da cui scaricare il file
                                    except SyntaxError:
                                        option = None

                                    if option is None:
                                        print 'Please select an option'
                                    elif option == 'c':
                                        return
                                    else:
                                        try:
                                            selected_peer = int(option)
                                        except ValueError:
                                            print "A number is required"

                                for idx2, owner in enumerate(file_to_download.owners):  # Download del file selezionato
                                    if selected_peer == idx2:
                                        print "Downloading file from: " + owner.ipv4 + " | " + owner.ipv6 + " " + owner.port
                                        Download.get_file(self.session_id, owner.ipv4, owner.ipv6, owner.port, file_to_download, self.directory)
                        else:
                            print "Unknown error, check your code!"
Пример #41
0
    def update_process(self):
        if self.asset_type == 'stocks':
            startTime = time.time()
            print '\tReading in Stock List'
            
            temp = []
            
            with open('NYSE.csv', 'r') as f:
                for line in f:
                    line = line.replace('"',"").strip()
                    line = line.replace(' ',"").strip()
                    spl =line.split(',')
                    temp.append(spl[0])
            with open('NASDAQ.csv', 'r') as f:
                for line in f:
                    line = line.replace('"',"").strip()
                    line = line.replace(' ',"").strip()
                    spl =line.split(',')
                    temp.append(spl[0])
            with open('AMEX.csv', 'r') as f:
                for line in f:
                    line = line.replace('"',"").strip()
                    line = line.replace(' ',"").strip()
                    spl =line.split(',')
                    temp.append(spl[0])
            '''
            with open('ETF_STOCKS.txt', 'r') as f:
                for line in f:
                    line = line.replace('"',"").strip()
                    line = line.replace(' ',"").strip()
                    spl =line.split(',')
                    temp.append(spl[0])
			'''
                
            print '\t\tComplete:\t' + str(time.time()-startTime)
            
            symbols = set(temp)
            
            if os.path.isfile('exceptionFile.csv'):
                with open('exceptionFile.csv','rb') as exception_file:
                    for line in exception_file:
                        line = line.strip()
                        if line in symbols:
                            symbols.remove(line)
        
            
            startTime = time.time()
            print '\tDownloading Data'
            
            try:
                shutil.rmtree('C:/Users/Keith/Desktop/Workspace/Eclipse Workspace/Stock_Predictor/src/'+self.folder)
            except:
                print '\t\t\tUnable to delete old stock data!'
                pass
            time.sleep(1)
            try:
                os.mkdir('C:/Users/Keith/Desktop/Workspace/Eclipse Workspace/Stock_Predictor/src/'+self.folder+'')
            except:
                print '\t\t\tUnable to create stocks_old folder!'
                pass
            
            Download.download_data(symbols)
           
            print '\t\tComplete:\t' + str(time.time()-startTime)
            
            
            #Update data
            startTime = time.time()
            print '\tProcessing Data'
            
            num_cpus = multiprocessing.cpu_count()
            threads = []
            self.update_queue = Queue()
            for k in symbols:
                self.update_queue.put(k)
            
            for k in xrange(num_cpus):
                try:
                    p = Process(target=self.process_updates, args=(self.update_queue, ))
                    threads.append(p)
                except IOError:
                    pass
                
            for thread in threads:
                thread.start()
                
            for thread in threads:
                thread.join()     
            
            
            print '\t\tComplete:\t' + str(time.time()-startTime)
            
            
            startTime = time.time()
            print '\tDeriving New Data'
            
            num_cpus = multiprocessing.cpu_count()
            
            threads = []
            self.derive_queue = Queue()
            for k in symbols:
                self.derive_queue.put(k)
            
            for k in xrange(num_cpus):
                p = Process(target=self.derive, args=(self.derive_queue, ))
                threads.append(p)
                    
                
            for thread in threads:
                thread.start()
                
            for thread in threads:
                thread.join()     
            
        
            print '\t\tComplete:\t' + str(time.time()-startTime)
            
            
            startTime = time.time()
            print '\tCreating Exceptions List'
            
            '''
            self.symbol_queue = []
            self.exceptions_queue = []
            for k in symbols:
                self.symbol_queue.append(k)
            
            self.find_exceptions(self.symbol_queue, self.exceptions_queue)
            
            suspicious_companies = []
            with open('suspicious_companies.csv', 'wb') as output:
                while len(self.exceptions_queue)>0:
                    line = str(self.exceptions_queue.pop())
                    line = line.replace("'",'')
                    line = line.replace("(",'')
                    line = line.replace(")",'')
                    suspicious_companies.append(line.split(',')[0])
                    output.write(str(line)+'\n')
                    
            for k in suspicious_companies:
                shutil.rmtree('C:/Users/Keith/Desktop/Workspace/Eclipse Workspace/Stock_Predictor/src/'+self.folder+'/'+k)
            '''
        
            
            print '\t\tComplete:\t' + str(time.time()-startTime)
        
        if self.asset_type=='etf':
            startTime = time.time()
            print '\tReading in ETF List'
            
            temp = []
            with open('./ETF/ETF_STOCKS.txt', 'r') as f:
                for line in f:
                    temp.append(line.strip())
                
            print '\t\tComplete:\t' + str(time.time()-startTime)
            
            symbols = set(temp)
            
            startTime = time.time()
            print '\tDownloading Data'
            
            try:
                shutil.rmtree('C:/Users/Keith/Desktop/Workspace/Eclipse Workspace/Stock_Predictor/src/ETF/ETF_old')
            except:
                print '\t\t\tUnable to delete old ETF data!'
                pass
            time.sleep(1)
            try:
                os.mkdir('C:/Users/Keith/Desktop/Workspace/Eclipse Workspace/Stock_Predictor/src/ETF/ETF_old')
            except:
                print '\t\t\tUnable to create ETF_old folder!'
                pass
            
            Download.folder = 'ETF/ETF_old'
            Download.download_data(symbols)
           
            print '\t\tComplete:\t' + str(time.time()-startTime)
            
            
            #Update data
            startTime = time.time()
            print '\tProcessing Data'
            
            num_cpus = multiprocessing.cpu_count()
            threads = []
            self.update_queue = Queue()
            for k in symbols:
                self.update_queue.put(k)
            
            for k in xrange(num_cpus):
                try:
                    p = Process(target=self.process_updates, args=(self.update_queue, ))
                    threads.append(p)
                except IOError:
                    pass
                
            for thread in threads:
                thread.start()
                
            for thread in threads:
                thread.join()     
            
            
            print '\t\tComplete:\t' + str(time.time()-startTime)
            
            
            startTime = time.time()
            print '\tDeriving New Data'
            
            num_cpus = multiprocessing.cpu_count()
            
            threads = []
            self.derive_queue = Queue()
            for k in symbols:
                self.derive_queue.put(k)
            
            for k in xrange(num_cpus):
                p = Process(target=self.derive, args=(self.derive_queue, ))
                threads.append(p)
                    
                
            for thread in threads:
                thread.start()
                
            for thread in threads:
                thread.join()     
            
        
            print '\t\tComplete:\t' + str(time.time()-startTime)
            
            
            startTime = time.time()
            print '\tCreating Exceptions List'
            
            '''
            self.symbol_queue = []
            self.exceptions_queue = []
            for k in symbols:
                self.symbol_queue.append(k)
            
            self.find_exceptions(self.symbol_queue, self.exceptions_queue)
            
            suspicious_companies = []
            with open('suspicious_companies.csv', 'wb') as output:
                while len(self.exceptions_queue)>0:
                    line = str(self.exceptions_queue.pop())
                    line = line.replace("'",'')
                    line = line.replace("(",'')
                    line = line.replace(")",'')
                    suspicious_companies.append(line.split(',')[0])
                    output.write(str(line)+'\n')
                    
            for k in suspicious_companies:
                shutil.rmtree('C:/Users/Keith/Desktop/Workspace/Eclipse Workspace/Stock_Predictor/src/'+self.folder+'/'+k)
            '''
        
            
            print '\t\tComplete:\t' + str(time.time()-startTime)
Пример #42
0
import kdb_csv as kc

import Download
#---------------------------------------------
'''
加载 config 文件
'''
with open('configs/config_main.json', 'r') as f:
    config_main = json.loads(f.read())
runD = config_main['runD']
if (runD): print(dt.datetime.now(), 'load config: config_main')
with open('configs/config_download.json', 'r') as f:
    config_download = json.loads(f.read())
if (runD): print(dt.datetime.now(), 'load config: config_download')

#---------------------------------------------
'''
下载数据
'''
if config_main['option']['whether_download_data'] == 1:
    if (runD): print(dt.datetime.now(), 'process: download data')
    Download.Download(config_download)
else:
    if (runD): print(dt.datetime.now(), 'process: give up download data')
'''
加载数据
'''
'''
处理数据
'''
Пример #43
0
# make a COM object
iTunes = win32com.client.gencache.EnsureDispatch('iTunes.Application')

# find My Documents
objShell = win32com.client.Dispatch('WScript.Shell')
myDocs = objShell.SpecialFolders('MyDocuments') + '\\'
print('My Documents is located at: %s' % myDocs)

# default downloadFolder is My Documents\Aethyr
downloadFolder = myDocs + 'Aethyr\\'

# config file holding location of download folder
configFileLocation = myDocs + 'aethyr.ini'

storedLocation = Download.loadStoredDownloadFolder(configFileLocation)

if (storedLocation is not None):
	downloadFolder = storedLocation

if (not Helper.isFolderIntegrityOK(downloadFolder)):
	downloadFolder = Download.resetDefaultDownloadFolder(myDocs, configFileLocation)

totalSize = 0
totalTime = 0

# dictionary for caching libraries
alreadyLoaded = {}

# seperating elements of an array
DELIMITER = '&&&'
Пример #44
0
def readExcel(tickerName):
    fileVariables = Variables()
    directory = fileVariables.directory
    endings = fileVariables.ending
    fileEnding = fileVariables.returnFileEnding(tickerName)
        
    """Opens each excel file and puts them in sheets array """
    sheets = []
    for i in fileEnding:
        fileNameTemp = directory + i
        if(os.path.isfile(fileNameTemp) == False):
            Download.downloadAll(tickerName)
        tempBook = xlrd.open_workbook(fileNameTemp)
        sheets.append(tempBook.sheet_by_index(0))
        
    """Get dates from first sheet. These dates will be used for all other sheets"""
    i = 0
    j = 0
    dates = []
    dates.append("dates")
    
    firstSheet = sheets[0]
    while(j < firstSheet.ncols):
        tempDate = firstSheet.cell_value(0,j)
        j+=1
        if(tempDate != ' ' and tempDate != ''):
            dateTuple = xlrd.xldate_as_tuple(tempDate,0)
            dates.append(str(dateTuple[0]) + "/" + str(dateTuple[1]) + "/" + str(dateTuple[2]))
      
    """Now, get all other data from all sheets. """
    """Add dates and other data to totalArray """
    j = 0
    totalArray = []
    totalArray.append(dates)
     
    """Goes through each sheet"""
    for iterator in range(0,len(sheets)):
        sheet = sheets[iterator]
        ending = endings[iterator]
        i = 1
        """Goes down row in each sheet and then across each column to get all data"""
        while(i < sheet.nrows):
            tempData = []
            j = 0 
            while(j < sheet.ncols):
                """Will add file ending (-Q, -T, -QB etc) for variable name which is 
                j = 0.""" 
                if(j == 0):
                    tempData.append(str(sheet.cell_value(i,j)) + ending)
                else:
                    tempData.append(sheet.cell_value(i,j))
                j += 1
            i += 1
            totalArray.append(tempData)
     
#     for i in totalArray:
#         print(len(i))
#         print(i)
      
    """Now make sure length of all arrays are the same"""
    longestArray = 0
    for i in totalArray:
        if(len(i) > longestArray):
            longestArray = len(i)
          
    for i in totalArray:
        appendNumber = longestArray - len(i)
        for j in range(0,appendNumber):
            i.append('')
        
#     for i in totalArray:
# #         print(len(i))
#         print(i)
#      
    return Utility.invert(totalArray)
Пример #45
0
 def get_download(self, id):
     assert isinstance(id, (int, long)), id
     headers, data = self._requester.requestAndCheck(
         "GET", self.url + "/downloads/" + str(id), None, None)
     return Download.Download(self._requester, data, completed=True)
Пример #46
0
#from _pickle import cPickle
from datetime import timedelta, datetime
from dateutil.tz import tzutc
import Download
import dtFeatures
from train_Data import train

date_start = datetime(2019, 1, 3, 0, 0, tzinfo=tzutc())
date_end = date_start + timedelta(days=2)

data = Download.download(date_start, date_end)
dtFeatures.dateTime_features(data, date_start, date_end)
path = r'C:\Users\Iacob\Desktop\ProiectStrongBytes'
name = "WindCSV"
dtFeatures.exportCSV(data, path, name)

print(data.head(10))