def main(): if not SKIP_DOWNLOAD: if os.path.exists('.tmp'): shutil.rmtree('.tmp') os.makedirs('.tmp', exist_ok=True) print("Downloading files...") download_file(RP_URL, '.tmp/rp.zip') download_file(BP_URL, '.tmp/bp.zip') if os.path.exists('.tmp/rp'): shutil.rmtree('.tmp/rp') print('Extracting resource pack') with ZipFile('.tmp/rp.zip') as zipf: zipf.extractall('.tmp/rp') if os.path.exists('.tmp/bp'): shutil.rmtree('.tmp/bp') print('Extracting behavior pack') with ZipFile('.tmp/bp.zip') as zipf: zipf.extractall('.tmp/bp') stable = DOWNLOAD_MODE == 'stable' harvest('.tmp/bp', stable) strip_sounds('.tmp/rp/sounds/sound_definitions.json')
def deal_li(li): # print(li.find_element_by_tag_name("a").get_attribute("href")) pi_driver = get_driver( li.find_element_by_tag_name("a").get_attribute("href")) # print(pi_driver.page_source) # title h1 = pi_driver.find_element_by_xpath( '//*[@class="photo-hd"]').find_element_by_tag_name('h1') title = h1.text.replace(":", "") print("title:%s" % title) div_pic = pi_driver.find_element_by_xpath('//*[@id="img"]') url = div_pic.find_element_by_tag_name('img').get_attribute("src") print("url:%s" % url) file_type = os.path.basename(url).split(".")[1] file_path = ("/Users/lyf/Pictures/crawler_images/%s.%s" % (title, file_type)) \ .replace(" ", "") print(file_path) downloader.download_file(url, file_path) # 需要vip才能下载原尺寸 # size_span = pi_driver.find_element_by_xpath('//*[@id="main"]/div[2]/div[2]/div[2]/p[2]/span') # print("size:%s"%size_span.text) pi_driver.close()
def main(): style = "=+" * 20 if url_checker(args.url) is False: # url format check raise argparse.ArgumentTypeError( 'Value has to be in full url format http:// or http://') print(style) print("Box.com PDF Downloader by @lfasmpao") box_object = Scraper(args.url, args.driver_location, args.use_x11, args.wait_time) print("Please wait for about {} seconds...".format(args.wait_time)) box_object.load_url() dl_name = box_object.get_download_title() print(style) print("DATA TO BE DOWNLOADED\nTitle: {}\nBox.com URL: {}".format( dl_name, args.url)) print(style) dl_url = box_object.get_download_url() print("Download URL:", dl_url) print(style) box_object.clean() # clean # make directory directory = os.path.dirname(args.output_location) if not os.path.exists(directory): os.makedirs(directory) print("Downloading..\nFile will be save as:", str(args.output_location + dl_name + ".pdf")) download_file(url=dl_url, path=str(args.output_location + dl_name + ".pdf"))
def download(): r = requests.get(url) page = bs4.BeautifulSoup(r.text, features="html.parser") table = page.table if platform.architecture()[0][0:2] == "64": download_url = table.find("a", href=True, title=re.compile("x86_64")).get( "href" ) else: download_url = table.find("a", href=True, title=re.compile("i686")).get("href") downloader.download_file(download_url, os.path.join(cd, "libmpv.7z"))
def download() -> None: r = requests.get(url) page = bs4.BeautifulSoup(r.text, features="html.parser") # last tested version is 5.8 versions = page.find_all("li") version = [i for i in versions if "5.8" in i.text][-1].a.get("href")[0:-1] download_url = ( url + "/" + version + "/" + "tt5sdk_{v}_{p}.7z".format(v=version, p=get_url_suffix_from_platform()) ) print("Downloading from " + download_url) downloader.download_file(download_url, os.path.join(cd, "ttsdk.7z"))
def deal_detail_page(_url): root_ele = etree.HTML(get_source(_url)) # type:Element img_ele = root_ele.xpath('''//*[@id="img"]''') # type :Element for img in img_ele[0]: # type:Element print(img) src_url = "" title = "" for map in img.items(): print(map) if map[0] == "src": src_url = url + map[1] if map[0] == "title": title = map[1] file_type = os.path.basename(src_url).split(".")[1] file_path = ("%s%s.%s" % (dir_path, title.replace("/", ""), file_type)) print(file_path) downloader.download_file(src_url, file_path)
def iterate_over_selects(selectors, hidden_inputs=None): # if there are no hidden inputs specified default it to empty array if not hidden_inputs: hidden_inputs = [] # iterate over the selects (drop-downs) on page for select in selectors: # get name of the selector (a.k.a. province, status, years, etc.) opt_name = select.attrs.get('name') # if it is already in use go to next selector if opt_name in values_in_use: continue # else append it to the list of selectors in use values_in_use.append(opt_name) # get all available options and iterate over them options = select.findAll('option') # for each option of the current drop down - make a request iterate_over_options(options, opt_name, hidden_inputs) # Once we have exercised all options for a level remove it from the list of values in use. # We can then iterate over the same element once again by using the next option of the # parent selector. If by removing it we end up removing the only child (root) # that means we have completed. values_in_use.remove(opt_name) request_values[opt_name] = '' if len(values_in_use) == 0: exit("Completed!") # Proceed downloading the file if len(hidden_inputs) == 8: # process hidden fields into a request download_request_headers = {} for hidden_input in hidden_inputs: download_request_headers[hidden_input.attrs.get('name')] = hidden_input.attrs.get('value') # add 'excel' header download_request_headers['save'] = 'xl' #actually download downloader.download_file(download_url, download_request_headers)
def read_robots(robots_path): f = downloader.download_file(robots_path) if f is False: return False if f is None: return None try: return f.read().decode('utf-8') except: return False
def main(): parser = argparse.ArgumentParser( description='Node configurator generator', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--mode', help='node type', choices=('api', 'peer', 'dual'), required=True) parser.add_argument('--voting', help='node will be voting', action='store_true') parser.add_argument('--harvesting', help='node will be harvesting', action='store_true') parser.add_argument('--output', help='output directory', default='../settings') parser.add_argument('--force', help='overwrite output directory', action='store_true') parser.add_argument('--ask-pass', help='ask about pass when loading pem key files', action='store_true') args = parser.parse_args() if not Path(args.output).is_dir(): os.makedirs(args.output, mode=0o700) feature_settings = { 'voting': args.voting, 'harvesting': args.harvesting, 'ask-pass': args.ask_pass } configurator = NodeConfigurator(args.output, args.force, args.mode, feature_settings) download_file(configurator.dir, NEMESIS_SEED) download_file(configurator.dir, MONGO_SCRIPTS) configurator.run()
def download_vid(link): browser = webdriver.Firefox(proxy=SELENIUM_PROXY) browser.get(link) if "kisscartoon" in link: link_text = "HERE" elif "kissanime" in link: link_text = "CLICK HERE" else: raise SystemError("Unknown domain") WebDriverWait(browser, TIME_LIMIT).until(EC.presence_of_element_located((By.LINK_TEXT, link_text))) assert "If the player does not work," in browser.page_source browser.find_element_by_tag_name('body').send_keys(Keys.ESCAPE) save_link = browser.find_element_by_link_text(link_text).get_attribute('href') browser.close() filename = link.split('/')[-1].split('?')[0] + '.mp4' if download_file(save_link, "%s/%s" % (SHOW_NAME, filename), proxy=URLLIB_PROXY) is False: raise SystemError("Connection error")
def download_and_analyze(fiscal_year, agency, spending_type): """The is a worker function, run on a separate thread.""" dbconn = Connection() db = dbconn[settings.DB_NAME] monthly_analyses = db['monthly_analyses'] analyses = monthly_analyses.find({'fiscal_year': fiscal_year, 'agency': agency, 'spending_type': spending_type}) needed = analyses_needed(analyses) if needed: (filename, url, destpath) = usaspending.file_info(fiscal_year, agency, spending_type) dl_result = download_file(filename, url, destpath) if isinstance(dl_result, DownloadFileFailure): return (False, dl_result) print >>sys.stdout, "Got file %s" % filename try: analyses = analyze_file(destpath, fiscal_year, settings.ANALYSIS_DATEFIELDS[spending_type], settings.ANALYSIS_FIELDS[spending_type]) save_analyses(db, fiscal_year, agency, spending_type, analyses) return (True, analyses) except _csv.Error, e: return (False, e)
def download(self, track: Track, file_path: str) -> None: downloader.download_file(track.url, file_path)
DEFAULT_SIZE_URL = '/400_320_102400' response = requests.get(f'{URL}/s/s46/search/artist?ima=0348') response.raise_for_status() soup = BeautifulSoup(response.text, "html.parser") groups = soup.find_all(class_='col2-wrap') group_info = {} for group in groups: group_name = group.find(class_='com-title-type0').getText() members = group.find_all(class_='box') member_list = [] for member in members: member_info = { 'name': member.find(class_='name').getText(), 'kana': member.find(class_='kana').getText() } img_url = member.find('img')['src'] member_info['img'] = img_url.replace(DEFAULT_SIZE_URL, '') member_list.append(member_info) file_name = f'{member_info["name"]}.jpg' print(f'{file_name} is downloading') downloader.download_file(f'{URL}{member_info["img"]}', file_name) print(f'{file_name} download was finished') group_info[group_name] = member_list
return _chapters_list_el chapters_list_el = get_chapters_list_el() for chapter_el in chapters_list_el: chapters_name.append( chapter_el.find_element_by_css_selector('span.track').text) for i, chapter_name in enumerate(chapters_name): chapters_list_el = get_chapters_list_el() chapters_list_el[i].click() start_time = time() added_audio = False while time() - start_time < 120 and not added_audio: audio_file_el = WebDriverWait(driver, 60).until( EC.presence_of_element_located( (By.CSS_SELECTOR, 'audio#audioplayer'))) audio_file_url = audio_file_el.get_attribute('src') if audio_file_url not in chapters_audio_url: chapters_audio_url.append(audio_file_url) added_audio = True sleep(.1) chapters_menu_el.click() driver.get(book_url) print('collected these chapters:') for i in range(len(chapters_name)): download_file(book_name, chapters_audio_url[i], f'{i + 1}-{chapters_name[i]}') finally: driver.close()
""" The url to run this program use this url: https://raw.githubusercontent.com/rmlassesen/dataset/master/p_pladser.csv """ import downloader as download import convert_csv as convert_csv import mean as mean if __name__ == '__main__': global file_name data_income_file = download.download_file() data = convert_csv.convert_csv_to_dataframe(data_income_file) mean.mean_find(data)
return file_dict cred = conf.getUserCread() if cred[0] is not None and cred[3] is not None: sk = Skype(cred[0], cred[1], "tokenFile") file_dict = get_msg_list(cred[3]) while True: user_input = input('n/id/quit >> ') if user_input.isnumeric(): index = int(user_input) if 1 <= index <= len(file_dict): file_tuple = file_dict[index] dw.download_file( file_tuple[0], file_tuple[1], "skype_token {0}".format(sk.conn.tokens["skype"]), cred[2]) else: print('Please Enter valid Id') else: if user_input == 'quit': break elif user_input == 'n': file_dict = get_msg_list(cred[3]) else: print('Please Enter Valid Option') elif cred[0] is not None and cred[3] is None: sk = Skype(cred[0], cred[1], "tokenFile") contact = sk.contacts contact_dict = {}
inputargs = {} sites = [] ddir = [] if '-r' in sys.argv: i = 2 r = True else: i = 1 r = False print(sys.argv) while i < sys.argv.index('-f'): inputargs[sys.argv[i]] = float(sys.argv[i + 1]) i += 2 xy = sys.argv.index('-f') xye = int(inputargs['-nf']) print(inputargs) print(sys.argv) name = [] for zxy in range(xye): sites.append(sys.argv[xy + zxy + 1]) ddir.append(sys.argv[xy + zxy + 2 + xye]) name.append('File' + str(zxy)) print(inputargs) for i in range(len(sites)): downloader.download_file(sites[i], ddir[i], name[i], r, int(inputargs['-n']), inputargs['-i'])
def main(): """ Parses command line options, then delegates to various other functions. """ usage_str = """ %prog OPTION [FILENAME | dir: DIRECTORY | repo] | Download a file from Github %prog push OPTION [FILEPATH | DIRPATH] | Push a file to Github Examples: `grabrc .emacs` -- Download .emacs from Github. `grabrc dir:.emacs.d --outfile .irssiconfig` - Download the .emacs.d directory from Github. `grabrc repo --destdir=/tmp/` -- Download and untar the repository in /tmp/. `grabrc push /home/user/.vimrc` -- Save ~/.vimrc to Github, overwriting the existing .vimrc. """ parser = OptionParser(usage=usage_str, version="r33") download_group = OptionGroup(parser, "Download: All (files, directories, repositories)") download_group.add_option("-o", "-O", "--name", "--outfile", dest="outfile", action="store", metavar="NAME", help="Rename the downloaded item to NAME.") download_group.add_option("-d", "--destdir", dest="destdir", action="store", metavar="DIR", help="Place the downloaded item in DIR. \ Default: The current directory.") download_group.add_option("--no-backup", dest="nobackup", action="store_true", help="If the file already exists, don't make a backup. \ Default: False. If the item already exists, it will be backed up.") dir_group = OptionGroup(parser, "Download: Repositories") dir_group.add_option("-k", "--keep-tar", dest="tar", action="store_true", help="Download the repository as a tar.gz file. \ Default: Untar the repository.") dir_group.add_option("-z", "--keep-zip", dest="zip", action="store_true", help="Download the repository as a .zip.") filegroup = OptionGroup(parser, "Download: Files") filegroup.add_option("-a", "--append", dest="append", action="store_true", help="If file already exists, append to existing file. \ Default: Back up existing file") filegroup.add_option("-r", "--replace", dest="replace", action="store_true", help="If the file already exists, replace it") filegroup.add_option("-p", "--print", dest="stdout", action="store_true", help="Print the file to the console instead of saving it.") savegroup = OptionGroup(parser, "Upload") savegroup.add_option("-m", "--message", dest="message", help="Specify a commit message for saving a file to Github.") # Validate and parse options, set mode map(parser.add_option_group, [download_group, filegroup, dir_group]) (opts, args) = parser.parse_args() logging.debug("Options and arguments: %s / %s" % (opts, args)) # Simple substitute for logging def usage_exit(level, reason): parser.print_help() print "[%s] %s" % (level.upper(), reason) sys.exit(1) try_msg = "Try either 'grabrc FILE' to download a file from Github \ or 'grabrc push FILEPATH' to upload a file." # Validate options: number of arguments if len(args) > 2 or len(args) == 0: usage_exit("error", "Invalid number of arguments. " + try_msg) # Validate options: either "save" or empty mode = "download" if len(args) == 1: arg = args[0] if arg == "save": usage_exit("error", "Please specify a file to save.") elif arg == "repo": mode = "repo" else: download_name = arg elif "push" in args: mode = "upload" upload_filepath = (n for n in args if n != "push").next() else: usage_exit("error", "Invalid arguments. " + try_msg) # Validate options: invalid combinations if opts.append and opts.replace: util.exit_runtime_error("Both --append and --replace were selected. Please select only one.") if opts.zip and opts.tar: util.exit_runtime_error("Both --keep-zip and --keep-tar were selected. Please select only one.") # Set defaults opts.destdir = opts.destdir or os.getcwd() opts.destdir = util.sanitize_path(opts.destdir) if opts.outfile: opts.outfile = util.sanitize_path(opts.outfile) # Check config file (~/.grabrc) for Github username configpath = "%s/.grabrc" % os.path.expanduser("~") if opts.__dict__.get('github'): github_acc = opts.github # Interactively prompt for username if ~/.grabrc does not exist if not os.path.isfile(configpath): print """\ ======================================================== Welcome! This seems to be your first time starting %s. Please enter your Github username. %s will search for files in the repository named %s""" \ % (Const.PROG_NAME, Const.PROG_NAME, Const.REPO_NAME) github_acc = raw_input('-- Github account: ') cfile = open(configpath, 'w+') cfile.write(github_acc) else: cfile = open(configpath, 'r+') github_acc = cfile.readline().strip() cfile.close() opts.github = github_acc logging.debug("Github account: %s" % github_acc) # Execute actual script DIR_PREFIX = "dir:" if mode == "upload": uploader.save(upload_filepath, opts) elif mode == "download": if download_name.startswith(DIR_PREFIX): downloader.download_subdirectory(download_name[len(DIR_PREFIX):], opts) else: downloader.download_file(download_name, opts) elif mode == "repo": downloader.download_repo_nongit(opts)
""" The url to run this program use this url: https://raw.githubusercontent.com/MikkelHansen95/dataset/master/movies_metadata.csv """ import downloader import convert_csv import library.popular_danish_movie as popular_danish_movie import library.english_action_movie as english_action_movie import library.plot_reliase_and_runtime as reliase_and_runtime import library.plot_adult_movies as plot_adult_movies import library.buzz_words as buzz_words import library.animated_movies as animated_movies import library.highest_budget as highest_budget if __name__ == '__main__': global file_name file_name = downloader.download_file() data = convert_csv.convert_csv_to_dataframe(file_name) print(plot_adult_movies.plotting_adult_and_non_adult_movies(data)) # With plot print(animated_movies.find_number_of_animated_movies(data)) print(highest_budget.find_highest_budget(data)) print(popular_danish_movie.find_most_popular_danish_movie(data)) print(english_action_movie.english_action_movie_with_biggest_revenue(data)) # Plots - Der er lavet dataframes men plot mangler. reliase_and_runtime.create_plot_realise_and_runtime(data) buzz_words.find_buzz_words(data)
import os import config import log_util import mysql_util import downloader import time if __name__ == "__main__": # 创建下载目录 if not os.path.exists(config.DOWNLOAD_PATH): os.makedirs(config.DOWNLOAD_PATH) sql = "select * from sp_random_download_task_multiVer_copy where dl_status = 0 limit 1" dbhandler = mysql_util.MysqlUtil() # 每次获取一条没有下载的数据 cur_selected_item = dbhandler.fetchone(sql) while cur_selected_item: # 乐观锁 # 当此更新语句成功影响行后才确认拿到下载链接 res = dbhandler.update("update sp_random_download_task_multiVer_copy set dl_status = 1 where dl_id = %s and dl_status = 0" ,(cur_selected_item[0],)) if not res: cur_selected_item = dbhandler.fetchone(sql) continue else: print("downloading dl_id = {}".format(cur_selected_item[0])) downloader.download_file(cur_selected_item, config.THREAD_NUM)