if not os.path.exists(path): path = '.'.join(path.split('.')[:-1]) + '.mp3' if not os.path.exists(path): path = '.'.join(path.split('.')[:-1]) + '.acc' if not os.path.exists(path): path = '.'.join(path.split('.')[:-1]) + '.wav' if not os.path.exists(path): print('Audio File was not found') print('{}, {}, {}, {}, {}, {}, {}, {}'.format( vid, track_name, artist_name, album_name, album_artist, image, album_artist, release_date)) continue dl_dir = "/tmp/{}-{}.jpg".format(safe_filename(track_name), safe_filename(artist_name)) download(image, dl_dir) audio_file = eyed3.load(path) audio_file.tag.artist = u"{}".format(artist_name) audio_file.tag.album = u"{}".format(album_name) audio_file.tag.album_artist = u"{}".format(album_artist) audio_file.tag.title = u"{}".format(track_name) audio_file.tag.release_date = u"{}".format(release_date) audio_file.tag.images.set(3, open(dl_dir, "rb").read(), "image/jpeg", u"") audio_file.tag.save() except Exception as e: print(e)
def _download_items(self, items): """ download items """ psync_path = Path(self.sync_path) if not psync_path.exists(): self.logger.error('Output path %s does not exists.', self.sync_path) return [] photos = [] for image_info in items: if 'image' in image_info['mimeType']: url = image_info['baseUrl'] + "=d" filename = image_info['filename'] fullpath = psync_path.joinpath(filename) if fullpath.exists(): self.logger.info('File %s already exists, skiping.', filename) continue download(url, str(fullpath)) photos.append(str(fullpath)) self.logger.info('Downloaded Item %s', filename) return photos
def write_photos_to_disk(mediaItems): for media in mediaItems: if 'image' in media['mimeType']: download_url = f"{media['baseUrl']}=d" if not exists(PHOTOS_SAVE_DIRECTORY): makedirs(PHOTOS_SAVE_DIRECTORY) download(download_url, f"{PHOTOS_SAVE_DIRECTORY}/{media['filename']}")
def dload(name, link, typeinfo, icao, choice): if choice == 1: ffile = typeinfo + "." + name.replace("/", "-") try: if not os.path.isdir(path(icao, typeinfo)): os.makedirs(path(icao, typeinfo)) except FileExistsError: if not os.path.isdir(path(icao, typeinfo)): os.makedirs(path(icao, typeinfo)) download(link, path(icao, typeinfo) + "/" + name.replace("/", "-") + ".pdf") if os.name == 'nt': images = convert_from_path( path(icao, typeinfo) + "/" + name.replace("/", "-") + ".pdf", poppler_path=r"./poppler-21.01.0/Library/bin") i = 1 for img in images: if i != 1: img.save( icao + "/" + icao + '/' + ffile + ' (PG-' + str(i) + ").png", 'PNG') else: img.save(icao + "/" + icao + '/' + ffile + ".png", 'PNG') i += 1 else: images = convert_from_path( path(icao, typeinfo) + "/" + name.replace("/", "-") + ".pdf") i = 1 for img in images: if i != 1: img.save( icao + "/" + icao + '/' + ffile + ' (PG-' + str(i) + ").png", 'PNG') else: img.save(icao + "/" + icao + '/' + ffile + ".png", 'PNG') i += 1 print("8===>" + ffile) if choice == 2: ffile = name.replace("/", "-") fpath = "./" + path(icao, typeinfo) + "/" + name.replace("/", "-") + ".pdf" try: if not os.path.isdir(path(icao, typeinfo)): os.makedirs(path(icao, typeinfo)) except FileExistsError: if not os.path.isdir(path(icao, typeinfo)): os.makedirs(path(icao, typeinfo)) download(link, fpath) print("8===>" + ffile)
def download_images(media_items, media_num): for x in media_items: if 'image' in x['mimeType']: url = x['baseUrl'] + '=d' else: url = x['baseUrl'] + '=dv' filename = str(media_num) + '_' + x['filename'] print('Downloading ' + filename) download(url, 'Google Photos Library/' + filename) media_num += 1 return media_num
def get_curriculum_pdf(self): """ Находит ссылку на pdf-файл с расписанием автобусов из кнопки в паблике Дубков Вконтакте """ link_to_dubki_group = 'https://vk.com/dubki' soup = BeautifulSoup(urlopen(link_to_dubki_group), features='html.parser') for a_with_href in soup.find_all('a', href=True): if 'Расписание' in str(a_with_href): link_to_pdf = a_with_href['href'] download(link_to_pdf, filename=self.filename) break
def check_player(self): if not file_exists(play_sound_path + 'play_sound.exe', file_exists_param): try: download( 'https://github.com/Pixelsuft/wav_win_sound/raw/main/wav_win_sound/play_sound.exe', play_sound_path + 'play_sound.exe') return True except: return False else: return True
def download_series(self, series_url): page = requests.get(series_url, params={'voice': self.voice}).text src = re.findall( r"var\splayer\s=\snew\sPlayerjs\({id:'.*',\s*file:'(.*)',comment:", page)[0] a = urlparse(src) path_list = a.path.split('/') name = path_list[-1] path = os.path.join(os.getcwd(), *path_list[-4:-1]) if not os.path.exists(path): try: os.makedirs(path) except: pass download(src, os.path.join(path, name))
def process(chat_id, file_list, folder_name, analyze): # Ignore if list is empty if not file_list: return files = [] for file_id in file_list: try: resp = requests.get( FILE_ID_URL.format(bot_token=BOT_TOKEN, file_id=file_id)) js = resp.json() file_path = js['result']['file_path'] download_url = DOWNLOAD_FILE_URL.format(bot_token=BOT_TOKEN, file_path=file_path) extension = '.' + file_path.split('.')[-1] file_path = join(folder_name, file_id + extension) download(download_url, filename=file_path) files.append(file_path) except: print(folder_name, file_id, "Could not be downloaded") # At least analyze one if analyze: for file_loc in files: try: if analyze: response = model.predict_by_filename(file_loc) for c in response['outputs'][0]['data']['concepts'][:5]: bot.sendMessage( chat_id, text= f"{c['name'].title()} : Confidence {(c['value'] * 100):.3f}%" ) return True except: print(folder_name, file_loc, "Could not be processed") return False else: return True
def pull_dataset(filename): """ Download dataset from Yann's website if dataset do not exists in data directory """ if not os.path.exists(params.DATA_DIR): os.mkdir(params.DATA_DIR) file_path = os.path.join(params.DATA_DIR, filename) if not os.path.exists(file_path): file_path, _ = download(params.SOURCE_URL + filename, file_path) with tf.gfile.GFile(file_path) as f: size = f.size() print("Successfully dowloaded dataset: {} - {}".format(filename, size)) return file_path
def download_images(media_items): media_num = 0 # Total number of medial files download_num = 0 # Number of downloaded files skip_num = 0 # Number of skipped files - already downloaded error_num = 0 # Number of download errors new_download = '' # Downloaded file list for x in media_items: filename = x['filename'] fd = x['mediaMetadata'][ 'creationTime'] # Read file picture taken date/time # Create datetime variable from metadata. Subtract 4 hours for timezone differences date = datetime(int(fd[0:4]), int(fd[5:7]), int(fd[8:10]), int(fd[11:13]), int(fd[14:16]), int( fd[17:19])) - timedelta(hours=4) utime = time.mktime( date.timetuple()) # Create tuple time format to be used later file_name_date = filename + ' ' + str(date) # File is image type if 'image' in x['mimeType']: url = x['baseUrl'] + '=d' f_download_file = bool(1) # File is video type elif 'video' in x['mimeType']: video_status = x['mediaMetadata']['video']['status'] # Allow download only if video file is in READY state # Other states cause download errors if 'READY' in video_status: url = x['baseUrl'] + '=dv' f_download_file = bool(1) else: print('*** ATTENTION: Video Status is not READY for ' + filename + ' ***') f_download_file = bool(0) # File not neither a image or a video else: f_download_file = bool(0) # Do not download if not f_download_file: print('*** ATTENTION: Not attempting to download ' + filename + ' ***') new_download = new_download + '\n\t*** Video not ready *** ' + filename error_num += 1 # Download image/video file only if it has not been downloaded previously elif file_name_date not in file_list: # Check if the file was NOT downloaded previously try: print('Downloading ' + filename) name, ext = os.path.splitext(filename) if os.path.exists( path + '\\' + filename ): # If same file exists, add utime to file name filename = name + '_' + str(utime) + ext download(url, path + '\\' + filename) # Download except: new_download = new_download + '\n\t*** Download Failed *** ' + filename else: new_download = new_download + '\n\tDownloaded ' + filename # Update file modified/accessed/created time to be the same as picture taken time os.utime(path + '\\' + filename, (utime, utime)) setctime(path + '\\' + filename, utime) f.write(file_name_date + '\n') # Add file name to list of downloaded files download_num += 1 # Google Photos API does not have a way to identify if Live Photos are available # Try to download it and see if it works if 'image' in x['mimeType']: try: filename = name + '.MOV' print('Attempting to download live photo ' + filename) if os.path.exists( path + '\\' + filename ): # If same file exists, add utime to file name filename = name + '_' + str(utime) + '.MOV' download(url + 'v', path + '\\' + filename) # Try downloading live photo except: # No live photos print(' - No Live Photo for ' + filename) else: print(' - Live Photo downloaded ' + filename) # Update file modified/accessed/created time to be the same as picture taken time os.utime(path + '\\' + filename, (utime, utime)) setctime(path + '\\' + filename, utime) f.write(filename + ' ' + str(date) + '\n') # Add file name to list of downloaded files new_download = new_download + '\n\tDownloaded ' + filename download_num += 1 media_num += 1 # File was previously downloaded. Skip else: print('Skipping ' + filename + '. Already downloaded.') skip_num += 1 media_num += 1 return media_num, download_num, skip_num, error_num, new_download
dir = 'media' Path(dir).mkdir(exist_ok=True) base = 'http://localhost:5000/' url = base + '/media' html = get(url) soup = Soup(html) # download images imgs = soup.find('img') srcs = [i.attrs['src'] for i in imgs] for src in srcs: name = src.split('/')[-1] download(base + src, f'{dir}/{name}') # download audio audio = soup.find('audio').find('source').attrs['src'] name = audio.split('/')[-1] download(base + audio, f"{dir}/{name}") # download video video = soup.find('video').find('source').attrs['src'] name = video.split('/')[-1] download(base + video, f"{dir}/{name}") # clean up
def main(): # parse arguments parser = argparse.ArgumentParser(description='Parameters for XML parser.') parser.add_argument('URL', help='URL of file to be downloaded\ (e. g. http://opus.nlpl.eu/download.php?f=OpenSubtitles/ro.tar.gz)' ) parser.add_argument('--outdir', default=None, help='Path where processed files will be saved') parser.add_argument( '--ext', default=".txt", help='Extension that will be given to the processed files') parser.add_argument( '--transform', default=None, help= 'Post-process each sentence with a user-specified function (to be found in detokenizer.py)\ \n (e. g. --transform="default, en" will first transform each sentence using "default" function \ in detokenizer.py and will then process each sentence using the "en" function)' ) parser.add_argument('--verbose', action="store_true", default=False, help='Increase verbosity') args = parser.parse_args() dataset_url = args.URL outdir = args.outdir verbose = args.verbose suffix = args.ext transform = args.transform if outdir == None: outdir = os.path.join('./', _get_path(_filename_from_URL(dataset_url))) # setting up names dataset_bare_filename = _get_bare_filename(_filename_from_URL(dataset_url)) dataset_filename = _get_filename(_filename_from_URL(dataset_url)) final_outdir = os.path.join(outdir, dataset_bare_filename) final_outname = os.path.join(final_outdir, dataset_filename) # create outdir if os.path.exists(final_outdir) == False: if verbose: print("Creating output directory {} ...".format(final_outdir)) os.makedirs(final_outdir) # Download XML corpus if verbose: print("Downloading corpus from {} ...".format(dataset_url)) download(dataset_url, final_outname) if verbose: print("Saved to {} ...".format(final_outname)) if verbose: print("Reading file {} ...".format(final_outname)) # Open .tar file ext = _get_extensions(dataset_filename) errors = [] if ext == '.tar.gz': with tarfile.open(final_outname, 'r') as zipped_data: # List .gz files file_list = zipped_data.getnames() nb_files = len(file_list) for i_, gz_file in enumerate(file_list): i_ += 1 if verbose: print("[{}/{}]({} errors) Processing file {} ...".format( i_, nb_files, len(errors), _get_bare_filename(gz_file))) # Get data compressed_gz_file = zipped_data.extractfile(gz_file) gz_data = get_gz_data(compressed_gz_file) # Get path path_ = os.path.join(final_outdir, _get_path(gz_file)) xml_outdir = os.path.join(path_, _get_bare_filename(gz_file) + suffix) # Parse XML and dump data e = process_gz_file(gz_data, xml_outdir, transform) if e != None: errors.append(e) if errors != []: print('Warning: could not process the following files:') for e_files in errors: print('\t{}'.format(e_files)) elif ext == '.xml.gz': print('Error: Wrong file format! As for now, this program\ only supports monolingual XML files') exit() else: print('Error: Unknown file format!') exit() if os.path.exists(final_outname): os.remove(final_outname)
def fetch_ip(URL, Query, List): # get ips ips = res.resolve(URL, Query) ips = [str(i) for i in ips] # print ips format 'example.com IN A [192.0.2.1, ...]' print(URL, 'IN', Query, ips) # append the ips for listing List += ips # download youtubeparsed download( 'https://raw.githubusercontent.com/nickspaargaren/no-google/master/categories/youtubeparsed', 'youtubeparsed') # keep previous ips with open('ipv4_list.txt', mode='r', encoding='utf-8') as f: for ip in f.readlines(): ip = ip.strip() try: ip = str(ip_address(ip)) ipv4List.append(ip) except ValueError: if ip != '': print('%s is not a valid IPv4 address!' % ip) with open('ipv6_list.txt', mode='r', encoding='utf-8') as f: for ip in f.readlines():
def downloader(self): c = self.conn.cursor() logger.info("Spotify Plugin : Downloader Started") while True: if not is_downloading_time(): time.sleep(2) continue if not os.path.exists(song_download_path): os.makedirs(song_download_path) c.execute( "SELECT id, track_name, artist_name, album_name, album_artist, image, url, album_artist, release_date FROM spotify_queue WHERE completed_time IS NULL " ) for vid, track_name, artist_name, album_name, album_artist, image, url, album_artist, release_date in c.fetchall( ): try: if not is_downloading_time(): break self.current_vid = vid # noinspection SpellCheckingInspection ydl_opts = { 'format': 'bestaudio/best', 'postprocessors': [{ 'key': 'FFmpegExtractAudio', 'preferredcodec': 'mp3', }], 'outtmpl': '{}{} - {} [%(id)s].%(ext)s'.format( song_download_path, safe_filename(artist_name.split(",")[0]), safe_filename(track_name)), 'continuedl': True, 'logger': logger, 'progress_hooks': [self.youtube_progress_hook], 'default_search': 'auto', 'noplaylist': True, 'source_address': '0.0.0.0', 'noprogress': True } logger.info("[Spotify] Downloading {} by {} ({})".format( track_name, artist_name, url)) with youtube_dl.YoutubeDL(ydl_opts) as ydl: data = ydl.extract_info(url) path = ydl.prepare_filename(data) if not os.path.exists(path): path = '.'.join(path.split('.')[:-1]) + '.mp3' if not os.path.exists(path): path = '.'.join(path.split('.')[:-1]) + '.acc' if not os.path.exists(path): path = '.'.join(path.split('.')[:-1]) + '.wav' if not os.path.exists(path): logger.error('Audio File was not found') logger.error( '{}, {}, {}, {}, {}, {}, {}, {}, {}'.format( vid, track_name, artist_name, album_name, album_artist, image, url, album_artist, release_date)) continue dl_dir = "/tmp/{}-{}.jpg".format( safe_filename(track_name), safe_filename(artist_name)) download(image, dl_dir) audio_file = eyed3.load(path) audio_file.tag.artist = u"{}".format(artist_name) audio_file.tag.album = u"{}".format(album_name) audio_file.tag.album_artist = u"{}".format(album_artist) audio_file.tag.title = u"{}".format(track_name) audio_file.tag.release_date = u"{}".format(release_date) audio_file.tag.images.set(3, open(dl_dir, "rb").read(), "image/jpeg", u"") audio_file.tag.save() except Exception as e: logger.error("Error While Downloading a song") logger.error('{}, {}, {}, {}, {}, {}, {}, {}, {}'.format( vid, track_name, artist_name, album_name, album_artist, image, url, album_artist, release_date)) c.execute( "UPDATE `spotify_queue` SET completed_time=null WHERE id=?", (self.current_vid, )) self.conn.commit() logger.exception(e) time.sleep(settings.downloader_time_out)
sleep(1) spec = pd.read_html(driver.page_source, index_col=0)[0].transpose() if idx == 0: spec.columns = map(lambda a: a.replace(' :', ''), spec.columns) spec.to_excel(writer, startrow=0, sheet_name='Sheet', index=False) else: spec.to_excel(writer, startrow=writer.sheets['Sheet'].max_row, sheet_name='Sheet', index=False, header=False) img = driver.find_element_by_css_selector( 'img[src^="http://img.g2b.go.kr:7070/Resource/CataAttach/XezCatalog/XZMOK/"]' ).get_attribute('src') download(img, os.path.join(DOWNLOAD_DIR, f'{idx}.png')) # execute_script('toBack(); return false;') writer.save() def 클립보드로_이미지_복사하기(i): filepath = rf"C:\Users\minhwasoo\Desktop\imgs\{i}.png" image = Image.open(filepath) output = BytesIO() image.convert("RGB").save(output, "BMP") data = output.getvalue()[14:] # bmp파일 헤더 길이가 14이므로 옵셋을 준 것임. output.close() win32clipboard.OpenClipboard() win32clipboard.EmptyClipboard() win32clipboard.SetClipboardData(win32clipboard.CF_DIB, data)
from selenium import webdriver import bs4 import pandas as pd driver = webdriver.Chrome("C:\chromium/chromedriver.exe") from pathlib import Path from urllib.request import urlretrieve as download products = [] #List to store name of the product prices = [] #List to store price of the product ratings = [] #List to store rating of the product driver.get("https://www.goole.com") content = driver.page_source soup = bs4.BeautifulSoup(content, 'html.parser') a = soup.find('img', attrs={'id': 'captcha'}).attrs['src'] img = 'https://www.goole.com' + a print(img) #df = pd.DataFrame({'Product Name':products,'Price':prices,'Rating':ratings}) #df.to_csv('products.csv', index=False, encoding='utf-8') id = 'tt5057054' directory = "images" Path(directory).mkdir(exist_ok=True) download(img, 'abc.png')
def update(): updateCheck = messagebox.askyesno(title="Update check", message="Check for updates?") if updateCheck == True: try: download( "http://raw.githubusercontent.com/teamdodo/isaac-debug-helper/master/update/update.cfg", "update.cfg") config = configparser.ConfigParser() config.read("update.cfg") latestVersion = config["UPDATE"]["latestVersion"] latestVersionUrl = config["UPDATE"]["latestVersionUrl"] os.remove("update.cfg") if VERSION != latestVersion: updateMessage = "Un update is avaliable, do you want to download it? This will launch your default browser and open a link to download the update" updateDetail = "Your version: {0} \nLatest version: {1}".format( VERSION, latestVersion) updateDownload = messagebox.askyesno( title="Update avaliable", message=updateMessage, detail=updateDetail) if updateDownload == True: webbrowser.open(latestVersionUrl, new=1, autoraise=True) if VERSION == latestVersion: messagebox.showinfo( title="No update avaliable", message= "No updates avaliable. You already have the latest version" ) except urllib.error.HTTPError: messagebox.showinfo( title="Error", icon="error", message= "Error while downloading update info. Try again later") except KeyError or IOError: messagebox.showinfo( title="Error", icon="error", message="Error while reading update info. Try again later") except OSError: messagebox.showinfo( title="Error", icon="error", message= "Error while deleting update info update.cfg. Try to delete it manually" ) except webbrowser.Error: messagebox.showinfo( title="Error", icon="error", message= "Error while opening your browser. Try again later or visit manually {0}" .format(latestVersionUrl)) except: messagebox.showinfo( title="Error", icon="error", message= "Unmanaged error while checking for updates. Try again later. \nIf this persists contact the developers." )
from urllib.request import urlretrieve as download download( "http://kt.agh.edu.pl/~kamisinski/teaching/mpsis/lab/L1/content/clp_1.16_gmpl_ubuntu_16.04.tar.bz2", "clp.tar.bz2") from subprocess import call call(['cp', './clp.tar.bz2', '/']) call(['cd', '/'], shell=True) call(['tar', '-xjvpf', 'clp.tar.bz2']) call(['export', 'LD_LIBRARY_PATH=/usr/local/lib'], shell=True)
URL = "http://sandbox.fsi.uni-tuebingen.de/~marc/vorlesungssammler/vorlesungen.txt" ROOT = os.path.join(".") DATA = os.path.join(ROOT, "data") OUTPUT = os.path.join(ROOT, "output") FILE_PATH = os.path.join(DATA, "vorlesungen.txt") # Create directories if they don't exist yet if not os.path.isdir(DATA): os.makedirs(DATA) if not os.path.isdir(OUTPUT): os.makedirs(OUTPUT) # Download file from Sandbox download(URL, FILE_PATH) # Load file to pandas DataFrame vorlesungen = pd.read_csv(FILE_PATH, sep=";") # Extract Ids of lectures ids = [ id_ for id_ in [re.findall(r'\d+', link) for link in vorlesungen.Url if link] ] ids = [item for sublist in ids for item in sublist] vorlesungen["Ids"] = ids # Moduls and identifiers bachelor = [("Pflichtmodul Philosophie", "pphilobsc"), ("Pflichtmodul Linguistik", "plingubsc"),
from os import remove from datetime import datetime from json import load as parse from urllib.request import urlretrieve as download # download the list download( 'https://github.com/nextdns/metadata/raw/master/privacy/blocklists/nextdns-recommended.json', '.temp') # open and parse list json = parse(open('.temp', mode='r')) exclu = json['exclusions'] # excludesions from json hostsList = [] # an empty hosts list domainsList = [] # an empty domains list localhosts = [ '0.0.0.0', '127.0.0.1', '255.255.255.255', '::1', 'broadcasthost', 'fe80::1%lo0', 'ff00::0', 'ff02::1', 'ff02::2', 'ff02::3', 'ip6-allhosts', 'ip6-allnodes', 'ip6-allrouters', 'ip6-localhost', 'ip6-localnet', 'ip6-loopback', 'ip6-mcastprefix', 'local', 'localhost', 'localhost.localdomain' ] # in json.sources<list> for source in json['sources']: # in json.sources<list entry> # download source url download(source['url'], '.temp')