def downloadAndCheckFeedVersion(self, currentVersion, logger): MISSING_VALUE = (False, None, None) title = currentVersion['f']['t'] logger.info(f'looking at {title}') # no download url, return if 'url' not in currentVersion: logger.error('no download url, skipping') return MISSING_VALUE url = currentVersion['url'] logger.info(url) errors = currentVersion['err'] if len(errors) > 0: logger.error( f'Feed {title} @ {url} had errors according to transitfeeds, not processing' ) logger.error('\n'.join([f' {e}' for e in errors])) return MISSING_VALUE id = currentVersion['id'] logger.info(id) feedDestDir = os.path.join(FEED_DIR, id) logger.info('--> downloading to %s' % feedDestDir) pathlib.Path(feedDestDir).mkdir(parents=True, exist_ok=True) DOWNLOAD_FILENAME = 'gtfs.zip' # download the file fileDest = os.path.join(feedDestDir, DOWNLOAD_FILENAME) expectedSize = currentVersion['size'] shouldDownload = True if os.path.exists(fileDest): logger.info(f'{fileDest} already exists') currentSize = os.stat(fileDest).st_size if currentSize == expectedSize: logger.info(f'{fileDest} is the right size, not redownloading') shouldDownload = False else: logger.info( f'{fileDest} is the wrong size exp: {expectedSize} vs on-disk: {currentSize} - redownloading' ) if shouldDownload: try: downloadFile(url, fileDest) logger.info(f'downloaded fileDest') except: traceback.print_exc() print(colored(f'could not download {url}', 'red')) return MISSING_VALUE # verify feed if not ValidateGtfs.validate(fileDest, logger): return MISSING_VALUE return (True, fileDest, id)
def get_table(name, format='fits', folder='.'): cmd = 'java -jar casjobs.jar extract -table {} -force -type {} -url {}' cmd = cmd.format(name, format, folder) output = sp.check_output(cmd, shell=True) for line in output.decode().split('\n'): if line.startswith('http'): utils.downloadFile(line, folder, filename='{}.{}'.format(name, format))
def loadEntity(self, uri): """ Download the data for an entity to the current working directory, TODO synapse cache support to ensure we don't overwrite files """ entity = self.getEntity(self.repoEndpoint, uri) locations = self.getEntity(self.repoEndpoint, entity['locations']) if (0 == len(locations['results'])): raise Exception("entity has no locations") location = locations['results'][0] url = location['path'] parseResult = urlparse.urlparse(url) pathComponents = string.split(parseResult.path, '/') filename = pathComponents[len(pathComponents) - 1] utils.downloadFile(url, filename) return filename
def loadEntity(self, uri): """ Download the data for an entity to the current working directory, TODO synapse cache support to ensure we don't overwrite files """ entity = self.getEntity(self.repoEndpoint, uri) locations = self.getEntity(self.repoEndpoint, entity["locations"]) if 0 == len(locations["results"]): raise Exception("entity has no locations") location = locations["results"][0] url = location["path"] parseResult = urlparse.urlparse(url) pathComponents = string.split(parseResult.path, "/") filename = pathComponents[len(pathComponents) - 1] utils.downloadFile(url, filename) return filename
def getPPS(obsid): """ Get PPS sources list for OBS_ID obsid. """ url = 'http://nxsa.esac.esa.int/nxsa-sl/servlet/data-action-aio?' url += 'obsno={}&name=OBSMLI&level=PPS&extension=FTZ'.format(obsid) utils.downloadFile(url, '/tmp', 'temp.tar') try: utils.untarFile(os.path.join('/tmp', 'temp.tar'), '/tmp') xmm_srclist = glob.glob('/tmp/{0}/pps/P{0}EP*.FTZ'.format(obsid)) except: # If untar fails, there is just one downloaded file # and is probably the EPIC source list xmm_srclist = ['/tmp/temp.tar'] return xmm_srclist[0]
def post(self, type): res = 0 if type == 'category': name = self.request.get('name') slug = self.request.get('slug') if db.addCategory(name, slug): res = 1 self.redirect('/admin?res=%d' % res) elif type == 'author': img_url = self.request.get('img_url') if img_url: img_width, img_height = utils.getImageDimensions(utils.downloadFile(img_url)) else: img_width = None img_height = None if db.addAuthor(name = self.request.get('name'), slug = self.request.get('slug'), description = self.request.get('description'), date_birth = self.request.get('date_birth'), date_death = self.request.get('date_death'), img_url = img_url, img_width = img_width, img_height = img_height): res = 1 self.redirect('/admin?res=%d' % res) elif type == 'quote': img_url = self.request.get('img_url') if img_url: img_width, img_height = utils.getImageDimensions(utils.downloadFile(img_url)) else: img_width = None img_height = None if db.addQuote(author = db.getAuthor(self.request.get('author')), categories = self.request.get_all('category'), name = self.request.get('name'), description = self.request.get('description'), text = self.request.get('text'), html = self.request.get('html'), img_url = img_url, img_width = img_width, img_height = img_height, quote_id = self.request.get('quote_id')): res = 1 self.redirect('/admin?res=%d' % res)
def checkAndDownloadMostRecentJar(prefix, path): url = f'{prefix}{path}/maven-metadata.xml' document = requests.get(url).content.decode('utf-8') tree = ET.fromstring(document) lst = tree.find('./versioning/release') latestVersion = lst.text artifactId = tree.find('./artifactId').text expectedJar = f'{artifactId}-{latestVersion}.jar' if not os.path.exists(expectedJar): downloadUrl = f'{prefix}{path}/{latestVersion}/{expectedJar}' print(f'downloading {expectedJar} @ {downloadUrl}') downloadFile(downloadUrl, expectedJar) print(f'downloaded {expectedJar}') else: print(f'already had {expectedJar}') return expectedJar
def downloadEntity(self, entity): """Download an entity and files associated with an entity to local cache TODO: Add storing of files in cache TODO: Add unpacking of files. Arguments: - `entity`: A synapse ID of entity (i.e dictionary describing an entity) Returns: - A dictionary representing an entity """ entity = self.getEntity(entity) if not entity.has_key('locations'): return entity location = entity['locations'][0] #TODO verify that this doesn't fail for unattached files url = location['path'] parseResult = urlparse.urlparse(url) pathComponents = string.split(parseResult.path, '/') filename = os.path.join(self.cacheDir, entity['id'] ,pathComponents[-1]) if os.path.exists(filename): #print filename, "cached" md5 = utils.computeMd5ForFile(filename) if md5.hexdigest() != entity.get('md5', ''): print filename, "changed, redownloading" utils.downloadFile(url, filename) else: print filename, 'downloading...', utils.downloadFile(url, filename) if entity['contentType']=='application/zip': ## Unpack file filepath=os.path.join(os.path.dirname(filename), os.path.basename(filename)+'_unpacked') #TODO!!!FIX THIS TO BE PATH SAFE! DON'T ALLOW ARBITRARY UNZIPING z = zipfile.ZipFile(filename, 'r') z.extractall(filepath) #WARNING!!!NOT SAFE entity['cacheDir'] = filepath entity['files'] = z.namelist() else: entity['cacheDir'] = os.path.dirname(filename) entity['files'] = [os.path.basename(filename)] return entity
def test_downloadFile(self): "test download file function in utils.py" result = utils.downloadFile("http://dev-versions.synapse.sagebase.org/sage_bionetworks_logo_274x128.png") if (result): # print("status: \"%s\"" % str(result[1].status)) # print("filename: \"%s\"" % str(result[0])) filename = result[0] assert os.path.exists(filename) ## cleanup try: os.remove(filename) except: print("warning: couldn't delete file: \"%s\"\n" % filename) else: print("failed to download file: \"%s\"" % filename) assert False
def download(root, gui: "App" = None): error_list = [] total_count = 0 skipped_count = 0 processed_count = 0 attachments_path = os.path.join(root, "Resources", "attachments") html_file_count = 0 for path, subdirs, files in os.walk(root): for name in files: if name.endswith(".html"): html_file_count += 1 current_file_idx = 0 for path, subdirs, files in os.walk(root): for name in files: if name.endswith(".html"): current_file_idx += 1 fpath = os.path.join(path, name) print("Loading:", fpath) setGuiFileDownloaderInfo(gui, week="Searching", topic="All html files", filename="", url="", output="", eta="", speed="", dl_size="", file_size="", progress=0, current_no=0, total_files=0) f = open(fpath, "r", encoding='utf-8') html_text = f.read() f.close() soup = BeautifulSoup(html_text, 'html.parser') # print(soup.get_text) attachment_tags = soup.find_all('a', {"class": "cml-asset-link"}) asset_containers = soup.find_all('div', {"class": "asset-container"}) print( len(attachment_tags) + len(asset_containers), "attachment(s) found") file_modified = False current_file_total_count = len(attachment_tags) + len( asset_containers) total_count += current_file_total_count new_attachment_href = "../../Resources" if fpath.find("{}Resources{}".format(os.path.sep, os.path.sep)) >= 0: new_attachment_href = "../Resources" if len(asset_containers) == 0: # Update GUI Progress dl_size = "{} of {}".format(0, 0) setGuiFileDownloaderInfo(gui, week="Loading", topic="", filename=name, url="", output=fpath, dl_size=dl_size, file_size="", progress=100, current_no=current_file_idx, total_files=html_file_count) for idx, asset_container in enumerate(asset_containers): attachment_tag = asset_container.find('a') attach_filename = asset_container.find( "span", { "class": "asset-name" }).text attach_filename = utils.getFormattedFileName( attach_filename) # print(link.get("href")) attach_href = attachment_tag.get('href') # Update GUI Progress progress = (idx + 1) / current_file_total_count * 100 dl_size = "{} of {}".format(idx + 1, current_file_total_count) setGuiFileDownloaderInfo(gui, week="Loading", topic="", filename=name, url=attach_href, output=fpath, dl_size=dl_size, file_size="", progress=progress, current_no=current_file_idx, total_files=html_file_count) print("Attachment {}/{}:".format(idx + 1, len(asset_containers)), end=" ") if attach_href.find(new_attachment_href) >= 0: print("Already processed. Skipping...") skipped_count += 1 continue elif attach_href == "": error_list.append({ "error": "blank href", "path": fpath }) print("Error: Blank href") continue try: attah_filename = utils.downloadFile( attach_href, attachments_path, attach_filename) file_modified = True processed_count += 1 attachment_tag[ 'href'] = new_attachment_href + "/attachments/" + attah_filename except Exception as e: print("Error:", e) error_list.append({ "error": "url", "url": attach_href, "path": fpath }) continue if len(attachment_tags) == 0: # Update GUI Progress dl_size = "{} of {}".format(0, 0) setGuiFileDownloaderInfo(gui, week="Loading", topic="", filename=name, url="", output=fpath, dl_size=dl_size, file_size="", progress=100, current_no=current_file_idx, total_files=html_file_count) for idx, attachment_tag in enumerate(attachment_tags): attach_href = attachment_tag.get('href') attach_filename = attachment_tag.text attach_filename = utils.getFormattedFileName( attach_filename) # Update GUI Progress progress = (len(asset_containers) + idx + 1) / current_file_total_count * 100 dl_size = "{} of {}".format( len(asset_containers) + idx + 1, current_file_total_count) setGuiFileDownloaderInfo(gui, week="Loading", topic="", filename=name, url=attach_href, output=fpath, dl_size=dl_size, file_size="", progress=progress, current_no=current_file_idx, total_files=html_file_count) print("Attachment {}/{}:".format(idx + 1, len(attachment_tags)), end=" ") if attach_href.find(new_attachment_href) >= 0: print("Already processed. Skipping...") skipped_count += 1 continue elif attach_href == "": error_list.append({ "error": "blank href", "path": fpath }) print("Error: Blank href") continue try: attah_filename = utils.downloadFile( attach_href, attachments_path, attach_filename) file_modified = True processed_count += 1 attachment_tag[ 'href'] = new_attachment_href + "/attachments/" + attah_filename except Exception as e: print("Error:", e) error_list.append({ "error": "url", "url": attach_href, "path": fpath }) continue if file_modified: utils.savePlainFile(fpath, str(soup)) print() print("Total:", total_count, "attachment(s)") print("Processed:", processed_count, "attachment(s)") print("Skipped:", skipped_count, "attachment(s)") print("Errors:", len(error_list)) print(error_list) # setGuiFileDownloaderInfo(gui, week="Success", topic="Attachment processing finished successfully!") with open("data/attach_errors.json", "w") as out_file: json.dump(error_list, out_file)
def make_cat(opt_survey, nir_survey, radius=15 * u.arcmin, poscorr=False, make_mocs=False, getXdata=False, getOPTdata=False, getWSdata=False, getNIRdata=False, define_bins=False, make_bins=False, make_xmatch=False): cat_url = 'http://xmmssc.irap.omp.eu/Catalogue/3XMM-DR8' obs_filename = '3xmmdr8_obslist.fits' det_filename = '3XMM_DR8cat_v1.0.fits' src_filename = '3XMM_DR8cat_slim_v1.0.fits' # Increase in radius to avoid border effects in the crossmatch: delta_radius = 0.3 * u.arcmin # (18 arcsec, like ARCHES) # Define structure of data directories dirs = dir_structure(opt_survey, nir_survey) # Get moc for the optical survey footprint if opt_survey == 'pstarrs': opt_label = 'PS' opt_moc = None elif opt_survey == 'sdss': opt_label = 'SDSS' opt_moc = utils.get_moc(('http://alasky.unistra.fr/footprints/tables/' 'vizier/V_139_sdss9/MOC?nside=2048'), opt_survey, dirs['opt']) else: raise ValueError('Unknown optical survey!') # Get moc for the nir survey footprint if nir_survey is '2MASS': nir_label = 'NTM' url_moc = None errtype_nir = 'ellipse' elif nir_survey is 'UKIDSS': nir_label = 'NUK' url_moc = 'http://horus.roe.ac.uk/vsa/coverage-maps/UKIDSS/DR10/' errtype_nir = 'rcd_dec_ellipse' elif nir_survey is 'VISTA': nir_label = 'NVT' url_moc = 'http://horus.roe.ac.uk/vsa/coverage-maps/VISTA/VHS/' errtype_nir = 'circle' else: raise ValueError('Unknown near-infrared survey!') nir_moc = utils.get_moc(url_moc, nir_survey, dirs['nir']) ### Get the list of XMM-Newton observations in the XMM catalogue xmmobsids_file_org = os.path.join(dirs['xmm'], obs_filename) if not os.path.isfile(xmmobsids_file_org): utils.downloadFile(os.path.join(cat_url, obs_filename), dirs['xmm']) ### Select valid obsids (clean observations and in the optical footprint) try: xmmobs = Table.read(xmmobsids_file_org) except: message = 'Unable to open XMM OBSIDs table!!!\nFile: {}' logging.error(message.format(xmmobsids_file_org)) return xmmobs_clean_opt = clean_obsids(xmmobs, radius, opt_survey, opt_moc) ### Define non-overlapping mocs for the obsids if make_mocs: xmmmocs.make_mocs(xmmobs_clean_opt, dirs['xmm'], moc_order=15, radius=radius + delta_radius, remove_stars=True, remove_large_galaxies=True) ### Get data for the cross-match ## X-rays # Check if the XMM sources catalogue exists, and download otherwise xmmcat_file = os.path.join(dirs['xmm'], src_filename) if not os.path.isfile(xmmcat_file): src_filename_gz = '{}.gz'.format(src_filename) utils.downloadFile(os.path.join(cat_url, src_filename_gz), dirs['xmm']) utils.gunzipFile(os.path.join(dirs['xmm'], src_filename_gz), xmmcat_file) # Correct astrometry of XMM sources if poscorr: # Check if the detections catalogue exists and download otherwise xmmdet_file = os.path.join(dirs['xmm'], det_filename) if not os.path.isfile(xmmdet_file): det_filename_gz = '{}.gz'.format(det_filename) utils.downloadFile(os.path.join(cat_url, det_filename_gz), dirs['xmm']) utils.gunzipFile(os.path.join(dirs['xmm'], det_filename_gz), xmmdet_file) xposcorr.run(xmmdet_file, xmmcat_file, xmmobs_clean_opt, dirs['xmm']) # Make files with X-ray sources per non-overlaping field file_name, file_ext = os.path.splitext(xmmobsids_file_org) xmmobsids_file = '{}_{}_clean_{}{}'.format(file_name, nir_survey.lower(), opt_survey, file_ext) if getXdata: xmmobs_xdata = getdata.xmm(xmmobs_clean_opt, dirs['xmm'], xmmcat_file, nir_moc=nir_moc, opt_moc=opt_moc, moc_order=15, radius=radius, use_poscorr=False) # Save selected obsids # (with Texp and sky area, remove fields with no sources) xmmobs_xdata.write(xmmobsids_file, overwrite=True) else: xmmobs_xdata = Table.read(xmmobsids_file) ## Optical file_name, file_ext = os.path.splitext(xmmobsids_file) xmmobsids_file = '{}_n{}src{}'.format(file_name, opt_label, file_ext) if getOPTdata: if opt_survey == 'pstarrs': xmmobs_optdata = getdata.pstarrs(xmmobs_xdata, dirs['opt'], dirs['xmm'], nir_moc=nir_moc, radius=radius + delta_radius, moc_order=15, overwrite=False) elif opt_survey == 'sdss': xmmobs_optdata = getdata.sdss(xmmobs_xdata, dirs['opt'], dirs['xmm'], nir_moc=nir_moc, radius=radius + delta_radius, moc_order=15, overwrite=False) else: raise ValueError('Unknown optical survey!') xmmobs_optdata.write(xmmobsids_file, overwrite=True) else: xmmobs_optdata = Table.read(xmmobsids_file) ## All-WISE file_name, file_ext = os.path.splitext(xmmobsids_file) xmmobsids_file = '{}_nWSsrc{}'.format(file_name, file_ext) if getWSdata: xmmobs_wsdata = getdata.wise(xmmobs_optdata, dirs['wise'], dirs['xmm'], nir_moc=nir_moc, opt_moc=opt_moc, radius=radius + delta_radius, moc_order=15, overwrite=False) xmmobs_wsdata.write(xmmobsids_file, overwrite=True) else: xmmobs_wsdata = Table.read(xmmobsids_file) ## NIR data file_name, file_ext = os.path.splitext(xmmobsids_file) xmmobsids_file = '{}_n{}src{}'.format(file_name, nir_label, file_ext) if getNIRdata: if nir_survey is '2MASS': xmmobs_nirdata = getdata.tmass(xmmobs_wsdata, dirs['nir'], dirs['xmm'], moc_order=15, opt_moc=opt_moc, radius=radius + delta_radius, overwrite=False) elif nir_survey is 'UKIDSS': xmmobs_nirdata = getdata.ukidss(xmmobs_wsdata, dirs['nir'], dirs['xmm'], moc_order=15, opt_moc=opt_moc, radius=radius + delta_radius, overwrite=False) elif nir_survey is 'VISTA': xmmobs_nirdata = getdata.vista(xmmobs_wsdata, dirs['nir'], dirs['xmm'], moc_order=15, opt_moc=opt_moc, radius=radius + delta_radius, overwrite=False) xmmobs_nirdata.write(xmmobsids_file, overwrite=True) else: xmmobs_nirdata = Table.read(xmmobsids_file) ### Calculate bins according to XMM exposure time and galactic latitude file_name, file_ext = os.path.splitext(xmmobsids_file_org) xmmobsids_file = '{}_{}_bins{}'.format(file_name, nir_survey.lower(), file_ext) if define_bins: ## Galactic latitude binning xmmobs_optbin = binning.optical(xmmobs_nirdata, dirs['data'], nir_survey, opt_survey) ## XMM exposure binning xmmobs_bins = binning.final(xmmobs_optbin, dirs['data'], nir_survey) xmmobs_bins.write(xmmobsids_file, overwrite=True) else: xmmobs_bins = Table.read(xmmobsids_file) ### Make bins if make_bins: binning.makebins(xmmobs_bins, dirs['xmm'], 'XMM', nir_survey, errtype='circle') binning.makebins(xmmobs_bins, dirs['opt'], opt_survey, nir_survey, errtype='rcd_dec_ellipse') binning.makebins(xmmobs_bins, dirs['wise'], 'WISE', nir_survey, errtype='ellipse') binning.makebins(xmmobs_bins, dirs['nir'], nir_survey, errtype=errtype_nir) ### Crossmatching of catalogues xmatchcat_filename = '{}_xmatchcat.fits'.format(nir_survey.lower()) xmatchcat_filename = os.path.join(dirs['xmatch'], xmatchcat_filename) if make_xmatch: stats_filename = '{}_bins.fits'.format(nir_survey.lower()) bin_stats = Table.read(os.path.join(dirs['data'], stats_filename)) crossmatching.run(bin_stats, dirs, opt_survey, opt_label, nir_survey, nir_label) crossmatching.merge_bins(bin_stats, dirs, opt_survey, nir_survey) xmatch_cat = crossmatching.merge_cats(dirs, opt_survey, opt_label, nir_survey, nir_label) xmatch_cat.write(xmatchcat_filename, overwrite=True) else: xmatch_cat = Table.read(xmatchcat_filename, memmap=True) return xmatch_cat
def downloadImages(self): root = self.root error_list = [] total_count = 0 skipped_count = 0 processed_count = 0 img_path = os.path.join(root, "Resources", "html", "img") self.setGuiFileDownloaderInfo(week="Searching", topic="All html files", filename="", url="", output="", eta="", speed="", dl_size="", file_size="", progress=0, current_no=0, total_files=0) html_file_count = 0 for path, subdirs, files in os.walk(root): for name in files: if name.endswith(".html"): html_file_count += 1 current_file_idx = 0 for path, subdirs, files in os.walk(root): for name in files: if name.endswith(".html"): current_file_idx += 1 fpath = os.path.join(path, name) print("Loading:", fpath) f = open(fpath, "r", encoding='utf-8') html_text = f.read() f.close() soup = BeautifulSoup(html_text, 'html.parser') # print(soup.get_text) imgTags = soup.find_all('img') print(len(imgTags), "image(s) found") file_modified = False total_count += len(imgTags) new_img_src = "../../Resources" if fpath.find("{}Resources{}".format( os.path.sep, os.path.sep)) >= 0: new_img_src = "../Resources" if len(imgTags) == 0: # Update GUI Progress dl_size = "{} of {}".format(0, len(imgTags)) self.setGuiFileDownloaderInfo( week="Loading", topic="", filename=name, url="", output=fpath, dl_size=dl_size, file_size="", progress=100, current_no=current_file_idx, total_files=html_file_count) for idx, img in enumerate(imgTags): imgUrl = img.get('src') # Update GUI Progress progress = (idx + 1) / len(imgTags) * 100 dl_size = "{} of {}".format(idx + 1, len(imgTags)) self.setGuiFileDownloaderInfo( week="Loading", topic="", filename=name, url=imgUrl, output=fpath, dl_size=dl_size, file_size="", progress=progress, current_no=current_file_idx, total_files=html_file_count) print("Image {}/{}:".format(idx + 1, len(imgTags)), end=" ") if imgUrl.find(new_img_src) >= 0: print("Already processed. Skipping...") skipped_count += 1 continue elif imgUrl == "": error_list.append({ "error": "blank img src", "path": fpath }) print("Error: Blank img src") continue # print(imgUrl) try: imgFilename = utils.downloadFile(imgUrl, img_path) file_modified = True processed_count += 1 img['src'] = new_img_src + "/html/img/" + imgFilename except Exception as e: print("Error:", e) error_list.append({ "error": "url", "url": imgUrl, "path": fpath }) continue if file_modified: utils.savePlainFile(fpath, str(soup)) print() print("Total:", total_count, "image(s)") print("Processed:", processed_count, "image(s)") print("Skipped:", skipped_count, "image(s)") print("Errors:", len(error_list)) print(error_list)
def downloadExternalExercise(self): root = self.root links = self.download_queue_assignment if not links: print("Empty Links") return False error_list = [] total_count = 0 skipped_count = 0 processed_count = 0 self.setGuiFileDownloaderInfo(week="Loading", topic="External Exercise", filename="", url="", output="", eta="", speed="", dl_size="", file_size="", progress=0, current_no=0, total_files=0) total_links = len(links) for link_idx, item in enumerate(links): path = item["path"] tmp = path.split("\\") week = tmp[0] topic = tmp[1] prefix = week.replace("Week ", "0") + topic[:2] base_link = item["url"] html = utils.getFile(base_link) soup = BeautifulSoup(html, 'html.parser') # print(soup.get_text) title_tag = soup.find('title') link_tags = soup.find_all('link') script_tags = soup.find_all('script') img_tags = soup.find_all('img') title = title_tag.text folder_name = prefix + "_" + utils.getFormattedFileName( title.lower().replace(" ", "_")) resource_path = os.path.join(root, "Resources", 'html', folder_name) media_path = os.path.join(root, "Resources", 'html', "media") # index_file_name = utils.getFormattedFileName(title) + ".html" index_file_name = item['filename'] # print(folder_name) print(len(link_tags), "links(s) found") print(len(script_tags), "script(s) found") print(len(img_tags), "image(s) found") # print(link_tags) link_total_count = len(link_tags) + len(script_tags) total_count += link_total_count # print(script_tags) for idx, link_tag in enumerate(link_tags): src = link_tag.get("href") url = utils.getFullUrl(base_link, src) # print(url) # Update GUI Progress progress = (idx + 1) / link_total_count * 100 dl_size = "{} of {}".format(idx + 1, link_total_count) self.setGuiFileDownloaderInfo(week=week, topic=topic, filename=index_file_name, url=url, output=resource_path, dl_size=dl_size, file_size="", progress=progress, current_no=link_idx + 1, total_files=total_links) print("Link {}/{}:".format(idx + 1, len(link_tags)), end=" ") if src == "": error_list.append({"error": "blank href", "path": path}) print("Error: Blank href") continue try: link_filename = utils.downloadFile(url, resource_path) processed_count += 1 link_tag[ 'href'] = "../../Resources/html/" + folder_name + "/" + link_filename except Exception as e: print("Error:", e) error_list.append({ "error": "url", "url": url, "path": path }) continue for idx, script_tag in enumerate(script_tags): progress = (len(link_tags) + idx + 1) / link_total_count * 100 dl_size = "{} of {}".format( len(link_tags) + idx + 1, link_total_count) # Update GUI Progress self.setGuiFileDownloaderInfo(week=week, topic=topic, filename=index_file_name, output=resource_path, dl_size=dl_size, file_size="", progress=progress, current_no=link_idx + 1, total_files=total_links) src = script_tag.get("src") if src is None: print( "External src not found. Maybe internal script. Skipping..." ) skipped_count += 1 continue url = utils.getFullUrl(base_link, src) # Update GUI Progress self.setGuiFileDownloaderInfo(week=week, topic=topic, filename=index_file_name, url=url, output=resource_path, dl_size=dl_size, file_size="", progress=progress, current_no=link_idx + 1, total_files=total_links) print("Script {}/{}:".format(idx + 1, len(link_tags)), end=" ") if src == "": error_list.append({"error": "blank src", "path": path}) print("Error: Blank src") continue try: if src.find("main") >= 0: js_file = utils.getFile(url).decode("utf-8") count_static = js_file.count("static") external_links = re.findall( "(static[/a-zA-Z._0-9-@]*)", js_file) external_links_count = len(external_links) print( "Found {} external links in main.js, now downloading" .format(external_links_count)) for ext_idx, external_link in enumerate( external_links): external_link_url = urljoin( base_link, external_link) # Update GUI Progress curr_progress = (ext_idx + 1) / len(external_links) prev_progress = (len(link_tags) + idx) / link_total_count * 100 progress = prev_progress + (100 * curr_progress / link_total_count) # progress = (len(link_tags) + idx + 1 + ext_idx + 1) / (link_total_count + len(external_links)) * 100 # dl_size = "{} of {}".format(len(link_tags) + idx + 1 + ext_idx + 1, link_total_count + len(external_links)) dl_size = "{} of {}".format( len(link_tags) + idx + 1, link_total_count) self.setGuiFileDownloaderInfo( week=week, topic=topic, filename=index_file_name, url=external_link_url, output=resource_path, dl_size=dl_size, file_size="", progress=progress, current_no=link_idx + 1, total_files=total_links) print("External Link {}/{}:".format( ext_idx + 1, external_links_count), end=" ") utils.downloadFile(external_link_url, media_path) if count_static != external_links_count: print( "WARNING: Downloaded {} external links but found {}" .format(external_links_count, count_static)) js_file = js_file.replace("static/", "../../Resources/html/") js_file_path = os.path.join(root, "Resources", 'html', folder_name, "main.js") link_filename = utils.savePlainFile( js_file_path, js_file) else: link_filename = utils.downloadFile(url, resource_path) processed_count += 1 script_tag[ 'src'] = "../../Resources/html/" + folder_name + "/" + link_filename except Exception as e: print("Error:", e) error_list.append({ "error": "url", "url": url, "path": path }) continue save_path = os.path.join(root, path, index_file_name) utils.savePlainFile(save_path, str(soup)) print() print("Total:", total_count, "file(s)") print("Processed:", processed_count, "file(s)") print("Skipped:", skipped_count, "file(s)") print("Errors:", len(error_list)) print(error_list)
def main(): if utils.DATA['version'] != VERSION: print('Your version of Launchcraft ({}) does not match the minimum version of Launchcraft ({}). Please update.'.format(VERSION, utils.DATA['version'])) utils.exit() print('This script will ask you yes or no questions.') print('Any answers in square brackets (e.g. [1.7.10]), or that are capitalized (e.g. [Y/n]) are the default answers, and will be selected when you press enter.') utils.print_separator() version = raw_input('Which version of Minecraft would you like to use? [1.7.10]: ').lower() if version == '': version = '1.7.10' if version not in utils.DATA['versions']: print("Invalid version selected.") utils.exit() utils.MODS = utils.DATA['versions'][version] JAR_DIR = os.path.join(VERSIONS_DIR, version) FORGE_VERSION = '{}-Forge{}'.format(version, utils.MODS['mods']['forge']['version']) FORGE_DIR = os.path.join(VERSIONS_DIR, FORGE_VERSION) print('Entering directory "{}".'.format(MINECRAFT_DIR)) try: os.chdir(MINECRAFT_DIR) except: print('Failed to enter minecraft directory, please install minecraft first.') utils.exit() utils.print_separator() # Set the directory to which the custom profile will be installed. profile_name = raw_input('What would you like to call the profile being created? [launchcraft]: ').lower() if profile_name == '': profile_name = 'launchcraft' PROFILE_DIR = os.path.join(VERSIONS_DIR, profile_name) print('Creating profile {}'.format(profile_name)) # Delete the old profile directory so we can start from scratch. try: shutil.rmtree(PROFILE_DIR) print('Removed old profile directory.') except OSError as ex: if ex.errno == errno.ENOENT: print('No old profile directory found.') else: print(ex) print('Failed to remove old profile directory, exiting...') utils.exit() utils.print_separator() forge = utils.query_yes_no('Would you like to use Forge?', default='no') if forge: if os.path.exists(FORGE_DIR): print('The required Forge version has been detected on your system.') message = 'reinstall' else: print('The required Forge version has not been detected on your system.') message = 'install' # Ask the user whether or not they need Forge. if utils.query_yes_no('Do you need to {} Forge?'.format(message), default='no'): forge = utils.MODS['mods']['forge'] name = forge['name'] version = forge['version'] jarName = 'forge.jar' if sys.platform == 'win32' or sys.platform == 'cygwin': os.chdir(BASE_DIR) # Download the Forge installer. print('Downloading {} version {}'.format(name, version)) utils.downloadFile(forge['url'], jarName) if sys.platform == 'win32' or sys.platform == 'cygwin': print('You must now run the {} that has been downloaded to your Launchcraft directory.'.format(jarName)) utils.exit() else: # Run the installer so the user can install Forge. print('You will now be asked to install Forge version {}.'.format(version)) with open(os.devnull, 'w') as devnull: subprocess.call('java -jar {}'.format(jarName), shell=True, stdout=devnull) os.remove(jarName) utils.print_separator() JAR_FILE = os.path.join(PROFILE_DIR, '{}.jar'.format(profile_name)) JSON_FILE = os.path.join(PROFILE_DIR, '{}.json'.format(profile_name)) if forge: print('Using Forge {} as the base for the profile'.format(utils.MODS['mods']['forge']['version'])) if not os.path.exists(MOD_DIR): os.makedirs(MOD_DIR) utils.INSTALLED_MODS.append('forge') JAR_DIR = FORGE_DIR print('Creating new profile directory.') shutil.copytree(FORGE_DIR, PROFILE_DIR) print('Renaming Forge jar.') shutil.move(os.path.join(PROFILE_DIR, '{}.jar'.format(FORGE_VERSION)), JAR_FILE) SOURCE_JSON_FILE = '{}.json'.format(FORGE_VERSION) print('Entering newly created profile directory.') os.chdir(PROFILE_DIR) else: print('Using Minecraft {} as the base for the profile'.format(version)) # Create the profile directory. try: print('Creating new profile directory.') os.makedirs(PROFILE_DIR) except OSError as ex: print(ex) print('Failed to create new profile directory, exiting...') utils.exit() print('Entering newly created profile directory.') os.chdir(PROFILE_DIR) print('Downloading "{0}.jar" and "{0}.json".'.format(version)) utils.downloadFile('https://s3.amazonaws.com/Minecraft.Download/versions/{0}/{0}.jar'.format(version), '{}.jar'.format(profile_name)) utils.downloadFile('https://s3.amazonaws.com/Minecraft.Download/versions/{0}/{0}.json'.format(version), '{}.json'.format(version)) SOURCE_JSON_FILE = '{}.json'.format(version) print('Creating "{}.json".'.format(profile_name)) with open('{}'.format(SOURCE_JSON_FILE), "r") as file: data = json.load(file) data['id'] = profile_name with open(JSON_FILE, "w") as file: json.dump(data, file, indent=4) print('Deleting "{}".'.format(SOURCE_JSON_FILE)) os.remove(SOURCE_JSON_FILE) utils.print_separator() if utils.query_yes_no('Do you want to install mods?', default='no'): print('Which mods would you like to install?') toInstall = utils.printAskOptions(utils.MODS['mods']) print('Installing mods.') print('') for mod in toInstall: modData = utils.MODS['mods'][mod] skip = False conflicts = [i for i in modData['conflicts'] if i in utils.INSTALLED_MODS] if mod == 'forge': continue # Do not install forge-dependant mods if Forge is not installed. if 'forge' in modData['deps'] and 'forge' not in utils.INSTALLED_MODS: print('Skipping {} due to missing Forge'.format(modData['name'])) skip = True # Skip conflicting mods elif conflicts: conflicting_mods = "" for i in conflicts: conflicting_mods += utils.MODS['mods'][i]['name'] + ", " print('Skipping {} because it conflicts with {}'.format(modData['name'], conflicting_mods[:-2])) skip = True if skip: print('') continue utils.installDep(mod, JAR_FILE) print('') utils.removeMETAINF(JAR_FILE) utils.print_separator() if utils.query_yes_no('Do you want to install texture packs?', default='no'): if not os.path.exists(RESOURCEPACK_DIR): os.makedirs(RESOURCEPACK_DIR) print("What texture packs would you like to install?") toInstall = utils.printAskOptions(utils.MODS['resourcepacks']) print('Installing resourcepacks.') print('') for pack in toInstall: packData = utils.MODS['resourcepacks'][pack] utils.installResourcePack(pack) print('') utils.print_separator() if utils.query_yes_no('Do you want to install shader packs?', default='no'): if not os.path.exists(SHADERPACK_DIR): os.makedirs(SHADERPACK_DIR) print("What shader packs would you like to install?") toInstall = utils.printAskOptions(utils.MODS['shaderpacks']) print('Installing shaderpacks.') print('') for pack in toInstall: packData = utils.MODS['shaderpacks'][pack] utils.installShaderPack(pack) print('') utils.print_separator() print('Completed successfully!') utils.exit() try: input('Press any key to exit...') except: pass