def fetch(packages=[], path=os.path.curdir): """ Fetches the given packages from the repository without installing, just downloads the packages. @param packages: list of package names -> list_of_strings @param path: path to where the packages will be downloaded. If not given, packages will be downloaded to the current working directory. """ packagedb = pisi.db.packagedb.PackageDB() repodb = pisi.db.repodb.RepoDB() for name in packages: package, repo = packagedb.get_package_repo(name) ctx.ui.info( _("%s package found in %s repository") % (package.name, repo)) uri = pisi.uri.URI(package.packageURI) output = os.path.join(path, uri.path()) if os.path.exists( output) and package.packageHash == pisi.util.sha1_file(output): ctx.ui.warning(_("%s package already fetched") % uri.path()) continue if uri.is_absolute_path(): url = str(pkg_uri) else: url = os.path.join(os.path.dirname(repodb.get_repo_url(repo)), str(uri.path())) fetcher.fetch_url(url, path, ctx.ui.Progress)
def fetch_remote_file(self, url): dest = ctx.config.cached_packages_dir() self.filepath = os.path.join(dest, url.filename()) if not os.path.exists(self.filepath): try: fetcher.fetch_url(url, dest, ctx.ui.Progress) except pisi.fetcher.FetchError: # Bug 3465 if ctx.get_option('reinstall'): raise Error(_("There was a problem while fetching '%s'.\nThe package " "may have been upgraded. Please try to upgrade the package.") % url); raise else: ctx.ui.info(_('%s [cached]') % url.filename())
def read_uri(self, filename, repo = None): """Read PSPEC file""" self.filepath = filename url = URI(filename) if url.is_remote_file(): from fetcher import fetch_url assert repo dest = os.path.join(ctx.config.index_dir(), repo) if not os.path.exists(dest): os.makedirs(dest) fetch_url(url, dest, ctx.ui.Progress) self.filepath = os.path.join(dest, url.filename()) self.read(self.filepath)
def __init__(self, packagefn, mode='r'): self.filepath = packagefn url = URI(packagefn) if url.is_remote_file(): from fetcher import fetch_url dest = ctx.config.packages_dir() self.filepath = join(dest, url.filename()) # FIXME: exists is not enough, also sha1sum check needed \ # when implemented in pisi-index.xml if not exists(self.filepath): fetch_url(url, dest, ctx.ui.Progress) else: ctx.ui.info(_('%s [cached]') % url.filename()) self.impl = archive.ArchiveZip(self.filepath, 'zip', mode)
def fetch_remote_file(self, url): dest = ctx.config.cached_packages_dir() self.filepath = os.path.join(dest, url.filename()) if not os.path.exists(self.filepath): try: fetcher.fetch_url(url, dest, ctx.ui.Progress) except pisi.fetcher.FetchError: # Bug 3465 if ctx.get_option('reinstall'): raise Error( _("There was a problem while fetching '%s'.\nThe package " "may have been upgraded. Please try to upgrade the package." ) % url) raise else: ctx.ui.info(_('%s [cached]') % url.filename())
def fetch(packages=[], path=os.path.curdir): """ Fetches the given packages from the repository without installing, just downloads the packages. @param packages: list of package names -> list_of_strings @param path: path to where the packages will be downloaded. If not given, packages will be downloaded to the current working directory. """ packagedb = pisi.db.packagedb.PackageDB() repodb = pisi.db.repodb.RepoDB() for name in packages: package, repo = packagedb.get_package_repo(name) uri = pisi.uri.URI(package.packageURI) if uri.is_absolute_path(): url = str(pkg_uri) else: url = os.path.join(os.path.dirname(repodb.get_repo_url(repo)), str(uri.path())) fetcher.fetch_url(url, path, ctx.ui.Progress)
def fetch_remote_file(self, url): from fetcher import fetch_url dest = ctx.config.packages_dir() self.filepath = join(dest, url.filename()) sha1sum = None if exists(self.filepath): sha1sum = util.sha1_file(self.filepath) name, version = util.parse_package_name(basename(self.filepath)) if sha1sum != ctx.packagedb.get_package(name).packageHash: try: fetch_url(url, dest, ctx.ui.Progress) except pisi.fetcher.FetchError: # Bug 3465 if ctx.get_option('reinstall'): raise Error(_("There was a problem while fetching '%s'.\nThe package " "may have been upgraded. Please try to upgrade the package.") % url); raise else: ctx.ui.info(_('%s [cached]') % url.filename())
def read(self, filename, repo = None): """Read PSPEC file""" self.filepath = filename url = URI(filename) if url.is_remote_file(): from fetcher import fetch_url dest = os.path.join(ctx.config.index_dir(), repo) if not os.path.exists(dest): os.makedirs(dest) fetch_url(url, dest, ctx.ui.Progress) self.filepath = os.path.join(dest, url.filename()) self.readxml(self.filepath) # find all binary packages packageElts = self.getAllNodes("Package") self.packages = [metadata.PackageInfo(p) for p in packageElts] self.unlink()
def fetch(packages=[], path=os.path.curdir): """ Fetches the given packages from the repository without installing, just downloads the packages. @param packages: list of package names -> list_of_strings @param path: path to where the packages will be downloaded. If not given, packages will be downloaded to the current working directory. """ packagedb = pisi.db.packagedb.PackageDB() repodb = pisi.db.repodb.RepoDB() for name in packages: package, repo = packagedb.get_package_repo(name) ctx.ui.info(_("%s package found in %s repository") % (package.name, repo)) uri = pisi.uri.URI(package.packageURI) output = os.path.join(path, uri.path()) if os.path.exists(output) and package.packageHash == pisi.util.sha1_file(output): ctx.ui.warning(_("%s package already fetched") % uri.path()) continue if uri.is_absolute_path(): url = str(pkg_uri) else: url = os.path.join(os.path.dirname(repodb.get_repo_url(repo)), str(uri.path())) fetcher.fetch_url(url, path, ctx.ui.Progress)
def feed_add(self, request): form_message = '' groups = get_groups(self.user) # URL could be passed via a GET (bookmarklet) or POST self_link = request.params.get('self_link', '').strip() if request.method == 'GET': return self.respond_with_template('_feed_add_wizard_1.html', locals()) if not is_valid_url(self_link): form_message = u'ERROR Error, specify a valid web address' return self.respond_with_template('_feed_add_wizard_1.html', locals()) response = fetcher.fetch_url(self_link) if response: if response.status_code not in fetcher.POSITIVE_STATUS_CODES: form_message = u'ERROR Error, feed host returned: %s' % filters.status_title( response.status_code) return self.respond_with_template('_feed_add_wizard_1.html', locals()) else: form_message = u'ERROR Error, a network error occured' return self.respond_with_template('_feed_add_wizard_1.html', locals()) group_id = int(request.POST.get('group', 0)) if group_id: group = Group.get(Group.id == group_id) else: group = Group.get(Group.title == Group.DEFAULT_GROUP) fetcher.load_plugins() trigger_event('fetch_started') feed = Feed() feed.self_link = self_link feed = fetcher.add_feed(feed, fetch_icon=True, add_entries=True) trigger_event('fetch_done', [feed]) subscription = fetcher.add_subscription(feed, self.user, group) if subscription: self.alert_message = u'SUCCESS Feed has been added to <i>%s</i> group' % group.title else: self.alert_message = u'INFO Feed is already in <i>%s</i> group' % group.title return self.respond_with_script('_modal_done.js', {'location': '%s/?feed=%d' % (request.application_url, feed.id)})
def extract_privacy_url(url): try: data = fetch_url(url) soup = BeautifulSoup(data) except Exception as e: return "%s"%(e) a_list = soup.findAll("a") lst = [] for node in a_list: if node.get_text().lower().find("privacy") != -1: lst.append(node) if len(lst)==1: return lst[0] for node in lst: if node.get_text().lower().find("policy")!=-1 or\ node.get_text().lower().find("notice")!=-1 or\ node.get_text().lower().find("promise")!=-1: return node return None
def process_feed(self, overflow, overflow_reason): # Sync pull down the latest feeds resp = yield fetch_url(self.feed_url, user_agent=self.user_agent) parsed_feed = json.loads(resp.content) posts = parsed_feed.get('data', []) new_entries = 0 for post in posts: key = ndb.Key(Entry, post.get('id'), parent=self.key) entry = yield key.get_async() if not entry: standard_resolution = post.get('images', {}).get('standard_resolution') kwargs = {} kwargs['image_url'] = standard_resolution.get('url') kwargs['image_width'] = standard_resolution.get('width') kwargs['image_height'] = standard_resolution.get('height') low_resolution = post.get('images', {}).get('low_resolution') kwargs['thumbnail_image_url'] = low_resolution.get('url') kwargs['thumbnail_image_width'] = low_resolution.get('width') kwargs['thumbnail_image_height'] = low_resolution.get('height') caption = post.get('caption') if not caption: kwargs['title'] = '.' else: kwargs['title'] = caption.get('text', '') kwargs['link'] = post.get('link') kwargs['feed_item'] = post kwargs['creating'] = False if overflow: kwargs['overflow'] = overflow kwargs['overflow_reason'] = overflow_reason kwargs['published'] = True entry = Entry(key=key, guid=post.get('id'), **kwargs) new_entries += 1 yield entry.put_async() raise ndb.Return((self, new_entries))
def crawl_alexa_category(category): URLFORMAT="http://www.alexa.com/topsites/category%s/Top/%s" urls = [] for i in xrange(10,20): if i==0: url = URLFORMAT%('', category) else: url = URLFORMAT%(";%d"%i, category) try: data = fetch_url(url) soup = BeautifulSoup(data) except: continue links = soup.findAll("a") for link in links: if link.has_attr("href"): url = link.attrs["href"] offset = url.find("/siteinfo/") if offset!=-1: urls.append(url[len("/siteinfo/"):]) urls = ["http://%s"%u for u in urls] return urls
def fetch(self, appendDest=""): from fetcher import fetch_url ctx.ui.info(_("Fetching %s") % self.url.uri) dest = join(self.dest, appendDest) fetch_url(self.url, dest)