def import_feeds(request): """Import feeds from an OPML source""" if request.method == 'POST': form = OPMLImportForm(request.POST, request.FILES) if form.is_valid(): # get the list of existing feeds existing_feeds = set( request.user.feeds.values_list('url', flat=True)) entries = opml.parse(request.FILES['file']) try: with user_lock('opml_import', request.user.pk, timeout=30): imported = save_outline(request.user, None, entries, existing_feeds) except ValidationError: logger.info("Prevented duplicate import for user {0}".format( request.user.pk)) else: message = " ".join([ ungettext(u'%s feed has been imported.', u'%s feeds have been imported.', imported) % imported, _('New content will appear in a moment when you refresh ' 'the page.') ]) messages.success(request, message) return redirect('feeds:entries') else: form = OPMLImportForm() context = { 'form': form, } return render(request, 'feeds/import_feeds.html', context)
def load_rss_feeds(): file_to_load = BOT_OPML outline = opml.parse(file_to_load) # upload the feed in the database, verify if the feed is still the same and update if needed. con = parentobj.getDBConnection() cur = con.cursor() for i in range(0, len(outline) - 1): feed_title = outline[i].title feed_xmlurl = outline[i].xmlUrl feed_htmlurl = outline[i].htmlUrl feed_type = outline[i].type try: sqlquery = "SELECT id from slackbot_feeds WHERE title LIKE ? and type LIKE ? and xmlurl LIKE ? and htmlurl LIKE ?" data = (feed_title, feed_type, feed_xmlurl, feed_htmlurl) cur.execute(sqlquery, data) id = cur.fetchone()[0] except: sqlquery = "INSERT INTO slackbot_feeds (title, type, xmlurl, htmlurl, borked) VALUES (?,?,?,?,?)" data = (feed_title, feed_type, feed_xmlurl, feed_htmlurl, "false") cur.execute(sqlquery, data) con.commit() parentobj.closeDBConnection(con)
def get_form_initial(self, step): if step == '1': src = None uploaddata = self.get_cleaned_data_for_step('0') if uploaddata['file']: fsrc = uploaddata['file'] str = "" for chunk in fsrc.chunks(): str += chunk ofile = opml.from_string(str) else: src = uploaddata['url'] ofile = opml.parse(src) initial = [] for entry in ofile: init_entry = { 'enabled': True, 'title': entry.title, 'feedurl': entry.xmlUrl, 'wwwurl': entry.htmlUrl, } initial.append(init_entry) return initial else: return super(OPMLImport, self).get_form_initial(step)
def get_rss_feed(opml_file): file = opml.parse(opml_file) podcast_library = [] for data in file: podcast_data = {'rss_link': data.xmlUrl} podcast_library.append(podcast_data) return podcast_library
def GetPodcastsFromOPML(request): sp = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials()) outline = opml.parse( "file:///home/viniciusdof/Downloads/podcasts_opml.xml") foundPodcastsList = [] podcastsOPML = OPMLPodcasts(outline.title, foundPodcastsList) for val in outline[0]: podcastList = [] podcasts = OPMLPodcastsPodcast(val.text, podcastList) result = sp.search(val.text, type="show", market='BR') for item in result["shows"]["items"]: imageList = [] for image in item["images"]: podcastImage = Image(image["url"], image["height"], image["width"]) imageList.append(podcastImage) podcast = PodcastPodcast(item["name"], item["publisher"], item["id"], item["description"], imageList) podcastList.append(podcast) podcasts.podcasts = podcastList foundPodcastsList.append(podcasts) podcastsOPML.podcasts = foundPodcastsList return HttpResponse(podcastsOPML.toJSON())
def getRSSSources(): """Gets all the RSS sources we want to mine from and returns a list of rss objects""" print os.getcwd() rss_sources = [] # add required sources # add techmeme sources #techmeme_sources = opml.parse('http://www.techmeme.com/lb.opml') # load from the list on file techmeme_sources = opml.parse( 'articurate/sources/feed_lists/tech_feed_list.opml') for item in techmeme_sources: try: if hasattr(item, 'htmlUrl'): rss_sources.append(RSSObj(item.text, item.xmlUrl, item.htmlUrl)) else: rss_sources.append(RSSObj(item.text, item.xmlUrl)) except: pass return {'rss': rss_sources}
def import_feeds(request): """Import feeds from an OPML source""" if request.method == 'POST': form = OPMLImportForm(request.POST, request.FILES) if form.is_valid(): # get the list of existing feeds existing_feeds = set([f.url for f in Feed.objects.filter( category__in=request.user.categories.all(), )]) # try to get the "Unclassified" field, create it if needed category, created = request.user.categories.get_or_create( slug='imported', defaults={'name': _('Imported')}, ) entries = opml.parse(request.FILES['file']) imported = save_outline(request.user, category, entries, existing_feeds) messages.success( request, _('%(num)s feeds have been imported' % {'num': imported}), ) return redirect('feeds:home') else: form = OPMLImportForm() context = { 'form': form, } return render(request, 'feeds/import_feeds.html', context)
def import_feeds(request): """Import feeds from an OPML source""" if request.method == 'POST': form = OPMLImportForm(request.POST, request.FILES) if form.is_valid(): # get the list of existing feeds existing_feeds = set(request.user.feeds.values_list('url', flat=True)) entries = opml.parse(request.FILES['file']) try: with user_lock('opml_import', request.user.pk, timeout=30): imported = save_outline(request.user, None, entries, existing_feeds) except ValidationError: logger.info("prevented duplicate import", request=request) else: message = " ".join([ungettext( u'%s feed has been imported.', u'%s feeds have been imported.', imported) % imported, _('New content will appear in a moment when you refresh ' 'the page.') ]) messages.success(request, message) return redirect('feeds:entries') else: form = OPMLImportForm() context = { 'form': form, } return render(request, 'feeds/import_feeds.html', context)
def load_rss_feeds(): file_to_load = "security.opml" outline = opml.parse(file_to_load) # upload the feed in the database, verify if the feed is still the same and update if needed. con = psycopg2.connect("dbname='%s' user='******'" % (database_name, database_username)) cur = con.cursor() for i in range(0, len(outline) - 1): feed_title = outline[i].title feed_xmlurl = outline[i].xmlUrl feed_htmlurl = outline[i].htmlUrl feed_type = outline[i].type try: sqlquery = "SELECT id from slackbot_feeds WHERE title LIKE %s and type LIKE %s and xmlurl LIKE %s and htmlurl LIKE %s" data = (feed_title, feed_type, feed_xmlurl, feed_htmlurl) cur.execute(sqlquery, data) id = cur.fetchone()[0] except: sqlquery = "INSERT INTO slackbot_feeds (title, type, xmlurl, htmlurl, borked) VALUES (%s,%s,%s,%s,%s)" data = (feed_title, feed_type, feed_xmlurl, feed_htmlurl, "false") cur.execute(sqlquery, data) con.commit() con.close()
def read(self): """ reads and parses OPML file (set by constructor) """ data = opml.parse(self.filename) for _ in data: self.list.append(_.xmlUrl)
def fill_out_file(inputfile, outputfile): nested = opml.parse(inputfile) my_dict = {} length = len(nested[0]) i = 0 while i < length: title = nested[0][i].text # Remove special characters title = re.sub(r'[^A-Za-z0-9]+', '', title) url = nested[0][i].xmlUrl ## change feed for channel url url = url.replace('feeds/videos.xml?channel_id=', 'channel/') my_dict[title] = url i += 1 with open(outputfile, 'a') as f_out: for title in sorted(my_dict.keys(), key=lambda line: line.lower().split()): # print (title, my_dict[title]) f_out_content = f''' [feeds.{title}] url = "{my_dict[title]}" page_size = 3 # The number of episodes to query each update (keep in mind, that this might drain API token) update_period = "12h" # How often query for updates, examples: "60m", "4h", "2h45m" quality = "high" # or "low" format = "video" # or "audio"''' f_out.write(f_out_content)
def convert_opml_to_rest(opmlPath, restFile, **kwargs): 'write reST for an OPML outline' opmlData = opml.parse(opmlPath) print >>restFile, '=' * len(opmlData.title) print >>restFile, opmlData.title print >>restFile, ('=' * len(opmlData.title)) + '\n' write_opml_to_rest(opmlData, restFile, **kwargs)
def dl(opml_filename, database, outtmpl, fake=False, quiet=False, verbose=False): engine = create_engine(database) Base.metadata.create_all(engine) Base.metadata.bind = engine session = sessionmaker(bind=engine)() opts = {} if outtmpl: opts['outtmpl'] = outtmpl if quiet: opts['quiet'] = quiet if verbose: opts['verbose'] = verbose for channel in opml.parse(opml_filename)[0]: logger.debug('Parsing channel "{}".'.format(channel.title)) for item in feedparser.parse(channel.xmlUrl)['items']: session.add(Video(url=item['link'])) try: session.commit() except IntegrityError: session.rollback() else: msg = 'Downloading "{}" from "{}".' logger.info(msg.format(item['title'], channel.title)) if not fake: try: download(item['link'], opts) except Exception: session.rollback()
def import_feeds(request): """Import feeds from an OPML source""" if request.method == 'POST': form = OPMLImportForm(request.POST, request.FILES) if form.is_valid(): # get the list of existing feeds existing_feeds = set([ f.url for f in Feed.objects.filter( category__in=request.user.categories.all(), ) ]) # try to get the "Unclassified" field, create it if needed category, created = request.user.categories.get_or_create( slug='imported', defaults={'name': _('Imported')}, ) entries = opml.parse(request.FILES['file']) imported = save_outline(request.user, category, entries, existing_feeds) messages.success( request, _('%(num)s feeds have been imported' % {'num': imported}), ) return redirect('feeds:home') else: form = OPMLImportForm() context = { 'form': form, } return render(request, 'feeds/import_feeds.html', context)
def start_to_subscribe(self): outline = opml.parse('subscription_manager') for x in outline[0]: channel = x.title # clean search box input search_for_input = self.driver.find_element_by_xpath( '//*[@id="search"]') search_for_input.send_keys(Keys.CONTROL + "a") search_for_input.send_keys(Keys.DELETE) search_for_input.send_keys(channel) search_btn = self.driver.find_element_by_xpath( '//*[@id="search-icon-legacy"]') search_btn.click() sleep(1) button_msg = self.driver.find_element_by_xpath( '//*[@id="subscribe-button"]/ytd-subscribe-button-renderer/paper-button/yt-formatted-string' ).text print(button_msg) if button_msg == 'SUBSCRIBE': subscribe_btn = self.driver.find_element_by_xpath( '//*[@id="subscribe-button"]/ytd-subscribe-button-renderer/paper-button' ) subscribe_btn.click() print(f"Processed {channel}") sleep(1)
def fetch_opml(url): try: outline = opml.parse(url) except IOError: print_d("Failed opening OPML %s" % url) return [] GObject.idle_add(lambda: update_feeds([x.xmlUrl for x in outline]))
def handle(self, *args, **options): if options['subscriptions']: try: outline = opml.parse(options['subscriptions'][0]) except: raise CommandError('Not a valid OPML file') else: raise CommandError('OPML file to import subscription is needed') if options['user_ids']: user_list = options['user_ids'] else: user_list = User.objects.values_list('id', flat=True) for user_id in user_list: self.stdout.write('Importing subscriptions for user %s' % user_id) try: user = User.objects.get(pk=user_id) except: raise CommandError('User "%s" does not exist' % user_id) for category in outline: if category: category_title = category.title else: category_title = "?" self.stdout.write('\tCategory: %s' % category_title) for entry in category: self.stdout.write('\t\tSubscribing to: %s' % entry.title) if _is_valid_rss_feed(entry.xmlUrl) and hasattr(entry, 'title'): _create_subscription_and_fetch_articles(user, entry.xmlUrl, entry.title) self.stdout.write(self.style.SUCCESS('\t\tSuccessfully suscribed user %s to %s' % (user_id, entry.title))) else: self.stdout.write(self.style.ERROR('\t\tCould not subscribe user %s to %s: not a valid rss feed' % (user_id, entry.title)))
def load(opml_resource): """ import an OPML file """ if opml_resource.endswith('.opml'): o_resource = opml.parse(opml_resource) for folder in o_resource: for feed in folder: log = f"{folder.text}, {feed.text}" # console.print(log, style="blue") # create the target folder if not exists try: f = Folders.objects.get(title=folder.text) except Folders.DoesNotExist: f = Folders.objects.create(title=folder.text) # create the Feeds source if not exists obj, created = Feeds.objects.get_or_create( title=feed.text, defaults={ 'title': feed.text, 'url': feed.xmlUrl, 'folder': f }, ) if created: console.print(log + ' created', style="blue") else: console.print(log + ' already created', style="yellow") console.print('Nyuseu - 뉴스 - Feeds Loaded', style="green") else: console.print(f"File {opml_resource} is not an OPML file", style="bold red")
def read_opml(target_file): '''parses the opml file starting from the top most header assumes there is only one top node and it is h1 ''' outline = opml.parse(target_file) topics = outline[0] topic_data_all = [] topic_content_subnames = [ "category", "competency", "reason", "description", "summary", "rating", "status", "plan", "dependencies", "related", "tags", "resources", "links" ] counter = 1 for topic in topics: topic_title = topic.text topic_data_current = {} topic_data_current["name"] = topic_title topic_content_subnames_found = [] for topic_content in topic: for topic_content_subname in topic_content_subnames: if topic_content.text.lower() == topic_content_subname: subdata = get_topic_subdata(topic_content, topic_content_subname) topic_data_current[topic_content_subname] = subdata topic_content_subnames_found.append(topic_content_subname.title()) # generated data topic_data_current["index"] = topic_content_subnames_found topic_data_current["priority"] = counter # counter += 1 topic_data_all.append(topic_data_current) return topic_data_all
def main(): with open("Subscriptions.opml", "r") as f: outline = filter(opml.parse(f)) feeds = [] for feed in outline: feeds.append({"title": feed.text, "htmlUrl": feed.htmlUrl}) write_markdown(feeds)
def _parse_file(self, file_name): """ creates nested structure of feedInfos and feedcategories from the ompl file :param file_name: the file to pare :return: a FeedInfo Object containing all Information from the file """ outline = opml.parse(file_name) return self.get_feeds_from_outline(outline)
def opml_import(request, url): import opml from opml_import import import_outline_element try: o = opml.parse(url) except: return HttpResponse('Cannot parse opml file %s' % url) import_outline_element(o) return HttpResponse('OK')
def __init__(self, f): listx = [] with open(f, "r") as opmlfile: outline = opml.parse(opmlfile) for i in range(0, len(outline)): feed = Feed(outline[i].text, outline[i].title, outline[i].xmlUrl) listx.append(feed) self.feeds = list(listx)
def __init__(self, subscription_manager_file): self.file = subscription_manager_file try: self.parsed = opml.parse(self.file) except: print("Please download \"subscription_manager\" from youtube and place in current directory") sys.exit(1) self.channels = self.get_channels() self.channels_cached_list = []
def opml_url_parser(): # return a dictionary (title,url) opmlFile = config.RSSOPT_CONFIG['OPML_LOCATION'] feeds = dict() rss = opml.parse(opmlFile) for i in range(0, len(rss)): for j in range(0, len(rss[i])): feeds[rss[i][j].title] = rss[i][j].xmlUrl return feeds
def handle(self, *args, **options): archive = opml.parse('na-archive.opml') shows = [show for year in archive for month in year for show in month] for show in shows: number = show_number(show.text) if number and not Show.objects.filter(id=number): if not hasattr(show, 'type'): continue # print('Handling {} [{}]'.format(show.url, number)) handle_redirect(show.url)
def load_new_feeds(self): """ go through feeds to which we subscribe and add any new feeds to our database """ sub_file = os.path.join(self.config.sub_dir, 'subscriptions.xml') outline = opml.parse(sub_file) # if there are feeds in subscription.xml that are not in the database, add them for feed in outline: if not self.dal.session.query(Feed).filter( Feed.xmlUrl == feed.xmlUrl).all(): self.dal.session.add(Feed(feed.text, feed.xmlUrl)) self.dal.session.commit()
def parse_opml_file(): file = request.files['file'] outline = opml.parse(file) logged_in_user_id = g.user['user_email'] from feeds_helper import import_opml_file try: import_opml_file(logged_in_user_id, file, mongo_lib) return json.dumps({"message":"OPML imported successfully"}) except Exception,e: return json.dumps({"message":"Could not import OPML file"})
def import_opml(user_id, opml_url=None, data=None): outline = None if opml_url is not None: outline = opml.parse(opml_url) if data is not None: outline = opml.from_string(data) outline = outline or [] for entry in outline: url = entry.xmlUrl print url subscribe_to_url(url, user_id)
def import_opml(user_id, opml_url=None, data=None): outline = None if opml_url is not None: outline = opml.parse(opml_url) if data is not None: outline = opml.from_string(data) outline = outline or [] for entry in outline: url = entry.xmlUrl print(url) subscribe_to_url(url, user_id)
def parse_opml(filename): """Parses the given opml of youtube subscriptions.""" global feeds outline = opml.parse(filename) assert outline[0].text == 'YouTube Subscriptions' with open(SUBSCRIPTION_FILE, 'a+') as subscription_file: # yes_for_all = False for feed in outline[0]: if feed.text not in feeds: subscription_file.write('{0:s} : {1:s}\n'.format(feed.text, feed.xmlUrl))
def opml_import(request, url): try: o = opml.parse(url) except: return HttpResponse('Cannot parse opml file %s' % url) for f in o: new = Feed.objects.create(url = f.xmlUrl, #tags = self.cleaned_data['tags'], name = f.title, ) new.save() return HttpResponse('OK')
def load_subs(f): """Load subscriptions from an exported youtube subscription xml Returns the channel feed URLs """ outline = opml.parse(f) recs = outline[0] urls = [rec.xmlUrl for rec in recs] return urls
def index(request): opmlfile = settings.OPML_PATH outline = opml.parse(opmlfile) opmlstruct = {'title':'default', 'subs':[], 'subfolders':[]} def handlesub(sub, subs, folders): if len(sub) > 0: subf = {'title':sub.title, 'subs':[], 'subfolders':[]} folders.append(subf) for s in sub: handlesub(s, subf['subs'], subf['subfolders']) else: feedobj = None try: feedobj = Feed.objects.get(feedurl=sub.xmlUrl) except: feedobj = None subs.append(feedobj) handlesub(outline, opmlstruct['subs'], opmlstruct['subfolders']) def sortfolders(ostruct): ostruct['subs'].sort(key=lambda x: x.title.lower()) ostruct['subfolders'].sort(key=lambda x: x['title'].lower()) for f in ostruct['subfolders']: sortfolders(f) sortfolders(opmlstruct) linearopml = [] def linearizeopml(ostruct): linearopml.append(('folder', ostruct['title'], None)) for subf in ostruct['subfolders']: linearizeopml(subf) linearopml.append(('outdent', '', None)) for sub in ostruct['subs']: linearopml.append(('sub', sub.title, sub)) struct_to_linearize = opmlstruct['subfolders'][0] for s in struct_to_linearize['subfolders']: linearizeopml(s) linearopml.append(('outdent', '', None)) for s in struct_to_linearize['subs']: linearopml.append(('sub', s.title, s)) context = { #'feeds': Feed.objects.order_by('title').all() 'sublist': linearopml } return render(request, 'feeds/index.html', context)
def handleopml(path, stdout): outline = opml.parse(path) def handlesub(sub, indent=0): indentch = "".join([' ' for a in range(0, indent)]) if len(sub) < 1: handlefeed(sub, indentch, stdout) else: print >> stdout, "%sFOLDER: %s" % (indentch, sub.title) for s in sub: handlesub(s, indent+1) handlesub(outline)
def handleopml(path, stdout): outline = opml.parse(path) def handlesub(sub, indent=0): indentch = "".join([' ' for a in range(0, indent)]) if len(sub) < 1: handlefeed(sub, indentch, stdout) else: print >> stdout, "%sFOLDER: %s" % (indentch, sub.title) for s in sub: handlesub(s, indent + 1) handlesub(outline)
def import_opml_file(logged_in_user, opml_file, mongo_lib): import opml default_tag = "default" outline = opml.parse(opml_file) for entry in outline: if hasattr(entry, "xmlUrl"): # This does not have any tags so default is the default tag add_feed_to_feeds_meta(mongo_lib,entry.xmlUrl, entry.title) associate_tags_to_user_feed(mongo_lib,logged_in_user, ['default'], entry.xmlUrl) if hasattr(entry, '_outlines'): for ent in entry._outlines: add_feed_to_feeds_meta(mongo_lib,ent.xmlUrl, ent.title) #f.tags = [Tag(entry.text)] associate_tags_to_user_feed(mongo_lib,logged_in_user, [entry.text], ent.xmlUrl)
def get_source_info(): """ get stories from opml list """ DEFAULT_OPML = "http://hosting.opml.org/dave/validatorTests/clean/subscriptionList.opml" titles = [] htmlUrls = [] xmlUrls = [] outline = opml.parse(DEFAULT_OPML) for ii in outline: titles.append(ii.text) htmlUrls.append(ii.htmlUrl) xmlUrls.append(ii.xmlUrl) return dict(htmlUrls=htmlUrls, titles=titles, xmlUrls=xmlUrls)
def import_pending(cls): pending_files = cls.objects.filter(import_status='P') # mark the files as being imported so they are not picked up by another task # pending_files.update(import_status='R') for pending_file in pending_files: #outline = opml.parse(pending_file.opml_file.url) outlines = opml.parse('/vagrant/drood/media/opml/2015/5/sarahmarshallfeedly.opml') for outline in outlines: print outline.title try: print outline.xmlUrl except: pass
def form2_success(request, appstruct): opml_file = appstruct['opml'] opml_data = opml_file['fp'] outline = opml.parse(opml_data) worklist = [e for e in outline] n = 0 while worklist: element = worklist.pop(0) if hasattr(element, 'xmlUrl'): url = element.xmlUrl tasks.import_feed(request, url) n += 1 else: worklist += element return '%d feeds imported' % n
def get_article_words(opmlfile = None): allwords = {} articlewords = [] articletitles = [] articlelinks = {} ec = 0 if opmlfile is not None: outline = opml.parse(opmlfile) # get the list of feeds from the OPML file feedlist = [] for folder in outline: for feed in folder: feedlist.append(feed.xmlUrl) else: feedlist = defflist # Loop over every feed for feed in feedlist: f = feedparser.parse(feed) # Loop over every article for e in f.entries: # Ignore entries without these fields if 'title' not in e or 'link' not in e or 'description' not in e: continue # Ignore identical articles if e.title in articletitles: continue # Extract the words txt = e.title.encode('utf8') + \ strip_HTML(e.description.encode('utf8')) words = separate_words(txt) articlewords.append({}) articletitles.append(e.title) articlelinks[e.title] = e.link # Increase the counts for this word in allwords and articlewords for word in words: allwords.setdefault(word, 0) allwords[word] += 1 articlewords[ec].setdefault(word, 0) articlewords[ec][word] += 1 ec += 1 return allwords, articlewords, articletitles, articlelinks
def main(argv=None): if argv is None: argv = sys.argv username = argv[1] opmlfile = argv[2] try: user_id = get_user_id(username) except IndexError: db.insert('users', username=username) user_id = get_user_id(username) with open(opmlfile, 'r') as f: parsedopml = opml.parse(f) process_opml(parsedopml, user_id)
def substract_subs(inputfile): nested = opml.parse(inputfile) subs = len(nested[0]) titles = [] urls = [] i = 0 while i < subs: title = nested[0][i].text # Remove special characters title = re.sub(r'[^A-Za-z0-9]+', '', title) url = nested[0][i].xmlUrl i += 1 urls.append(url) titles.append(title) return (urls, titles, subs)
def getUpdate(): json_data = [] outline = opml.parse("podcasts.xml") logging.info("Getting Feeds") # try: if outline[0].text == "feeds": # This is a pocket casts feed for podcast_feed in outline[0]: json_data.extend(Update_feeds.getFeeds(podcast_feed.xmlUrl)) else: for podcast_feed in outline: json_data.extend(Update_feeds.getFeeds(podcast_feed.xmlUrl)) # print("Writting JSON data") Update_feeds.writeFeeds(json_data)
def import_opml(opml_source): result = [] def import_outline(outline): try: if outline.type == 'rss': result.append(add_feed(outline.xmlUrl, outline.title)) except AttributeError: if len(outline): for o in outline: import_outline(o) outlines = opml.parse(opml_source) import_outline(outlines) return result
def handle(self, *args, **kwargs): opml_file, username = args try: user = User.objects.get(username=username) except User.DoesNotExist: raise CommandError('User {0} does not exist'.format(username)) stats = defaultdict(int) outlines = opml.parse(opml_file) for outline in outlines: parse_outline(outline, user, stats) self.stdout.write( 'Imported %d new feeds from %s (%d already present)' % (stats['created'], opml_file, stats['existing']) )
def main(): parser = argparse.ArgumentParser( description= 'Read an OPML file and print spiderss TOML format to stdout.') parser.add_argument('file', help='OPML input file') args = parser.parse_args() file = args.file try: outline = opml.parse(file) for o in outline: print_outline(o, '') except Exception as e: print('ERROR: {}'.format(e)) sys.exit(1)
def create_entries(): outline = opml.parse("/jchandrashekar/Projects/hqfeeds/google-reader-subscriptions.xml") default_tag = Tag("hqfeed_default") for entry in outline: if hasattr(entry, "xmlUrl"): f = Feeds() f.mongo_feed_id = entry.xmlUrl f.feed_title = entry.title dbsession.add(f) continue if hasattr(entry, '_outlines'): for ent in entry._outlines: f = Feeds() f.mongo_feed_id = ent.xmlUrl f.feed_title = ent.title f.tags = [Tag(entry.text)] dbsession.add(f) dbsession.commit() print "Total feeds created ", dbsession.query(Feeds).count()
def import_opml(store, opml_source): result = [] def import_outline(outline): try: if outline.type == 'rss': result.append(add_feed(store, outline.xmlUrl, outline.title)) store.commit() # TODO: handle commit exceptions except AttributeError: if len(outline): for o in outline: import_outline(o) outlines = opml.parse(opml_source) import_outline(outlines) return result
def opml_import(): url = request.args.get('url') if not url: return 'Missing url' import opml try: o = opml.parse(url) except: return 'Cannot parse opml file %s' % url def import_outline_element(o): for f in o: if hasattr(f,'xmlUrl'): s = Source(f.title,'feed',f.xmlUrl) db.session.add(s) else: import_outline_element(f) import_outline_element(o) db.session.commit() flash('import successed') return redirect(request.referrer or '/')
def import_opml(user_id, path): _opml = opml.parse(path) uncategorized = None for outline in _opml: if hasattr(outline, 'xmlUrl'): if uncategorized is None: # does not defined yet uncategorized = Category.query.filter_by(user_id=user_id, name="Uncategorized").first() if uncategorized is None: # not found uncategorized = Category(user_id, "Uncategorized", order_id=9999) uncategorized.save() feed = Feed(outline.xmlUrl) feed.save() user_feed = UserFeed(user_id, uncategorized.id, feed.id, outline.text) user_feed.save() else: category = Category.query.filter_by(user_id=user_id, name=outline.text).first() if category is None: category = Category(user_id, outline.text) category.save() for child in outline: if hasattr(child, 'xmlUrl'): hash = xxhash.xxh64() feed = Feed.query.filter_by(feed_url_hash=hash).first() if feed is None: feed = Feed(child.xmlUrl) feed.save() user_feed = UserFeed(user_id=user_id, category_id=category.id, feed_id=feed.id, feed_name=child.text) user_feed.save() else: logger.warn("Nested category is not supported yet, ignored!")
import opml import MySQLdb import ConfigParser config = ConfigParser.RawConfigParser() config.read('reader.cfg') #import feedparser #feedparser._HTMLSanitizer.acceptable_elements = feedparser._HTMLSanitizer.acceptable_elements + ['object', 'embed','iframe'] #import bs4 db=MySQLdb.connect(host="localhost",user=config.get('Database', 'username'),passwd=config.get('Database', 'password'),db="test_rss", charset='utf8') cur = db.cursor() opml_file = 'subscriptions.xml' o=opml.parse(opml_file) print 'reading ' + o.title + ' ...' #create list of existing labels cur.execute('SELECT * FROM reader_label') labels = [] for item in cur.fetchall(): labels.append(item[1]) #add labels for item in o: if len(item) > 0: if item.title in labels: print 'LABEL EXISTS', item.title else: cur.execute('insert into reader_label (label) VALUES (%s)', item.title)
import opml import sys opml_file = 'feeds/Reabble-zh.opml' opml_file = 'feeds/Kindle4RSS-Feeds.xml' opml_file = 'feeds/feedly_askender.opml' outline = opml.parse(opml_file) # meta optional # print(outline.title) # print(outline.ownerName) # print(outline.ownerEmail) # info # print(len(outline)) level = int(sys.argv[1]) assert len(outline) for one in outline: if hasattr(one, 'type'): assert one.type == 'rss' # if not len(one): if level == 1 and not len(one): print() print('### {}'.format(one.text)) if one.title != one.text: print(one.title) raise # 1级分类