def _clean_content(self, content): original = content scrubber = Scrubber() content = scrubber.scrub(content) if len(content) < len(original) * 0.01: content = original content = content.replace('!important', '') return content
def _clean_content(self, content): original = content scrubber = Scrubber() content = scrubber.scrub(content) if len(content) < len(original)*0.01: content = original content = content.replace('!important', '') return content
def _clean_content(self, content): scrubber = Scrubber() content = scrubber.scrub(content) content = content.replace('!important', '') return content
def clean_content(self, content): scrubber = Scrubber() content = scrubber.scrub(content) content = content.replace('!important', '') return content
df = pd.read_csv(ppeFinalFile, sep='\031') #Initial Formatting print 'Initial Formatting...' df = format.initial_format(df) #!!!! do initial scrubbing here as well #Target Concept print 'Extracting target concept..' tc = TargetConcept(df) df_target_concept = tc.target_concept(config.has_label) print '\nTarget concept: ' + str(df_target_concept.columns) #Null column scrubbing print 'Scrubbing sparse features..' scrubber = Scrubber(df) scrubber.initial_nullscrubber_percent() print '\nNull scrubbed features: ' + str(scrubber.scrubbed_list) #column typing print '\nLoad column typer keywords...' ct = Typer(df) master_list = ct.column_typer() scrubber.remove(scrubber.scrubbed_list, master_list['cat_list'], master_list['num_list'], master_list['date_list'], master_list['zip_list']) print '\nDates: ' + str(master_list['date_list']) print '\nGeos: ' + str(master_list['zip_list']) #Initial scrubbing print 'Initial scrubbing...' #scrubber.initial_scrubber_abs()
def clean_content(self, content): scrubber = Scrubber() return scrubber.scrub(content)
def share_story(request, token): code = 0 story_url = request.POST["story_url"] comments = request.POST["comments"] title = request.POST["title"] content = request.POST["content"] rss_url = request.POST.get("rss_url") feed_id = request.POST.get("feed_id") or 0 feed = None message = None if not story_url: code = -1 else: try: profile = Profile.objects.get(secret_token=token) except Profile.DoesNotExist: code = -1 if feed_id: feed = Feed.objects.get(pk=feed_id) else: if rss_url: feed = Feed.get_feed_from_url(rss_url, create=True, fetch=True) if not feed: feed = Feed.get_feed_from_url(story_url, create=True, fetch=True) if feed: feed_id = feed.pk parsed_url = urlparse.urlparse(story_url) base_url = "%s://%s%s" % (parsed_url.scheme, parsed_url.hostname, parsed_url.path) scrubber = Scrubber(base_url=base_url) content = scrubber.scrub(content) title = scrubber.scrub(title) shared_story = ( MSharedStory.objects.filter(user_id=profile.user.pk, story_feed_id=feed_id, story_guid=story_url) .limit(1) .first() ) if not shared_story: story_db = { "story_guid": story_url, "story_permalink": story_url, "story_title": title, "story_feed_id": feed_id, "story_content": content, "story_date": datetime.datetime.now(), "user_id": profile.user.pk, "comments": comments, "has_comments": bool(comments), } shared_story = MSharedStory.objects.create(**story_db) socialsubs = MSocialSubscription.objects.filter(subscription_user_id=profile.user.pk) for socialsub in socialsubs: socialsub.needs_unread_recalc = True socialsub.save() logging.user(profile.user, "~BM~FYSharing story from site: ~SB%s: %s" % (story_url, comments)) else: shared_story.story_content = content shared_story.story_title = title shared_story.comments = comments shared_story.story_permalink = story_url shared_story.story_guid = story_url shared_story.has_comments = bool(comments) shared_story.story_feed_id = feed_id shared_story.save() logging.user(profile.user, "~BM~FY~SBUpdating~SN shared story from site: ~SB%s: %s" % (story_url, comments)) shared_story.publish_update_to_subscribers() response = HttpResponse(json.encode({"code": code, "message": message, "story": None}), mimetype="text/plain") response["Access-Control-Allow-Origin"] = "*" response["Access-Control-Allow-Methods"] = "POST" return response
def clean_content(self, content): scrubber = Scrubber() content = scrubber.scrub(content) content = content.replace("!important", "") return content