Пример #1
0
 def _clean_content(self, content):
     original = content
     scrubber = Scrubber()
     content = scrubber.scrub(content)
     if len(content) < len(original) * 0.01:
         content = original
     content = content.replace('!important', '')
     return content
Пример #2
0
 def _clean_content(self, content):
     original = content
     scrubber = Scrubber()
     content = scrubber.scrub(content)
     if len(content) < len(original)*0.01:
         content = original
     content = content.replace('!important', '')
     return content
Пример #3
0
 def _clean_content(self, content):
     scrubber = Scrubber()
     content = scrubber.scrub(content)
     content = content.replace('!important', '')
     return content
Пример #4
0
 def clean_content(self, content):
     scrubber = Scrubber()
     content = scrubber.scrub(content)
     content = content.replace('!important', '')
     return content
Пример #5
0
df = pd.read_csv(ppeFinalFile, sep='\031')

#Initial Formatting
print 'Initial Formatting...'
df = format.initial_format(df)
#!!!! do initial scrubbing here as well

#Target Concept
print 'Extracting target concept..'
tc = TargetConcept(df)
df_target_concept = tc.target_concept(config.has_label)
print '\nTarget concept: ' + str(df_target_concept.columns)

#Null column scrubbing
print 'Scrubbing sparse features..'
scrubber = Scrubber(df)
scrubber.initial_nullscrubber_percent()
print '\nNull scrubbed features: ' + str(scrubber.scrubbed_list)

#column typing
print '\nLoad column typer keywords...'
ct = Typer(df)
master_list = ct.column_typer()
scrubber.remove(scrubber.scrubbed_list, master_list['cat_list'],
                master_list['num_list'], master_list['date_list'],
                master_list['zip_list'])
print '\nDates: ' + str(master_list['date_list'])
print '\nGeos: ' + str(master_list['zip_list'])
#Initial scrubbing
print 'Initial scrubbing...'
#scrubber.initial_scrubber_abs()
Пример #6
0
 def clean_content(self, content):
     scrubber = Scrubber()
     return scrubber.scrub(content)
Пример #7
0
 def clean_content(self, content):
     scrubber = Scrubber()
     return scrubber.scrub(content)
Пример #8
0
def share_story(request, token):
    code = 0
    story_url = request.POST["story_url"]
    comments = request.POST["comments"]
    title = request.POST["title"]
    content = request.POST["content"]
    rss_url = request.POST.get("rss_url")
    feed_id = request.POST.get("feed_id") or 0
    feed = None
    message = None

    if not story_url:
        code = -1
    else:
        try:
            profile = Profile.objects.get(secret_token=token)
        except Profile.DoesNotExist:
            code = -1

    if feed_id:
        feed = Feed.objects.get(pk=feed_id)
    else:
        if rss_url:
            feed = Feed.get_feed_from_url(rss_url, create=True, fetch=True)
        if not feed:
            feed = Feed.get_feed_from_url(story_url, create=True, fetch=True)
        if feed:
            feed_id = feed.pk

    parsed_url = urlparse.urlparse(story_url)
    base_url = "%s://%s%s" % (parsed_url.scheme, parsed_url.hostname, parsed_url.path)
    scrubber = Scrubber(base_url=base_url)
    content = scrubber.scrub(content)
    title = scrubber.scrub(title)

    shared_story = (
        MSharedStory.objects.filter(user_id=profile.user.pk, story_feed_id=feed_id, story_guid=story_url)
        .limit(1)
        .first()
    )
    if not shared_story:
        story_db = {
            "story_guid": story_url,
            "story_permalink": story_url,
            "story_title": title,
            "story_feed_id": feed_id,
            "story_content": content,
            "story_date": datetime.datetime.now(),
            "user_id": profile.user.pk,
            "comments": comments,
            "has_comments": bool(comments),
        }
        shared_story = MSharedStory.objects.create(**story_db)
        socialsubs = MSocialSubscription.objects.filter(subscription_user_id=profile.user.pk)
        for socialsub in socialsubs:
            socialsub.needs_unread_recalc = True
            socialsub.save()
        logging.user(profile.user, "~BM~FYSharing story from site: ~SB%s: %s" % (story_url, comments))
    else:
        shared_story.story_content = content
        shared_story.story_title = title
        shared_story.comments = comments
        shared_story.story_permalink = story_url
        shared_story.story_guid = story_url
        shared_story.has_comments = bool(comments)
        shared_story.story_feed_id = feed_id
        shared_story.save()
        logging.user(profile.user, "~BM~FY~SBUpdating~SN shared story from site: ~SB%s: %s" % (story_url, comments))

    shared_story.publish_update_to_subscribers()

    response = HttpResponse(json.encode({"code": code, "message": message, "story": None}), mimetype="text/plain")
    response["Access-Control-Allow-Origin"] = "*"
    response["Access-Control-Allow-Methods"] = "POST"

    return response
Пример #9
0
 def clean_content(self, content):
     scrubber = Scrubber()
     content = scrubber.scrub(content)
     content = content.replace("!important", "")
     return content