示例#1
0
    def write(self, stories, team=None):
        new_stories = []
        for story in stories:
            info_from_api = npr_api_scraper.get_story_details(story['story_url'])

            if not info_from_api:
                logger.info('Not adding %s to database: could not get story' % (story['story_headline']))
                pass

            exists = Story.select().where(Story.url == story['story_url'])
            if exists:
                logger.info('Not adding %s to database: already exists' % (story['story_headline']))

            else:
                try:
                    screenshot_url = screenshotter.get_story_image(story['story_url'])
                    story = Story.create(
                        name = story['story_headline'].strip(),
                        slug = story['graphic_slug'].strip(),
                        date = info_from_api['date'],
                        story_type = story['graphic_type'].strip(),
                        url = story['story_url'].strip(),
                        image = info_from_api['image'],
                        team = team,
                        screenshot = screenshot_url
                    )
                    new_stories.append(story)
                except IntegrityError:
                    # Story probably already exists.
                    logger.info('Not adding %s to database: probably already exists' % (story['story_headline']))
                    pass
        return new_stories
示例#2
0
def add_story_screenshots(regenerate=False, article_id='storytext'):
    """
    Utility. Used to generate a screenshot of every article.
    pass regenerate=true to regenerate all screenshots (otherwise it'll skip
    stories where that field already has a URL).

    Pass an articleid to specify the CSS ID of the article. The image will be
    cropped to that ID.
    """
    if regenerate:
        for story in Story.select():
            logger.info("About to check {0}".format(story.name))

            story.screenshot = screenshotter.get_story_image(story_url=story.url,
                article_id=article_id)
            story.save()

    else:
        for story in Story.select().where(Story.screenshot == None):
            logger.info("About to check {0}".format(story.name))

            story.screenshot = screenshotter.get_story_image(story_url=story.url,
                article_id=article_id)
            logger.info("Got screenshot {0}".format(story.screenshot))
            story.save()
示例#3
0
    def respond(self, message):
        m = re.search(self.START_TRACKING_REGEX, message.body['text'])
        url = re.search(self.GRUBER_URLINTEXT_PAT, message.body['text'])

        if not m:
            return False

        slug = m.group(1)
        url = url.group(1)

        if slug:
            # Check if the slug is in the database.
            try:
                story = Story.select().where(Story.url.contains(url)).get()
                story.slug = slug
                story.save()

                text = "Ok! I'm already tracking `%s`, and I've updated the slug." % url

            except Story.DoesNotExist:
                # If it's not in the database, start tracking it.
                if not url:
                    logger.error("Couldn't find story URL in message %s", message.body['text'])
                    text = "Sorry, I need a story URL to start tracking."
                    return

                details = npr_api_scraper.get_story_details(url)

                if not details:
                    logger.error("Couldn't find story in API for URL %s", url)
                    text = "Sorry, I wasn't able to find that story in the API, so I couldn't start tracking it."
                    return

                # Find out what team we need to save this story to
                channel = slack_tools.get_channel_name(message.body['channel'])
                team = self.config.get_team_for_channel(channel)

                # Create the story
                story = Story.create(name=details['title'],
                                     slug=slug,
                                     date=details['date'],
                                     url=url,
                                     image=details['image'],
                                     team=team
                                    )
                story.save()
                text = "Ok, I've started tracking `%s` on %s. The first stats should arrive in 4 hours or less." % (slug, url)

        else:
            text = "Sorry, I wasn't able to start tracking `%s` right now." % slug

        if text:
            return {
                'text': text
            }
示例#4
0
def start_tracking(message):
    m = re.search(START_TRACKING_REGEX, message.body['text'])

    if not m:
        return False

    slug = m.group(1)

    if slug:
        # Check if the slug is in the database.
        try:
            story = Story.select().where(Story.slug.contains(slug)).get()
            message.reply(
                "Thanks! I'm already tracking `%s`, and you should start seeing results within a couple hours."
                % slug)
        except Story.DoesNotExist:

            # If it's not in the database, start tracking it.
            url = re.search(GRUBER_URLINTEXT_PAT, message.body['text'])

            if not url:
                logger.error("Couldn't find story URL in message %s",
                             message.body['text'])
                message.reply("Sorry, I need a story URL to start tracking.")
                return

            details = npr_api_scraper.get_story_details(url.group(1))

            if not details:
                logger.error("Couldn't find story in API for URL %s",
                             url.group(1))
                message.reply(
                    "Sorry, I wasn't able to find that story in the API, so I couldn't start tracking it."
                )
                return

            # Find out what team we need to save this story to
            channel = slackTools.get_channel_name(message.body['channel'])
            team = config.get_team_for_channel(channel)

            # Create the story
            story = Story.create(name=details['title'],
                                 slug=slug,
                                 date=details['date'],
                                 url=url.group(1),
                                 image=details['image'],
                                 team=team)
            story.save()
            message.reply(
                "Ok, I've started tracking `%s`. The first stats should arrive in 4 hours or less."
                % slug)

    else:
        message.reply(
            "Sorry, I wasn't able to start tracking `%s` right now." % slug)
示例#5
0
    def respond(self, message):
        m = re.search(self.START_TRACKING_REGEX, message.body['text'])
        url = re.search(self.GRUBER_URLINTEXT_PAT, message.body['text'])

        if not m:
            return False

        slug = m.group(1)
        url = url.group(1)

        if slug:
            # Check if the slug is in the database.
            try:
                story = Story.select().where(Story.url.contains(url)).get()
                story.slug = slug
                story.save()

                text = "Ok! I'm already tracking `%s`, and I've updated the slug." % url

            except Story.DoesNotExist:
                # If it's not in the database, start tracking it.
                if not url:
                    logger.error("Couldn't find story URL in message %s",
                                 message.body['text'])
                    text = "Sorry, I need a story URL to start tracking."
                    return

                details = npr_api_scraper.get_story_details(url)

                if not details:
                    logger.error("Couldn't find story in API for URL %s", url)
                    text = "Sorry, I wasn't able to find that story in the API, so I couldn't start tracking it."
                    return

                # Find out what team we need to save this story to
                channel = slack_tools.get_channel_name(message.body['channel'])
                team = self.config.get_team_for_channel(channel)

                # Create the story
                story = Story.create(name=details['title'],
                                     slug=slug,
                                     date=details['date'],
                                     url=url,
                                     image=details['image'],
                                     team=team)
                story.save()
                text = "Ok, I've started tracking `%s` on %s. The first stats should arrive in 4 hours or less." % (
                    slug, url)

        else:
            text = "Sorry, I wasn't able to start tracking `%s` right now." % slug

        if text:
            return {'text': text}
示例#6
0
def add_story_screenshots(regenerate=False):
    if regenerate:
        for story in Story.select():
            logger.info("About to check %s" % (story.name))

            story.screenshot = screenshotter.get_story_image(story.url)
            story.save()

    else:
        for story in Story.select().where(Story.screenshot == None):
            logger.info("About to check %s" % (story.name))

            story.screenshot = screenshotter.get_story_image(story.url)
            story.save()
示例#7
0
def add_story_screenshots(regenerate=False):
    if regenerate:
        for story in Story.select():
            logger.info("About to check %s" % (story.name))

            story.screenshot = screenshotter.get_story_image(story.url)
            story.save()

    else:
        for story in Story.select().where(Story.screenshot == None):
            logger.info("About to check %s" % (story.name))

            story.screenshot = screenshotter.get_story_image(story.url)
            story.save()
示例#8
0
def handle_overview_question(message):
    message.reply(
        "Let me check what's been happening. This may take a second.")
    seven_days_ago = datetime.datetime.now() - datetime.timedelta(days=7)
    stories = Story.select().where(Story.tracking_started > seven_days_ago)

    slugs = Set()
    for story in stories:
        print story.name
        story_slugs = story.slug.split(',')
        for slug in story_slugs:
            slugs.add(slug)

    total_users = analytics.get_user_data(start_date='7daysAgo')
    total_users = int(total_users['rows'][0][0])
    total_users = "{:,}".format(total_users)

    median = analytics.get_linger_rate(start_date='7daysAgo')
    linger_rows = analytics.get_linger_rows(start_date='7daysAgo')
    linger_histogram_url = ChartTools.linger_histogram_link(
        linger_rows, median)

    attachments = [{
        "fallback": "linger update",
        "color": "#eeeeee",
        "title": "Time spent on graphics over the last week",
        "image_url": linger_histogram_url
    }]

    slackTools.send_message(
        message.body['channel'],
        "In the past 7 days, I've tracked %s stories and %s graphics." %
        (len(stories), len(slugs)))
    slackTools.send_message(
        message.body['channel'],
        "%s people looked at graphics on those stories. Here's how much time they spent:"
        % total_users,
        attachments,
        unfurl_links=False)

    fields = []
    for story in stories:
        print "Adding %s" % story.name
        fields.append({
            "title": story.name.strip(),
            "value": '<' + story.url + '|' + story.slug.strip() + '>',
            "short": True
        })

    attachments = [{
        "fallback": "linger update",
        "color": "#eeeeee",
        # "title": "What we have done",
        "fields": fields
    }]

    slackTools.send_message(message.body['channel'],
                            "Here's everything:",
                            attachments,
                            unfurl_links=False)
示例#9
0
    def write(self, stories, team=None):
        new_stories = []
        for story in stories:
            info_from_api = npr_api_scraper.get_story_details(story['story_url'])

            if not info_from_api:
                logger.info('Not adding %s to database: could not get story' % (story['story_headline']))

            try:
                story = Story.create(
                    name = story['story_headline'],
                    slug = story['graphic_slug'],
                    date = info_from_api['date'],
                    article_posted = info_from_api['date'],
                    story_type = story['graphic_type'],
                    url = story['story_url'],
                    image = info_from_api['image'],
                    team = team
                )
                new_stories.append(story)
            except IntegrityError:
                # Story probably already exists.
                logger.info('Not adding %s to database: probably already exists' % (story['story_headline']))
                pass


        return new_stories
示例#10
0
    def write(self, stories, team=None):
        """
        Save rows to the database
        """
        new_stories = []
        for story in stories:
            slug = story['official flavor description'] + ' - ' + story[
                'taster']

            try:
                story = Story.create(
                    name=story['name'].strip(),
                    slug=slug,
                    date=PockyScraper.parse_date(story['date tasted']),
                    story_type='pocky',
                    team=team,
                )
                logger.info('Added {0}'.format(story.name))
                new_stories.append(story)
            except IntegrityError:
                # Story probably already exists.
                logger.info(
                    'Not adding %s to database: probably already exists' %
                    (slug))
                pass

        return new_stories
示例#11
0
    def test_change_tracking(self, mock_get_channel_name):
        """
        Check if we can start tracking a URL, then update the slugs that are
        tracked on it
        """
        mock_get_channel_name.return_value = 'default-channel'
        clear_stories()
        tracker = NPRStartTracking()

        class FakeMessage(object):
            body = {
                'text': '@carebot track slug-a-b-c on http://www.npr.org/sections/13.7/2016/02/16/466109612/was-einstein-wrong',
                'channel': 'default-channel'
            }

        expected = "Ok, I've started tracking `slug-a-b-c` on http://www.npr.org/sections/13.7/2016/02/16/466109612/was-einstein-wrong"
        message = tracker.respond(FakeMessage)
        assert expected in message['text']

        # Now try to change the slug
        FakeMessage.body['text'] = '@carebot track slug-a-b-c,slug-x-y-z on http://www.npr.org/sections/13.7/2016/02/16/466109612/was-einstein-wrong'
        message = tracker.respond(FakeMessage)
        results = Story.select()
        self.assertEqual(len(results), 1)
        self.assertEqual(results[0].url, 'http://www.npr.org/sections/13.7/2016/02/16/466109612/was-einstein-wrong')
        self.assertEqual(results[0].slug, 'slug-a-b-c,slug-x-y-z')
示例#12
0
def get_story_stats():
    analytics = GoogleAnalyticsScraper()

    # TODO use a SQL query instead of app logic to exclude stories that are
    # too old.
    for story in Story.select():
        logger.info("About to check %s" % (story.name))

        story_time_bucket = time_bucket(story.date)
        last_bucket = story.last_bucket

        # Check when the story was last reported on
        if last_bucket:

            # Skip stories that have been checked recently
            # And stories that are too old.
            if (last_bucket == story_time_bucket):
                logger.info("Checked recently. Bucket is still %s" % (story_time_bucket))
                continue

        if not story_time_bucket:
            logger.info("Story is too new; skipping for now")
            continue

        # Some stories have multiple slugs
        stats_per_slug = analytics.get_linger_data_for_story(story)

        if len(stats_per_slug) is not 0:
            slackTools.send_linger_time_update(story, stats_per_slug, story_time_bucket)

        # Mark the story as checked
        story.last_checked = datetime.datetime.now(pytz.timezone('US/Eastern'))
        story.last_bucket = story_time_bucket
        story.save()
示例#13
0
    def test_change_tracking(self, mock_get_channel_name):
        """
        Check if we can start tracking a URL, then update the slugs that are
        tracked on it
        """
        mock_get_channel_name.return_value = "default-channel"
        clear_stories()
        tracker = NPRStartTracking()

        class FakeMessage(object):
            body = {
                "text": "@carebot track slug-a-b-c on http://www.npr.org/sections/13.7/2016/02/16/466109612/was-einstein-wrong",
                "channel": "default-channel",
            }

        expected = "Ok, I've started tracking `slug-a-b-c` on http://www.npr.org/sections/13.7/2016/02/16/466109612/was-einstein-wrong"
        message = tracker.respond(FakeMessage)
        assert expected in message["text"]

        # Now try to change the slug
        FakeMessage.body[
            "text"
        ] = "@carebot track slug-a-b-c,slug-x-y-z on http://www.npr.org/sections/13.7/2016/02/16/466109612/was-einstein-wrong"
        message = tracker.respond(FakeMessage)
        results = Story.select()
        self.assertEqual(len(results), 1)
        self.assertEqual(results[0].url, "http://www.npr.org/sections/13.7/2016/02/16/466109612/was-einstein-wrong")
        self.assertEqual(results[0].slug, "slug-a-b-c,slug-x-y-z")
示例#14
0
    def respond(self, message):
        """
        Respond to requests about the last seven days of data
        TODO: Loop over all stories and report stats on each
        """
        seven_days_ago = datetime.datetime.now() - datetime.timedelta(days=7)
        stories = Story.select().where(Story.tracking_started > seven_days_ago)

        slugs = Set()
        for story in stories:
            # print story.name
            story_slugs = story.slug.split(',')
            for slug in story_slugs:
                slugs.add(slug)

        try:
            team = self.config.get_team_for_story(stories[0])
        except:
            team = self.config.get_default_team()

        total_users = self.get_user_data(team=team, start_date='7daysAgo')
        total_users = int(total_users['rows'][0][0])
        total_users = "{:,}".format(total_users)

        npr_linger = NPRLingerRate()
        linger_rows = npr_linger.get_linger_data(team=team, start_date='7daysAgo')
        median = NPRLingerRate.get_median(linger_rows)
        linger_histogram_url = npr_linger.get_histogram_url(linger_rows, median)

        attachments = [{
            "fallback": "linger update",
            "color": "#eeeeee",
            "title": "Time spent on graphics over the last week",
            "image_url": linger_histogram_url
        }]

        text = "In the past 7 days, I've tracked {0} stories and {1} graphics.".format(len(stories), len(slugs))
        text += "\n\n"
        text += "{0} people looked at graphics on the property. Here's how much time they spent:".format(total_users)

        fields = []
        for story in stories:
            fields.append({
                "title": story.name.strip(),
                "value": "<{0}|{1}>".format(story.url, story.slug.strip()),
                "short": True
            })

        attachments.append({
            "fallback": "linger update",
            "color": "#eeeeee",
            # "title": "What we have done",
            "fields": fields
        })

        return {
            'text': text,
            'attachments': attachments
        }
示例#15
0
    def test_write_spreadsheet_duplicates(self, mock_upload):
        mock_upload.return_value = 'http://image-url-here'

        clear_stories()

        scraper = SpreadsheetScraper()
        stories = scraper.scrape_spreadsheet('tests/data/stories.xlsx')

        # Insert the stories
        scraper.write(stories)
        results = Story.select()
        self.assertEqual(len(results), 4)

        # Now insert them again and make sure we don't have duplicates
        scraper.write(stories)
        results = Story.select()
        self.assertEqual(len(results), 4)
示例#16
0
    def test_write_spreadsheet_duplicates(self, mock_upload):
        mock_upload.return_value = 'http://image-url-here'

        clear_stories()

        scraper = SpreadsheetScraper()
        stories = scraper.scrape_spreadsheet('tests/data/stories.xlsx')

        # Insert the stories
        scraper.write(stories)
        results = Story.select()
        self.assertEqual(len(results), 4)

        # Now insert them again and make sure we don't have duplicates
        scraper.write(stories)
        results = Story.select()
        self.assertEqual(len(results), 4)
示例#17
0
    def handle_slug_inquiry(self, message):
        """
        Respond to an inquiry about the slug with stats and charts
        """
        match = re.search(self.SLUG_SEARCH_REGEX, message.body["text"])
        slug = match.group(1)

        if slug:

            # Try to match the story to a slug to accurately get a team
            # The Google Analytics property ID comes from the team config
            # We use the default team if none is found
            stories = Story.select().where(Story.slug.contains(slug))
            team = self.config.get_team_for_stories(stories)

            linger_rows = self.get_linger_data(team=team, slug=slug)

            if not linger_rows:
                return {"text": "Sorry, I wasn't able to find linger rate stats for %s" % slug}

            median = NPRLingerRate.get_median(linger_rows)
            print "Got median"
            print median
            people = "{:,}".format(median["total_people"])
            time_text = TimeTools.humanist_time_bucket(median)

            reply = u"*%s* people spent a median *%s* on `%s`." % (people, time_text, slug)

            reply += "\n\nThis graphic appears in %s %s I am tracking:" % (
                inflector.number_to_words(len(stories)),
                inflector.plural("story", len(stories)),
            )

            for story in stories:
                reply += "\n" + "*<%s|%s>*" % (story.url, story.name.strip())

            # Get linger rate data for charting.

            all_graphics_rows = self.get_linger_data(team=team)
            all_graphics_median = NPRLingerRate.get_median(all_graphics_rows)

            attachments = [
                {
                    "fallback": slug + " update",
                    "color": "#eeeeee",
                    "title": slug,
                    "image_url": self.get_histogram_url(linger_rows, median),
                },
                {
                    "fallback": slug + " update",
                    "color": "#eeeeee",
                    "title": "How all graphics performed",
                    "image_url": self.get_histogram_url(all_graphics_rows, all_graphics_median),
                },
            ]

            return {"text": reply, "attachments": attachments}
示例#18
0
    def test_handle_slug_inquiry(self,
                                 mock_upload,
                                 mock_histogram,
                                 mock_linger,
                                ):

        # Set some fake analytics
        linger_data = [
            [10, 10],
            [20, 10],
            [30, 10],
            [40, 10],
            [50, 10],
            [60, 10],
            [120, 10],
            [180, 10],
            [240, 10],
            [300, 10],
        ]
        mock_linger.return_value = linger_data
        mock_histogram.return_value = 'http://image-url-here'
        mock_upload.return_value = 'http://image-url-here'

        slug = 'x-y-z'
        linger = NPRLingerRate()
        class FakeMessage(object):
            body = {
                'text': 'check slug ' + slug
            }

        clear_stories()
        Story.create(
            name = 'example',
            slug = slug,
            date = datetime.datetime.now(),
            url = 'example.com',
            team = 'deafult'
        )

        message = linger.handle_slug_inquiry(FakeMessage)
        print message
        assert u'*100* people spent a median *55 seconds* on `x-y-z`' in message['text']
        self.assertEqual(message['attachments'][0]['title'], slug)
示例#19
0
    def test_handle_slug_inquiry(
        self,
        mock_upload,
        mock_histogram,
        mock_linger,
    ):

        # Set some fake analytics
        linger_data = [
            [10, 10],
            [20, 10],
            [30, 10],
            [40, 10],
            [50, 10],
            [60, 10],
            [120, 10],
            [180, 10],
            [240, 10],
            [300, 10],
        ]
        mock_linger.return_value = linger_data
        mock_histogram.return_value = 'http://image-url-here'
        mock_upload.return_value = 'http://image-url-here'

        slug = 'x-y-z'
        linger = NPRLingerRate()

        class FakeMessage(object):
            body = {'text': 'check slug ' + slug}

        clear_stories()
        Story.create(name='example',
                     slug=slug,
                     date=datetime.datetime.now(),
                     url='example.com',
                     team='deafult')

        message = linger.handle_slug_inquiry(FakeMessage)
        print message
        assert u'*100* people spent a median *55 seconds* on `x-y-z`' in message[
            'text']
        self.assertEqual(message['attachments'][0]['title'], slug)
示例#20
0
def add_story_screenshots(regenerate=False, article_id="storytext"):
    """
    Utility. Used to generate a screenshot of every article.
    pass regenerate=true to regenerate all screenshots (otherwise it'll skip
    stories where that field already has a URL).

    Pass an articleid to specify the CSS ID of the article. The image will be
    cropped to that ID.
    """
    if regenerate:
        for story in Story.select():
            logger.info("About to check {0}".format(story.name))

            story.screenshot = screenshotter.get_story_image(story_url=story.url, article_id=article_id)
            story.save()

    else:
        for story in Story.select().where(Story.screenshot == None):
            logger.info("About to check {0}".format(story.name))

            story.screenshot = screenshotter.get_story_image(story_url=story.url, article_id=article_id)
            logger.info("Got screenshot {0}".format(story.screenshot))
            story.save()
示例#21
0
    def write(self, stories, team=None):
        new_stories = []
        for story in stories:
            info_from_api = npr_api_scraper.get_story_details(
                story['story_url'])

            if not info_from_api:
                logger.info('Not adding %s to database: could not get story' %
                            (story['story_headline']))
                pass

            exists = Story.select().where(Story.url == story['story_url'])
            if exists:
                logger.info('Not adding %s to database: already exists' %
                            (story['story_headline']))

            else:
                try:
                    screenshot_url = screenshotter.get_story_image(
                        story['story_url'])
                    story = Story.create(
                        name=story['story_headline'].strip(),
                        slug=story['graphic_slug'].strip(),
                        date=info_from_api['date'],
                        story_type=story['graphic_type'].strip(),
                        url=story['story_url'].strip(),
                        image=info_from_api['image'],
                        team=team,
                        screenshot=screenshot_url)
                    new_stories.append(story)
                except IntegrityError:
                    # Story probably already exists.
                    logger.info(
                        'Not adding %s to database: probably already exists' %
                        (story['story_headline']))
                    pass
        return new_stories
示例#22
0
    def handle_pageviews_inquiry(self, message):
        match = re.search(self.UNSUCK_SLUG_SEARCH_REGEX, message.body["text"])
        slug = match.group(1)

        stories = Story.select().where(Story.slug.contains(slug))
        team = config.get_team_for_stories(stories)

        unique_pageviews = self.get_unique_visitor_data(team=team, slug=slug)
        unique_pageviews = int(unique_pageviews)
        unique_pageviews = "{:,}".format(unique_pageviews)

        if not unique_pageviews:
            return {"text": "Sorry, I wasn't able to find unique visitor stats for %s" % slug}

        return {"text": "`{0}` has had *{1}* unique pageviews".format(slug, unique_pageviews)}
示例#23
0
def get_story_stats():
    """
    Loop through every story we know about.
    If there hasn't been an update recently, fetch stats for that article.
    """

    # TODO use a SQL query instead of app logic to exclude stories that are
    # too old.
    for story in Story.select():
        logger.info("About to check %s" % (story.name))

        team = config.get_team_for_story(story)
        story_time_bucket = time_bucket(story.date)
        last_bucket = story.last_bucket

        # Check when the story was last reported on
        if last_bucket:

            # Skip stories that have been checked recently
            # And stories that are too old.
            if last_bucket == story_time_bucket:
                logger.info("Checked recently. Bucket is still %s", story_time_bucket)
                continue

        if not story_time_bucket:
            logger.info("Story is too new; skipping for now")
            continue

        plugins = [getattr(importlib.import_module(mod), cls) for (mod, cls) in (plugin.rsplit(".", 1) for plugin in team['plugins'])]
        for plugin in plugins:
            plugin = plugin()

            try:
                message = plugin.get_update_message(story)
                if message:
                    slack_tools.send_message(
                        team['channel'],
                        message['text'],
                        message.get('attachments', None)
                    )

            except NotImplementedError:
                pass

        # Mark the story as checked
        story.last_checked = datetime.datetime.now(pytz.timezone(app_config.PROJECT_TIMEZONE))
        story.last_bucket = story_time_bucket
        story.save()
示例#24
0
    def test_write_spreadsheet(self, mock_upload):
        mock_upload.return_value = 'http://image-url-here'

        clear_stories()

        scraper = SpreadsheetScraper()
        stories = scraper.scrape_spreadsheet('tests/data/stories.xlsx')

        scraper.write(stories)

        results = Story.select()
        self.assertEqual(len(results), 4)

        for idx, story in enumerate(stories):
            self.assertEqual(results[idx].name, story['story_headline'])
            self.assertEqual(results[idx].url, story['story_url'])
示例#25
0
    def test_write_spreadsheet(self, mock_upload):
        mock_upload.return_value = 'http://image-url-here'

        clear_stories()

        scraper = SpreadsheetScraper()
        stories = scraper.scrape_spreadsheet('tests/data/stories.xlsx')

        scraper.write(stories)

        results = Story.select()
        self.assertEqual(len(results), 4)

        for idx, story in enumerate(stories):
            self.assertEqual(results[idx].name, story['story_headline'])
            self.assertEqual(results[idx].url, story['story_url'])
示例#26
0
def get_story_stats():
    """
    Loop through every story we know about.
    If there hasn't been an update recently, fetch stats for that article.
    """

    # TODO use a SQL query instead of app logic to exclude stories that are
    # too old.
    for story in Story.select():
        logger.info("About to check %s" % (story.name))

        team = config.get_team_for_story(story)
        story_time_bucket = time_bucket(story.date)
        last_bucket = story.last_bucket

        # Check when the story was last reported on
        if last_bucket:

            # Skip stories that have been checked recently
            # And stories that are too old.
            if last_bucket == story_time_bucket:
                logger.info("Checked recently. Bucket is still %s", story_time_bucket)
                continue

        if not story_time_bucket:
            logger.info("Story is too new; skipping for now")
            continue

        plugins = [
            getattr(importlib.import_module(mod), cls)
            for (mod, cls) in (plugin.rsplit(".", 1) for plugin in team["plugins"])
        ]
        for plugin in plugins:
            plugin = plugin()

            try:
                message = plugin.get_update_message(story)
                if message:
                    slack_tools.send_message(team["channel"], message["text"], message.get("attachments", None))

            except NotImplementedError:
                pass

        # Mark the story as checked
        story.last_checked = datetime.datetime.now(pytz.timezone(app_config.PROJECT_TIMEZONE))
        story.last_bucket = story_time_bucket
        story.save()
示例#27
0
def handle_linger_update(message):
    if 'doing' not in message.body['text']:
        return

    m = GRUBER_URLINTEXT_PAT.findall(message.body['text'])

    if not m[0]:
        return

    url = str(m[0][0])
    url = url.replace('&amp;', '&')
    logger.info("Looking for url %s" % url)

    try:
        story = Story.select().where(Story.url == url).get()
    except:
        message.reply("Sorry, I don't have stats for %s" % url)
        return

    story_time_bucket = story.time_bucket()
    stats_per_slug = analytics.get_linger_data_for_story(story)

    if len(stats_per_slug) is not 0:
        reply = ("Here's what I know about the graphics on _%s_:") % (
            story.name.strip())

        fields = []
        for stat in stats_per_slug:
            time = TimeTools.humanist_time_bucket(stat['stats'])
            fields.append({
                "title": stat['slug'],
                "value": time,
                "short": True
            })

        attachments = [{
            "fallback": story.name + " update",
            "color": "#eeeeee",
            "title": story.name,
            "title_link": story.url,
            "fields": fields
        }]

        # Use send_message instead of message.reply, otherwise we lose
        # the bot icon.
        slackTools.send_message(message.body['channel'], reply, attachments)
示例#28
0
    def test_write_spreadsheet(self, mock_upload):
        """
        Make sure we save the stories to the database when scraping from a
        spreadsheet
        """
        clear_stories()

        scraper = SpreadsheetScraper(self.source)
        stories = scraper.scrape_spreadsheet('tests/data/stories.xlsx')

        scraper.write(stories)

        results = Story.select()
        self.assertEqual(len(results), 4)

        for idx, story in enumerate(stories):
            self.assertEqual(results[idx].name, story['story_headline'])
            self.assertEqual(results[idx].url, story['story_url'])
示例#29
0
    def test_write_spreadsheet(self, mock_upload):
        """
        Make sure we save the stories to the database when scraping from a
        spreadsheet
        """
        clear_stories()

        scraper = SpreadsheetScraper(self.source)
        stories = scraper.scrape_spreadsheet('tests/data/stories.xlsx')

        scraper.write(stories)

        results = Story.select()
        self.assertEqual(len(results), 4)

        for idx, story in enumerate(stories):
            self.assertEqual(results[idx].name, story['story_headline'])
            self.assertEqual(results[idx].url, story['story_url'])
示例#30
0
    def write(self, stories, team=None):
        # TODO
        # this should be abstracted here and in spreadsheet.py
        new_stories = []
        for story in stories:
            try:
                story = Story.create(name=story['name'],
                                     slug=story['slug'],
                                     date=story['date'],
                                     url=story['url'],
                                     team=team)
                new_stories.append(story)
            except IntegrityError:
                # Story probably already exists.
                logger.info(
                    'Not adding %s to database: probably already exists' %
                    (story['name']))

        return new_stories
示例#31
0
def handle_scroll_slug_question(message):
    m = re.search(SCROLL_RATE_REGEX, message.body['text'])

    if not m:
        return

    slug = m.group(1)

    if slug:
        stories = Story.select().where(Story.slug.contains(slug))
        rows = analytics.get_depth_rate(slug)

        if rows:
            reply = u"Here's what I know about `%s`." % slug

            reply += '\n\nThis graphic appears in %s %s:' % (
                inflector.number_to_words(
                    len(stories)), inflector.plural('story', len(stories)))

            for story in stories:
                reply += '\n' + '*<%s|%s>*' % (story.url, story.name.strip())

            histogram_url = ChartTools.scroll_histogram_link(rows)

            if story.screenshot:
                histogram_url = ChartTools.add_screenshot_to_chart(
                    story.screenshot, histogram_url)

            attachments = [{
                "fallback": slug + " update",
                "color": "#eeeeee",
                "title": slug,
                "image_url": histogram_url
            }]

            slackTools.send_message(message.body['channel'],
                                    reply,
                                    attachments,
                                    unfurl_links=False)

        else:
            message.reply("I wasn't able to find scroll data for %s" % slug)
示例#32
0
文件: rss.py 项目: thecarebot/carebot
    def write(self, stories, team=None):
        # TODO
        # this should be abstracted here and in spreadsheet.py
        new_stories = []
        for story in stories:
            try:
                story = Story.create(
                    name = story['name'],
                    slug = story['slug'],
                    date = story['date'],
                    url = story['url'],
                    team = team
                )
                new_stories.append(story)
            except IntegrityError:
                # Story probably already exists.
                logger.info('Not adding %s to database: probably already exists' % (story['name']))


        return new_stories
示例#33
0
    def get_slug_message(self, slug, story=None):
        # Try to match the story to a slug to accurately get a team
        # The Google Analytics property ID comes from the team config
        # We use the default team if none is found
        stories = Story.select().where(Story.slug.contains(slug))
        team = self.config.get_team_for_stories(stories)

        params = self.get_slug_query_params(team=team, slug=slug)
        data = GoogleAnalytics.query_ga(params)
        if not data.get('rows'):
            logger.info('No rows found for slug %s' % slug)
            return

        # Clean up the data
        clean_data = self.clean_data(data.get('rows'))
        total_people = self.get_total_people(clean_data)
        friendly_people = "{:,}".format(total_people) # Comma-separated #s
        median = self.get_median(clean_data)

        # Set up the chart
        scroll_histogram_url = self.get_chart(clean_data)
        if story:
            scroll_histogram_url = ChartTools.add_screenshot_to_chart(story,
                                                                scroll_histogram_url)

        # TODO: Not confident in median calculations so far
        # text = "*%s people* got a median of *%s percent* down the page." % (friendly_people, median)
        text = ''
        attachments = [{
            "fallback": slug + " update",
            "color": "#eeeeee",
            "title": "How far down did people scroll?",
            "image_url": scroll_histogram_url
        }]

        return {
            'text': text,
            'attachments': attachments
        }
示例#34
0
    def handle_url_inquiry(self, message):
        """
        Respond to "How is http://example.com/foo doing?"
        """
        if 'doing' not in message.body['text']:
            return

        match = self.GRUBER_URLINTEXT_PAT.findall(message.body['text'])

        if not match[0]:
            return

        url = str(match[0][0])
        url = url.replace('&amp;', '&')
        logger.info("Looking for url %s" % url)

        try:
            story = Story.select().where(Story.url == url).get()
        except:
            return {'text': "Sorry, I don't have stats for %s" % url}

        return self.get_update_message(story)
示例#35
0
    def handle_url_inquiry(self, message):
        """
        Respond to "How is http://example.com/foo doing?"
        """
        if "doing" not in message.body["text"]:
            return

        match = self.GRUBER_URLINTEXT_PAT.findall(message.body["text"])

        if not match[0]:
            return

        url = str(match[0][0])
        url = url.replace("&amp;", "&")
        logger.info("Looking for url %s" % url)

        try:
            story = Story.select().where(Story.url == url).get()
        except:
            return {"text": "Sorry, I don't have stats for %s" % url}

        return self.get_update_message(story)
示例#36
0
    def test_start_tracking(self, mock_get_channel_name):
        """
        Test if we can start tracking a new story given only a NPR URL and a
        graphic slug
        """
        mock_get_channel_name.return_value = 'default-channel'
        clear_stories()
        tracker = NPRStartTracking()

        class FakeMessage(object):
            body = {
                'text': '@carebot track slug-a-b-c on http://www.npr.org/sections/13.7/2016/02/16/466109612/was-einstein-wrong',
                'channel': 'default-channel'
            }

        expected = "Ok, I've started tracking `slug-a-b-c` on http://www.npr.org/sections/13.7/2016/02/16/466109612/was-einstein-wrong"
        message = tracker.respond(FakeMessage)
        print message
        assert expected in message['text']

        results = Story.select()
        self.assertEqual(len(results), 1)
        self.assertEqual(results[0].url, 'http://www.npr.org/sections/13.7/2016/02/16/466109612/was-einstein-wrong')
示例#37
0
    def handle_pageviews_inquiry(self, message):
        match = re.search(self.UNSUCK_SLUG_SEARCH_REGEX, message.body['text'])
        slug = match.group(1)

        stories = Story.select().where(Story.slug.contains(slug))
        team = config.get_team_for_stories(stories)

        unique_pageviews = self.get_unique_visitor_data(team=team, slug=slug)
        unique_pageviews = int(unique_pageviews)
        unique_pageviews = "{:,}".format(unique_pageviews)

        if not unique_pageviews:
            return {
                'text':
                "Sorry, I wasn't able to find unique visitor stats for %s" %
                slug
            }

        return {
            'text':
            '`{0}` has had *{1}* unique pageviews'.format(
                slug, unique_pageviews)
        }
示例#38
0
    def test_start_tracking(self, mock_get_channel_name):
        """
        Test if we can start tracking a new story given only a NPR URL and a
        graphic slug
        """
        mock_get_channel_name.return_value = "default-channel"
        clear_stories()
        tracker = NPRStartTracking()

        class FakeMessage(object):
            body = {
                "text": "@carebot track slug-a-b-c on http://www.npr.org/sections/13.7/2016/02/16/466109612/was-einstein-wrong",
                "channel": "default-channel",
            }

        expected = "Ok, I've started tracking `slug-a-b-c` on http://www.npr.org/sections/13.7/2016/02/16/466109612/was-einstein-wrong"
        message = tracker.respond(FakeMessage)
        print message
        assert expected in message["text"]

        results = Story.select()
        self.assertEqual(len(results), 1)
        self.assertEqual(results[0].url, "http://www.npr.org/sections/13.7/2016/02/16/466109612/was-einstein-wrong")
示例#39
0
    def write(self, stories, team=None):
        """
        Save rows to the database
        """
        new_stories = []
        for story in stories:
            slug = story['official flavor description'] + ' - ' + story['taster']

            try:
                story = Story.create(
                    name=story['name'].strip(),
                    slug=slug,
                    date=PockyScraper.parse_date(story['date tasted']),
                    story_type='pocky',
                    team=team,
                )
                logger.info('Added {0}'.format(story.name))
                new_stories.append(story)
            except IntegrityError:
                # Story probably already exists.
                logger.info('Not adding %s to database: probably already exists' % (slug))
                pass

        return new_stories
示例#40
0
def get_story_stats():
    analytics = GoogleAnalyticsScraper()

    # TODO use a SQL query instead of app logic to exclude stories that are
    # too old.
    for story in Story.select():
        logger.info("About to check %s" % (story.name))

        story_time_bucket = time_bucket(story.date)
        last_bucket = story.last_bucket

        # Check when the story was last reported on
        if last_bucket:

            # Skip stories that have been checked recently
            # And stories that are too old.
            if (last_bucket == story_time_bucket):
                logger.info("Checked recently. Bucket is still %s" %
                            (story_time_bucket))
                continue

        if not story_time_bucket:
            logger.info("Story is too new; skipping for now")
            continue

        # Some stories have multiple slugs
        stats_per_slug = analytics.get_linger_data_for_story(story)

        if len(stats_per_slug) is not 0:
            slackTools.send_linger_time_update(story, stats_per_slug,
                                               story_time_bucket)

        # Mark the story as checked
        story.last_checked = datetime.datetime.now(pytz.timezone('US/Eastern'))
        story.last_bucket = story_time_bucket
        story.save()
示例#41
0
    def respond(self, message):
        """
        Respond to requests about the last seven days of data
        TODO: Loop over all stories and report stats on each
        """
        seven_days_ago = datetime.datetime.now() - datetime.timedelta(days=7)
        stories = Story.select().where(Story.tracking_started > seven_days_ago)

        slugs = Set()
        for story in stories:
            # print story.name
            story_slugs = story.slug.split(',')
            for slug in story_slugs:
                slugs.add(slug)

        try:
            team = self.config.get_team_for_story(stories[0])
        except:
            team = self.config.get_default_team()

        total_users = self.get_user_data(team=team, start_date='7daysAgo')
        total_users = int(total_users['rows'][0][0])
        total_users = "{:,}".format(total_users)

        npr_linger = NPRLingerRate()
        linger_rows = npr_linger.get_linger_data(team=team,
                                                 start_date='7daysAgo')
        median = NPRLingerRate.get_median(linger_rows)
        linger_histogram_url = npr_linger.get_histogram_url(
            linger_rows, median)

        attachments = [{
            "fallback": "linger update",
            "color": "#eeeeee",
            "title": "Time spent on graphics over the last week",
            "image_url": linger_histogram_url
        }]

        text = "In the past 7 days, I've tracked {0} stories and {1} graphics.".format(
            len(stories), len(slugs))
        text += "\n\n"
        text += "{0} people looked at graphics on the property. Here's how much time they spent:".format(
            total_users)

        fields = []
        for story in stories:
            fields.append({
                "title":
                story.name.strip(),
                "value":
                "<{0}|{1}>".format(story.url, story.slug.strip()),
                "short":
                True
            })

        attachments.append({
            "fallback": "linger update",
            "color": "#eeeeee",
            # "title": "What we have done",
            "fields": fields
        })

        return {'text': text, 'attachments': attachments}
示例#42
0
    def handle_slug_inquiry(self, message):
        """
        Respond to an inquiry about the slug with stats and charts
        """
        match = re.search(self.SLUG_SEARCH_REGEX, message.body['text'])
        slug = match.group(1)

        if slug:

            # Try to match the story to a slug to accurately get a team
            # The Google Analytics property ID comes from the team config
            # We use the default team if none is found
            stories = Story.select().where(Story.slug.contains(slug))
            team = self.config.get_team_for_stories(stories)

            linger_rows = self.get_linger_data(team=team, slug=slug)

            if not linger_rows:
                return {
                    'text':
                    "Sorry, I wasn't able to find linger rate stats for %s" %
                    slug
                }

            median = NPRLingerRate.get_median(linger_rows)
            print "Got median"
            print median
            people = "{:,}".format(median['total_people'])
            time_text = TimeTools.humanist_time_bucket(median)

            reply = u"*%s* people spent a median *%s* on `%s`." % (
                people, time_text, slug)

            reply += '\n\nThis graphic appears in %s %s I am tracking:' % (
                inflector.number_to_words(
                    len(stories)), inflector.plural('story', len(stories)))

            for story in stories:
                reply += '\n' + '*<%s|%s>*' % (story.url, story.name.strip())

            # Get linger rate data for charting.

            all_graphics_rows = self.get_linger_data(team=team)
            all_graphics_median = NPRLingerRate.get_median(all_graphics_rows)

            attachments = [{
                "fallback":
                slug + " update",
                "color":
                "#eeeeee",
                "title":
                slug,
                "image_url":
                self.get_histogram_url(linger_rows, median)
            }, {
                "fallback":
                slug + " update",
                "color":
                "#eeeeee",
                "title":
                "How all graphics performed",
                "image_url":
                self.get_histogram_url(all_graphics_rows, all_graphics_median)
            }]

            return {'text': reply, 'attachments': attachments}
示例#43
0
def handle_slug_question(message):
    m = re.search(LINGER_RATE_REGEX, message.body['text'])

    if not m:
        return

    slug = m.group(1)

    if slug:
        median = analytics.get_linger_rate(slug)
        stories = Story.select().where(Story.slug.contains(slug))

        message.reply("Ok! I'm looking up %s. This may take a second." % slug)

        if median:
            people = "{:,}".format(median['total_people'])
            time_text = TimeTools.humanist_time_bucket(median)
            reply = u"*%s* people spent a median *%s* on `%s`." % (
                people, time_text, slug)

            # List the stories this slug appears on
            reply += '\n\nThis graphic appears in %s %s:' % (
                inflector.number_to_words(
                    len(stories)), inflector.plural('story', len(stories)))

            for story in stories:
                reply += '\n' + '*<%s|%s>*' % (story.url, story.name.strip())

            # Get linger rate data
            linger_rows = analytics.get_linger_rows(slug)
            linger_histogram_url = ChartTools.linger_histogram_link(
                linger_rows, median)

            all_graphics_rows = analytics.get_linger_rows()
            all_graphics_median = analytics.get_linger_rate()
            all_histogram = ChartTools.linger_histogram_link(
                all_graphics_rows, all_graphics_median)

            attachments = [{
                "fallback": slug + " update",
                "color": "#eeeeee",
                "title": slug,
                "image_url": linger_histogram_url
            }, {
                "fallback": slug + " update",
                "color": "#eeeeee",
                "title": "How all graphics performed",
                "image_url": all_histogram
            }]

            # Get scroll data, if any.
            scroll_depth_rows = analytics.get_depth_rate(slug)
            if scroll_depth_rows:
                scroll_histogram_url = ChartTools.scroll_histogram_link(
                    scroll_depth_rows)

                if stories[0].screenshot:
                    scroll_histogram_url = ChartTools.add_screenshot_to_chart(
                        stories[0].screenshot, scroll_histogram_url)

                attachments.append({
                    "fallback": slug + " update",
                    "color": "#eeeeee",
                    "title": "How far down did people scroll?",
                    "image_url": scroll_histogram_url
                })

            slackTools.send_message(message.body['channel'],
                                    reply,
                                    attachments,
                                    unfurl_links=False)

        else:
            message.reply("I wasn't able to figure out the linger rate of %s" %
                          slug)
示例#44
0
def clear_stories():
    q = Story.delete()
    q.execute()
示例#45
0
文件: db.py 项目: PotterSys/carebot
def clear_stories():
  q = Story.delete()
  q.execute()