Exemplo n.º 1
0
class ArticleRankerTest(unittest.TestCase):

    def setUp(self):
        fill_database()
        add_vendors()
        config_ = load_config(file_path="/vagrant/config.yaml", logger=logger)
        self.feature_extractor = TfidfFeatureExtractor(prefix=config_['prefix'])
        self.ranker = ArticleRanker(extractor=self.feature_extractor)
        self.body = '{"news_vendor": "boingboing", ' \
                    '"features": ' \
                    '{"version": "TF-IDF-1.1", ' \
                    '"data": [[87, 1.0]]}, ' \
                    '"author": "David Pescovitz", ' \
                    '"headline": "Documentary about Astro Boy creator Osamu\\u00a0Tezuka",' \
                    ' "content": "<p class=\\"byline permalink\\"><a href=\\"http://boingboing.net/author/david_pescovitz\\" title=\\"Posts by David Pescovitz\\" rel=\\"author\\">David Pescovitz</a> at 9:33 am Wed, Oct 23, 2013   \\n\\n\\n\\n</p>", ' \
                    '"clean_content": "David Pescovitz at 9:33 am Wed, Oct 23, 2013 ",' \
                    ' "link": "http://rss.feedsportal.com/c/35208/f/653965/s/32d1ba95/sc/38/l/0Lboingboing0Bnet0C20A130C10A0C230Cdocumentary0Eabout0Eastro0Eboy0Ecr0Bhtml/story01.htm"}'
        self.article_as_dict = json.loads(self.body)

    def tearDown(self):
        #clear_database()
        pass

    def test_get_vendor(self):
        vendor = self.ranker.get_vendor(self.article_as_dict)

        self.assertEqual(self.article_as_dict.get('news_vendor'), vendor.name)

    def test_save_article(self):
        vendor = self.ranker.get_vendor(self.article_as_dict)
        stored_article = self.ranker.save_article(vendor, self.article_as_dict)

        self.assertEqual(stored_article.author, 'David Pescovitz')

    def test_save_rating(self):
        vendor = self.ranker.get_vendor(self.article_as_dict)
        stored_article = self.ranker.save_article(vendor, self.article_as_dict)
        user = User.objects(email="*****@*****.**").first()

        # Store number of ranked items
        ranked_articles = RankedArticle.objects(user_id=user.id)
        ranked_articles_before = ranked_articles.count()

        # Store new ranked article
        self.ranker.save_rating(user=user, article=stored_article, rating=1.0)

        user.reload()
        ranked_articles_after = RankedArticle.objects(user_id=user.id).count()

        self.assertEqual(1, ranked_articles_after - ranked_articles_before)
        self.assertEqual(1.0, ranked_articles[ranked_articles_before].rating)

    def test_rank_article(self):
        pass
        #some error in gensim. probably because some features are not quite right
        self.ranker.rank_article(self.article_as_dict)
Exemplo n.º 2
0
 def setUp(self):
     fill_database()
     add_vendors()
     config_ = load_config(file_path="/vagrant/config.yaml", logger=logger)
     self.feature_extractor = TfidfFeatureExtractor(prefix=config_['prefix'])
     self.ranker = ArticleRanker(extractor=self.feature_extractor)
     self.body = '{"news_vendor": "boingboing", ' \
                 '"features": ' \
                 '{"version": "TF-IDF-1.1", ' \
                 '"data": [[87, 1.0]]}, ' \
                 '"author": "David Pescovitz", ' \
                 '"headline": "Documentary about Astro Boy creator Osamu\\u00a0Tezuka",' \
                 ' "content": "<p class=\\"byline permalink\\"><a href=\\"http://boingboing.net/author/david_pescovitz\\" title=\\"Posts by David Pescovitz\\" rel=\\"author\\">David Pescovitz</a> at 9:33 am Wed, Oct 23, 2013   \\n\\n\\n\\n</p>", ' \
                 '"clean_content": "David Pescovitz at 9:33 am Wed, Oct 23, 2013 ",' \
                 ' "link": "http://rss.feedsportal.com/c/35208/f/653965/s/32d1ba95/sc/38/l/0Lboingboing0Bnet0C20A130C10A0C230Cdocumentary0Eabout0Eastro0Eboy0Ecr0Bhtml/story01.htm"}'
     self.article_as_dict = json.loads(self.body)
Exemplo n.º 3
0
 def setUp(self):
     fill_database()
     config_ = load_config(file_path="/vagrant/config.yaml", logger=logger)
     self.feature_extractor = EsaFeatureExtractor(prefix=config_['prefix'])
     self.ranker = ArticleRanker(extractor=self.feature_extractor)
     self.article_as_dict = {'news_vendor': 'TechCrunch',
                             'author': "MG Siegler",
                             'link': "http://www.techcrunch.com",
                             'headline': "Again Apple",
                             'clean_content': "Fooobaaar!",
                             'content': "<p>Fooobaaar!</p>",
                             'features': {'version': '1.0',
                                         'data': [(1, 0.5),
                                                  (3, 0.6)
                                                 ]
                                         }
                             }
Exemplo n.º 4
0
    def __init__(self, config):
        self.config_ = config
        self.logger = logging.getLogger("main")
        self.stdout = sys.stdout

        # Connect to mongo database
        try:
            connect(config['database']['db-name'],
                    username=config['database']['user'],
                    password=config['database']['passwd'],
                    port=config['database']['port'])
        except ConnectionError as e:
            logger.error("Could not connect to mongodb: %s" % e)
            sys.exit(1)

        logger.info("Load feature extractor.")
        try:
            self.feature_extractor_ = LdaFeatureExtractor(prefix=self.config_["prefix"])
        except Exception as inst:
            logger.error("Could not load feature extractor."
                         "Unknown error %s: %s" % (type(inst), inst))
            sys.exit(1)

        self.ranker = ArticleRanker(extractor=self.feature_extractor_)
Exemplo n.º 5
0
class ArticleRankerTest(unittest.TestCase):

    def setUp(self):
        fill_database()
        config_ = load_config(file_path="/vagrant/config.yaml", logger=logger)
        self.feature_extractor = EsaFeatureExtractor(prefix=config_['prefix'])
        self.ranker = ArticleRanker(extractor=self.feature_extractor)
        self.article_as_dict = {'news_vendor': 'TechCrunch',
                                'author': "MG Siegler",
                                'link': "http://www.techcrunch.com",
                                'headline': "Again Apple",
                                'clean_content': "Fooobaaar!",
                                'content': "<p>Fooobaaar!</p>",
                                'features': {'version': '1.0',
                                            'data': [(1, 0.5),
                                                     (3, 0.6)
                                                    ]
                                            }
                                }

    def tearDown(self):
        clear_database()

    def test_get_vendor_false(self):
        vendor = self.ranker.get_vendor({'news_vendor': 'not in db'})

        self.assertEqual(vendor, None)

    def test_get_vendor(self):
        vendor = self.ranker.get_vendor(self.article_as_dict)

        self.assertEqual(vendor.config, 'vendor config')

    def test_save_article_false(self):
        vendor = self.ranker.get_vendor(self.article_as_dict)
        stored_article = self.ranker.save_article(vendor, {'headline': "Everything else is missing."})

        self.assertEqual(stored_article, None)

    def test_save_article(self):
        vendor = self.ranker.get_vendor(self.article_as_dict)
        stored_article = self.ranker.save_article(vendor, self.article_as_dict)

        self.assertEqual(stored_article.author, 'MG Siegler')

    def test_save_rating(self):
        vendor = self.ranker.get_vendor(self.article_as_dict)
        stored_article = self.ranker.save_article(vendor, self.article_as_dict)
        user = User.objects(email="*****@*****.**").first()
        self.ranker.save_rating(user=user, article=stored_article, rating=1.0)

        user.reload()
        ranked_articles = RankedArticle.objects(user_id=user.id)
        self.assertEqual(3, ranked_articles.count())
        self.assertEqual(1.0, ranked_articles[0].rating)

    def test_rank_article(self):
        pass
        #some error in gensim. probably because some features are not quite right
        self.ranker.rank_article(self.article_as_dict)
Exemplo n.º 6
0
class StompListener(object):
    def __init__(self, config):
        self.config_ = config
        self.logger = logging.getLogger("main")
        self.stdout = sys.stdout

        # Connect to mongo database
        try:
            connect(config['database']['db-name'],
                    username=config['database']['user'],
                    password=config['database']['passwd'],
                    port=config['database']['port'])
        except ConnectionError as e:
            logger.error("Could not connect to mongodb: %s" % e)
            sys.exit(1)

        logger.info("Load feature extractor.")
        try:
            self.feature_extractor_ = LdaFeatureExtractor(prefix=self.config_["prefix"])
        except Exception as inst:
            logger.error("Could not load feature extractor."
                         "Unknown error %s: %s" % (type(inst), inst))
            sys.exit(1)

        self.ranker = ArticleRanker(extractor=self.feature_extractor_)

    def rank_article(self, article_as_dict):
        self.ranker.rank_article(article_as_dict)

    @staticmethod
    def on_error(self, message):
        logger.error('received an error %s' % message)

    def on_message(self, headers, message):
        received_message = json.loads(message)

        #save and rank article
        logger.info("*Ranked article* -> " + message)
        self.rank_article(received_message)

    def __print_async(self, frame_type, headers, body):
        """
        Utility function to print a message and setup the command prompt
        for the next input
        """
        self.__sysout("\r  \r", end='')
        self.__sysout(frame_type)
        for header_key in headers.keys():
            self.__sysout('%s: %s' % (header_key, headers[header_key]))
        self.__sysout('')
        self.__sysout(body)
        self.__sysout('> ', end='')
        self.stdout.flush()

    def on_connected(self, headers, body):
        self.__print_async("CONNECTED", headers, body)

    def __error(self, msg, end="\n"):
        self.stdout.write(str(msg) + end)

    def __sysout(self, msg, end="\n"):
        self.stdout.write(str(msg) + end)

    def on_send(self, headers, body):
        self.__print_async("SEND", headers, body)
Exemplo n.º 7
0
from nyan.shared_modules.models.mongodb_models import *

import unittest
from nyan.shared_modules.utils.helper import load_config
from nyan.shared_modules.models.mongodb_models import *

logger = logging.getLogger("unittesting")
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG)

#Connect to test database
connect("nyan_test", port=27017)


config_ = load_config(file_path="/vagrant/config.yaml", logger=logger)
feature_extractor = TfidfFeatureExtractor(prefix=config_['prefix'])
ranker = ArticleRanker(extractor=feature_extractor)

#body='{"news_vendor": "allfacebook", "features": {"version": "TF-IDF-1.1", "data": [[30, 0.1152804266328724], [700, 0.10306959692284932], [1182, 0.11807137456445314], [1653, 0.0941681102858023], [1676, 0.10370011901114959], [1811, 0.17155946860137014], [2611, 0.21788069908826144], [2652, 0.45744644650563265], [2662, 0.21298213388546064], [3204, 0.1214239502500954], [3405, 0.11661768833389584], [4666, 0.11661768833389584], [4837, 0.10076153961190332], [5148, 0.0776009243684577], [5363, 0.23323537666779168], [5689, 0.09148928930112653], [5817, 0.1256226233038028], [5829, 0.12819799595561865], [6045, 0.08371453418650211], [6287, 0.12339173403941908], [6333, 0.32891400667298226], [6616, 0.5590571320884813], [7528, 0.09148928930112653], [7689, 0.08102851972383454]]}, "author": "Rene Nederhand", "headline": "Social Media Jobs: High 5 Games, DeSales Media Group, ESI Design", "content": "<section class=\\"postcontent col-main\\">\\n\\t\\t<p></p>\\n<p>This week, <a href=\\"http://www.mediabistro.com/High-5-Games-jobs-e31135.html\\" target=\\"_blank\\">High 5 Games</a> is hiring a <a href=\\"http://mediabistro.com/allfacebook-jobs/jobview.asp?joid=157317&amp;c=jejpaf\\" target=\\"_blank\\">Flash game developer</a>, as well as a <a href=\\"http://mediabistro.com/allfacebook-jobs/jobview.asp?joid=157315&amp;c=jejpaf\\" target=\\"_blank\\">Java developer</a>. Meanwhile, <a href=\\"http://www.mediabistro.com/DeSales-Media-Group-jobs-e34506.html\\" target=\\"_blank\\">DeSales Media Group</a> needs a <a href=\\"http://mediabistro.com/allfacebook-jobs/jobview.asp?joid=157370&amp;c=jejpaf\\" target=\\"_blank\\">bilingual social media specialist</a>, and <a href=\\"http://www.mediabistro.com/ESI-Design-jobs-e24647.html\\" target=\\"_blank\\">ESI Design</a> is on the hunt for a <a href=\\"http://mediabistro.com/allfacebook-jobs/jobview.asp?joid=157413&amp;c=jejpaf\\" target=\\"_blank\\">vice president of business development and marketing</a>. Get the scoop on these openings below, and find additional <a href=\\"http://mediabistro.com/Social-Media-jobs.html?c=jejpaf\\" target=\\"_blank\\">social media jobs</a> on Mediabistro.</p>\\n<p><a href=\\"http://allfacebook.com/files/2012/06/high5games.jpg\\"></a></p>\\n<ul>\\n<li>\\n<a href=\\"http://mediabistro.com/allfacebook-jobs/jobview.asp?joid=157317&amp;c=jejpaf\\" target=\\"_blank\\">Flash Game Developer</a><strong> High 5 Games </strong>(New York, NY)</li>\\n<li>\\n<a href=\\"http://mediabistro.com/allfacebook-jobs/jobview.asp?joid=157315&amp;c=jejpaf\\" target=\\"_blank\\">Java Developer</a> <strong></strong><strong></strong><strong>High 5 Games </strong>(New York, NY)</li>\\n<li>\\n<a href=\\"http://mediabistro.com/allfacebook-jobs/jobview.asp?joid=157370&amp;c=jejpaf\\" target=\\"_blank\\">Bilingual Social Media Specialist</a><strong> Future US </strong>(Brooklyn, NY)</li>\\n<li>\\n<a href=\\"http://mediabistro.com/allfacebook-jobs/jobview.asp?joid=157413&amp;c=jejpaf\\" target=\\"_blank\\">Vice President, Business Development &amp; Marketing</a> <strong></strong><strong>Warner Bros. Entertainment Group </strong>(New York, NY)</li>\\n<li>\\n<a href=\\"http://mediabistro.com/allfacebook-jobs/jobview.asp?joid=157435&amp;c=jejpaf\\" target=\\"_blank\\">Social Media Community Engagement Coordinator</a><strong>\\u00a0</strong><strong>International Fellowship of Christians &amp; Jews </strong>(Chicago, IL)</li>\\n</ul>\\n<p><em>Find more great <a href=\\"http://mediabistro.com/allfacebook-jobs?c=jejpaf\\" target=\\"_blank\\">social media jobs</a> on our job board. Looking to hire? Tap into our network of talented AllFacebook pros and <a href=\\"http://mediabistro.com/allfacebook-jobs/post_job.asp?c=jejpaf\\" target=\\"_blank\\">post a risk-free job listing</a>. For real-time openings and employment news, follow <a href=\\"http://twitter.com/mbjobpost\\" target=\\"_blank\\">@MBJobPost</a>.</em></p>\\n\\t</section>", "clean_content": " This week, High 5 Games is hiring a Flash game developer, as well as a Java developer. Meanwhile, DeSales Media Group needs a bilingual social media specialist, and ESI Design is on the hunt for a vice president of business development and marketing. Get the scoop on these openings below, and find additional social media jobs on Mediabistro. Flash Game Developer High 5 Games (New York, NY) Java Developer High 5 Games (New York, NY) Bilingual Social Media Specialist Future US (Brooklyn, NY) Vice President, Business Development & Marketing Warner Bros. Entertainment Group (New York, NY) Social Media Community Engagement Coordinator\\u00a0International Fellowship of Christians & Jews (Chicago, IL) Find more great social media jobs on our job board. Looking to hire? Tap into our network of talented AllFacebook pros and post a risk-free job listing. For real-time openings and employment news, follow @MBJobPost. ", "link": "http://allfacebook.com/social-media-jobs-high-5-games-desales-media-group-esi-design_b126538"}'
body='{"news_vendor": "allthingsd", "features": {"version": "TF-IDF-1.1", "data": [[22, 0.07471478070485978], [30, 0.09216158296656375], [107, 0.06304488999435026], [300, 0.18234353646009205], [542, 0.09216158296656375], [650, 0.06182478702301671], [654, 0.07006266179247984], [661, 0.09025027275624188], [767, 0.10042971004055462], [1132, 0.17281575947747868], [1332, 0.10248860613638557], [1382, 0.08781511553287091], [1514, 0.09707305734337951], [1887, 0.2735153046901381], [2214, 0.14781010961996477], [2252, 0.06438775753503796], [2351, 0.08781511553287091], [2858, 0.10042971004055462], [2888, 0.08342748816910424], [2987, 0.09864621311888302], [3057, 0.09025027275624188], [3196, 0.06182478702301671], [3738, 0.06573803636976888], [3797, 0.12801585894568449], [4011, 0.09117176823004602], [4192, 0.12386242595690393], [4212, 0.08938827130837443], [4395, 0.09439282192763115], [4785, 0.19133164309849587], [4807, 0.09216158296656375], [4824, 0.08191382641953743], [5083, 0.10248860613638557], [5307, 0.27075081826872566], [5394, 0.07151361738584092], [5432, 0.06559669762503734], [5439, 0.07471478070485978], [5478, 0.06573803636976888], [5591, 0.07893343484990234], [5687, 0.08342748816910424], [5749, 0.20085942008110924], [5930, 0.19764266036248826], [6047, 0.07971933132411649], [6117, 0.07557678215272721], [6123, 0.09566582154924794], [6271, 0.06911500296704057], [6315, 0.10248860613638557], [6334, 0.20085942008110924], [6431, 0.09025027275624188], [6475, 0.09323066432587697], [6504, 0.22673034645818166], [6531, 0.4482886842291587], [6593, 0.07108272883352532], [6683, 0.06986767458148789], [6978, 0.08191382641953743], [7206, 0.07819122242685785], [7492, 0.08575621943703997], [7728, 0.10248860613638557], [7779, 0.07364569934554656], [7792, 0.08709297281568963]]}, "author": "Arik Hesseldahl", "headline": "Oracle, Google and Red Hat Engineers Ride to the Rescue of Health Care Site", "content": "<p><a href=\\"http://allthingsd.com/20131031/oracle-google-and-red-hat-engineers-ride-to-the-rescue-of-health-care-site/scouts_to_the_rescue/\\" rel=\\"attachment wp-att-369719\\"></a>Engineers from at least three major tech companies are said to be helping the federal government with its troubled health insurance website, HealthCare.gov.</p><a href=\\"http://allthingsd.com/20131031/oracle-google-and-red-hat-engineers-ride-to-the-rescue-of-health-care-site/scouts_to_the_rescue/\\" rel=\\"attachment wp-att-369719\\"></a><img src=\\"http://i2.wp.com/allthingsd.com/files/2013/10/scouts_to_the_rescue-363x285.jpg?resize=363%2C285\\" alt=\\"scouts_to_the_rescue\\" class=\\"alignright size-medium wp-image-369719\\" data-recalc-dims=\\"1\\"><p>According to a <a href=\\"http://www.bloomberg.com/news/2013-10-31/google-oracle-workers-enlisted-for-obamacare-tech-surge-.html\\">Bloomberg report</a>, at least three employees on leave from Google, Oracle and Red Hat are stepping in to help get the site up and running nearly a month after its disastrous launch. </p><a href=\\"http://www.bloomberg.com/news/2013-10-31/google-oracle-workers-enlisted-for-obamacare-tech-surge-.html\\">Bloomberg report</a><p>One of those named is Michael Dickerson, a site reliability engineer at Google. According to his <a href=\\"http://www.linkedin.com/pub/mikey-dickerson/40/73/b49\\">LinkedIn profile</a>, he\\u2019s a seven-year veteran of Google who also spent five months working on the Obama campaign during the 2012 election. He\\u2019s on leave from Google, according to a person familiar with the situation.</p><a href=\\"http://www.linkedin.com/pub/mikey-dickerson/40/73/b49\\">LinkedIn profile</a><p>A second engineer is Greg Gershman, whose <a href=\\"http://www.linkedin.com/in/greggershman\\">LinkedIn profile </a> lists him as the director of innovation at <a href=\\"http://www.mobomo.com/\\">Mobomo</a>, a Baltimore-based company that has built mobile apps for the U.S. Navy and NASA. He\\u2019s a former presidential innovation fellow who spent six months working on <a href=\\"https://my.usa.gov/\\">Project MyUSA</a>, a site intended to \\u201cre-imagine the relationship between citizens and government through technology.\\u201d</p><a href=\\"http://www.linkedin.com/in/greggershman\\">LinkedIn profile </a><a href=\\"http://www.mobomo.com/\\">Mobomo</a><a href=\\"https://my.usa.gov/\\">Project MyUSA</a><p>The story didn\\u2019t name anyone from Oracle or Red Hat working on the effort, dubbed the \\u201ctech surge,\\u201d to try to get the site running properly. Stephanie Wonderlick, a spokeswoman for Red Hat, didn\\u2019t have any comment. Google had no comment. </p><p>Larry Ellison, CEO of Oracle, one of the companies supplying the software underpinning the site, was asked about the effort at a shareholders meeting held today. \\u201cWe think it\\u2019s our responsibility as a technology provider to serve all of our customers, and the government is one of our customers,\\u201d he said. \\u201cWe are helping them in every way we can. I will refrain from editorial comments about what has happened there. I think most of us want to see our government operating effectively.\\u201d (Ellison\\u2019s <a href=\\"http://oracle.com.edgesuite.net/ivt/wc/4000/5204/18806/29569/Lobby/default.htm\\">comments are here</a> at about the 1:15 mark during the Q&amp;A portion of the meeting.) </p><a href=\\"http://oracle.com.edgesuite.net/ivt/wc/4000/5204/18806/29569/Lobby/default.htm\\">comments are here</a><p>The HealthCare.gov site has been plagued by availability problems from the start. And yesterday Kathleen Sebelius, the secretary of health and human services, <a href=\\"http://online.wsj.com/news/articles/SB10001424052702304655104579166033844245124\\">apologized for the issues</a>. What\\u2019s missing so far is a clear and precise explanation of what went wrong.  </p><a href=\\"http://online.wsj.com/news/articles/SB10001424052702304655104579166033844245124\\">apologized for the issues</a><p>Whatever it is they\\u2019re doing, they better hurry. Those people who haven\\u2019t bought health insurance by March 31 are, under the terms of the Affordable Care Act, subject to fines. </p><div class=\\"clearfix\\"></div>", "clean_content": "Engineers from at least three major tech companies are said to be helping the federal government with its troubled health insurance website, HealthCare.gov.According to a Bloomberg report, at least three employees on leave from Google, Oracle and Red Hat are stepping in to help get the site up and running nearly a month after its disastrous launch. Bloomberg reportOne of those named is Michael Dickerson, a site reliability engineer at Google. According to his LinkedIn profile, he\\u2019s a seven-year veteran of Google who also spent five months working on the Obama campaign during the 2012 election. He\\u2019s on leave from Google, according to a person familiar with the situation.LinkedIn profileA second engineer is Greg Gershman, whose LinkedIn profile lists him as the director of innovation at Mobomo, a Baltimore-based company that has built mobile apps for the U.S. Navy and NASA. He\\u2019s a former presidential innovation fellow who spent six months working on Project MyUSA, a site intended to \\u201cre-imagine the relationship between citizens and government through technology.\\u201dLinkedIn profile MobomoProject MyUSAThe story didn\\u2019t name anyone from Oracle or Red Hat working on the effort, dubbed the \\u201ctech surge,\\u201d to try to get the site running properly. Stephanie Wonderlick, a spokeswoman for Red Hat, didn\\u2019t have any comment. Google had no comment. Larry Ellison, CEO of Oracle, one of the companies supplying the software underpinning the site, was asked about the effort at a shareholders meeting held today. \\u201cWe think it\\u2019s our responsibility as a technology provider to serve all of our customers, and the government is one of our customers,\\u201d he said. \\u201cWe are helping them in every way we can. I will refrain from editorial comments about what has happened there. I think most of us want to see our government operating effectively.\\u201d (Ellison\\u2019s comments are here at about the 1:15 mark during the Q&A portion of the meeting.) comments are hereThe HealthCare.gov site has been plagued by availability problems from the start. And yesterday Kathleen Sebelius, the secretary of health and human services, apologized for the issues. What\\u2019s missing so far is a clear and precise explanation of what went wrong. apologized for the issuesWhatever it is they\\u2019re doing, they better hurry. Those people who haven\\u2019t bought health insurance by March 31 are, under the terms of the Affordable Care Act, subject to fines. ", "link": "http://allthingsd.com/20131031/oracle-google-and-red-hat-engineers-ride-to-the-rescue-of-health-care-site/"}'
#body='{"news_vendor": "TechCrunch", "features": {"version": "TF-IDF-1.1", "data": [[22, 0.07471478070485978], [30, 0.09216158296656375], [107, 0.06304488999435026], [300, 0.18234353646009205], [542, 0.09216158296656375], [650, 0.06182478702301671], [654, 0.07006266179247984], [661, 0.09025027275624188], [767, 0.10042971004055462], [1132, 0.17281575947747868], [1332, 0.10248860613638557], [1382, 0.08781511553287091], [1514, 0.09707305734337951], [1887, 0.2735153046901381], [2214, 0.14781010961996477], [2252, 0.06438775753503796], [2351, 0.08781511553287091], [2858, 0.10042971004055462], [2888, 0.08342748816910424], [2987, 0.09864621311888302], [3057, 0.09025027275624188], [3196, 0.06182478702301671], [3738, 0.06573803636976888], [3797, 0.12801585894568449], [4011, 0.09117176823004602], [4192, 0.12386242595690393], [4212, 0.08938827130837443], [4395, 0.09439282192763115], [4785, 0.19133164309849587], [4807, 0.09216158296656375], [4824, 0.08191382641953743], [5083, 0.10248860613638557], [5307, 0.27075081826872566], [5394, 0.07151361738584092], [5432, 0.06559669762503734], [5439, 0.07471478070485978], [5478, 0.06573803636976888], [5591, 0.07893343484990234], [5687, 0.08342748816910424], [5749, 0.20085942008110924], [5930, 0.19764266036248826], [6047, 0.07971933132411649], [6117, 0.07557678215272721], [6123, 0.09566582154924794], [6271, 0.06911500296704057], [6315, 0.10248860613638557], [6334, 0.20085942008110924], [6431, 0.09025027275624188], [6475, 0.09323066432587697], [6504, 0.22673034645818166], [6531, 0.4482886842291587], [6593, 0.07108272883352532], [6683, 0.06986767458148789], [6978, 0.08191382641953743], [7206, 0.07819122242685785], [7492, 0.08575621943703997], [7728, 0.10248860613638557], [7779, 0.07364569934554656], [7792, 0.08709297281568963]]}, "author": "Arik Hesseldahl", "headline": "Oracle, Google and Red Hat Engineers Ride to the Rescue of Health Care Site", "content": "<p><a href=\\"http://allthingsd.com/20131031/oracle-google-and-red-hat-engineers-ride-to-the-rescue-of-health-care-site/scouts_to_the_rescue/\\" rel=\\"attachment wp-att-369719\\"></a>Engineers from at least three major tech companies are said to be helping the federal government with its troubled health insurance website, HealthCare.gov.</p><a href=\\"http://allthingsd.com/20131031/oracle-google-and-red-hat-engineers-ride-to-the-rescue-of-health-care-site/scouts_to_the_rescue/\\" rel=\\"attachment wp-att-369719\\"></a><img src=\\"http://i2.wp.com/allthingsd.com/files/2013/10/scouts_to_the_rescue-363x285.jpg?resize=363%2C285\\" alt=\\"scouts_to_the_rescue\\" class=\\"alignright size-medium wp-image-369719\\" data-recalc-dims=\\"1\\"><p>According to a <a href=\\"http://www.bloomberg.com/news/2013-10-31/google-oracle-workers-enlisted-for-obamacare-tech-surge-.html\\">Bloomberg report</a>, at least three employees on leave from Google, Oracle and Red Hat are stepping in to help get the site up and running nearly a month after its disastrous launch. </p><a href=\\"http://www.bloomberg.com/news/2013-10-31/google-oracle-workers-enlisted-for-obamacare-tech-surge-.html\\">Bloomberg report</a><p>One of those named is Michael Dickerson, a site reliability engineer at Google. According to his <a href=\\"http://www.linkedin.com/pub/mikey-dickerson/40/73/b49\\">LinkedIn profile</a>, he\\u2019s a seven-year veteran of Google who also spent five months working on the Obama campaign during the 2012 election. He\\u2019s on leave from Google, according to a person familiar with the situation.</p><a href=\\"http://www.linkedin.com/pub/mikey-dickerson/40/73/b49\\">LinkedIn profile</a><p>A second engineer is Greg Gershman, whose <a href=\\"http://www.linkedin.com/in/greggershman\\">LinkedIn profile </a> lists him as the director of innovation at <a href=\\"http://www.mobomo.com/\\">Mobomo</a>, a Baltimore-based company that has built mobile apps for the U.S. Navy and NASA. He\\u2019s a former presidential innovation fellow who spent six months working on <a href=\\"https://my.usa.gov/\\">Project MyUSA</a>, a site intended to \\u201cre-imagine the relationship between citizens and government through technology.\\u201d</p><a href=\\"http://www.linkedin.com/in/greggershman\\">LinkedIn profile </a><a href=\\"http://www.mobomo.com/\\">Mobomo</a><a href=\\"https://my.usa.gov/\\">Project MyUSA</a><p>The story didn\\u2019t name anyone from Oracle or Red Hat working on the effort, dubbed the \\u201ctech surge,\\u201d to try to get the site running properly. Stephanie Wonderlick, a spokeswoman for Red Hat, didn\\u2019t have any comment. Google had no comment. </p><p>Larry Ellison, CEO of Oracle, one of the companies supplying the software underpinning the site, was asked about the effort at a shareholders meeting held today. \\u201cWe think it\\u2019s our responsibility as a technology provider to serve all of our customers, and the government is one of our customers,\\u201d he said. \\u201cWe are helping them in every way we can. I will refrain from editorial comments about what has happened there. I think most of us want to see our government operating effectively.\\u201d (Ellison\\u2019s <a href=\\"http://oracle.com.edgesuite.net/ivt/wc/4000/5204/18806/29569/Lobby/default.htm\\">comments are here</a> at about the 1:15 mark during the Q&amp;A portion of the meeting.) </p><a href=\\"http://oracle.com.edgesuite.net/ivt/wc/4000/5204/18806/29569/Lobby/default.htm\\">comments are here</a><p>The HealthCare.gov site has been plagued by availability problems from the start. And yesterday Kathleen Sebelius, the secretary of health and human services, <a href=\\"http://online.wsj.com/news/articles/SB10001424052702304655104579166033844245124\\">apologized for the issues</a>. What\\u2019s missing so far is a clear and precise explanation of what went wrong.  </p><a href=\\"http://online.wsj.com/news/articles/SB10001424052702304655104579166033844245124\\">apologized for the issues</a><p>Whatever it is they\\u2019re doing, they better hurry. Those people who haven\\u2019t bought health insurance by March 31 are, under the terms of the Affordable Care Act, subject to fines. </p><div class=\\"clearfix\\"></div>", "clean_content": "Engineers from at least three major tech companies are said to be helping the federal government with its troubled health insurance website, HealthCare.gov.According to a Bloomberg report, at least three employees on leave from Google, Oracle and Red Hat are stepping in to help get the site up and running nearly a month after its disastrous launch. Bloomberg reportOne of those named is Michael Dickerson, a site reliability engineer at Google. According to his LinkedIn profile, he\\u2019s a seven-year veteran of Google who also spent five months working on the Obama campaign during the 2012 election. He\\u2019s on leave from Google, according to a person familiar with the situation.LinkedIn profileA second engineer is Greg Gershman, whose LinkedIn profile lists him as the director of innovation at Mobomo, a Baltimore-based company that has built mobile apps for the U.S. Navy and NASA. He\\u2019s a former presidential innovation fellow who spent six months working on Project MyUSA, a site intended to \\u201cre-imagine the relationship between citizens and government through technology.\\u201dLinkedIn profile MobomoProject MyUSAThe story didn\\u2019t name anyone from Oracle or Red Hat working on the effort, dubbed the \\u201ctech surge,\\u201d to try to get the site running properly. Stephanie Wonderlick, a spokeswoman for Red Hat, didn\\u2019t have any comment. Google had no comment. Larry Ellison, CEO of Oracle, one of the companies supplying the software underpinning the site, was asked about the effort at a shareholders meeting held today. \\u201cWe think it\\u2019s our responsibility as a technology provider to serve all of our customers, and the government is one of our customers,\\u201d he said. \\u201cWe are helping them in every way we can. I will refrain from editorial comments about what has happened there. I think most of us want to see our government operating effectively.\\u201d (Ellison\\u2019s comments are here at about the 1:15 mark during the Q&A portion of the meeting.) comments are hereThe HealthCare.gov site has been plagued by availability problems from the start. And yesterday Kathleen Sebelius, the secretary of health and human services, apologized for the issues. What\\u2019s missing so far is a clear and precise explanation of what went wrong. apologized for the issuesWhatever it is they\\u2019re doing, they better hurry. Those people who haven\\u2019t bought health insurance by March 31 are, under the terms of the Affordable Care Act, subject to fines. ", "link": "http://allthingsd.com/20131031/oracle-google-and-red-hat-engineers-ride-to-the-rescue-of-health-care-site/"}'

article_as_dict = json.loads(body)

print "INPUT: ", article_as_dict

#ranker.rank_article(article_as_dict)

# Save article
article_vendor = ranker.get_vendor(article_as_dict)
print "VENDOR :", article_vendor.name
stored_article = ranker.save_article(article_vendor, article_as_dict)