def test_get_content():
    print '\nfunction: %s ' % inspect.stack()[0][3]

    url = 'http://petewarden.com/2015/04/20/why-gemm-is-at-the-heart-of-deep-learning/'
    out_fname = 'data/test.html'
    min_length = 100
    raw_html = singel_query_raw_html_all_methods(url)
    print 'length: %i' % len(raw_html)
    print 'html written out to file  %s' % out_fname
    with open(out_fname, 'w') as in_fh:
        #in_fh.write(unicodedata.normalize('NFKD', raw_html).encode('ascii','ignore'))
        in_fh.write(raw_html.encode('ascii', 'ignore'))
    n.assert_greater(len(raw_html), min_length)
    print ''
        #fname = '1.html'
        links = extract_urls(fname, dir_name)
        insert_links(links_collection, links, linksource=linksource)

if __name__ == '__main__':
    mongo_client = MongoClient()
    links_collection, articles_collection = get_mongodb_collections(
        mongo_client)
    sections = [
        'opinions-interviews.html', 'meetings.html', 'publications.html', 'news-features.html'
    ]
    for y in [2014, 2015]:
        m_max = 13
        if y == 2015:
            m_max = 7
        for m in xrange(1, m_max):
            dt_submit = datetime.datetime(y, m, 1).strftime('%Y-%m-%d')
            for section in sections:
                str_m = "%02d" % m
                url_newsletter = 'http://www.kdnuggets.com/%s/%s/%s' % (
                    str(y), str_m, section)
                print url_newsletter

                raw_html = singel_query_raw_html_all_methods(url_newsletter)
                links = extract_urls_from_text(raw_html)
                print links
                #insert_links(links_collection, links, linksource = linksource)
                insert_links(
                    links_collection, links, linksource=linksource, dt_submit=dt_submit)
                time.sleep(2)