def example_save_index_html(): sample_article_0 = CMR_Article() sample_article_0.title = "ABC, Parker’s Piece, 7 July 2017" sample_article_0.url = "http://example_url_0.com" sample_article_1 = CMR_Article() sample_article_1.title = "Lee Hull, Corner House, Cambridge, 4 June 2017" sample_article_1.url = "http://example_url_1.com" sample_article_1.index_text = "Lee Hull, 4th June 2017" sample_article_1.category = CMR_Index_Categories.live articles = [sample_article_0, sample_article_1] print("before filling in missing details:") for article in articles: print("----") article.print_article_details() fill_in_missing_data_interactive(articles) save_index_html(articles, "test.html") articles = get_all_cmr_articles() fill_in_missing_data_interactive(articles) save_index_html(articles, "test.html")
def test_sort_articles(self): sample_article_0 = CMR_Article() sample_article_0.title = "ABC, Parker’s Piece, 7 July 2017" sample_article_0.url = "http://example_url_0.com" sample_article_1 = CMR_Article() sample_article_1.title = "Lee Hull, Corner House, Cambridge, 4 June 2017" sample_article_1.url = "http://example_url_1.com" sample_article_1.index_text = "Lee Hull, 4th June 2017" sample_article_1.category = CMR_Index_Categories.live sample_article_2 = CMR_Article() sample_article_2.title = "z" sample_article_2.url = "http://example_url_0.com" sample_article_2.index_text = "z" sample_article_2.category = CMR_Index_Categories.extra sample_article_3 = CMR_Article() sample_article_3.title = "z" sample_article_3.url = "http://example_url_0.com" sample_article_3.index_text = "w" sample_article_3.category = CMR_Index_Categories.extra articles = [sample_article_0, sample_article_1, sample_article_2, sample_article_3] sort_articles(articles) self.assertEqual(articles[0].title,'z') self.assertEqual(articles[0].index_text,'w') self.assertEqual(articles[1].title,'z') self.assertEqual(articles[1].index_text,'z') self.assertEqual(articles[2].title,"Lee Hull, Corner House, Cambridge, 4 June 2017") self.assertEqual(articles[3].title,"ABC, Parker’s Piece, 7 July 2017")
def example_CMR_Article(): #Create a CMR_Article object print("------- create an article with no data in it") my_article = CMR_Article() # print out its contents my_article.print_article_details() # fill in some details print("------- set a title for the article") my_article.title = "Ed Sheeran, Wembley Stadium, July 1st 2017" # print out its contents my_article.print_article_details() # fill in more details print("------- set other data for the article") my_article.url = "http://made_up_address.html" my_article.index_text = "Ed Sheeran" my_article.category = CMR_Index_Categories.live # print out its contents my_article.print_article_details() # change in some details print("------- set a title for the article") my_article.title = "Ed Sheeran, Wembley Stadium, August 11th 2017" # print out its contents my_article.print_article_details()
def test_build_article(self): print("test_build_article...") article = CMR_Article() example_title = "ABC, Parker’s Piece, 7 July 2017" article.title = example_title self.assertEqual(article.title,example_title) #self.assertEqual(article.title,"hello") #uncomment to see test failure example_url = "https://cambridgemusicreviews.net/2017/07/09/"\ +"abc-parkers-piece-cambridge-7-july-2017/" article.url = example_url self.assertEqual(article.url, example_url)
def setup_articles(): sample_article_0 = CMR_Article() sample_article_0.title = "ABC, Parker’s Piece, 7 month-year" sample_article_0.url = "http://example_url_0.com" sample_article_1 = CMR_Article() sample_article_1.title = "Lee Hull, Corner House, Cambridge, 4 June 2017" sample_article_1.url = "http://example_url_1.com" sample_article_1.index_text = "Lee Hull, 4th Junish 2017" sample_article_1.category = CMR_Index_Categories.live sample_article_2 = CMR_Article() sample_article_2.title = "no idea, what this is, 1 month-year" sample_article_2.url = "http://example_url_2.com" articles = [sample_article_0, sample_article_1, sample_article_2] fill_in_missing_data(articles, _get_missing_index_text_test, _get_missing_category_test, _confirm_test, _confirm_test, _confirm_test) return articles
def _get_all_cmr_articles_no_index(quick_test): #set up an empty list to hold data for each link articles_found = [] # Wordpress serves up data one page at a time # Today we have 8 pages. Allow site to grow to 1000 pages. for page_number in range(1, 1000): url = "https://public-api.wordpress.com/rest/v1.1/sites/"\ +"cambridgemusicreviews.net/posts" # influences the formatting of the results url+= "?context=\"display\"" url+= "&page="+str(page_number) if quick_test: # ask for 4 results only url+= "&number=4" ret = requests.get(url) returned_code = ret.status_code #print("returned value is "+str(ret.status_code)) if returned_code == 200: posts = ret.json()["posts"] if len(posts) == 0: break #print("got "+str(len(posts))+" posts"); for post in posts: # build a CMR_Article this_article = CMR_Article() this_article.title = html.unescape(post["title"]) this_article.url = post["URL"] articles_found.append(this_article) else: print("error from REST API request") break #pass the results back to the calling code return articles_found
def test_fill_in_missing_data_02(self): gig_dates = ["1st", "2nd", "3rd", "4th", "5th", "6th", "7th", "8th", "9th", "10th", "11th", "12th", "13th", "14th", "15th", "16th", "17th", "18th", "19th", "20th", "21st", "22nd", "23rd", "24th", "25th", "26th", "27th", "28th", "29th", "30th", "31st"] for i in range(1, 31): sample_article = CMR_Article() sample_article.title = "ABC, Parker’s Piece, "+str(i)+" month-year" sample_article.url = "http://example_url_0.com" articles = [sample_article] fill_in_missing_data(articles, _get_missing_index_text_test, _get_missing_category_test, _confirm_test, _confirm_test, _confirm_test) #print(articles[0].index_text) self.assertEqual(articles[0].index_text, \ "ABC, "+gig_dates[i-1]+" month-year")
def get_index_anchors(soup, tag, category): #set up an empty list to hold data for each link articles_found = [] #the headings we're interested in all have class = given tag #ask soup for a list of such headings my_div = soup.find("div", { "class" : tag }) anchors = my_div.findAll('a') #iterate over these anchors compiling data into result_data for anchor in anchors: #for each one get the text the human sees and the link url this_index_text = str(anchor.contents[0]) this_url = str(anchor["href"]) # build a CMR_Article this_article = CMR_Article() this_article.index_text = this_index_text this_article.url = this_url this_article.category = category articles_found.append(this_article) #pass the results back to the calling code return articles_found