def extract_tvseries(dom): ''' Extract a list of highest ranking TV series from DOM (of IMDB page). Each TV series entry should contain the following fields: - TV Title - Ranking - Genres (comma separated if more than one) - Actors/actresses (comma separated if more than one) - Runtime (only a number!) ''' # ADD YOUR CODE HERE TO EXTRACT THE ABOVE INFORMATION ABOUT THE # HIGHEST RANKING TV-SERIES # NOTE: FOR THIS EXERCISE YOU ARE ALLOWED (BUT NOT REQUIRED) TO IGNORE # UNICODE CHARACTERS AND SIMPLY LEAVE THEM OUT OF THE OUTPUT. series = [] # Loops over top 5 imdb series for index in dom.by_tag("tr.even detailed")[:5]: actors = [] genres = [] serie = [] # Extracts the required fields of the html for td in index.by_tag("td.number")[:1]: ranking = unicode(plaintext(td.content)) # Extract ranking for td in index.by_tag("td.title")[:1]: for a in td.by_tag("a")[:1]: title = unicode(plaintext(a.content)) # Extract title for span in td.by_tag("span.credit")[:1]: for a in span.by_tag("a"): actors.append(unicode(plaintext(a.content))) # Extract actors for span in td.by_tag("span.genre")[:1]: for a in span.by_tag("a"): genres.append(unicode(plaintext(a.content))) # Extract genres for span in td.by_tag("span.runtime")[:1]: runtime = unicode(plaintext(span.content)) # Extract runtime with minute runtime_split = split_string(runtime, ' ') # Split number from minute runtime_num = runtime_split[0] # append required fields to serie list serie.append(title) serie.append(ranking) serie.append(genres) serie.append(actors) serie.append(runtime_num) # appends serie to series series.append(serie) return series
def test_normal(self): self.assertEqual(split_string('abacadabra', 'b'), ['a', 'acada', 'ra']) self.assertEqual(split_string('aabbaa', 'b'), ['aa', 'aa']) self.assertEqual(split_string('abacadabra', 'ab'), ['c', 'd', 'r'])
def test_empty(self): self.assertEqual(split_string('', ''), []) self.assertEqual(split_string('', 'abc'), []) self.assertEqual(split_string('abc', ''), ['abc'])
def test_dumb(self): # Tests for the number of parameters with self.assertRaises(TypeError): split_string() with self.assertRaises(TypeError): split_string('')