def extract_tvseries(dom):
    '''
    Extract a list of highest ranking TV series from DOM (of IMDB page).

    Each TV series entry should contain the following fields:
    - TV Title
    - Ranking
    - Genres (comma separated if more than one)
    - Actors/actresses (comma separated if more than one)
    - Runtime (only a number!)
    '''

    # ADD YOUR CODE HERE TO EXTRACT THE ABOVE INFORMATION ABOUT THE
    # HIGHEST RANKING TV-SERIES
    # NOTE: FOR THIS EXERCISE YOU ARE ALLOWED (BUT NOT REQUIRED) TO IGNORE
    # UNICODE CHARACTERS AND SIMPLY LEAVE THEM OUT OF THE OUTPUT.


    series = []

    # Loops over top 5 imdb series
    for index in dom.by_tag("tr.even detailed")[:5]:

        actors = []
        genres = []
        serie = []

        # Extracts the required fields of the html
        for td in index.by_tag("td.number")[:1]:
            ranking = unicode(plaintext(td.content)) # Extract ranking
        for td in index.by_tag("td.title")[:1]:
            for a in td.by_tag("a")[:1]:
                title = unicode(plaintext(a.content)) # Extract title
            for span in td.by_tag("span.credit")[:1]:
                for a in span.by_tag("a"):
                    actors.append(unicode(plaintext(a.content))) # Extract actors
            for span in td.by_tag("span.genre")[:1]:
                for a in span.by_tag("a"):
                    genres.append(unicode(plaintext(a.content))) # Extract genres
            for span in td.by_tag("span.runtime")[:1]:
                runtime = unicode(plaintext(span.content)) # Extract runtime with minute
                runtime_split = split_string(runtime, ' ') # Split number from minute
                runtime_num = runtime_split[0]

        # append required fields to serie list
        serie.append(title)
        serie.append(ranking)
        serie.append(genres)
        serie.append(actors)
        serie.append(runtime_num)

        # appends serie to series
        series.append(serie)
        
    return series
示例#2
0
 def test_normal(self):
     self.assertEqual(split_string('abacadabra', 'b'), ['a', 'acada', 'ra'])
     self.assertEqual(split_string('aabbaa', 'b'), ['aa', 'aa'])
     self.assertEqual(split_string('abacadabra', 'ab'), ['c', 'd', 'r'])
示例#3
0
 def test_empty(self):
     self.assertEqual(split_string('', ''), [])
     self.assertEqual(split_string('', 'abc'), [])
     self.assertEqual(split_string('abc', ''), ['abc'])
示例#4
0
 def test_dumb(self):
     # Tests for the number of parameters
     with self.assertRaises(TypeError):
         split_string()
     with self.assertRaises(TypeError):
         split_string('')
示例#5
0
 def test_normal(self):
     self.assertEqual(split_string('abacadabra', 'b'), ['a', 'acada', 'ra'])
     self.assertEqual(split_string('aabbaa', 'b'), ['aa', 'aa'])
     self.assertEqual(split_string('abacadabra', 'ab'), ['c', 'd', 'r'])
示例#6
0
 def test_empty(self):
     self.assertEqual(split_string('', ''), [])
     self.assertEqual(split_string('', 'abc'), [])
     self.assertEqual(split_string('abc', ''), ['abc'])
示例#7
0
 def test_dumb(self):
     # Tests for the number of parameters
     with self.assertRaises(TypeError):
         split_string()
     with self.assertRaises(TypeError):
         split_string('')