def handle_starttag(self, tag, attrs): if tag == 'table': if get_attrs_value_by_name(attrs, 'class') == 'AlbumInfo': self.inAlbumInfoTable = True if tag == 'td': if get_attrs_value_by_name(attrs, 'class') == 'Title': self.inTitleTd = True if get_attrs_value_by_name(attrs, 'class') == 'Tracks': self.inMemoTd = True if tag == 'a': if get_attrs_value_by_name(attrs, 'name') == 'LandingPageLink': self.inLinkA = True href = get_attrs_value_by_name(attrs, 'href') self.tmpalbum = self.albumtemplate.copy() self.tmpalbum['id'] = re.match(r'.*id%3D(.*?)&resnum.*', href, re.S).group(1)
def handle_starttag(self, tag, attrs): '''处理标签开始的函数''' if tag == 'a': self.isa = 1 if self.insongtable and 'Icon' in self.tdclass: if get_attrs_value_by_name(attrs,'title')==u'下载': self.tmpsong['id'] = re.match(r'.*id%3D(.*?)\\x26.*', get_attrs_value_by_name(attrs, 'onclick'), re.S).group(1) self.songlist.append(self.tmpsong) if tag == 'table': if get_attrs_value_by_name(attrs,'id') == 'song_list': self.insongtable = 1 if get_attrs_value_by_name(attrs,'id') == 'album_item': self.inalbumtable = 1 if self.insongtable and tag == 'td': self.tdclass = get_attrs_value_by_name(attrs, 'class') if 'Title' in self.tdclass: self.tmpsong = self.songtemplate.copy() if self.inalbumtable and tag == 'td': self.tdclass = get_attrs_value_by_name(attrs, 'class') if tag == 'span': self.ispan = 1