def handle(self, *args, **options): for link in (('HR','http://www.nihongo.aikidoka.ru/59-propisi_hiragana.html'),('KT','http://www.nihongo.aikidoka.ru/60-propisi_katakana.html')): html_page = urllib2.urlopen(link[1]).read().decode('cp1251') soup_page = BeautifulSoup(html_page) katakana = soup_page.findAll(attrs={'lang':'JA'}) for letter in katakana: reading = letter.parent.contents[2] if letter.a == None: hieroglyph = decode_unicode_references(letter.text) else: hieroglyph = decode_unicode_references(letter.a.text) print u"%s - %s (%s)"%(hieroglyph, reading, link[0]) AlphabetLetters.objects.create(letter_type=link[0], letter = hieroglyph, pronunciation=reading)
def handle(self, *args, **options): url = 'http://www.nihongo.aikidoka.ru/kanji_key.html' html_page = urllib2.urlopen(url).read().decode('cp1251') soup_page = BeautifulSoup(html_page) key_amount = 0 for key_block in soup_page.findAll(attrs={'class':'kanji_tab'}): key_amount += 1 hieroglyph = decode_unicode_references(key_block.contents[0].find(text=True)) number = key_block.contents[2].find(text=True) print hieroglyph, number HieroglyphKey.objects.create(key = hieroglyph, number = number) print "Считано %s ключей."%key_amount