def Add_Movie(self, movie): # add info to XML xml_movie_hash = 'mov_' + self.f_md5(movie['origin'] + movie['year']) #check if movie info exists xml_movie = self.movies.find(xml_movie_hash) if xml_movie is None: #-- create new record # create serial record in XML xml_movie = SubElement(self.movies, xml_movie_hash) xml_movie.text = movie['name'] SubElement(xml_movie, "origin").text = movie['origin'] SubElement(xml_movie, "url").text = movie['url'] SubElement(xml_movie, "genre").text = movie['genre'] SubElement(xml_movie, "director").text = movie['director'] SubElement(xml_movie, "actors").text = movie['actor'] SubElement(xml_movie, "text").text = movie['descr'] SubElement(xml_movie, "img").text = movie['image'] # add year info SubElement(xml_movie, "year").text = movie['year'] year_hash = self.Get_Movie_Year(movie['year']) # add movie category info SubElement(xml_movie, "categories") cat = xml_movie.find("categories") for cat_rec in movie['category']: cat_hash = self.Get_Movie_Type(cat_rec) if cat.find(cat_hash) is None: SubElement(cat, cat_hash)
def Get_Film_Info(url, xml_serials, xml_types, xml_genres, serial_found, dp): html = get_HTML(url) if html == '': return False html_container = re.compile( '<div class="container">(.+?)<div class="navigation">', re.MULTILINE | re.DOTALL).findall(html) # -- parsing web page ---------------------------------------------------------- soup = BeautifulSoup(''.join(html_container[0].replace('<p>', ' ').replace( '</p>', ''))) serials = soup.findAll("div", {"class": "entry"}) for ser in serials: #try: # check if process was cancelled if (dp.iscanceled()): return # -- i_name = unescape(ser.find("h2").find("a").text.strip()) i_url = ser.find("h2").find("a")["href"] xbmc.log(' *** ' + i_name.encode('utf-8')) #-- detail info info = ser.find("div", {"class": "content"}) try: i_image = info.find("img")["src"] except: ser_name = ser.find("h2").find("a").text.strip( ) #i_name.replace(u'”', u'"').replace(u'“',u'"').replace(u'«',u'"').replace(u'»',u'"') search_mask = '<p><img class="m_pic" alt="' + ser_name + '" align="left" src="(.+?)" /></p>' img_alt = re.compile(search_mask, re.MULTILINE | re.DOTALL).findall(html) try: i_image = img_alt[0] except: search_mask = '<p><img class="m_pic" alt="' + ser_name + '"" align="left" src="(.+?)" /></p>' img_alt = re.compile(search_mask, re.MULTILINE | re.DOTALL).findall(html) try: i_image = img_alt[0] except: i_image = '-' xbmc.log(i_name.encode('utf-8') + ' - image not found') o_name = '-' i_year = '-' i_country = '-' i_genre = '-' i_director = '-' i_actors = '-' i_text = '-' for inf in info.findAll("strong"): if inf.text.encode('utf-8') == 'Оригинальное название:': o_name = unescape(str(inf.nextSibling).strip()) elif inf.text.encode('utf-8') == 'Год выхода на экран:': i_year = unescape(str(inf.nextSibling).strip()) elif inf.text.encode('utf-8') == 'Страна:': i_country = unescape(str(inf.nextSibling).strip()) elif inf.text.encode('utf-8') == 'Сериал относится к жанру:': i_genre = unescape(str(inf.nextSibling).strip()) elif inf.text.encode('utf-8') == 'Постановщик': i_director = unescape(str(inf.nextSibling).strip()) elif inf.text.encode( 'utf-8') == 'Актеры, принявшие участие в съемках:': i_actors = unescape(str(inf.nextSibling).strip()) elif inf.text.encode('utf-8') == 'Краткое описание:': i_text = unescape(str(inf.nextSibling)) full_text = i_text if o_name != '': full_text = full_text + (u'\nОригинальное название: ') + o_name if i_actors != '': full_text = full_text + (u'\nАктеры: ') + i_actors # add info to XML xml_serial_hash = 'ser_' + f_md5( (i_name + i_year).encode('utf-8')).hexdigest() #check if serial info exists xml_serial = xml_serials.find(xml_serial_hash) if xml_serial is None: #-- create new record # create serial record in XML xml_serial = SubElement(xml_serials, xml_serial_hash) xml_serial.text = i_name SubElement(xml_serial, "name").text = i_name SubElement(xml_serial, "url").text = i_url SubElement(xml_serial, "year").text = i_year SubElement(xml_serial, "genre").text = i_genre SubElement(xml_serial, "director").text = i_director SubElement(xml_serial, "text").text = full_text SubElement(xml_serial, "img").text = i_image SubElement(xml_serial, "categories") SubElement(xml_serial, "genres") isCategory_found = 'n' # add serial category info categories = xml_serial.find("categories") for cat in ser.find("div", {"class": "cat"}).findAll("a"): if cat.text.encode('utf-8') <> 'Сериалы': cur_type_hash = 'sc_' + f_md5( cat.text.strip().lower().encode('utf-8')).hexdigest() # check if category exists if xml_types.find(cur_type_hash) is None: type = SubElement(xml_types, cur_type_hash) SubElement(type, "name").text = unescape( cat.text.strip()).capitalize() if categories.find(cur_type_hash) is None: SubElement(categories, cur_type_hash) isCategory_found = 'y' isMultserial = 'n' # add serial genre info genres = xml_serial.find("genres") for gen in i_genre.split(','): cur_genre_hash = 'sg_' + f_md5( gen.strip().lower().encode('utf-8')).hexdigest() # check if category exists if xml_genres.find(cur_genre_hash) is None: genre = SubElement(xml_genres, cur_genre_hash) SubElement(genre, "name").text = unescape(gen.strip()).capitalize() if genres.find(cur_genre_hash) is None: SubElement(genres, cur_genre_hash) # check if it's multserial if gen.encode( 'utf-8') == 'Мультсериал' and isCategory_found == 'n': isMultserial = 'y' # add multserial or foreighn serial types if isCategory_found == 'n': if isMultserial == 'y': # add serial category info categories = xml_serial.find("categories") cur_type_hash = 'sc_' + f_md5( (u'Мультсериалы').lower().encode('utf-8')).hexdigest() # check if category exists if xml_types.find(cur_type_hash) is None: type = SubElement(xml_types, cur_type_hash) SubElement(type, "name").text = u'Мультсериалы' if categories.find(cur_type_hash) is None: SubElement(categories, cur_type_hash) else: if i_country.encode('utf-8') == 'Россия': # add serial category info categories = xml_serial.find("categories") cur_type_hash = 'sc_' + f_md5( (u'Русские сериалы' ).lower().encode('utf-8')).hexdigest() # check if category exists if xml_types.find(cur_type_hash) is None: type = SubElement(xml_types, cur_type_hash) SubElement(type, "name").text = u'Русские сериалы' if categories.find(cur_type_hash) is None: SubElement(categories, cur_type_hash) else: # add serial category info categories = xml_serial.find("categories") cur_type_hash = 'sc_' + f_md5( (u'Зарубежные сериалы' ).lower().encode('utf-8')).hexdigest() # check if category exists if xml_types.find(cur_type_hash) is None: type = SubElement(xml_types, cur_type_hash) SubElement(type, "name").text = u'Зарубежные сериалы' if categories.find(cur_type_hash) is None: SubElement(categories, cur_type_hash) # update info in progress dialog serial_found = serial_found + 1 #except: #xbmc.log(formatExceptionInfo()) return serial_found
import urllib2, urllib, re, cookielib, sys, time # load XML library sys.path.append(r'g:\XBMC\resources\lib') from ElementTree import Element, SubElement, ElementTree window = Element("window") title = SubElement(window, "title", font="large") title.text = ("Проверка АБВГД..").decode('utf-8') text = SubElement(window, "text", wrap="word") box = SubElement(text, "buttonbox1") SubElement(box, "button").text = ("OK").decode('utf-8') SubElement(box, "button").text = ("Проба записи").decode('utf-8') box = SubElement(text, "buttonbox2") SubElement(box, "button").text = ("Error").decode('utf-8') SubElement(box, "button").text = ("Але! Гараж").decode('utf-8') ElementTree(window).write(r'g:\xbmc\resources\data\test.xml', encoding='utf-8')