def get_hexdigest(algorithm, salt, raw_password): raw_password, salt = smart_str(raw_password), smart_str(salt) if algorithm == 'md5': return hashlib.md5(salt + raw_password).hexdigest() elif algorithm == 'sha1': return hashlib.sha1(salt + raw_password).hexdigest() raise ValueError('Got unknown password algorithm type in password')
def get_hexdigest(salt, raw_password): """ Returns a string of the hexdigest of the given plaintext password and salt using the given algorithm ('md5', 'sha1' or 'crypt'). """ raw_password, salt = smart_str(raw_password), smart_str(salt) return hashlib.sha1(salt + raw_password).hexdigest()
def get_author_list(str_url,site_category): soup = parser(str_url) plist = SoupStrainer(id="experts") content = soup.find_all(plist) dd_list = content[0].select("dd") cur_page_data_list = [] site = 'blog.csdn.net' for dd in dd_list: author_info_list = [] d_a = dd.select("a") url = smart_str(d_a[0]['href'].strip()) user_name = url.split('net/')[1] #real_name = smart_str(d_a[0].text.strip()) rss = url+'/rss/list' #print url author_info_list.append(site) author_info_list.append(url) #author_info_list.append(site_category) author_info_list.append(user_name) #author_info_list.append(real_name) author_info_list.append(rss) author_info_list.append(datehelper.now_datetime()) cur_page_data_list.append(tuple(author_info_list)) # print author_info_list return cur_page_data_list
def encodeValue(value): string_org = value try: value = smart_unicode(value) except (UnicodeEncodeError): value = smart_str(value) except: value = string_org return value
def xml_to_dict(raw_str): raw_str = smart_str(raw_str) raw_str = str(raw_str) msg = {} root_elem = ET.fromstring(raw_str) if root_elem.tag == 'xml': for child in root_elem: msg[child.tag] = smart_unicode(child.text) msg[child.tag] = unicode(child.text) return msg else: return None
def main(str_url): parserLinks = ParserLinks() web = urllib.urlopen(str_url) for context in web.readlines(): _str = smart_str(context).decode('utf-8') try: parserLinks.feed(_str) except HTMLParser.HTMLParseError: print traceback.format_exc() pass web.close() image_list= parserLinks.get_file_list() down_jpg_mutithread(image_list)
def feed_favorite(parser): article_list = parser.find_all('article') for my_article in article_list: state = [] all_content = '' for my_tag in my_article.h3.contents: factor = my_tag.string if factor != None: factor = factor.strip(u'\r\n') factor = factor.strip(u'\n') factor = factor.replace(u'\n','') factor = factor.strip() state.append(smart_str(factor)) content = my_article.select('.content') duration = my_article.select('.duration') if content: block_quote = content[0].select('blockquote') if block_quote: content_detail = smart_str(block_quote[0].contents[0].string) state.append(content_detail) if duration: state.append(smart_str(duration[0].string)) print ''.join(state)
def countSale(str_url): soup = parser(str_url) items = soup.select('.item') sale = 0.0 #销售总额 for item in items: amount = 0 #单件商品销售数量 pricestr = item.select('strong')[0] amount_list = item.select('em') if len(amount_list) != 0: amount = int(amount_list[0].contents[0]) #price = re.sub(r'</?\w+[^>]*>','',pricestr) price_str = smart_str(pricestr.contents[0]) #price = filter(str.isdigit, price_str) price = float(filter(lambda ch: ch in r'0123456789.', price_str)) sale += round(price * amount, 2) return sale
def countSale(str_url): soup = parser(str_url) items = soup.select('.item') sale = 0.0 #销售总额 for item in items: amount = 0 #单件商品销售数量 pricestr = item.select('strong')[0] amount_list = item.select('em') if len(amount_list) != 0: amount = int(amount_list[0].contents[0]) #price = re.sub(r'</?\w+[^>]*>','',pricestr) price_str = smart_str(pricestr.contents[0]) #price = filter(str.isdigit, price_str) price =float(filter(lambda ch: ch in r'0123456789.', price_str)) sale += round(price*amount,2) return sale
def get_ssq_list(str_url): cur_page_data_list = [] soup = parser(str_url) plist = SoupStrainer("table") content = soup.find_all(plist) content_table = content[0] trs = content_table.find_all('tr') for i,tr in enumerate(trs): ssq_info_list = [] if i >1: tds = tr.find_all('td') if (len(tds)>5): kjrq = tds[0].text #开奖日期 jh = tds[1].text #期号 qiu = tds[2] ems = qiu.select('em') red1 = ems[0].text red2 = ems[1].text red3 = ems[2].text red4 = ems[3].text red5 = ems[4].text red6 = ems[5].text blue1 = ems[6].text sales = tds[3].contents[0].string #销售额 first = tds[4].contents[0].string #一等奖个数 second = tds[5].contents[0].string #二等奖个数 distribution = smart_str(tds[4].contents[1].string).replace("(","").replace("..","").replace(")","").strip() #一等奖分布 ssq_info_list.append(kjrq) ssq_info_list.append(jh) ssq_info_list.append(red1) ssq_info_list.append(red2) ssq_info_list.append(red3) ssq_info_list.append(red4) ssq_info_list.append(red5) ssq_info_list.append(red6) ssq_info_list.append(blue1) ssq_info_list.append(sales) ssq_info_list.append(first) ssq_info_list.append(second) ssq_info_list.append(distribution) cur_page_data_list.append(tuple(ssq_info_list)) #print kjrq,jh,red1,red2,red3,red4,red5,red6,blue1,sales,first,second,distribution return cur_page_data_list
def get_goods_data_list(str_url): soup = parser(str_url) goods_name = smart_str(soup.select('.tb-detail-hd')[0].h3.contents[0].string) #print goods_name property = soup.select('.tb-property') #attributes = soup.select('.attributes') #tb-meta goods_price = property[0].select('.tb-detail-price') j_str_price = goods_price[0].select('strong')[0].text #j_promo_price = goods_price[1].select('div')[0] #print goods_price #print j_str_price #print j_promo_price sold_out = property[0].select('.tb-sold-out.tb-clearfix') evaluate = property[0].select('.tb-evaluate.tb-clearfix') print property[0].select('.tb-key.tb-key-sku') print evaluate
def get_book_list(str_url): soup = parser(str_url) plist = SoupStrainer(id="plist") content = soup.find_all(plist) items = soup.select('.item') cur_page_data_list = [] for item in items: book_info_list = [] index = smart_str(item.select('.index')[0].contents[0].string) #图书排行 p_name = item.select('.p-name')[0].contents[0] book_url = smart_str(p_name['href']) #图书链接地址 book_name = smart_str(p_name.text) #图书名称 book_info = item.select('.p-info') book_publisher_auther = book_info[0] book_p_a_len = len(list(book_publisher_auther.select('a'))) book_auther = '' #图书作者 book_trans_auther = '' #图书译者 book_publisher = '' #图书出版社 if book_p_a_len == 2: book_auther = smart_str(book_publisher_auther.select('a')[1].text) book_publisher = smart_str( book_publisher_auther.select('a')[1].text) elif book_p_a_len == 1: book_auther = '' book_publisher = smart_str( book_publisher_auther.select('a')[0].text) elif book_p_a_len == 3: book_auther = smart_str(book_publisher_auther.select('a')[0].text) book_trans_auther = smart_str( book_publisher_auther.select('a')[1].text) book_publisher = smart_str( book_publisher_auther.select('a')[2].text) book_img = smart_str(item.select('.p-img.bookimg')[0].img['src']) book_prices = book_info[1] del_price = (smart_str(book_prices.select('del')[0].text)).replace( '¥', '') #定价 jd_price = (smart_str(book_prices.select('span')[0].text)).replace( '¥', '') #京东价 #print index,book_name,book_url #print book_auther,book_trans_auther,book_publisher #print del_price,jd_price #print book_img #print '|'.join(book_info_list) book_info_list.append(index) book_info_list.append(book_name) book_info_list.append(book_img) book_info_list.append(book_url) book_info_list.append(book_auther) book_info_list.append(book_trans_auther) book_info_list.append(book_publisher) book_info_list.append(del_price) book_info_list.append(jd_price) cur_page_data_list.append(tuple(book_info_list)) return cur_page_data_list
def get_book_list(str_url): soup = parser(str_url) plist = SoupStrainer(id="plist") content = soup.find_all(plist) items = soup.select('.item') cur_page_data_list = [] for item in items: book_info_list = [] index = smart_str(item.select('.index')[0].contents[0].string) #图书排行 p_name = item.select('.p-name')[0].contents[0] book_url = smart_str(p_name['href']) #图书链接地址 book_name = smart_str(p_name.text) #图书名称 book_info = item.select('.p-info') book_publisher_auther = book_info[0] book_p_a_len = len(list(book_publisher_auther.select('a'))) book_auther = '' #图书作者 book_trans_auther = '' #图书译者 book_publisher = '' #图书出版社 if book_p_a_len == 2: book_auther = smart_str(book_publisher_auther.select('a')[1].text) book_publisher = smart_str(book_publisher_auther.select('a')[1].text) elif book_p_a_len == 1: book_auther = '' book_publisher = smart_str(book_publisher_auther.select('a')[0].text) elif book_p_a_len == 3: book_auther = smart_str(book_publisher_auther.select('a')[0].text) book_trans_auther = smart_str(book_publisher_auther.select('a')[1].text) book_publisher = smart_str(book_publisher_auther.select('a')[2].text) book_img = smart_str(item.select('.p-img.bookimg')[0].img['src']) book_prices = book_info[1] del_price = (smart_str(book_prices.select('del')[0].text)).replace('¥','') #定价 jd_price = (smart_str(book_prices.select('span')[0].text)).replace('¥','') #京东价 #print index,book_name,book_url #print book_auther,book_trans_auther,book_publisher #print del_price,jd_price #print book_img #print '|'.join(book_info_list) book_info_list.append(index) book_info_list.append(book_name) book_info_list.append(book_img) book_info_list.append(book_url) book_info_list.append(book_auther) book_info_list.append(book_trans_auther) book_info_list.append(book_publisher) book_info_list.append(del_price) book_info_list.append(jd_price) cur_page_data_list.append(tuple(book_info_list)) return cur_page_data_list