def get_error_from_django(self, document): ### init error_title = error_description = '' ### xpath terms xpath_title = './/div[@id="summary"]/h1/text()' xpath_description = './/div[@id="summary"]/pre/text()' ### get the title and description BSXdocument = BSXPathEvaluator(document) title = BSXdocument.getItemList(xpath_title) if len(title) > 0: error_title = '%s' % title[0] description = BSXdocument.getItemList(xpath_description) if len(description) > 0: error_description = '%s' % description[0] ### cleanup del title del description del BSXdocument ### return error title and description return (error_title, error_description)
def getinfo(url,html): document = BSXPathEvaluator(html) setting={} setting['next_xpath']=u"//a[contains(text(),'下章') or contains(text(),'下一章') or contains(text(),'下一页') or contains(text(),'下页') or contains(text(),'下一节')]" setting['title_xpath']="//title" title= ''+document.getFirstItem(setting['title_xpath']).string next_link = document.getItemList(setting['next_xpath']) if len(next_link)==0: return title,None pass next_url=urlparse.urljoin(url,next_link[0]['href'])#修正为绝对URL #site=root=urlparse.urlparse(url).netloc return title,next_url
def get_error_apache(self, document): ### init apache_error = '' ### xpath terms xpath_apache_error = './/title/text()' ### get the title and description BSXdocument = BSXPathEvaluator(document) err = BSXdocument.getItemList(xpath_apache_error) if len(err) > 0: apache_error = '%s' % err[0] ### cleanup del err del BSXdocument ### return error title and description return apache_error
def parse_document(document): BSXdocument = BSXPathEvaluator(document) XPath_table = './/*[@id="main"]/p[2]/table' XPath_table_body = '%s/tbody' % (XPath_table) XPath_table_header = '%s/tr[1]' % (XPath_table_body) XPath_table_lines = '%s/tr' % (XPath_table_body) rows = BSXdocument.getItemList(XPath_table_lines)[1:] for row_counter in xrange(len(rows)): row = rows[row_counter] # print row # print "======" rowDoc = BSXPathEvaluator('%s'%row) XPath_table_row = '/' XPath_table_row_cell_category = '%s/td[%d]/text()' % (XPath_table_row, 1) cell_category = rowDoc.getFirstItem(XPath_table_row_cell_category) XPath_table_row_cell_type = '%s/td[%d]/text()' % (XPath_table_row, 2) cell_type = rowDoc.getFirstItem(XPath_table_row_cell_type) XPath_table_row_cell_time = '%s/td[%d]/text()' % (XPath_table_row, 3) cell_time = rowDoc.getFirstItem(XPath_table_row_cell_time) XPath_table_row_cell_level = '%s/td[%d]/text()' % (XPath_table_row, 4) cell_level = rowDoc.getFirstItem(XPath_table_row_cell_level) XPath_table_row_cell_message = '%s/td[%d]/text()' % (XPath_table_row, 5) cell_message = rowDoc.getFirstItem(XPath_table_row_cell_message) print "======", row_counter, "======" print "Category:",cell_category print "Type:",cell_type print "Time:",cell_time print "Level:",cell_level print "Message:",cell_message return rows
def parse_catalog(catalog_url, parser): fetch_result = urlfetch.fetch(catalog_url, allow_truncated=True) html = fetch_result.content.decode(parser.site_coding, "ignore") document = BSXPathEvaluator(html, convertEntities=BeautifulSoup.HTML_ENTITIES) # 转换实体字符 parse_result = {} vol_list = document.getItemList(parser.vol_and_chapter_xpath) chapter_url_list = [] chapter_title_list = [] if parser.url_remove_prefix_re: # 加速,下面要重复使用 url_remove_prefix_re = re.compile(parser.url_remove_prefix_re) for i in vol_list: if i.name != "a": # 判断是否解析到了VIP卷 if not parser.vol_vip_string or unicode(i).find(parser.vol_vip_string) == -1: chapter_url_list.append("") # 数据库的列表不能保存None chapter_title_list.append(get_all_contents(i)) else: chapter_url_list.append("") # 数据库的列表不能保存None chapter_title_list.append(parser.vol_vip_string) break else: url = i["href"] if parser.url_remove_prefix_re: url = url_remove_prefix_re.sub("", url) chapter_url_list.append(url) chapter_title_list.append(get_all_contents(i)) put_into_dict(parse_result, "chapter_url_list", chapter_url_list) put_into_dict(parse_result, "chapter_title_list", chapter_title_list) return parse_result
def parse_document(document): BSXdocument = BSXPathEvaluator(document) XPath_table = './/*[@id="main"]/p[2]/table' XPath_table_body = '%s/tbody' % (XPath_table) XPath_table_header = '%s/tr[1]' % (XPath_table_body) XPath_table_lines = '%s/tr' % (XPath_table_body) rows = BSXdocument.getItemList(XPath_table_lines)[1:] records = [] for row_counter in xrange(len(rows)): record = () SHIFT=0 row = rows[row_counter] XPath_table_row = '%s/tr[%d]' % (XPath_table_body, row_counter+1) XPath_table_row_cell_category = '%s/td[%d]/text()' % (XPath_table_row, 1) cell_category = BSXdocument.getItemList(XPath_table_row_cell_category) if len(cell_category)>0: cell_category = cell_category[0] XPath_table_row_cell_type = '%s/td[%d]/text()' % (XPath_table_row, 2) cell_type = BSXdocument.getItemList(XPath_table_row_cell_type) if len(cell_type)>0: cell_type = cell_type[0] XPath_table_row_cell_time = '%s/td[%d]/text()' % (XPath_table_row, 3) cell_time = BSXdocument.getItemList(XPath_table_row_cell_time) if len(cell_time)>0: cell_time = cell_time[0] XPath_table_row_cell_level = '%s/td[%d]/text()' % (XPath_table_row, 4) cell_level = BSXdocument.getItemList(XPath_table_row_cell_level) if len(cell_level)>0: cell_level = cell_level[0] XPath_table_row_cell_message = '%s/td[%d]/text()' % (XPath_table_row, 5) cell_message = BSXdocument.getItemList(XPath_table_row_cell_message) if len(cell_message)>0: cell_message = cell_message[0] message_category="" message_date="" message_time="" message_dataset="" message_site="no.site" message_reason="no.reason" message_weight="no.weight" message_weight_val=0 message_weight_0=0 message_weight_1=0 message_weight_2=0 message_weight_3=0 message_weight_4=0 message_weight_5=0 message_treshold="no.treshold" message_treshold_current=0 message_treshold_expected=0 ###print;print;print ###print u'DEBUG: ln113: cell_message=', cell_message ## SKIPPED if is_this_category(cell_message, ' - action=SKIPPED '): tmp_message=re.sub(r'\s+', ';', str( cell_message.replace(' ', ' ') ) ).split(';') #print u'test', 'row:', row_counter, u'|tmp_message|=', len(tmp_message), tmp_message ###print u'DEBUG ln123: tmp_message=', tmp_message message_category="SKIPPED" message_date=tmp_message[0] message_time=tmp_message[1] message_dataset=tmp_message[5].split('=')[1] message_reason=tmp_message[4].split('=')[1] ### SKIPPED_REASONS=['TOO_MANY_T2_REPLICAS', 'TOO_MANY_T2_SUBSCRIPTIONS'] if message_reason=="TOO_MANY_T2_REPLICAS": try: #message_treshold_current=re.sub(r"[)(>]", '', re.sub(">", '', str(tmp_message[13]) ) ).split('=')[0] #message_treshold_expected=re.sub(r"[)]", '', str(tmp_message[14]) ).split('=')[1] message_treshold_current=re.sub(r"[)(>]", '', str(tmp_message[13]) ).split('=')[0] message_treshold_expected=re.sub(r"[)(>]", '', str(tmp_message[13]) ).split('=')[1] except: message_treshold_current=-1 message_treshold_expected=-1 #print u'test', 'row:', row_counter, u'|tmp_message|=', len(tmp_message), tmp_message elif message_reason=="TOO_MANY_T2_SUBSCRIPTIONS": try: #message_treshold_current=re.sub(r"[)(>]", '', re.sub(">", '', str(tmp_message[11]) ) ).split('=')[0] #message_treshold_expected=re.sub(r"[)]", '', str(tmp_message[12]) ).split('=')[1] message_treshold_current=re.sub(r"[)(>]", '', str(tmp_message[12]) ).split('=')[0] message_treshold_expected=re.sub(r"[)]", '', str(tmp_message[12]) ).split('=')[1] except: message_treshold_current=-1 message_treshold_expected=-1 #print u'test', 'row:', row_counter, u'|tmp_message|=', len(tmp_message), tmp_message #message_treshold=tmp_message[13] #print u'row:', row_counter, message_treshold, re.sub(r"[)(]", '', str(message_treshold) ) #message_treshold_current=re.sub(r"[)(>]", '', re.sub(">", '', str(tmp_message[13]) ) ).split('=')[0] #message_treshold_expected=re.sub(r"[)]", '', str(tmp_message[13]) ).split('=')[1] #message_weight=0 #message_weight_val=0.0 #message_weight_0=0 #message_weight_1=0 #message_weight_2=0 #message_weight_3=0 #message_weight_4=0 #message_weight_5=0 #print u'test', message_date, message_time, message_dataset, message_reason, message_treshold_current, message_treshold_expected #print u'test::', tmp_message ## triggered if is_this_category(cell_message, ' - triggered '): tmp_message=re.sub(r'\s+', ';', str( cell_message.replace(' ', ' ') ) ).split(';') ###print u'DEBUG ln166: tmp_message=', tmp_message message_category="triggered" message_date=tmp_message[0] message_time=tmp_message[1] message_dataset=tmp_message[6] #message_weight=0 #message_weight_val=0.0 #message_weight_0=0 #message_weight_1=0 #message_weight_2=0 #message_weight_3=0 #message_weight_4=0 #message_weight_5=0 #message_treshold="" #message_treshold_current=0 #message_treshold_expected=0 ###print u'test', message_date, message_time, message_dataset ## UNSELECTEDT2 if is_this_category(cell_message, ' - action=UNSELECTEDT2 '): tmp_message=re.sub(r'\s+', ';', str( cell_message.replace(' ', ' ') ) ).split(';') ###print u'DEBUG ln184: tmp_message=', tmp_message message_category="UNSELECTED" message_date=tmp_message[0] message_time=tmp_message[1] try: message_dataset=tmp_message[6].split('=')[1] except IndexError: dataset_field = "" for tmp_item in tmp_message: if re.search('^dataset=', tmp_item): message_dataset=tmp_item.split('=')[1] break message_site=tmp_message[4].split('=')[1] message_weight=tmp_message[5].split('=')[1] if message_weight == WEIGHT_NA_STRING: message_weight=message_weight_0=message_weight_1=message_weight_2=message_weight_3=message_weight_4=message_weight_5=WEIGHT_NA_VALUE message_weight_val=WEIGHT_NA_VALUE else: #message_weight_val=eval(float(message_weight)*1.0) message_weight_params=re.sub(r"[()]", '', re.sub(r"[+/*]", ';', str(message_weight) ) ).split(';') message_weight_0=message_weight_params[0] message_weight_1=message_weight_params[1] message_weight_2=message_weight_params[2] message_weight_3=message_weight_params[3] message_weight_4=message_weight_params[4] message_weight_5=message_weight_params[5] try: message_weight_val=(float(message_weight_0)+float(message_weight_1)/float(message_weight_2))*float(message_weight_3)/float(message_weight_4)/float(message_weight_5) except: message_weight_val=-1 #print u'test', 'row:', row_counter, u'|tmp_message|=', len(tmp_message), tmp_message #message_treshold="" #message_treshold_current=0 #message_treshold_expected=0 ###print u'test', message_date, message_time, message_dataset, message_site, message_weight, message_weight_0, message_weight_1, message_weight_2, message_weight_3, message_weight_4, message_weight_5 ## SELECTEDT1 if is_this_category(cell_message, ' - action=SELECTEDT1 '): tmp_message=re.sub(r'\s+', ';', str( cell_message.replace(' ', ' ') ) ).split(';') ###print u'DEBUG ln213: tmp_message=', tmp_message message_category="SELECTEDT1" message_date=tmp_message[0] message_time=tmp_message[1] #message_dataset=tmp_message[6].split('=')[1] try: message_dataset=tmp_message[6].split('=')[1] except IndexError: dataset_field = "" for tmp_item in tmp_message: if re.search('^dataset=', tmp_item): message_dataset=tmp_item.split('=')[1] break message_site=tmp_message[4].split('=')[1] #message_weight=tmp_message[5].split('=')[1] #message_weight_params=re.sub(r"[()]", '', re.sub(r"[+/*]", ';', str(message_weight) ) ).split(';') #print u'DEBUG ln246: message_weight_params=', message_weight_params #message_weight_0=message_weight_params[0] #message_weight_1=message_weight_params[1] #message_weight_2=message_weight_params[2] #message_weight_3=message_weight_params[3] #message_weight_4=message_weight_params[4] #message_weight_5=message_weight_params[5] #try: # message_weight_val=(float(message_weight_0)+float(message_weight_1)/float(message_weight_2))*float(message_weight_3)/float(message_weight_4)/float(message_weight_5) #except: # message_weight_val=-1 message_weight=message_weight_0=message_weight_1=message_weight_2=message_weight_3=message_weight_4=message_weight_5=message_weight_val=WEIGHT_T1_VALUE ## SELECTEDT2 if is_this_category(cell_message, ' - action=SELECTEDT2 '): tmp_message=re.sub(r'\s+', ';', str( cell_message.replace(' ', ' ') ) ).split(';') ###print u'DEBUG ln213: tmp_message=', tmp_message message_category="SELECTEDT2" message_date=tmp_message[0] message_time=tmp_message[1] #message_dataset=tmp_message[6].split('=')[1] try: message_dataset=tmp_message[6].split('=')[1] except IndexError: dataset_field = "" for tmp_item in tmp_message: if re.search('^dataset=', tmp_item): message_dataset=tmp_item.split('=')[1] break message_site=tmp_message[4].split('=')[1] message_weight=tmp_message[5].split('=')[1] message_weight_params=re.sub(r"[()]", '', re.sub(r"[+/*]", ';', str(message_weight) ) ).split(';') message_weight_0=message_weight_params[0] message_weight_1=message_weight_params[1] message_weight_2=message_weight_params[2] message_weight_3=message_weight_params[3] message_weight_4=message_weight_params[4] message_weight_5=message_weight_params[5] try: message_weight_val=(float(message_weight_0)+float(message_weight_1)/float(message_weight_2))*float(message_weight_3)/float(message_weight_4)/float(message_weight_5) except: message_weight_val=-1 ## SELECTEDT2_T1MOU if is_this_category(cell_message, ' - action=SELECTEDT2_T1MOU '): tmp_message=re.sub(r'\s+', ';', str( cell_message.replace(' ', ' ') ) ).split(';') ###print u'DEBUG ln213: tmp_message=', tmp_message message_category="SELECTEDT2_T1MOU" message_date=tmp_message[0] message_time=tmp_message[1] #message_dataset=tmp_message[6].split('=')[1] try: message_dataset=tmp_message[6].split('=')[1] except IndexError: dataset_field = "" for tmp_item in tmp_message: if re.search('^dataset=', tmp_item): message_dataset=tmp_item.split('=')[1] break message_site=tmp_message[4].split('=')[1] #message_weight=tmp_message[5].split('=')[1] #message_weight_params=re.sub(r"[()]", '', re.sub(r"[+/*]", ';', str(message_weight) ) ).split(';') #message_weight_0=message_weight_params[0] #message_weight_1=message_weight_params[1] #message_weight_2=message_weight_params[2] #message_weight_3=message_weight_params[3] #message_weight_4=message_weight_params[4] #message_weight_5=message_weight_params[5] #try: # message_weight_val=(float(message_weight_0)+float(message_weight_1)/float(message_weight_2))*float(message_weight_3)/float(message_weight_4)/float(message_weight_5) #except: # message_weight_val=-1 ###message_weight_params=re.sub(r"[()]", '', re.sub(r"[+/*]", ';', str(message_weight) ) ).split(';') message_weight=message_weight_0=message_weight_1=message_weight_2=message_weight_3=message_weight_4=message_weight_5=message_weight_val=WEIGHT_T2_T1MOU_VALUE ## SELECTEDT2_T2MOU if is_this_category(cell_message, ' - action=SELECTEDT2_T2MOU '): tmp_message=re.sub(r'\s+', ';', str( cell_message.replace(' ', ' ') ) ).split(';') ###print u'DEBUG ln213: tmp_message=', tmp_message message_category="SELECTEDT2_T2MOU" message_date=tmp_message[0] message_time=tmp_message[1] #message_dataset=tmp_message[6].split('=')[1] try: message_dataset=tmp_message[6].split('=')[1] except IndexError: dataset_field = "" for tmp_item in tmp_message: if re.search('^dataset=', tmp_item): message_dataset=tmp_item.split('=')[1] break message_site=tmp_message[4].split('=')[1] #message_weight=tmp_message[5].split('=')[1] #message_weight_params=re.sub(r"[()]", '', re.sub(r"[+/*]", ';', str(message_weight) ) ).split(';') #message_weight_0=message_weight_params[0] #message_weight_1=message_weight_params[1] #message_weight_2=message_weight_params[2] #message_weight_3=message_weight_params[3] #message_weight_4=message_weight_params[4] #message_weight_5=message_weight_params[5] #try: # message_weight_val=(float(message_weight_0)+float(message_weight_1)/float(message_weight_2))*float(message_weight_3)/float(message_weight_4)/float(message_weight_5) #except: # message_weight_val=-1 message_weight=message_weight_0=message_weight_1=message_weight_2=message_weight_3=message_weight_4=message_weight_5=message_weight_val=WEIGHT_T2_T2MOU_VALUE #print u'DEBUG:: message:', cell_message #print u'=============' record = (message_date, message_time, message_category, message_dataset, message_site, \ message_reason, message_weight, \ message_weight_val, message_weight_0, message_weight_1, \ message_weight_2, message_weight_3, \ message_weight_4, message_weight_5, \ message_treshold_current, message_treshold_expected ) records.append(record) return records
def parse_document(document): BSXdocument = BSXPathEvaluator(document) XPath_table = './/*[@id="main"]/p[2]/table' XPath_table_body = '%s/tbody' % (XPath_table) XPath_table_header = '%s/tr[1]' % (XPath_table_body) XPath_table_lines = '%s/tr' % (XPath_table_body) rows = BSXdocument.getItemList(XPath_table_lines)[1:] # get cloud name fjson = open('panda_queues.json','r') data = fjson.read() dic = json.loads(data) fjson.close() records = [] ex_record = [] exist_records = [] in_buf_records = [] maxId = db.get_max_id() last_time = db.get_last_updated_time() if last_time is None: db.first_last_updated_time() last_time = db.get_last_updated_time() this_time = None skip_time = None set_last = None this_year = datetime.date.today().year if maxId is None: maxId = 0 processed_rows = 0 for row_counter in xrange(len(rows)): record = () ex_rec = () SHIFT=0 row = rows[row_counter] rowDoc = BSXPathEvaluator('%s'%row) #XPath_table_row = '%s/tr[%d]' % (XPath_table_body, row_counter+1) XPath_table_row = '/' """ XPath_table_row_cell_category = '%s/td[%d]/text()' % (XPath_table_row, 1) cell_category = BSXdocument.getItemList(XPath_table_row_cell_category) if len(cell_category)>0: cell_category = cell_category[0] XPath_table_row_cell_type = '%s/td[%d]/text()' % (XPath_table_row, 2) cell_type = BSXdocument.getItemList(XPath_table_row_cell_type) if len(cell_type)>0: cell_type = cell_type[0] """ XPath_table_row_cell_time = '%s/td[%d]/text()' % (XPath_table_row, 3) cell_time = rowDoc.getFirstItem(XPath_table_row_cell_time) #if len(cell_time)>0: #cell_time = cell_time[0] """ XPath_table_row_cell_level = '%s/td[%d]/text()' % (XPath_table_row, 4) cell_level = BSXdocument.getItemList(XPath_table_row_cell_level) if len(cell_level)>0: cell_level = cell_level[0] """ XPath_table_row_cell_message = '%s/td[%d]/text()' % (XPath_table_row, 5) cell_message = rowDoc.getFirstItem(XPath_table_row_cell_message) #if len(cell_message)>0: #cell_message = cell_message[0] message_category="no.category" message_date = "" message_time = "" message_dn = "" message_jobset ="no.jobset" message_jobdef = "no.jobdef" message_action = "" message_site="no.site" message_reason="no.reason" message_weight="no.weight" message_datetime = str(cell_time).split(' ') message_date = message_datetime[0].strip() message_time = message_datetime[1].strip() # Skip the leading uncompleted minute log_year = get_log_year(this_year, message_date, message_time) this_time = "%s-%s %s"%(log_year, message_date, message_time) if skip_time is None or skip_time == this_time: skip_time = this_time continue # set the last updated time when skip done.( Records in time DESC ) if set_last is None: # save it when every thing done set_last = this_time # Break when reach the last_time if (last_time is not None) and (this_time <= last_time): break # print 'Debug:',message_date,message_time,row_counter,cell_message processed_rows += 1 tmp_message = str(cell_message.replace(' ', ' ')).split(' : ') message_dn = tmp_message[0].split('=')[1].replace("\\\'","").strip().replace(' ','_') tmp_job = tmp_message[1].split(' ') if len(tmp_job) > 1: message_jobset = tmp_job[0].split('=')[1].strip() message_jobdef = tmp_job[1].split('=')[1].strip() else: if is_this_category(tmp_job[0],'jobset'): message_jobset = tmp_job[0].split('=')[1].strip() if is_this_category(tmp_job[0],'jobdef'): message_jobdef = tmp_job[0].split('=')[1].strip() ###print;print;print #print u'DEBUG: date time=', message_date, message_time #print u'DEBUG: dn=', message_dn #print u'DEBUG: jobset=', message_jobset #print u'DEBUG: jobdef=', message_jobdef #print u'DEBUG: ln113: tmp_message[1]=', tmp_message[1] #print u'DEBUG: ln113: tmp_message[2]=', tmp_message[2] ## skip if is_this_category(cell_message, ' action=skip '): # continue # try to speed up message_category = "D" message_skip = tmp_message[2].split(' ') message_action = message_skip[0].split('=')[1].strip() message_site = message_skip[1].split('=')[1].strip() message_reason = message_skip[2].split('=')[1].strip() if re.search('=',message_skip[4]): message_weight = message_skip[4].split('=')[1].strip() else: message_reason = '_'.join(message_skip[3:]).strip('_') # exclude : add at 2011-10-26 elif is_this_category(cell_message, ' action=exclude '): message_category = "E" message_skip = tmp_message[2].split(' ') message_action = message_skip[0].split('=')[1].strip() message_site = message_skip[1].split('=')[1].strip() message_reason = message_skip[2].split('=')[1].strip() if re.search('=',message_skip[4]): message_weight = message_skip[4].split('=')[1].strip() else: message_reason = '_'.join(message_skip[3:]).strip('_') site_name,cloud = get_sitecloud_name(dic,message_site) if is_excluded(ex_record,message_dn,message_jobset,site_name): message_category = "D" # skip if excluded by other jobdef of same jobset else: ex_rec = (message_dn, message_jobset, site_name) ex_record.insert(0, ex_rec) ## choose elif is_this_category(cell_message, ' action=choose '): message_category = "C" message_choose = tmp_message[2].split(' ') message_action = message_choose[0].split('=')[1].strip() message_site = message_choose[1].split('=')[1].strip() message_reason = message_choose[2].split('=')[1].strip() if re.search('=',message_choose[5]): message_weight = message_choose[5].split('=')[1].strip() else: message_reason = '_'.join(message_choose[3:]).strip('_') ## action=use: add at 2011-10-26 elif is_this_category(cell_message, ' action=use '): #message_category = "C" message_choose = tmp_message[2].split(' ') message_action = message_choose[0].split('=')[1].strip() message_site = message_choose[1].split('=')[1].strip() # message_reason = message_choose[2].split('=')[1].strip() message_reason = '_'.join(message_choose[3:]).strip('_') if is_this_category(message_reason, 'site'): message_category = "A" if is_this_category(message_reason, 'cloud'): message_category = "B" ## use site or cloud elif is_this_category(cell_message, ' use '): message_use = tmp_message[2].split(' ') message_action = message_use[0].strip() message_site = message_use[1].strip() message_reason = '_'.join(message_use[3:]).strip('_') if is_this_category(message_reason, 'site'): message_category = "A" if is_this_category(message_reason, 'cloud'): message_category = "B" ## other actions elif is_this_category(cell_message, ' action='): message_buf = tmp_message[2].split(' ') message_action = message_buf[0].split('=')[1].strip() print "WARNING: action=%s is not processed!"%message_action ## append to records it belong to if message_category in ['A','B','C','E']: logDate = str("%s-%s"%(log_year, message_date)) rec_idx = None site_name,cloud = get_sitecloud_name(dic,message_site) dailyLogId = db.is_exist_item(logDate, message_category, site_name, message_dn) if dailyLogId is None: rec_idx = is_in_buf(records, logDate, message_category, site_name, message_dn) if dailyLogId is not None: exist_records.append([dailyLogId]) elif rec_idx is not None: record = (logDate, message_category, site_name, message_dn) in_buf_records.append(record) else: maxId += 1 count = 1 record = (maxId, logDate, message_category, site_name, \ cloud, message_dn, count) records.append(record) if DEBUG==1: print "=========" print "DEBUG:",message_category,": ",row print "=========" db.set_last_updated_time(set_last) # set when all done. if (this_time is not None) and not (this_time <= last_time): print "Error: === NOT Reach the last updated time (%s -> %s) ==="%(this_time,last_time) return (processed_rows,records, exist_records, in_buf_records)
def test(): global document,options,DEFAULT_TESTDIR,url_data def nodesStr(nodes): def tagstr(node): try: strs=['<'+node.name] i=node.get('id') c=node.get('class') if i: strs.append('id='+i) if c: strs.append('class='+c) return escapeStr(' '.join(strs)+'>') except: return escapeStr(unicode(node)) if isinstance(nodes,list): return ' '.join([tagstr(node) for node in nodes]) elif getattr(nodes,'nodeType',None) or isinstance(nodes,basestring): return escapeStr(unicode(nodes)) else: return nodes if options.web: fp=urllib2.urlopen(url_data) dirdoc=BSXPathEvaluator(fp.read()) files=map(lambda node:node.get('href'),dirdoc.getItemList('//li/a[@href!="../"]')) else: if options.path: testdir=options.path else: testdir=DEFAULT_TESTDIR files=os.listdir(testdir) tnames=','.join(options.names).split(',') if options.names else None tnumbers=','.join(options.numbers).split(',') if options.numbers else None for name in files: if tnames: fname=re.sub(r'\..*$','',name) if not fname in tnames: continue target=url_data+'/'+name if options.web else os.path.join(testdir,name) data=parseTestData(target,options.web) print '[%s]\n%s\n' % (name,data.comment) document=BSXPathEvaluator(data.html) context=document.evaluate(data.contextExpr,document,None,XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,None).snapshotItem(0) tests=data.tests cnt=0 for test in tests: cnt=cnt+1 if tnumbers: if not str(cnt) in tnumbers: continue print u'No.%d' % cnt expr=test.expr print u'expr : %s' % (expr) (nodes,time,resultType)=document.applyXPath(context,expr) print u'time : %d.%06d sec' % (time.seconds,time.microseconds) print u'result: %s' % nodesStr(nodes) print u'expect: %s' % (test.data) judge=testNodes(nodes,test.data) print u'judge : %s (%s)' % (judge.status,judge.detail) print u'' print u''
def test(): global document, options, DEFAULT_TESTDIR, url_data def nodesStr(nodes): def tagstr(node): try: strs = ['<' + node.name] i = node.get('id') c = node.get('class') if i: strs.append('id=' + i) if c: strs.append('class=' + c) return escapeStr(' '.join(strs) + '>') except: return escapeStr(unicode(node)) if isinstance(nodes, list): return ' '.join([tagstr(node) for node in nodes]) elif getattr(nodes, 'nodeType', None) or isinstance(nodes, basestring): return escapeStr(unicode(nodes)) else: return nodes if options.web: fp = urllib2.urlopen(url_data) dirdoc = BSXPathEvaluator(fp.read()) files = map(lambda node: node.get('href'), dirdoc.getItemList('//li/a[@href!="../"]')) else: if options.path: testdir = options.path else: testdir = DEFAULT_TESTDIR files = os.listdir(testdir) tnames = ','.join(options.names).split(',') if options.names else None tnumbers = ','.join( options.numbers).split(',') if options.numbers else None for name in files: if tnames: fname = re.sub(r'\..*$', '', name) if not fname in tnames: continue target = url_data + '/' + name if options.web else os.path.join( testdir, name) data = parseTestData(target, options.web) print '[%s]\n%s\n' % (name, data.comment) document = BSXPathEvaluator(data.html) context = document.evaluate(data.contextExpr, document, None, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, None).snapshotItem(0) tests = data.tests cnt = 0 for test in tests: cnt = cnt + 1 if tnumbers: if not str(cnt) in tnumbers: continue print u'No.%d' % cnt expr = test.expr print u'expr : %s' % (expr) (nodes, time, resultType) = document.applyXPath(context, expr) print u'time : %d.%06d sec' % (time.seconds, time.microseconds) print u'result: %s' % nodesStr(nodes) print u'expect: %s' % (test.data) judge = testNodes(nodes, test.data) print u'judge : %s (%s)' % (judge.status, judge.detail) print u'' print u''