def readUrlList(filename): debug_flag = debug_readUrlList print(f"reading url list from file {filename}") file_lines = u.readFile(filename) # entries=(#Entries keyed by URL) entries = dict() url_match = '^https?://' p_url = re.compile(url_match) empty_match = '^\s*$' p_empty = re.compile(empty_match) comment_match = '^\s*#' p_comment = re.compile(comment_match) end_match = '^__END__$' p_end = re.compile(end_match) ###################################################################### ## Read all lines, adding entries to dictionary: entry_no = 1 line_no = 0 entries_started = False entry = Entry() entry.url = None entry.name = 'entry' + str(entry_no) + '_line' + str(line_no) entry.fields['name'] = 'entry' + str(entry_no) + '_line' + str(line_no) entry.debug = DEBUG_MODE entry.dinfo = DEBUG_INFO skip_until_empty_lines = False for file_line in file_lines: line_no = line_no + 1 debug("LINE" + str(line_no) + ": " + file_line) ######################################## ## Skip comment lines: if p_comment.match(file_line): continue ######################################## ## Empty lines delimit entries: if (p_empty.match(file_line) or p_end.match(file_line)): url = entry.url #print("END OF ENTRY") if skip_until_empty_lines: debug("IGNORING lines after error") continue # Ignore if empty-line before 1st entry: if (p_empty.match(file_line) and (not entries_started)): debug("IGNORING empty-lines before 1st entry") continue if p_end.match(file_line): break if (url == None): continue #print("No url defined for entry"+str(entry_no)+" ending at line "+str(line_no)) #exit(-1) if (url in entries): full_error = "Entry already defined for url <{}> in entry <{}> ending at line {}".format( url, str(entry_no), str(line_no)) u.sendmail(entry, [SEND_TO], full_error, [], category, period, "ERROR: Duplicate url", runid) # skip rest of entry lines: skip_until_empty_lines = True continue #exit(-1) if (entry.get('debug') and ((entry.get('debug').lower == "true") or (entry.get('debug').lower == "enabled"))): entry.debug = True if (entry.get('dinfo') and ((entry.get('dinfo').lower == "true") or (entry.get('dinfo').lower == "enabled"))): entry.dinfo = True debug("Adding entry#" + str(entry_no)) entries[url] = entry entry_no = entry_no + 1 entry = Entry() entry.url = None entry.debug = DEBUG_MODE entry.dinfo = DEBUG_INFO entry.name = 'entry' + str(entry_no) + '_line' + str(line_no) entry.fields['name'] = 'entry' + str(entry_no) + '_line' + str( line_no) continue skip_until_empty_lines = False ######################################## ## Detect title lines: (No spaces before line) if (file_line.find(" ") != 0): entry.fields['name'] = file_line entry.name = file_line entries_started = True continue file_line = file_line.lstrip() entries_started = True ######################################## ## Detect url lines: if (p_url.match(file_line)): entry.url = file_line continue ######################################## ## Treat other lines: elements = file_line.split(":") name = elements[0] value = ":".join(elements[1:]) entry.fields[name] = value return entries
def readUrlList(filename): debug_flag=debug_readUrlList file_lines = u.readFile(filename) # entries=(#Entries keyed by URL) entries=dict() url_match='^https?://' p_url = re.compile(url_match) empty_match='^\s*$' p_empty = re.compile(empty_match) comment_match='^\s*#' p_comment = re.compile(comment_match) end_match='^__END__$' p_end = re.compile(end_match) ###################################################################### ## Read all lines, adding entries to dictionary: entry_no=1; line_no=0; entries_started=False; entry = Entry() entry.url=None entry.name='entry'+str(entry_no)+'_line'+str(line_no) entry.fields['name']='entry'+str(entry_no)+'_line'+str(line_no) entry.debug=DEBUG_MODE entry.dinfo=DEBUG_INFO for file_line in file_lines: line_no = line_no+1 debug("LINE"+str(line_no)+": "+file_line) ######################################## ## Skip comment lines: if p_comment.match(file_line): continue ######################################## ## Empty lines delimit entries: if (p_empty.match(file_line) or p_end.match(file_line)): url = entry.url #print "END OF ENTRY" # Ignore if empty-line before 1st entry: if (p_empty.match(file_line) and (not entries_started)): debug("IGNORING empty-lines before 1st entry") continue if p_end.match(file_line): break if (url == None): continue #print "No url defined for entry"+str(entry_no)+" ending at line "+str(line_no) #exit(-1) if (url in entries): print "Entry already defined for url <"+url+"> in entry"+str(entry_no)+" ending at line "+str(line_no) exit(-1) if (entry.get('debug') and ((entry.get('debug').lower == "true") or (entry.get('debug').lower == "enabled"))): entry.debug=True if (entry.get('dinfo') and ((entry.get('dinfo').lower == "true") or (entry.get('dinfo').lower == "enabled"))): entry.dinfo=True debug("Adding entry#"+str(entry_no)) entries[url]=entry entry_no = entry_no+1 entry = Entry() entry.url=None entry.debug=DEBUG_MODE entry.dinfo=DEBUG_INFO entry.name='entry'+str(entry_no)+'_line'+str(line_no) entry.fields['name']='entry'+str(entry_no)+'_line'+str(line_no) continue ######################################## ## Detect title lines: (No spaces before line) if (file_line.find(" ") != 0): entry.fields['name']=file_line entry.name=file_line entries_started=True; continue file_line=file_line.lstrip() entries_started=True; ######################################## ## Detect url lines: if (p_url.match(file_line)): entry.url=file_line continue ######################################## ## Treat other lines: elements = file_line.split(":") name = elements[0] value = ":".join(elements[1:]) entry.fields[name]=value return entries
def readUrlList(filename): debug_flag = debug_readUrlList file_lines = u.readFile(filename) # entries=(#Entries keyed by URL) entries = dict() url_match = '^https?://' p_url = re.compile(url_match) empty_match = '^\s*$' p_empty = re.compile(empty_match) comment_match = '^\s*#' p_comment = re.compile(comment_match) end_match = '^__END__$' p_end = re.compile(end_match) ###################################################################### ## Read all lines, adding entries to dictionary: entry_no = 1 line_no = 0 entries_started = False entry = Entry() entry.url = None entry.name = 'entry' + str(entry_no) + '_line' + str(line_no) entry.fields['name'] = 'entry' + str(entry_no) + '_line' + str(line_no) entry.debug = DEBUG_MODE entry.dinfo = DEBUG_INFO for file_line in file_lines: line_no = line_no + 1 debug("LINE" + str(line_no) + ": " + file_line) ######################################## ## Skip comment lines: if p_comment.match(file_line): continue ######################################## ## Empty lines delimit entries: if (p_empty.match(file_line) or p_end.match(file_line)): url = entry.url #print "END OF ENTRY" # Ignore if empty-line before 1st entry: if (p_empty.match(file_line) and (not entries_started)): debug("IGNORING empty-lines before 1st entry") continue if p_end.match(file_line): break if (url == None): continue #print "No url defined for entry"+str(entry_no)+" ending at line "+str(line_no) #exit(-1) if (url in entries): print "Entry already defined for url <" + url + "> in entry" + str( entry_no) + " ending at line " + str(line_no) exit(-1) if (entry.get('debug') and ((entry.get('debug').lower == "true") or (entry.get('debug').lower == "enabled"))): entry.debug = True if (entry.get('dinfo') and ((entry.get('dinfo').lower == "true") or (entry.get('dinfo').lower == "enabled"))): entry.dinfo = True debug("Adding entry#" + str(entry_no)) entries[url] = entry entry_no = entry_no + 1 entry = Entry() entry.url = None entry.debug = DEBUG_MODE entry.dinfo = DEBUG_INFO entry.name = 'entry' + str(entry_no) + '_line' + str(line_no) entry.fields['name'] = 'entry' + str(entry_no) + '_line' + str( line_no) continue ######################################## ## Detect title lines: (No spaces before line) if (file_line.find(" ") != 0): entry.fields['name'] = file_line entry.name = file_line entries_started = True continue file_line = file_line.lstrip() entries_started = True ######################################## ## Detect url lines: if (p_url.match(file_line)): entry.url = file_line continue ######################################## ## Treat other lines: elements = file_line.split(":") name = elements[0] value = ":".join(elements[1:]) entry.fields[name] = value return entries