示例#1
0
def readUrlList(filename):
    debug_flag = debug_readUrlList
    print(f"reading url list from file {filename}")

    file_lines = u.readFile(filename)

    # entries=(#Entries keyed by URL)
    entries = dict()

    url_match = '^https?://'
    p_url = re.compile(url_match)

    empty_match = '^\s*$'
    p_empty = re.compile(empty_match)

    comment_match = '^\s*#'
    p_comment = re.compile(comment_match)

    end_match = '^__END__$'
    p_end = re.compile(end_match)

    ######################################################################
    ## Read all lines, adding entries to dictionary:

    entry_no = 1
    line_no = 0
    entries_started = False

    entry = Entry()
    entry.url = None
    entry.name = 'entry' + str(entry_no) + '_line' + str(line_no)
    entry.fields['name'] = 'entry' + str(entry_no) + '_line' + str(line_no)
    entry.debug = DEBUG_MODE
    entry.dinfo = DEBUG_INFO

    skip_until_empty_lines = False
    for file_line in file_lines:
        line_no = line_no + 1
        debug("LINE" + str(line_no) + ": " + file_line)

        ########################################
        ## Skip comment lines:
        if p_comment.match(file_line):
            continue

        ########################################
        ## Empty lines delimit entries:
        if (p_empty.match(file_line) or p_end.match(file_line)):
            url = entry.url
            #print("END OF ENTRY")

            if skip_until_empty_lines:
                debug("IGNORING lines after error")
                continue

            # Ignore if empty-line before 1st entry:
            if (p_empty.match(file_line) and (not entries_started)):
                debug("IGNORING empty-lines before 1st entry")
                continue

            if p_end.match(file_line):
                break

            if (url == None):
                continue
                #print("No url defined for entry"+str(entry_no)+" ending at line "+str(line_no))
                #exit(-1)

            if (url in entries):
                full_error = "Entry already defined for url <{}> in entry <{}> ending at line {}".format(
                    url, str(entry_no), str(line_no))
                u.sendmail(entry, [SEND_TO], full_error, [], category, period,
                           "ERROR: Duplicate url", runid)

                # skip rest of entry lines:
                skip_until_empty_lines = True
                continue
                #exit(-1)

            if (entry.get('debug')
                    and ((entry.get('debug').lower == "true") or
                         (entry.get('debug').lower == "enabled"))):
                entry.debug = True

            if (entry.get('dinfo')
                    and ((entry.get('dinfo').lower == "true") or
                         (entry.get('dinfo').lower == "enabled"))):
                entry.dinfo = True

            debug("Adding entry#" + str(entry_no))
            entries[url] = entry
            entry_no = entry_no + 1

            entry = Entry()
            entry.url = None
            entry.debug = DEBUG_MODE
            entry.dinfo = DEBUG_INFO
            entry.name = 'entry' + str(entry_no) + '_line' + str(line_no)
            entry.fields['name'] = 'entry' + str(entry_no) + '_line' + str(
                line_no)
            continue

        skip_until_empty_lines = False

        ########################################
        ## Detect title lines: (No spaces before line)
        if (file_line.find(" ") != 0):
            entry.fields['name'] = file_line
            entry.name = file_line
            entries_started = True
            continue

        file_line = file_line.lstrip()
        entries_started = True

        ########################################
        ## Detect url lines:
        if (p_url.match(file_line)):
            entry.url = file_line
            continue

        ########################################
        ## Treat other lines:
        elements = file_line.split(":")
        name = elements[0]
        value = ":".join(elements[1:])
        entry.fields[name] = value

    return entries
示例#2
0
def readUrlList(filename):
    debug_flag=debug_readUrlList

    file_lines = u.readFile(filename)

    # entries=(#Entries keyed by URL)
    entries=dict()

    url_match='^https?://'
    p_url = re.compile(url_match)

    empty_match='^\s*$'
    p_empty = re.compile(empty_match)

    comment_match='^\s*#'
    p_comment = re.compile(comment_match)

    end_match='^__END__$'
    p_end = re.compile(end_match)

    ######################################################################
    ## Read all lines, adding entries to dictionary:

    entry_no=1;
    line_no=0;
    entries_started=False;

    entry = Entry()
    entry.url=None
    entry.name='entry'+str(entry_no)+'_line'+str(line_no)
    entry.fields['name']='entry'+str(entry_no)+'_line'+str(line_no)
    entry.debug=DEBUG_MODE
    entry.dinfo=DEBUG_INFO

    for file_line in file_lines:
        line_no = line_no+1
        debug("LINE"+str(line_no)+": "+file_line)

        ########################################
        ## Skip comment lines:
        if p_comment.match(file_line):
            continue

        ########################################
        ## Empty lines delimit entries:
        if (p_empty.match(file_line) or p_end.match(file_line)):
            url = entry.url
            #print "END OF ENTRY"

            # Ignore if empty-line before 1st entry:
            if (p_empty.match(file_line) and (not entries_started)):
                debug("IGNORING empty-lines before 1st entry")
                continue

            if p_end.match(file_line):
                break

            if (url == None):
                continue
                #print "No url defined for entry"+str(entry_no)+" ending at line "+str(line_no)
                #exit(-1)

            if (url in entries):
                print "Entry already defined for url <"+url+"> in entry"+str(entry_no)+" ending at line "+str(line_no)
                exit(-1)

            if (entry.get('debug') and ((entry.get('debug').lower == "true") or (entry.get('debug').lower == "enabled"))):
                entry.debug=True

            if (entry.get('dinfo') and ((entry.get('dinfo').lower == "true") or (entry.get('dinfo').lower == "enabled"))):
                entry.dinfo=True

            debug("Adding entry#"+str(entry_no))
            entries[url]=entry
            entry_no = entry_no+1

            entry = Entry()
            entry.url=None
            entry.debug=DEBUG_MODE
            entry.dinfo=DEBUG_INFO
            entry.name='entry'+str(entry_no)+'_line'+str(line_no)
            entry.fields['name']='entry'+str(entry_no)+'_line'+str(line_no)
            continue

        ########################################
        ## Detect title lines: (No spaces before line)
        if (file_line.find(" ") != 0): 
            entry.fields['name']=file_line
            entry.name=file_line
            entries_started=True;
            continue

        file_line=file_line.lstrip()
        entries_started=True;

        ########################################
        ## Detect url lines:
        if (p_url.match(file_line)):
            entry.url=file_line
            continue

        ########################################
        ## Treat other lines:
        elements = file_line.split(":")
        name = elements[0]
        value = ":".join(elements[1:])
        entry.fields[name]=value

    return entries
示例#3
0
def readUrlList(filename):
    debug_flag = debug_readUrlList

    file_lines = u.readFile(filename)

    # entries=(#Entries keyed by URL)
    entries = dict()

    url_match = '^https?://'
    p_url = re.compile(url_match)

    empty_match = '^\s*$'
    p_empty = re.compile(empty_match)

    comment_match = '^\s*#'
    p_comment = re.compile(comment_match)

    end_match = '^__END__$'
    p_end = re.compile(end_match)

    ######################################################################
    ## Read all lines, adding entries to dictionary:

    entry_no = 1
    line_no = 0
    entries_started = False

    entry = Entry()
    entry.url = None
    entry.name = 'entry' + str(entry_no) + '_line' + str(line_no)
    entry.fields['name'] = 'entry' + str(entry_no) + '_line' + str(line_no)
    entry.debug = DEBUG_MODE
    entry.dinfo = DEBUG_INFO

    for file_line in file_lines:
        line_no = line_no + 1
        debug("LINE" + str(line_no) + ": " + file_line)

        ########################################
        ## Skip comment lines:
        if p_comment.match(file_line):
            continue

        ########################################
        ## Empty lines delimit entries:
        if (p_empty.match(file_line) or p_end.match(file_line)):
            url = entry.url
            #print "END OF ENTRY"

            # Ignore if empty-line before 1st entry:
            if (p_empty.match(file_line) and (not entries_started)):
                debug("IGNORING empty-lines before 1st entry")
                continue

            if p_end.match(file_line):
                break

            if (url == None):
                continue
                #print "No url defined for entry"+str(entry_no)+" ending at line "+str(line_no)
                #exit(-1)

            if (url in entries):
                print "Entry already defined for url <" + url + "> in entry" + str(
                    entry_no) + " ending at line " + str(line_no)
                exit(-1)

            if (entry.get('debug')
                    and ((entry.get('debug').lower == "true") or
                         (entry.get('debug').lower == "enabled"))):
                entry.debug = True

            if (entry.get('dinfo')
                    and ((entry.get('dinfo').lower == "true") or
                         (entry.get('dinfo').lower == "enabled"))):
                entry.dinfo = True

            debug("Adding entry#" + str(entry_no))
            entries[url] = entry
            entry_no = entry_no + 1

            entry = Entry()
            entry.url = None
            entry.debug = DEBUG_MODE
            entry.dinfo = DEBUG_INFO
            entry.name = 'entry' + str(entry_no) + '_line' + str(line_no)
            entry.fields['name'] = 'entry' + str(entry_no) + '_line' + str(
                line_no)
            continue

        ########################################
        ## Detect title lines: (No spaces before line)
        if (file_line.find(" ") != 0):
            entry.fields['name'] = file_line
            entry.name = file_line
            entries_started = True
            continue

        file_line = file_line.lstrip()
        entries_started = True

        ########################################
        ## Detect url lines:
        if (p_url.match(file_line)):
            entry.url = file_line
            continue

        ########################################
        ## Treat other lines:
        elements = file_line.split(":")
        name = elements[0]
        value = ":".join(elements[1:])
        entry.fields[name] = value

    return entries