Exemplo n.º 1
0
def get_rar_file_index(subject, last_index):

    # place *.vol###+##.par2 at the end
    rar_match = sre.search("\".+?\.vol\d+\+\d+\.par2\"", subject)
    if rar_match != None:
        last_index -= 1
        return last_index

    # place par2 header first
    rar_match = sre.search("\".+?\.par2\"", subject)
    if rar_match != None:
        return 0

    # place *.part##.rar files simply in order of the ##
    rar_match = sre.search("\".+?\.part(\d+).rar\"", subject)
    if rar_match != None:
        return int(rar_match.group(1)) + 1

    # place *.rar files before *.r##
    rar_match = sre.search("\".+?\.rar\"", subject)
    if rar_match != None:
        return 1

    # place *.r## files simply in order of the ##
    rar_match = sre.search("\".+?\.r(\d+)\"", subject)
    if rar_match != None:
        return int(rar_match.group(1)) + 2

    # place anything else at the end
    last_index -= 1
    return last_index
Exemplo n.º 2
0
def get_rar_file_index(subject, last_index):

    # place *.vol###+##.par2 at the end
    rar_match = sre.search("\".+?\.vol\d+\+\d+\.par2\"", subject)
    if rar_match != None:
        last_index -= 1
        return last_index

    # place par2 header first
    rar_match = sre.search("\".+?\.par2\"", subject)
    if rar_match != None:
        return 0

    # place *.part##.rar files simply in order of the ##
    rar_match = sre.search("\".+?\.part(\d+).rar\"", subject)
    if rar_match != None:
        return int(rar_match.group(1))+1

    # place *.rar files before *.r##
    rar_match = sre.search("\".+?\.rar\"", subject)
    if rar_match != None:
        return 1

    # place *.r## files simply in order of the ##
    rar_match = sre.search("\".+?\.r(\d+)\"", subject)
    if rar_match != None:
        return int(rar_match.group(1))+2

    # place anything else at the end
    last_index -= 1
    return last_index
Exemplo n.º 3
0
 def FindSuffix(self):
     """Identify any known suffixes, mark off
     as syllables and possible stresses.
     
     Syllables are stored in a class-wide compiled RE. We identify them and
     list them backwards so as to "cut off" the last first. We consult a
     global-to-module list of those that force stress on previous syllable.
     """
     self.numSuffixes = 0
     self.forceStress = 0
     resultslist = []
     for f in self.suffixes.finditer(self.wd):
         resultslist.append((f.group(), f.start()))
     if not resultslist: return
     # make sure *end* of word is in list! otherwise, 'DESP erate'
     if resultslist[-1][1] + len(resultslist[-1][0]) < len(self.wd):
         return
     resultslist.reverse()
     for res in resultslist:
         # if no vowel left before, false suffix ('singing')
         # n.b.: will choke on 'quest' etc! put in dictionary, I guess
         if not sre.search('[aeiouy]', self.wd[:res[1]]): break
         if res[0] == 'ing' and self.wd[res[1]-1] == self.wd[res[1]-2]:
             self.sylBounds.append(res[1] - 1)	# freq special case
         else: self.sylBounds.append(res[1])	# sorted later
         self.wd = self.wd[:res[1]]
         self.numSuffixes += 1
         if res[0] in STRESSSUFFIX:
             self.forceStress = 0 - len(self.sylBounds)
         if res[0] in MULTISUFFIX:
             # tricky bit! it *happens* that secondary division in all these
             # comes after its first character; NOT inevitable!
             # also does not allow for 3-syl: 'ically' (which are reliable!)
             self.sylBounds.append(res[1]+1)
             self.numSuffixes += 1
Exemplo n.º 4
0
 def DivideCV(self):
     """Divide the word among C and V groups to fill the sylBounds list.
     
     Here, and here alone, we need to catch e-with-grave-accent to count it
     as not only a vowel but syllabic ('an aged man' vs. 'aged beef'). Other
     special characters might be useful to recognize, but won't make the 
     same syllabic difference.
     """
     unicodeVowels = u"[ae\N{LATIN SMALL LETTER E WITH GRAVE}iouy]+"
     uniConsonants = u"[^ae\N{LATIN SMALL LETTER E WITH GRAVE}iouy]+"
     firstvowel = sre.search(unicodeVowels, self.wd).start()
     for v in sre.finditer(unicodeVowels, self.wd):
         lastvowel = v.end()		# replaced for each group, last sticks
         disyllabicvowels = self.sylvowels.search(v.group())
         if disyllabicvowels:
             self.sylBounds.append(v.start() + disyllabicvowels.start() + 1)
     for cc in sre.finditer(uniConsonants, self.wd):
         if cc.start() < firstvowel or cc.end() >= lastvowel: continue
         numcons = len(cc.group())
         if numcons < 3: pos = cc.end() - 1	# before single C or betw. 2
         elif numcons > 3: pos = cc.end() - 2	# before penult C
         else:		# 3 consonants, divide 1/2 or 2/1?
             cg = cc.group()		# our CCC cluster
             if cg[-3] == cg[-2] or self.splitLeftPairs.search(cg):
                 pos = cc.end() - 2			# divide 1/2
             else: pos = cc.end() - 1		# divide 2/1
         if not self.wd[pos-1].isalpha() and not self.wd[pos].isalpha():
             self.sylBounds.append(pos-1)
         else: self.sylBounds.append(pos)
Exemplo n.º 5
0
def mv_nl(fname):
    m = sre.search("(.+)\sNomad", fname)
    if m:
        print ">%s< ==> [%s]**************" % (fname, m.group(1))
        print os.rename(fname, m.group(1))
    else:
        print "NOM>%s<" % fname
Exemplo n.º 6
0
 def DivideCV(self):
     """Divide the word among C and V groups to fill the sylBounds list.
     
     Here, and here alone, we need to catch e-with-grave-accent to count it
     as not only a vowel but syllabic ('an aged man' vs. 'aged beef'). Other
     special characters might be useful to recognize, but won't make the 
     same syllabic difference.
     """
     unicodeVowels = u"[ae\N{LATIN SMALL LETTER E WITH GRAVE}iouy]+"
     uniConsonants = u"[^ae\N{LATIN SMALL LETTER E WITH GRAVE}iouy]+"
     firstvowel = sre.search(unicodeVowels, self.wd).start()
     for v in sre.finditer(unicodeVowels, self.wd):
         lastvowel = v.end()		# replaced for each group, last sticks
         disyllabicvowels = self.sylvowels.search(v.group())
         if disyllabicvowels:
             self.sylBounds.append(v.start() + disyllabicvowels.start() + 1)
     for cc in sre.finditer(uniConsonants, self.wd):
         if cc.start() < firstvowel or cc.end() >= lastvowel: continue
         numcons = len(cc.group())
         if numcons < 3: pos = cc.end() - 1	# before single C or betw. 2
         elif numcons > 3: pos = cc.end() - 2	# before penult C
         else:		# 3 consonants, divide 1/2 or 2/1?
             cg = cc.group()		# our CCC cluster
             if cg[-3] == cg[-2] or self.splitLeftPairs.search(cg):
                 pos = cc.end() - 2			# divide 1/2
             else: pos = cc.end() - 1		# divide 2/1
         if not self.wd[pos-1].isalpha() and not self.wd[pos].isalpha():
             self.sylBounds.append(pos-1)
         else: self.sylBounds.append(pos)
Exemplo n.º 7
0
 def FindSuffix(self):
     """Identify any known suffixes, mark off as syllables and possible stresses.
     
     Syllables are stored in a class-wide compiled RE. We identify them and
     list them backwards so as to "cut off" the last first. We consult a
     global-to-module list of those that force stress on previous syllable.
     """
     self.numSuffixes = 0
     self.forceStress = 0
     resultslist = []
     for f in self.suffixes.finditer(self.wd):
         resultslist.append((f.group(), f.start()))
     if not resultslist: return
     # make sure *end* of word is in list! otherwise, 'DESP erate'
     if resultslist[-1][1] + len(resultslist[-1][0]) < len(self.wd):
         return
     resultslist.reverse()
     for res in resultslist:
         # if no vowel left before, false suffix ('singing')
         # n.b.: will choke on 'quest' etc! put in dictionary, I guess
         if not sre.search('[aeiouy]', self.wd[:res[1]]): break
         if res[0] == 'ing' and self.wd[res[1]-1] == self.wd[res[1]-2]:
             self.sylBounds.append(res[1] - 1)	# freq special case
         else: self.sylBounds.append(res[1])	# sorted later
         self.wd = self.wd[:res[1]]
         self.numSuffixes += 1
         if res[0] in STRESSSUFFIX:
             self.forceStress = 0 - len(self.sylBounds)
         if res[0] in MULTISUFFIX:
             # tricky bit! it *happens* that secondary division in all these
             # comes after its first character; NOT inevitable!
             # also does not allow for 3-syl: 'ically' (which are reliable!)
             self.sylBounds.append(res[1]+1)
             self.numSuffixes += 1
Exemplo n.º 8
0
 def Preliminaries(self):
     apostrophe = self.wd.find("\'", -2)	# just at end of word ('twas)
     if apostrophe != -1:		# poss.; check if syllabic and remove 
         if self.wd[-1] != '\'' and self.wd[-1] in 'se' and self.wd[-2] in SIBILANTS:
             self.sylBounds.append(apostrophe)
         self.wd = self.wd[:apostrophe]	# cut off ' or 's until last stage
     # cut final s/d from plurals/pasts if not syllabic
     self.isPast = self.isPlural = False			# defaults used also for suffixes
     if sre.search(r"[^s]s\b", self.wd): self.isPlural = True	# terminal single s (DUMB!)
     if sre.search(r"ed\b", self.wd): self.isPast = True		# terminal 'ed'
     if self.isPast or self.isPlural: self.wd = self.wd[:-1]
     # final-syl test turns out to do better work *after* suffices cut off
     self.FindSuffix()
     # if final syllable is l/r+e, reverse letters for processing as syllable
     if len(self.wd) > 3 and self.liquidterm.search(self.wd):
         self.wd = self.wd[:-2] + self.wd[-1] + self.wd[-2]
Exemplo n.º 9
0
def do_screens(fobj, emitter, regexp=DP_SCREEN_LS_REGEXP):
    while True:
        line = fobj.readline()
        if len(line) == 0:
            break
        m = sre.search(DP_SCREEN_LS_REGEXP, line)
        emitter(m, line)
Exemplo n.º 10
0
    def _verify_with_par2(self, par2_file):
        try:
           # if not os.path.exists(self.status_filepath):
            cmd = '"' + os.path.join(self.par2exe_directory, 'par2.exe') + '"'
            args = ' v -q "' + par2_file + '"'
            print cmd, args, self.status_filepath
            cmd = '"' + cmd + args + ' > "' + self.status_filepath + '""'
            cmd.encode('utf8')
            os.system(cmd)
            #print par2exe.readlines()
            #from subprocess import *
            #par2exe = Popen(["c:/temp/par2.exe"], stdout=PIPE, stderr=STDOUT, stdin=PIPE)
            #par2exe.wait()
            status_file = open(self.status_filepath)
            lines = status_file.readlines()
            status_file.close()
            par2_targets = dict()
            for line in lines:
                # Target: "foo.rar" - found.
                # Target: "bar.rar" - missing.
                # Target: "baz.rar" - damaged.
                par2_target_match = sre.search("Target: \"(.+?)\" - (\S+)\.", line)
                if par2_target_match:
                    par2_targets[par2_target_match.group(1).lower()] = par2_target_match.group(2)
            return par2_targets

        except:
            traceback.print_exc()
Exemplo n.º 11
0
    def _verify_with_par2(self, par2_file):
        try:
            # if not os.path.exists(self.status_filepath):
            cmd = '"' + os.path.join(self.par2exe_directory, 'par2.exe') + '"'
            args = ' v -q "' + par2_file + '"'
            print cmd, args, self.status_filepath
            cmd = '"' + cmd + args + ' > "' + self.status_filepath + '""'
            cmd.encode('utf8')
            os.system(cmd)
            #print par2exe.readlines()
            #from subprocess import *
            #par2exe = Popen(["c:/temp/par2.exe"], stdout=PIPE, stderr=STDOUT, stdin=PIPE)
            #par2exe.wait()
            status_file = open(self.status_filepath)
            lines = status_file.readlines()
            status_file.close()
            par2_targets = dict()
            for line in lines:
                # Target: "foo.rar" - found.
                # Target: "bar.rar" - missing.
                # Target: "baz.rar" - damaged.
                par2_target_match = sre.search("Target: \"(.+?)\" - (\S+)\.",
                                               line)
                if par2_target_match:
                    par2_targets[par2_target_match.group(
                        1).lower()] = par2_target_match.group(2)
            return par2_targets

        except:
            traceback.print_exc()
Exemplo n.º 12
0
def getattrval(body,attr):
	body=sre.sub("([^\>]*)\>([^\000]*)","\\1",body)
        if sre.search(attr+"=(\"|'|)([^\\1\ \>]*)\\1",body)!=None:
		delim=sre.sub("[^\>]* "+attr+"=(\"|'|)([^\\1\ \>]*)\\1([^\>]*)","\\1",body)
		exp="[^\>]* "+attr+"=(\\"+delim+")([^"
		if delim=="": exp+="\ "
		else: exp+=delim
		exp+="\>]*)\\"+delim+"([^\>]*)"
		return sre.sub(exp,"\\2",body)
	else: return ""
Exemplo n.º 13
0
 def Preliminaries(self):
     apostrophe = self.wd.find("\'", -2)	# just at end of word ('twas)
     if apostrophe != -1:		# poss.; check if syllabic and remove 
         if (self.wd[-1] != '\'' and self.wd[-1] in 'se'
                                 and self.wd[-2] in SIBILANTS):
             self.sylBounds.append(apostrophe)
         self.wd = self.wd[:apostrophe]	# cut off ' or 's until last stage
     # cut final s/d from plurals/pasts if not syllabic
     self.isPast = self.isPlural = False	 # defaults used also for suffixes
     if sre.search(r"[^s]s\b", self.wd):
         self.isPlural = True	# terminal single s (DUMB!)
     if sre.search(r"ed\b", self.wd):
         self.isPast = True		# terminal 'ed'
     if self.isPast or self.isPlural:
         self.wd = self.wd[:-1]
     # final-syl test turns out to do better work *after* suffices cut off
     self.FindSuffix()
     # if final syllable is l/r+e, reverse letters for processing as syll.
     if len(self.wd) > 3 and self.liquidterm.search(self.wd):
         self.wd = self.wd[:-2] + self.wd[-1] + self.wd[-2]
Exemplo n.º 14
0
def getattrval(body, attr):
    body = sre.sub("([^\>]*)\>([^\000]*)", "\\1", body)
    if sre.search(attr + "=(\"|'|)([^\\1\ \>]*)\\1", body) != None:
        delim = sre.sub("[^\>]* " + attr + "=(\"|'|)([^\\1\ \>]*)\\1([^\>]*)",
                        "\\1", body)
        exp = "[^\>]* " + attr + "=(\\" + delim + ")([^"
        if delim == "": exp += "\ "
        else: exp += delim
        exp += "\>]*)\\" + delim + "([^\>]*)"
        return sre.sub(exp, "\\2", body)
    else:
        return ""
Exemplo n.º 15
0
    def sanitize(self):
        # sanatize the query to guard against non selects
        if not sre.search('^\s*select[^;]*$',self['query'],sre.I):
            usage()
            print "ERROR: Queries must be select statements without ';'s"
            sys.exit(3)


        if not sre.search('^[a-zA-Z][\w_]*$', self['table'],sre.I):
            usage()
            print "ERROR: TABLE must be 1 word consisting of alphanumeric + '_'"
            sys.exit(3)

        if not sre.search('^([a-zA-Z][\w_]*|)$', self['procname'],sre.I):
            usage()
            print "ERROR: proc-name must be 1 word consisting of alphanumeric + '_'"
            sys.exit(3)

        if not sre.search('order\s+by\s+([a-zA-Z][\w_]*[.]|)(?P<tn>tail_num)\s*,\s*([a-zA-Z][\w_]*[.]|)(?P<mt>measurement_time)',self['query'],sre.I):
            usage()
            print "ERROR: Queries must have an order by tail_num, measurement_time"
            sys.exit(3)
Exemplo n.º 16
0
    def sanitize(self):
        # sanatize the query to guard against non selects
        if not sre.search('^\s*select[^;]*$', self['query'], sre.I):
            usage()
            print "ERROR: Queries must be select statements without ';'s"
            sys.exit(3)

        if not sre.search('^[a-zA-Z][\w_]*$', self['table'], sre.I):
            usage()
            print "ERROR: TABLE must be 1 word consisting of alphanumeric + '_'"
            sys.exit(3)

        if not sre.search('^([a-zA-Z][\w_]*|)$', self['procname'], sre.I):
            usage()
            print "ERROR: proc-name must be 1 word consisting of alphanumeric + '_'"
            sys.exit(3)

        if not sre.search(
                'order\s+by\s+([a-zA-Z][\w_]*[.]|)(?P<tn>tail_num)\s*,\s*([a-zA-Z][\w_]*[.]|)(?P<mt>measurement_time)',
                self['query'], sre.I):
            usage()
            print "ERROR: Queries must have an order by tail_num, measurement_time"
            sys.exit(3)
Exemplo n.º 17
0
 def SpecialCodes(self):
     """Encode character-combinations so as to trick DivideCV.
     
     The combinations are contained in regexes compiled in the class's 
     __init__. Encoding (*not* to be confused with Unicode functions!) is
     done by small functions outside of (and preceding) the class.
     
     The combinations in Paul Holzer's original code have been supplemented
     and tweaked in various ways. For example, the original test for [iy]V
     is poor; 'avionics' defeats it; so we leave that to a new
     disyllabic-vowel test.
     
     The messy encoding-and-sometimes-decoding of nonsyllabic final 'e' 
     after a C seems the best that can be done, though I hope not. 
     """
     if sre.search(r"[^aeiouy]e\b", self.wd):  # nonsyllabic final e after C
         if (not self.isPlural or self.wd[-2] not in SIBILANTS) and (not self.isPast or self.wd[-2] not in "dt"):
             self.wd = self.wd[:-1] + encode(self.wd[-1])
         if not sre.search(r"[aeiouy]", self.wd):  # any vowel left??
             self.wd = self.wd[:-1] + "e"  # undo the encoding
     self.wd = self.CiVcomb.sub(handleCiV, self.wd)
     self.wd = self.CCpair.sub(handleCC, self.wd)
     self.wd = self.VyVcomb.sub(handleVyV, self.wd)
Exemplo n.º 18
0
def get_next_image_dir():
    dnbase = "./imgs"
    lst = glob.glob(dnbase + '-[0-9][0-9][0-9][0-9]')
    if (lst):
        lst.sort()
        last = lst[len(lst) - 1]
        m = sre.search(sre.compile(dnbase + "-(\d+)$"), last)
        nlast = int(m.group(1))
        fno = nlast + 1
    else:
        fno = 1
    dirname = (dnbase + "-%04d") % (fno)
    #     printf("dirname=%s\n", dirname);
    os.mkdir(dirname)
    return dirname
Exemplo n.º 19
0
 def SpecialCodes(self):
     """Encode character-combinations so as to trick DivideCV.
     
     The combinations are contained in regexes compiled in the class's 
     __init__. Encoding (*not* to be confused with Unicode functions!) is
     done by small functions outside of (and preceding) the class.
     
     The combinations in Paul Holzer's original code have been supplemented
     and tweaked in various ways. For example, the original test for [iy]V
     is poor; 'avionics' defeats it; so we leave that to a new
     disyllabic-vowel test.
     
     The messy encoding-and-sometimes-decoding of nonsyllabic final 'e' 
     after a C seems the best that can be done, though I hope not. 
     """
     if sre.search(r"[^aeiouy]e\b", self.wd): # nonsyllabic final e after C
         if ((not self.isPlural or self.wd[-2] not in SIBILANTS) and
                              (not self.isPast or self.wd[-2] not in 'dt')):
             self.wd = self.wd[:-1] + encode(self.wd[-1])
         if not sre.search(r"[aeiouy]", self.wd):		# any vowel left??
             self.wd = self.wd[:-1] + 'e'		# undo the encoding
     self.wd = self.CiVcomb.sub(handleCiV, self.wd)
     self.wd = self.CCpair.sub(handleCC, self.wd)
     self.wd = self.VyVcomb.sub(handleVyV, self.wd)
Exemplo n.º 20
0
    def SSLSpoofCheck(self, host):
        spoof = None
        if self.spoof_ssl_config.has_key(self.path):
            spoof = self.spoof_ssl_config[self.path]
        else:
            for i in self.spoof_ssl_config:
                if sre.search(i, self.path):
                    self.log.debug('SSLSpoofCheck %s matched %s' % (i, host))
                    spoof = self.spoof_ssl_config[i]
                    break

        if spoof == None:
            self.log.debug('SSLSpoofCheck no matches, using DEFAULT')
            spoof = self.spoof_ssl_config['DEFAULT']

        self.log.debug('SSLSpoofCheck for %s: %s' % (host, spoof))
        return spoof
Exemplo n.º 21
0
    def _ProcessRedirects(self):
        self.log.debug('Entering _ProcessRedirects')
        newurl = None
        for target in self.http_redirect_table:
            match = sre.search(target, self.url)
            if match:
                self.log.debug('Matched %s on %s' % (target, self.url))
                newurl = match.expand(self.http_redirect_table[target])
                self.log.debug('  expanded %s to %s' %
                               (self.http_redirect_table[target], newurl))
                break

        if not newurl:
            self.log.debug('No matches on %s' % self.url)
            self.spoof_url = self.url
        else:
            self.spoof_url = newurl
Exemplo n.º 22
0
  def SSLSpoofCheck(self, host):
    spoof = None
    if self.spoof_ssl_config.has_key(self.path):
      spoof = self.spoof_ssl_config[self.path]
    else:
      for i in self.spoof_ssl_config:
        if sre.search(i, self.path):
          self.log.debug('SSLSpoofCheck %s matched %s' % (i, host))
          spoof = self.spoof_ssl_config[i]
          break

    if spoof == None:
      self.log.debug('SSLSpoofCheck no matches, using DEFAULT')
      spoof = self.spoof_ssl_config['DEFAULT']

    self.log.debug('SSLSpoofCheck for %s: %s' % (host, spoof))
    return spoof
Exemplo n.º 23
0
  def _ProcessRedirects(self):
    self.log.debug('Entering _ProcessRedirects')
    newurl = None
    for target in self.http_redirect_table:
      match = sre.search(target, self.url)
      if match:
        self.log.debug('Matched %s on %s' % (target, self.url))
        newurl = match.expand(self.http_redirect_table[target])
        self.log.debug('  expanded %s to %s' %
            (self.http_redirect_table[target], newurl))
        break

    if not newurl:
      self.log.debug('No matches on %s' % self.url)
      self.spoof_url = self.url
    else:
      self.spoof_url = newurl
Exemplo n.º 24
0
def html_format(src):
    out = []
    while src != "":
        m = sre.search(pat_token, src)
        if m is None:
            break
        out.append(src[: m.start()])
        token = src[m.start() : m.end()]
        src = src[m.end() :]
        if sre.match(pat_paragraph, token):
            out.append(P())
        elif sre.match(pat_uri, token):
            out.append(link(token, token))
        elif sre.match(pat_tabulate, token):
            qtriples, src = parse_tabulate(src)
            tabulate(out, qtriples)
        elif sre.match(pat_slink, token):
            contents = token[1:-1].split()
            if 0 == len(contents):
                pass  # XXX error message?
            else:
                # XXX security screen target and caption
                # (caption should not look like a URL itself)
                # XXX url encoding
                # XXX nofollow
                if contents[0].startswith("http:"):
                    target = contents[0]
                    if 1 == len(contents):
                        caption = contents[0]
                    else:
                        caption = " ".join(contents[1:])
                else:
                    caption = " ".join(contents)
                    target = "/page/" + caption
            out.append(link(target, caption))
        elif sre.match(pat_escape, token):
            out.append(token[1])
        else:
            raise "Bug"
    out.append(src)
    return out
Exemplo n.º 25
0
def getpage(url,dheaders=1,redir=0,realpage=0,poststring="",exceptions=0):

	# function to recurse and try getpage() again with new values
	def recurse(exceptions):

		sock.close()
		exceptions+=1

		if exceptions<=6: return getpage(url,dheaders,redir,realpage,poststring,exceptions)
		else:
			print "Too many recursions, skipping..."
			return


	global usecookies,urllogfile,debug,ignorefileext
	if not checkserver(servername(url)): return

	if url.find("#")!=-1: url=url[:url.find("#")]

	# file extensions that need to be ignored code
	fileext=sre.sub(".*(http\://[^/]*/).*","\\1",url)
	if url==fileext: fileext="None"
	else: fileext=sre.sub("^.*\/[^/]*\.([^\&\#\?\/]*)[^/]*$","\\1",url)
	if ignorefileext.count(","+fileext+",")!=0: return

	try:

		sock=socket.socket(socket.AF_INET,socket.SOCK_STREAM)
		sock.connect((servername(url,False),80))

		workurl=pagename(url)
		theurl=url
		if redir!=1: theurl=workurl

		qrytype="GET"
		if poststring!="": qrytype="POST"
		out=(qrytype+" "+theurl+" HTTP/1.1\n"
		     "Host: "+servername(url,False)+"\n"
		     "Connection: close\n")
		if usecookies:
			global cookies
			out+="Cookie: "+cookies+"\n"
		if poststring!="":
			out+="Content-Type: application/x-www-form-urlencoded\n"
			out+="Content-Length: "+str(len(poststring))
			out+="\n\n"+poststring+"\n"
		out+="\r\n\r\n"
		sock.send(out)

		# get response type and log the page
		response=sock.recv(12)[-3:]
		fp=open("logs/"+urllogfile,"a")
		fp.write(url+": "+response+" "+str(realpage)+"\n")
		if poststring!="": fp.write(indent+"POST: "+poststring+"\n")
		fp.close()


		# at 404 response, close connection and fail
		if response=="404" or response=="500":
			sock.close()
			return

		# at 30[1237] response types, recurse new page
		if sre.search("30[1237]",response):
			while 1:
				chunk=""
				byte=sock.recv(1)
				while byte!="\r":
					chunk+=byte
					byte=sock.recv(1)
				sock.recv(1)
				if chunk.lower()[:9]=="location:":
					location=chunk.lower()[9:].strip()
					if location=="http://"+servername(url,False)+url: location="/"
					locpage=fullpath(location,url)
					sock.close()
# 					if url[len(url)-2:]=="" and locpage[len(locpage)-4:]=="": break
					redir=1
					if locpage!=url:
						redir=0
						if pagename(sre.sub("\\\\(\"|\')","\\1",locpage))==pagename(url):
							print "QUOTE REDIR"
							return
					print "OLD:",url
					print "NEW:",chunk.lower()
					print "REDIR:",locpage
					return getpage(locpage,redir=redir,realpage=realpage)
			if realpage==1:
				sock.close()
				return

		elif realpage==1:
			sock.close()
			return url

		# get headers, ignoring certain HTTP headers
		headers=""
		type=0
		while 1:
			chunk=""
			byte=sock.recv(1)
			if byte=="\r":
				sock.recv(1)
				break
			while byte!="\r":
				chunk+=byte
				byte=sock.recv(1)
			sock.recv(1)

			if chunk.lower()[:11]!="set-cookie:" and chunk.lower()[:5]!="date:" and chunk.lower()[:15]!="content-length:" and chunk.lower()[:11]!="keep-alive:" and chunk.lower()[:18]!="transfer-encoding:" and chunk.lower()[:11]!="connection:":
				headers+=chunk

#			if chunk.lower()[:15]=="content-length:":
#				type=1
#				conlen=int(chunk[16:])

			if chunk.lower()[:26]=="transfer-encoding: chunked": type=2

		# no special type specified, just get the page
		if type==0:
			body=""
			while 1:
				chunk=sock.recv(200)
				body+=chunk
				if chunk=="": break


		# set it up if it does have a type
#		else:
#			byte=sock.recv(1)
#			if byte=="\r": sock.recv(1)
#			else:
#				while 1:
#					i=-1
#					while byte!="\r":
#						i+=1
#						byte=sock.recv(1)
#					nbytes=sock.recv(3)
#					if nbytes=="\n\r\n": break

#		# content-length
#		if type==1:
#			body=""
#			for i in range(conlen):
#				chunk=sock.recv(1)
#				body+=chunk

		# transfer-encoding: chunked
		if type==2:
			body=""
			chunksize=""
			while chunksize!=0:
				byte=""
				chunk=""
				while byte!="\r":
					chunk+=byte
					byte=sock.recv(1)
				sock.recv(1)
				chunksize=int(chunk,16)
				wchunksz=chunksize
				while wchunksz>=1:
					subchunk=sock.recv(wchunksz)
					body+=subchunk
					wchunksz-=len(subchunk)
				sock.recv(2)

		# clean up and return
		sock.close()
		if dheaders!=1: headers=""

		return [headers,body,urlfix(url)]

	# catch socket errors, such as "connection reset by peer" - trys again until it gives up and goes on to the next page
	except socket.error:
		print "Socket Error, Recursing..."
		return recurse(exceptions)
Exemplo n.º 26
0
def main(argv):
    # Main program.  Takes string containing arguments a la unix command line
    p = Params()
    p.define_defaults()
    p.read_params(argv)
    p.read_config()
    p.parse_param_opts()

    p.sanitize()

    # connect
    my = MySQLdb.connect(**p['con_opts'])

    # create cursor to contain data about tail_nums to parse
    mycur = my.cursor()

    # create procedure name or get it from user
    if p['procname'] == "":
        rn = random.randint(0, 1000000)
        procname = 'P%s' % rn
    else:
        procname = p['procname']

    # string to store all cmds issued to DB
    all_cmds = ""

    # if dropping procedure
    if p['drop_proc']:
        cmd = 'DROP PROCEDURE IF EXISTS %s;\n' % procname
        all_cmds += cmd
        if not p['debug']:
            mycur.execute(cmd)

    # start working on procedure string
    ps = procedure_string

    # if add drop table
    if p['drop']:
        ps = ps.replace('<<DROP>>', 'DROP TABLE IF EXISTS <<TABLE>>;')
    else:
        ps = ps.replace('<<DROP>>', '')

    # sub table and first select
    ps = ps.replace('<<TABLE>>', p['table'])
    ps = ps.replace('<<SELECTASC>>', p['query'])

    # turn ASC to DESC
    mtch = sre.search(
        'order\s+by\s+([a-zA-Z][\w_]*[.]|)(?P<tn>tail_num)\s*,\s*([a-zA-Z][\w_]*[.]|)(?P<mt>measurement_time)',
        p['query'], sre.I)
    tn_pos = mtch.span('tn')
    mt_pos = mtch.span('mt')
    qry = p['query'][:tn_pos[0]] + 'tail_num DESC' + p['query'][
        tn_pos[1]:mt_pos[0]] + 'measurement_time DESC'

    # sub second select
    ps = ps.replace('<<SELECTDESC>>', qry)

    # finally substitute procname
    ps = ps.replace('<<PROCNAME>>', procname)

    # if not debug execute, otherwise just print
    if not p['debug']:
        mycur.execute(ps)

    # add to all cmds string
    all_cmds += ps

    # if do no all call
    if not p['no_call']:
        cmd = 'call %s;\n' % procname
        all_cmds += cmd
        if not p['debug']:
            mycur.execute(cmd)

    # if leave_proc
    if not (p['leave_proc'] or p['no_call']):
        cmd = 'DROP PROCEDURE %s;\n' % procname
        all_cmds += cmd
        if not p['debug']:
            mycur.execute(cmd)

    if p['debug']:
        print all_cmds
Exemplo n.º 27
0
    def fromString(source):
        import sre
        result = {}
        insertionorder = []
        fail = False
        originalsource = source  # preserve original in case of broken header
        headervalueRE_sX = "^([^: ]+[^:]*):( ?)((.|\n)+)"  # TODO: This could be optimised

        lines = source.split("\r\n")
        I = 0
        headerLines = []
        valid = False
        for I in xrange(len(lines)):
            if lines[I] != "":
                headerLines.append(lines[I])
            else:
                # The divider cannot be the last line
                valid = not (I == len(lines) - 1)
                break

        if not valid:
            body = originalsource
            fail = True
        else:
            bodyLines = lines[I + 1:]
            body = "\r\n".join(bodyLines)
            key = None
            for line in headerLines:
                match = sre.search(headervalueRE_sX, line)
                if match:
                    (key, spaces, value, X) = match.groups()
                    if value == " " and not spaces:
                        value = ""
                    try:
                        result[key].append(value)
                    except KeyError:
                        result[key] = value
                    except AttributeError:
                        result[key] = [result[key], value]
                    insertionorder.append(key)

                else:
                    if key:
                        #                       value = line.strip() # Strictly speaking, surely we should be doing this??? (Breaks tests though if we do...)
                        value = line
                        if isinstance(result[key], list):
                            # Append to last item in the list
                            result[key][len(result[key]) - 1] += "\r\n" + value
                        else:
                            result[key] += "\r\n" + value
                    else:
                        #                    print "NOMATCH!NOMATCH!NOMATCH!NOMATCH!NOMATCH!NOMATCH!"
                        fail = True
                        break
        if not fail:
            result["__BODY__"] = body
        else:
            result["__BODY__"] = originalsource
        md = MimeDict(**result)
        md.insertionorder = insertionorder
        md.invalidSource = fail
        return md
Exemplo n.º 28
0
def treepages(url,level):

	global treeglob,urlfields,postfields,treedurls,levels,server,vulnlogfile,scanlimit,ignorefileext
	print ">>>>>>>>",level,"<<<<<<<<"

	print " ---> "+url

	pageinfo=getpage(url)
	if listempty(pageinfo): return

	body=pageinfo[1].lower()

	print "AA"

	# select/option, textarea
	# check for forms
	bodyarr=sre.split("<form",body)
	for i in range(len(bodyarr)):

		frmsect=bodyarr[i][:bodyarr[i].find(">")]
		frmbody=bodyarr[i][bodyarr[i].find(">"):][:bodyarr[i].find("</form>")]

		actionurl=getattrval(frmsect,"action")
		if actionurl=="" or actionurl==frmsect or actionurl=="\"\"": actionurl=pageinfo[2]
		if actionurl.count(";")>0: actionurl=actionurl[actionurl.find(";")+1:]
		if actionurl[:11].lower()=="javascript:": continue
		actionurl=fullpath(actionurl,pageinfo[2])

		print "ACTION:",actionurl

		# get the input variables
		poststring=""
		inputarr=sre.sub("(.*?)\<input([^\>]*)\>(.*?)","\\2|ZZaaXXaaZZ|",frmbody).split("|ZZaaXXaaZZ|")
		for j in range(len(inputarr)):

			name=getattrval(inputarr[j],"name")
			if name==inputarr[j] or name=="" or name=="\"\"": continue

			value=getattrval(inputarr[j],"value")
			if value==inputarr[j] or value=="" or value=="\"\"": value=""

			if poststring!="": poststring+="&"
			poststring+=name+"="+value

		# get select/option tags
		selectarr=sre.sub("(.*?)\<select([^\>]*)\>(.*?)","\\2|ZZaaXXaaZZ|",frmbody).split("|ZZaaXXaaZZ|")
		for j in range(len(selectarr)):

			name=getattrval(selectarr[j],"name")
			if name==selectarr[j] or name=="" or name=="\"\"": continue

			value=sre.sub("(.*?)\<option([^\>]*)value=(\"|'|)([^\\3\ ]*)\\3([^\>]*)\>(.*?)","\\2",selectarr[j])
			if value==selectarr[j] or value=="" or value=="\"\"": value=""

			if poststring!="": poststring+="&"
			poststring+=name+"="+value
			print "sel/opt: "+name+"="+value

		if poststring=="": continue

		if sre.search("method=([\'\"]|)post([\'\"]|)",frmsect[:frmsect.find(">")].lower())==None:
			if actionurl.find("?")!=-1: actionurl+="&"
			else: actionurl+="?"
			actionurl+=poststring
			body+='<a href="'+actionurl+'">'
			print 'GETT <a href="'+actionurl+'">'
			continue

		# determine if it needs to be scanned, and if so, scan it
		postscan=0
		postvars=poststring.split("&")
		if postfields.has_key(actionurl):
			for j in range(len(postvars)):
				postvars[j]=postvars[j][:postvars[j].find("=")]
				if postfields[actionurl].count(postvars[j])==0:
					postfields[actionurl].append(postvars[j])
					postscan=1
		else:
			for j in range(len(postvars)): postvars[j]=postvars[j][:postvars[j].find("=")]
			postfields[actionurl]=postvars
			postscan=1

		if postscan==1:
			vulns=checkvars(actionurl,poststring)
			if not listempty(vulns): dispvulns(vulns,actionurl)

	print "BB"

	# check for urls in "href" tags
	# ? # part of 3? (src|href|location|window.open)= and http://
	urlreg="(\'|\")(?!javascript:)(([^\>]+?)(?!\.("+ignorefileext.replace(",","|")+"))(.{3,8}?)(|\?([^\>]+?)))"
	urlarr=sre.sub("(?s)(?i)(.+?)((src|href)=|location([\ ]*)=([\ ]*)|window\.open\()"+urlreg+"\\6","\\7|ZZaaXXaaZZ|",body).split("|ZZaaXXaaZZ|")
	del urlarr[len(urlarr)-1]
	urlarr.append(sre.sub("(?s)(?i)(.+?)(src|href)="+urlreg+"\\3","\\4|ZZaaXXaaZZ|",body).split("|ZZaaXXaaZZ|"))
	del urlarr[len(urlarr)-1]
	for i in range(len(urlarr)):

		theurl=fullpath(urlarr[i],pageinfo[2])
		if not checkserver(servername(theurl)): continue

		# determine if it needs scanned and/or treed, and if so, scan and/or tree it
		getscan=0
		if theurl.count("?")!=0:
			nqurl=theurl[:theurl.find("?")]
			query=theurl[theurl.find("?")+1:]
			query=sre.sub("\&amp\;","\&",query)
			qryvars=query.split("&")
			if urlfields.has_key(nqurl):
				for j in range(len(qryvars)):
					qryvars[j]=qryvars[j][:qryvars[j].find("=")]
					if urlfields[nqurl].count(qryvars[j])==0:
						urlfields[nqurl].append(qryvars[j])
						getscan=1
			else:
				for j in range(len(qryvars)): qryvars[j]=qryvars[j][:qryvars[j].find("=")]
				urlfields[nqurl]=qryvars
				getscan=1
		else:
			if urlfields.has_key(theurl)==False: urlfields[theurl]=[]
			nqurl=theurl

		if getscan==1:
			vulns=checkvars(theurl)
			if not listempty(vulns): dispvulns(vulns,theurl)
		tree=treeglob
		if treedurls.has_key(nqurl):
			if treedurls[nqurl].count(theurl)==0 and len(treedurls[nqurl])<=scanlimit:
				treedurls[nqurl].append(theurl)
			else: tree=0

		else: treedurls[nqurl]=[theurl]
		if tree==1 and level<levels:
			realurl=getpage(theurl,realpage=1)
			if theurl!=realurl and realurl!=None:
				body+=' href="'+realurl+'" '
			print "treeee"
			try: treepages(theurl,level+1)
			except KeyboardInterrupt:
				treeglob=0
				print "TREEGLOB CHANGED TO ZERO"
				treepages(theurl,level+1)
Exemplo n.º 29
0
        os.chdir("..")
    os.chdir("..")

sys.stderr.write("Must check %d directories\n" % (total_dirs))
for chip in chip_list:
    #pyraf.iraf.cd(chip)
    if chip not in cand_list:
        sys.stderr.write("No candidate on %s\n" % (chip))
        conintue
    for field in field_list:
        if field not in cand_list[chip]:
            sys.stderr.write("%s/%s failed to complete.\n" % (chip, field))
            continue
        if cand_list[chip][field] == "no_candidates":
            continue
        if sre.search('checked', cand_list[chip][field]):
            continue
        else:
            #print cand_list[chip][field]
            sys.stderr.write("Checking candidates in %s %s\n" % (field, chip))
            pyraf.iraf.cd(chip + "/" + field)
            result = discands(read_cands(cand_list[chip][field]))
            if result > -1:
                sys.stderr.write("%d objects marked as real\n" % (result))
                os.rename(cand_list[chip][field],
                          cand_list[chip][field] + ".checked")
            pyraf.iraf.cd("../..")
            if result == -2:
                sys.stderr.write("Removing lock file and exiting.\n")
                os.unlink('MOPconf.lock')
                sys.exit()
Exemplo n.º 30
0
def getpage(url, dheaders=1, redir=0, realpage=0, poststring="", exceptions=0):

    # function to recurse and try getpage() again with new values
    def recurse(exceptions):

        sock.close()
        exceptions += 1

        if exceptions <= 6:
            return getpage(url, dheaders, redir, realpage, poststring,
                           exceptions)
        else:
            print "Too many recursions, skipping..."
            return

    global usecookies, urllogfile, debug, ignorefileext
    if not checkserver(servername(url)): return

    if url.find("#") != -1: url = url[:url.find("#")]

    # file extensions that need to be ignored code
    fileext = sre.sub(".*(http\://[^/]*/).*", "\\1", url)
    if url == fileext: fileext = "None"
    else: fileext = sre.sub("^.*\/[^/]*\.([^\&\#\?\/]*)[^/]*$", "\\1", url)
    if ignorefileext.count("," + fileext + ",") != 0: return

    try:

        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        sock.connect((servername(url, False), 80))

        workurl = pagename(url)
        theurl = url
        if redir != 1: theurl = workurl

        qrytype = "GET"
        if poststring != "": qrytype = "POST"
        out = (qrytype + " " + theurl + " HTTP/1.1\n"
               "Host: " + servername(url, False) + "\n"
               "Connection: close\n")
        if usecookies:
            global cookies
            out += "Cookie: " + cookies + "\n"
        if poststring != "":
            out += "Content-Type: application/x-www-form-urlencoded\n"
            out += "Content-Length: " + str(len(poststring))
            out += "\n\n" + poststring + "\n"
        out += "\r\n\r\n"
        sock.send(out)

        # get response type and log the page
        response = sock.recv(12)[-3:]
        fp = open("logs/" + urllogfile, "a")
        fp.write(url + ": " + response + " " + str(realpage) + "\n")
        if poststring != "": fp.write(indent + "POST: " + poststring + "\n")
        fp.close()

        # at 404 response, close connection and fail
        if response == "404" or response == "500":
            sock.close()
            return

        # at 30[1237] response types, recurse new page
        if sre.search("30[1237]", response):
            while 1:
                chunk = ""
                byte = sock.recv(1)
                while byte != "\r":
                    chunk += byte
                    byte = sock.recv(1)
                sock.recv(1)
                if chunk.lower()[:9] == "location:":
                    location = chunk.lower()[9:].strip()
                    if location == "http://" + servername(url, False) + url:
                        location = "/"
                    locpage = fullpath(location, url)
                    sock.close()
                    # 					if url[len(url)-2:]=="" and locpage[len(locpage)-4:]=="": break
                    redir = 1
                    if locpage != url:
                        redir = 0
                        if pagename(sre.sub("\\\\(\"|\')", "\\1",
                                            locpage)) == pagename(url):
                            print "QUOTE REDIR"
                            return
                    print "OLD:", url
                    print "NEW:", chunk.lower()
                    print "REDIR:", locpage
                    return getpage(locpage, redir=redir, realpage=realpage)
            if realpage == 1:
                sock.close()
                return

        elif realpage == 1:
            sock.close()
            return url

        # get headers, ignoring certain HTTP headers
        headers = ""
        type = 0
        while 1:
            chunk = ""
            byte = sock.recv(1)
            if byte == "\r":
                sock.recv(1)
                break
            while byte != "\r":
                chunk += byte
                byte = sock.recv(1)
            sock.recv(1)

            if chunk.lower()[:11] != "set-cookie:" and chunk.lower(
            )[:5] != "date:" and chunk.lower(
            )[:15] != "content-length:" and chunk.lower(
            )[:11] != "keep-alive:" and chunk.lower(
            )[:18] != "transfer-encoding:" and chunk.lower(
            )[:11] != "connection:":
                headers += chunk

#			if chunk.lower()[:15]=="content-length:":
#				type=1
#				conlen=int(chunk[16:])

            if chunk.lower()[:26] == "transfer-encoding: chunked": type = 2

        # no special type specified, just get the page
        if type == 0:
            body = ""
            while 1:
                chunk = sock.recv(200)
                body += chunk
                if chunk == "": break

        # set it up if it does have a type
#		else:
#			byte=sock.recv(1)
#			if byte=="\r": sock.recv(1)
#			else:
#				while 1:
#					i=-1
#					while byte!="\r":
#						i+=1
#						byte=sock.recv(1)
#					nbytes=sock.recv(3)
#					if nbytes=="\n\r\n": break

#		# content-length
#		if type==1:
#			body=""
#			for i in range(conlen):
#				chunk=sock.recv(1)
#				body+=chunk

# transfer-encoding: chunked
        if type == 2:
            body = ""
            chunksize = ""
            while chunksize != 0:
                byte = ""
                chunk = ""
                while byte != "\r":
                    chunk += byte
                    byte = sock.recv(1)
                sock.recv(1)
                chunksize = int(chunk, 16)
                wchunksz = chunksize
                while wchunksz >= 1:
                    subchunk = sock.recv(wchunksz)
                    body += subchunk
                    wchunksz -= len(subchunk)
                sock.recv(2)

        # clean up and return
        sock.close()
        if dheaders != 1: headers = ""

        return [headers, body, urlfix(url)]

    # catch socket errors, such as "connection reset by peer" - trys again until it gives up and goes on to the next page
    except socket.error:
        print "Socket Error, Recursing..."
        return recurse(exceptions)
Exemplo n.º 31
0
# FIXME: this is basically test_re.py, with a few minor changes

import sys
sys.path=['.']+sys.path

from test_support import verbose, TestFailed
import sre
import sys, os, string, traceback

# Misc tests from Tim Peters' re.doc

if verbose:
    print 'Running tests on sre.search and sre.match'

try:
    assert sre.search('x*', 'axx').span(0) == (0, 0)
    assert sre.search('x*', 'axx').span() == (0, 0)
    assert sre.search('x+', 'axx').span(0) == (1, 3)
    assert sre.search('x+', 'axx').span() == (1, 3)
    assert sre.search('x', 'aaa') == None
except:
    raise TestFailed, "sre.search"

try:
    assert sre.match('a*', 'xxx').span(0) == (0, 0)
    assert sre.match('a*', 'xxx').span() == (0, 0)
    assert sre.match('x*', 'xxxa').span(0) == (0, 3)
    assert sre.match('x*', 'xxxa').span() == (0, 3)
    assert sre.match('a+', 'xxx') == None
except:
    raise TestFailed, "sre.search"
Exemplo n.º 32
0
    def __init__(self, server, port, username, password, num_threads, nzb_source, nzb_directory, par2exe_directory, common_prefix = "", progress_update = None):
        self.server = server
        self.port = port
        self.username = username
        self.password = password
        self.num_threads = num_threads

        self.par2exe_directory = par2exe_directory
        self.progress_update = progress_update

        self.common_prefix = common_prefix
        self.rar_filepath = None
        self.sorted_filenames = list()
        self.downloaded_files = dict()
        self.download_queue = Queue.Queue(0)
        self.decode_queue = Queue.Queue(0)
        self.cancelled = False
        self.finished_files = 0

        self.threads = list()

        # note on calls to _update_progress: a call is made before the task begins and after the task completes;
        # this allows the consumer to cancel during the task

        nzb_string = ""
        nzb_files = None
        nzb_filepath = ""
        try:
            title = self.common_prefix
            if title == "":
                if nzb_source[:7] == "file://" and os.path.exists(urllib.url2pathname(nzb_source)):
                    nzb_filepath = urllib.url2pathname(nzb_source)
                title = "NZB"
            else:
                parts = title.split('.')
                if len(parts) > 1:
                    ext = parts[-1].lower()
                    if ext == "par2" or ext == "nzb" or ext == "nfo":
                        title = '.'.join(parts[:-1])

            if nzb_filepath == "":
                nzb_filepath = os.path.join(nzb_directory, title) + ".nzb"

            print "NZB filepath: " + nzb_filepath

            if nzb_source.startswith("<?xml"):
                nzb_string = nzb_source
            elif os.path.exists(nzb_filepath) and os.path.isfile(nzb_filepath):
                nzb_file = open(nzb_filepath, "r")
                nzb_string = string.join(nzb_file.readlines(), "")
                nzb_file.close()
                #nzb_filepath = possible_nzb_filepath
            else:
                nzb_url = nzb_source
                if self._update_progress("Downloading " + title, STATUS_INITIALIZING, os.path.basename(nzb_url)): return
                urllib.urlopen(nzb_url)
                nzb_string = string.join(urllib.urlopen(nzb_url).readlines(), "")
                if self._update_progress("Downloading " + title, STATUS_INITIALIZING, os.path.basename(nzb_url)): return

            if self._update_progress("Parsing " + title, STATUS_INITIALIZING, title): return
            nzb_files = nzb_parser.parse(nzb_string)
            sort_nzb_rar_files(nzb_files)

            for nzb_file in nzb_files:
                filename = sre.search("\"(.*?)\"", nzb_file.subject).group(1)
                filename = filename.encode('utf8').lower()
                self.sorted_filenames.append(filename)

            # a common prefix from the file list is preferred
            better_common_prefix = os.path.commonprefix(self.sorted_filenames).rstrip(". ")
            if better_common_prefix != "":
                self.common_prefix = better_common_prefix

            if self.common_prefix == "":
                self.common_prefix = self.sorted_filenames[0]

            parts = self.common_prefix.split('.')
            print parts
            if len(parts) > 1:
                ext = parts[-1].lower()
                if ext == "par2" or ext == "nzb" or ext == "nfo":
                    self.common_prefix = '.'.join(parts[:-1])

            print "Common prefix: " + self.common_prefix

            self.download_directory = os.path.join(nzb_directory, self.common_prefix)
            self.status_filepath = os.path.join(self.download_directory, self.common_prefix + ".status")

            if self._update_progress("Parsing " + title, STATUS_INITIALIZING, title): return

            # make sure the download directory exists
            try: os.makedirs(self.download_directory)
            except: pass

            nzb_filepath = os.path.join(nzb_directory, self.common_prefix + ".nzb" )
            nzb_filepath = nzb_filepath.encode('utf8')
            #print nzb_filepath
            #if os.path.exists(nzb_filepath) and os.path.isdir(nzb_filepath):
            #    shutil.rmtree(nzb_filepath) # remove the directory containing the nzb; it is rewritten below
            if not os.path.exists(nzb_filepath) or os.path.getsize(nzb_filepath) != len(nzb_string):
                nzb = open(nzb_filepath, "w+b")
                nzb.write(nzb_string)
                nzb.close()

            # run par2 if we already have the .par2 file
            par2_file = os.path.join(self.download_directory, self.common_prefix + ".par2")
            print "PAR2 file: " + par2_file
            par2_targets = None
            if os.path.exists(par2_file):
                if self._update_progress("Verifying with PAR2", STATUS_INITIALIZING, os.path.basename(par2_file)): return
                par2_targets = self._verify_with_par2(par2_file)
                if self._update_progress("Verifying with PAR2", STATUS_INITIALIZING, os.path.basename(par2_file)): return

                #for target in par2_targets:
                #    print "\t" + target + ": " + par2_targets[target]

            nested_nzbs = list()

            for nzb_file in nzb_files:
                nzb_file.filename = sre.search("\"(.*?)\"", nzb_file.subject.lower()).group(1)
                nzb_file.filename = nzb_file.filename.encode('utf8')
                nzb_file.filepath = os.path.join(self.download_directory, nzb_file.filename)
                nzb_file.filepath = nzb_file.filepath.encode('utf8')

                #print filepath
                # create an empty file if it doesn't exist
                if not os.path.exists(nzb_file.filepath):
                    open(nzb_file.filepath, "w+b").close()

                if not self.rar_filepath:
                    rar_match = sre.search("(.+?)\.part(\d+).rar", nzb_file.filename)
                    if rar_match and int(rar_match.group(2)) == 1:
                        self.rar_filepath = nzb_file.filepath
                        self.rar_filename = os.path.basename(nzb_file.filepath)
                    else:
                        rar_match = sre.search("(.+?)\.rar", nzb_file.filename)
                        if rar_match:
                            self.rar_filepath = nzb_file.filepath
                            self.rar_filename = os.path.basename(nzb_file.filepath)
                    if self.rar_filepath:
                        print "First RAR file is " + self.rar_filename

                if os.path.splitext(nzb_file.filepath)[1] == ".nzb":
                    nested_nzbs.append(nzb_file.filepath)

                self.downloaded_files[nzb_file.filename] = multipart_file(nzb_file.filename, nzb_file.filepath, nzb_file)

                nzb_file.finished = False

                # skip non-PAR2 files if par2 validated it
                if par2_targets and par2_targets.has_key(nzb_file.filename) and par2_targets[nzb_file.filename] == "found":
                    print "PAR2 verified " + nzb_file.filename + ": skipping"
                    self.finished_files += 1
                    self.downloaded_files[nzb_file.filename].finished = True
                    nzb_file.finished = True
                    continue

                # sort segments in ascending order by article number
                nzb_file.segments.sort(key=lambda obj: obj.number)

            # if no RAR file and no nested NZBs, abort
            if not self.rar_filepath:
                if len(nested_nzbs) == 0:
                    raise Exception("nothing to do: NZB did not have a RAR file or any nested NZBs")
                self.rar_filepath = self.rar_filename = ""

            # check if first RAR is already finished
            if par2_targets and par2_targets.has_key(self.rar_filename) and par2_targets[self.rar_filename] == "found":
                if self._update_progress("First RAR is ready.", STATUS_READY, self.rar_filepath): return

            if self._update_progress("Starting " + `self.num_threads` + " download threads", STATUS_INITIALIZING): return

            # queue first segment of each file to get each file's total size
            #for nzb_file in nzb_files:
                # skip non-PAR2 files if par2 validated it
                #if nzb_file.finished: continue
                #self.download_queue.put([nzb_file.filename, nzb_file, nzb_file.segments[0]], timeout=1)

            # queue the rest of the segments in order
            for nzb_file in nzb_files:

                # skip non-PAR2 files if par2 validated it
                if nzb_file.finished: continue

                if self._update_progress("Queueing file", STATUS_INITIALIZING, nzb_file.filename): return
                for nzb_segment in nzb_file.segments[0:]:
                    self.download_queue.put([nzb_file.filename, nzb_file, nzb_segment], timeout=1)

            # start download threads
            for i in range(self.num_threads):
                thread = threading.Thread(name=`i`, target=self._download_thread)
                thread.start()
                self.threads.append(thread)

            if self._update_progress("Starting " + `self.num_threads` + " download threads", STATUS_INITIALIZING): return

            # decode parts as they are downloaded
            # begins streaming when the first RAR is finished
            self._decode_loop()

            # if no RAR file was found, try the nested NZBs that were downloaded
            if self.rar_filepath == "":
                if self._update_progress("No RAR files found.", STATUS_INITIALIZING): return
                for nested_nzb_filepath in nested_nzbs:
                    if self._update_progress("Trying nested NZB: " + os.path.basename(nested_nzb_filepath), STATUS_INITIALIZING): return
                    nzb_stream(self.server, self.port, self.username, self.password,
                               self.num_threads,
                               urllib.pathname2url(nested_nzb_filepath),
                               nzb_directory,
                               par2exe_directory,
                               "",
                               self.progress_update)

        except:
            traceback.print_exc()
            self._update_progress("Error parsing NZB", STATUS_FAILED, self.common_prefix)

        # cancel all threads before returning
        self.cancelled = True
        for thread in self.threads:
            if thread.isAlive():
                print "Cancelled thread " + thread.getName()
                thread.join()
Exemplo n.º 33
0
def main(argv):
    # Main program.  Takes string containing arguments a la unix command line
    p = Params()
    p.define_defaults()
    p.read_params(argv)
    p.read_config()
    p.parse_param_opts()

    p.sanitize()

    # connect
    my = MySQLdb.connect(**p['con_opts'])

    # create cursor to contain data about tail_nums to parse
    mycur = my.cursor()

    # create procedure name or get it from user
    if p['procname']=="":
        rn = random.randint(0,1000000)
        procname = 'P%s' % rn
    else:
        procname = p['procname']

    # string to store all cmds issued to DB
    all_cmds = ""

    # if dropping procedure
    if p['drop_proc']:
        cmd = 'DROP PROCEDURE IF EXISTS %s;\n' % procname
        all_cmds +=  cmd
        if not p['debug']:
            mycur.execute(cmd)

    # start working on procedure string
    ps = procedure_string

    # if add drop table
    if p['drop']:
        ps = ps.replace('<<DROP>>','DROP TABLE IF EXISTS <<TABLE>>;')
    else:
        ps = ps.replace('<<DROP>>','')        

    # sub table and first select
    ps = ps.replace('<<TABLE>>',p['table'])
    ps = ps.replace('<<SELECTASC>>',p['query'])

    # turn ASC to DESC
    mtch = sre.search('order\s+by\s+([a-zA-Z][\w_]*[.]|)(?P<tn>tail_num)\s*,\s*([a-zA-Z][\w_]*[.]|)(?P<mt>measurement_time)',p['query'],sre.I)
    tn_pos = mtch.span('tn')
    mt_pos = mtch.span('mt')
    qry = p['query'][:tn_pos[0]] + 'tail_num DESC' +  p['query'][tn_pos[1]:mt_pos[0]] + 'measurement_time DESC'

    # sub second select
    ps = ps.replace('<<SELECTDESC>>',qry)

    # finally substitute procname
    ps = ps.replace('<<PROCNAME>>',procname)

    # if not debug execute, otherwise just print
    if not p['debug']:
        mycur.execute(ps)

    # add to all cmds string
    all_cmds += ps

    # if do no all call
    if not p['no_call']:
        cmd = 'call %s;\n' % procname
        all_cmds += cmd
        if not p['debug']:
            mycur.execute(cmd)
        
    # if leave_proc
    if not (p['leave_proc'] or p['no_call']):
        cmd = 'DROP PROCEDURE %s;\n' % procname
        all_cmds += cmd
        if not p['debug']:
            mycur.execute(cmd)

    if p['debug']:
        print all_cmds
Exemplo n.º 34
0
import sys, sre

if len(sys.argv) != 2:
    print 'Usage: depfilter.py NODE'
    sys.exit(1)

top = sys.argv[1]

# Read in dot file

lines = sys.stdin.readlines()

graph = {}

for arc in lines[1:-1]:
    match = sre.search('"(.*)" -> "(.*)"', arc)
    n1, n2 = match.group(1), match.group(2)
    if not graph.has_key(n1):
        graph[n1] = []
    graph[n1].append(n2)

# Create subset of 'graph' rooted at 'top'

subgraph = {}

def add_deps(node):
    if graph.has_key(node) and not subgraph.has_key(node):
        subgraph[node] = graph[node]
        for n in graph[node]:
            add_deps(n)
Exemplo n.º 35
0
def treepages(url, level):

    global treeglob, urlfields, postfields, treedurls, levels, server, vulnlogfile, scanlimit, ignorefileext
    print ">>>>>>>>", level, "<<<<<<<<"

    print " ---> " + url

    pageinfo = getpage(url)
    if listempty(pageinfo): return

    body = pageinfo[1].lower()

    print "AA"

    # select/option, textarea
    # check for forms
    bodyarr = sre.split("<form", body)
    for i in range(len(bodyarr)):

        frmsect = bodyarr[i][:bodyarr[i].find(">")]
        frmbody = bodyarr[i][bodyarr[i].find(">"):][:bodyarr[i].find("</form>"
                                                                     )]

        actionurl = getattrval(frmsect, "action")
        if actionurl == "" or actionurl == frmsect or actionurl == "\"\"":
            actionurl = pageinfo[2]
        if actionurl.count(";") > 0:
            actionurl = actionurl[actionurl.find(";") + 1:]
        if actionurl[:11].lower() == "javascript:": continue
        actionurl = fullpath(actionurl, pageinfo[2])

        print "ACTION:", actionurl

        # get the input variables
        poststring = ""
        inputarr = sre.sub("(.*?)\<input([^\>]*)\>(.*?)", "\\2|ZZaaXXaaZZ|",
                           frmbody).split("|ZZaaXXaaZZ|")
        for j in range(len(inputarr)):

            name = getattrval(inputarr[j], "name")
            if name == inputarr[j] or name == "" or name == "\"\"": continue

            value = getattrval(inputarr[j], "value")
            if value == inputarr[j] or value == "" or value == "\"\"":
                value = ""

            if poststring != "": poststring += "&"
            poststring += name + "=" + value

        # get select/option tags
        selectarr = sre.sub("(.*?)\<select([^\>]*)\>(.*?)", "\\2|ZZaaXXaaZZ|",
                            frmbody).split("|ZZaaXXaaZZ|")
        for j in range(len(selectarr)):

            name = getattrval(selectarr[j], "name")
            if name == selectarr[j] or name == "" or name == "\"\"": continue

            value = sre.sub(
                "(.*?)\<option([^\>]*)value=(\"|'|)([^\\3\ ]*)\\3([^\>]*)\>(.*?)",
                "\\2", selectarr[j])
            if value == selectarr[j] or value == "" or value == "\"\"":
                value = ""

            if poststring != "": poststring += "&"
            poststring += name + "=" + value
            print "sel/opt: " + name + "=" + value

        if poststring == "": continue

        if sre.search("method=([\'\"]|)post([\'\"]|)",
                      frmsect[:frmsect.find(">")].lower()) == None:
            if actionurl.find("?") != -1: actionurl += "&"
            else: actionurl += "?"
            actionurl += poststring
            body += '<a href="' + actionurl + '">'
            print 'GETT <a href="' + actionurl + '">'
            continue

        # determine if it needs to be scanned, and if so, scan it
        postscan = 0
        postvars = poststring.split("&")
        if postfields.has_key(actionurl):
            for j in range(len(postvars)):
                postvars[j] = postvars[j][:postvars[j].find("=")]
                if postfields[actionurl].count(postvars[j]) == 0:
                    postfields[actionurl].append(postvars[j])
                    postscan = 1
        else:
            for j in range(len(postvars)):
                postvars[j] = postvars[j][:postvars[j].find("=")]
            postfields[actionurl] = postvars
            postscan = 1

        if postscan == 1:
            vulns = checkvars(actionurl, poststring)
            if not listempty(vulns): dispvulns(vulns, actionurl)

    print "BB"

    # check for urls in "href" tags
    # ? # part of 3? (src|href|location|window.open)= and http://
    urlreg = "(\'|\")(?!javascript:)(([^\>]+?)(?!\.(" + ignorefileext.replace(
        ",", "|") + "))(.{3,8}?)(|\?([^\>]+?)))"
    urlarr = sre.sub(
        "(?s)(?i)(.+?)((src|href)=|location([\ ]*)=([\ ]*)|window\.open\()" +
        urlreg + "\\6", "\\7|ZZaaXXaaZZ|", body).split("|ZZaaXXaaZZ|")
    del urlarr[len(urlarr) - 1]
    urlarr.append(
        sre.sub("(?s)(?i)(.+?)(src|href)=" + urlreg + "\\3", "\\4|ZZaaXXaaZZ|",
                body).split("|ZZaaXXaaZZ|"))
    del urlarr[len(urlarr) - 1]
    for i in range(len(urlarr)):

        theurl = fullpath(urlarr[i], pageinfo[2])
        if not checkserver(servername(theurl)): continue

        # determine if it needs scanned and/or treed, and if so, scan and/or tree it
        getscan = 0
        if theurl.count("?") != 0:
            nqurl = theurl[:theurl.find("?")]
            query = theurl[theurl.find("?") + 1:]
            query = sre.sub("\&amp\;", "\&", query)
            qryvars = query.split("&")
            if urlfields.has_key(nqurl):
                for j in range(len(qryvars)):
                    qryvars[j] = qryvars[j][:qryvars[j].find("=")]
                    if urlfields[nqurl].count(qryvars[j]) == 0:
                        urlfields[nqurl].append(qryvars[j])
                        getscan = 1
            else:
                for j in range(len(qryvars)):
                    qryvars[j] = qryvars[j][:qryvars[j].find("=")]
                urlfields[nqurl] = qryvars
                getscan = 1
        else:
            if urlfields.has_key(theurl) == False: urlfields[theurl] = []
            nqurl = theurl

        if getscan == 1:
            vulns = checkvars(theurl)
            if not listempty(vulns): dispvulns(vulns, theurl)
        tree = treeglob
        if treedurls.has_key(nqurl):
            if treedurls[nqurl].count(theurl) == 0 and len(
                    treedurls[nqurl]) <= scanlimit:
                treedurls[nqurl].append(theurl)
            else:
                tree = 0

        else:
            treedurls[nqurl] = [theurl]
        if tree == 1 and level < levels:
            realurl = getpage(theurl, realpage=1)
            if theurl != realurl and realurl != None:
                body += ' href="' + realurl + '" '
            print "treeee"
            try:
                treepages(theurl, level + 1)
            except KeyboardInterrupt:
                treeglob = 0
                print "TREEGLOB CHANGED TO ZERO"
                treepages(theurl, level + 1)
Exemplo n.º 36
0
   def fromString(source):
      import sre
      result = {}
      insertionorder = []
      fail = False
      originalsource = source # preserve original in case of broken header
      headervalueRE_sX = "^([^: ]+[^:]*):( ?)((.|\n)+)" # TODO: This could be optimised

      lines = source.split("\r\n")
      I = 0
      headerLines = []
      valid = False
      for I in xrange(len(lines)):
          if lines[I] != "":
             headerLines.append(lines[I])
          else:
             # The divider cannot be the last line
             valid = not (I == len(lines)-1)
             break

      if not valid:
          body = originalsource
          fail = True
      else:
          bodyLines = lines[I+1:]
          body = "\r\n".join(bodyLines)
          key = None
          for line in headerLines:
             match = sre.search(headervalueRE_sX, line)
             if match:
                (key, spaces,value,X) = match.groups()
                if value == " " and not spaces:
                   value = ""
                try:
                   result[key].append(value)
                except KeyError:            
                   result[key] = value
                except AttributeError:
                   result[key] = [ result[key], value ]
                insertionorder.append(key)

             else:
                if key:
#                       value = line.strip() # Strictly speaking, surely we should be doing this??? (Breaks tests though if we do...)
                       value = line
                       if isinstance(result[key], list):
                          # Append to last item in the list
                          result[key][len(result[key])-1] += "\r\n" + value
                       else:
                          result[key] += "\r\n" + value
                else:
#                    print "NOMATCH!NOMATCH!NOMATCH!NOMATCH!NOMATCH!NOMATCH!"
                    fail = True
                    break
      if not fail:
          result["__BODY__"]=body
      else:
          result["__BODY__"]=originalsource
      md = MimeDict(**result)
      md.insertionorder = insertionorder
      md.invalidSource = fail
      return md
Exemplo n.º 37
0
    def fromString(source):
        import sre
        result = {}
        insertionorder = []
        fail = False
        originalsource = source  # preserve original in case of broken header
        # The leading space in the headervalue RE prevents a continuation line
        # being treated like a key: value line.
        headervalueRE_s = "^([^: ]+[^:]*):( ?)([^\r]+)\r\n"  # TODO: This could be optimised
        continuationHeaderRE_s = "^( +[^\r\n]*)\r\n"
        match = sre.search(headervalueRE_s, source)

        # Search for header lines
        inHeader = True
        key = None

        while True:  # We break out this loop when matching fails for any reason
            if match:
                (key, spaces, value) = match.groups()
                if value == " " and not spaces:  # Empty header
                    value = ""
                try:
                    result[key].append(value)
                except KeyError:
                    result[key] = value
                except AttributeError:
                    result[key] = [result[key], value]
                insertionorder.append(key)

            if not match and key:
                # We have already matched a line. This may be continuation.
                match = sre.search(continuationHeaderRE_s, source)
                if not match: break
                (value, ) = match.groups()
                if isinstance(result[key], list):
                    # Append to last item in the list
                    result[key][len(result[key]) - 1] += "\r\n" + value
                else:
                    result[key] += "\r\n" + value

            if not match: break

            source = source[match.end():]
            match = sre.search(headervalueRE_s, source)

        # End of header lines. Start of source should be "\r\n"
        #
        # If it isn't, the header is invalid, and the entire original
        # source becomes the __BODY__, and all keys aside from that removed.
        #
        if source[:2] == "\r\n":
            source = source[2:]
        else:
            source = originalsource
            result = {}
            insertionorder = []
            fail = True
        result["__BODY__"] = source
        md = MimeDict(**result)
        md.insertionorder = insertionorder
        md.invalidSource = fail
        return md
Exemplo n.º 38
0
        interval_bytes[Apos] += sbytes
        interval_bytes[Bpos] += ebytes
        interval_pkts[Apos] += spkts
        interval_pkts[Bpos] += epkts

        for i in xrange(Apos+1,Bpos):
            active_sessions[i] += interval
            interval_bytes[i] += ibytes
            interval_pkts[i] += ipkts

        if debug > 1:
            print "->span fracstart<",frac_start,">fracend<",frac_end,">"


infile = file(args[0])
mobj = sre.search(sre.compile("\.bz2$"), args[0])
if mobj:
    infile.close()
    infile = bz2.BZ2File(args[0])


if not infile:
    print "error opening ",args[0]
    sys.exit(0)

buffer = infile.readline()
if not buffer:
    print "error reading from ",args[0]
    usage(sys.argv[0])

mobj = sre.match(sre.compile('first-record ([\d\.]+)'), buffer)
Exemplo n.º 39
0
import sys, sre

if len(sys.argv) != 2:
    print 'Usage: depfilter.py NODE'
    sys.exit(1)

top = sys.argv[1]

# Read in dot file

lines = sys.stdin.readlines()

graph = {}

for arc in lines[1:-1]:
    match = sre.search('"(.*)" -> "(.*)"', arc)
    n1, n2 = match.group(1), match.group(2)
    if not graph.has_key(n1):
        graph[n1] = []
    graph[n1].append(n2)

# Create subset of 'graph' rooted at 'top'

subgraph = {}


def add_deps(node):
    if graph.has_key(node) and not subgraph.has_key(node):
        subgraph[node] = graph[node]
        for n in graph[node]:
            add_deps(n)
Exemplo n.º 40
0
   def fromString(source):
      import sre
      result = {}
      insertionorder = []
      fail = False
      originalsource = source # preserve original in case of broken header
      # The leading space in the headervalue RE prevents a continuation line
      # being treated like a key: value line.
      headervalueRE_s = "^([^: ]+[^:]*):( ?)([^\r]+)\r\n" # TODO: This could be optimised
      continuationHeaderRE_s = "^( +[^\r\n]*)\r\n"
      match = sre.search(headervalueRE_s,source)

      # Search for header lines
      inHeader = True
      key = None

      while True: # We break out this loop when matching fails for any reason
         if match:
            (key, spaces,value) = match.groups()
            if value == " " and not spaces: # Empty header
                  value = ""
            try:
               result[key].append(value)
            except KeyError:            
               result[key] = value
            except AttributeError:
               result[key] = [ result[key], value ]
            insertionorder.append(key)

         if not match and key:
            # We have already matched a line. This may be continuation.
            match = sre.search(continuationHeaderRE_s, source)
            if not match:  break
            (value,) = match.groups()
            if isinstance(result[key], list):
               # Append to last item in the list
               result[key][len(result[key])-1] += "\r\n" + value
            else:
               result[key] += "\r\n" + value

         if not match:  break

         source = source[match.end():]
         match = sre.search(headervalueRE_s,source)

      # End of header lines. Start of source should be "\r\n"
      #
      # If it isn't, the header is invalid, and the entire original
      # source becomes the __BODY__, and all keys aside from that removed.
      #
      if source[:2]=="\r\n":
         source = source[2:]
      else:
         source = originalsource
         result = {}
         insertionorder = []
         fail = True
      result["__BODY__"]=source
      md = MimeDict(**result)
      md.insertionorder = insertionorder
      md.invalidSource = fail 
      return md
Exemplo n.º 41
0
Arquivo: MOPconf.py Projeto: OSSOS/MOP
        os.chdir("..")
    os.chdir("..")

sys.stderr.write("Must check %d directories\n"  % ( total_dirs))
for chip in chip_list:
    #pyraf.iraf.cd(chip)
    if chip not in cand_list:
        sys.stderr.write("No candidate on %s\n" % ( chip))
        conintue
    for field in field_list:
        if field not in cand_list[chip]:
            sys.stderr.write("%s/%s failed to complete.\n"  % ( chip,field) )
            continue
        if cand_list[chip][field]=="no_candidates":
            continue
        if sre.search('checked',cand_list[chip][field]): 
            continue
        else:
	    #print cand_list[chip][field]
            sys.stderr.write("Checking candidates in %s %s\n" % ( field , chip))
            pyraf.iraf.cd(chip+"/"+field)
            result=discands(read_cands(cand_list[chip][field]))
            if result > -1: 
                sys.stderr.write("%d objects marked as real\n" % ( result))
                os.rename(cand_list[chip][field],cand_list[chip][field]+".checked")
            pyraf.iraf.cd("../..")
            if result==-2:
                sys.stderr.write("Removing lock file and exiting.\n")
                os.unlink('MOPconf.lock')
                sys.exit()
Exemplo n.º 42
0
    def __init__(self,
                 server,
                 port,
                 username,
                 password,
                 num_threads,
                 nzb_source,
                 nzb_directory,
                 par2exe_directory,
                 common_prefix="",
                 progress_update=None):
        self.server = server
        self.port = port
        self.username = username
        self.password = password
        self.num_threads = num_threads

        self.par2exe_directory = par2exe_directory
        self.progress_update = progress_update

        self.common_prefix = common_prefix
        self.rar_filepath = None
        self.sorted_filenames = list()
        self.downloaded_files = dict()
        self.download_queue = Queue.Queue(0)
        self.decode_queue = Queue.Queue(0)
        self.cancelled = False
        self.finished_files = 0

        self.threads = list()

        # note on calls to _update_progress: a call is made before the task begins and after the task completes;
        # this allows the consumer to cancel during the task

        nzb_string = ""
        nzb_files = None
        nzb_filepath = ""
        try:
            title = self.common_prefix
            if title == "":
                if nzb_source[:7] == "file://" and os.path.exists(
                        urllib.url2pathname(nzb_source)):
                    nzb_filepath = urllib.url2pathname(nzb_source)
                title = "NZB"
            else:
                parts = title.split('.')
                if len(parts) > 1:
                    ext = parts[-1].lower()
                    if ext == "par2" or ext == "nzb" or ext == "nfo":
                        title = '.'.join(parts[:-1])

            if nzb_filepath == "":
                nzb_filepath = os.path.join(nzb_directory, title) + ".nzb"

            print "NZB filepath: " + nzb_filepath

            if nzb_source.startswith("<?xml"):
                nzb_string = nzb_source
            elif os.path.exists(nzb_filepath) and os.path.isfile(nzb_filepath):
                nzb_file = open(nzb_filepath, "r")
                nzb_string = string.join(nzb_file.readlines(), "")
                nzb_file.close()
                #nzb_filepath = possible_nzb_filepath
            else:
                nzb_url = nzb_source
                if self._update_progress("Downloading " + title,
                                         STATUS_INITIALIZING,
                                         os.path.basename(nzb_url)):
                    return
                urllib.urlopen(nzb_url)
                nzb_string = string.join(
                    urllib.urlopen(nzb_url).readlines(), "")
                if self._update_progress("Downloading " + title,
                                         STATUS_INITIALIZING,
                                         os.path.basename(nzb_url)):
                    return

            if self._update_progress("Parsing " + title, STATUS_INITIALIZING,
                                     title):
                return
            nzb_files = nzb_parser.parse(nzb_string)
            sort_nzb_rar_files(nzb_files)

            for nzb_file in nzb_files:
                filename = sre.search("\"(.*?)\"", nzb_file.subject).group(1)
                filename = filename.encode('utf8').lower()
                self.sorted_filenames.append(filename)

            # a common prefix from the file list is preferred
            better_common_prefix = os.path.commonprefix(
                self.sorted_filenames).rstrip(". ")
            if better_common_prefix != "":
                self.common_prefix = better_common_prefix

            if self.common_prefix == "":
                self.common_prefix = self.sorted_filenames[0]

            parts = self.common_prefix.split('.')
            print parts
            if len(parts) > 1:
                ext = parts[-1].lower()
                if ext == "par2" or ext == "nzb" or ext == "nfo":
                    self.common_prefix = '.'.join(parts[:-1])

            print "Common prefix: " + self.common_prefix

            self.download_directory = os.path.join(nzb_directory,
                                                   self.common_prefix)
            self.status_filepath = os.path.join(self.download_directory,
                                                self.common_prefix + ".status")

            if self._update_progress("Parsing " + title, STATUS_INITIALIZING,
                                     title):
                return

            # make sure the download directory exists
            try:
                os.makedirs(self.download_directory)
            except:
                pass

            nzb_filepath = os.path.join(nzb_directory,
                                        self.common_prefix + ".nzb")
            nzb_filepath = nzb_filepath.encode('utf8')
            #print nzb_filepath
            #if os.path.exists(nzb_filepath) and os.path.isdir(nzb_filepath):
            #    shutil.rmtree(nzb_filepath) # remove the directory containing the nzb; it is rewritten below
            if not os.path.exists(nzb_filepath) or os.path.getsize(
                    nzb_filepath) != len(nzb_string):
                nzb = open(nzb_filepath, "w+b")
                nzb.write(nzb_string)
                nzb.close()

            # run par2 if we already have the .par2 file
            par2_file = os.path.join(self.download_directory,
                                     self.common_prefix + ".par2")
            print "PAR2 file: " + par2_file
            par2_targets = None
            if os.path.exists(par2_file):
                if self._update_progress("Verifying with PAR2",
                                         STATUS_INITIALIZING,
                                         os.path.basename(par2_file)):
                    return
                par2_targets = self._verify_with_par2(par2_file)
                if self._update_progress("Verifying with PAR2",
                                         STATUS_INITIALIZING,
                                         os.path.basename(par2_file)):
                    return

                #for target in par2_targets:
                #    print "\t" + target + ": " + par2_targets[target]

            nested_nzbs = list()

            for nzb_file in nzb_files:
                nzb_file.filename = sre.search(
                    "\"(.*?)\"", nzb_file.subject.lower()).group(1)
                nzb_file.filename = nzb_file.filename.encode('utf8')
                nzb_file.filepath = os.path.join(self.download_directory,
                                                 nzb_file.filename)
                nzb_file.filepath = nzb_file.filepath.encode('utf8')

                #print filepath
                # create an empty file if it doesn't exist
                if not os.path.exists(nzb_file.filepath):
                    open(nzb_file.filepath, "w+b").close()

                if not self.rar_filepath:
                    rar_match = sre.search("(.+?)\.part(\d+).rar",
                                           nzb_file.filename)
                    if rar_match and int(rar_match.group(2)) == 1:
                        self.rar_filepath = nzb_file.filepath
                        self.rar_filename = os.path.basename(nzb_file.filepath)
                    else:
                        rar_match = sre.search("(.+?)\.rar", nzb_file.filename)
                        if rar_match:
                            self.rar_filepath = nzb_file.filepath
                            self.rar_filename = os.path.basename(
                                nzb_file.filepath)
                    if self.rar_filepath:
                        print "First RAR file is " + self.rar_filename

                if os.path.splitext(nzb_file.filepath)[1] == ".nzb":
                    nested_nzbs.append(nzb_file.filepath)

                self.downloaded_files[nzb_file.filename] = multipart_file(
                    nzb_file.filename, nzb_file.filepath, nzb_file)

                nzb_file.finished = False

                # skip non-PAR2 files if par2 validated it
                if par2_targets and par2_targets.has_key(
                        nzb_file.filename) and par2_targets[
                            nzb_file.filename] == "found":
                    print "PAR2 verified " + nzb_file.filename + ": skipping"
                    self.finished_files += 1
                    self.downloaded_files[nzb_file.filename].finished = True
                    nzb_file.finished = True
                    continue

                # sort segments in ascending order by article number
                nzb_file.segments.sort(key=lambda obj: obj.number)

            # if no RAR file and no nested NZBs, abort
            if not self.rar_filepath:
                if len(nested_nzbs) == 0:
                    raise Exception(
                        "nothing to do: NZB did not have a RAR file or any nested NZBs"
                    )
                self.rar_filepath = self.rar_filename = ""

            # check if first RAR is already finished
            if par2_targets and par2_targets.has_key(
                    self.rar_filename) and par2_targets[
                        self.rar_filename] == "found":
                if self._update_progress("First RAR is ready.", STATUS_READY,
                                         self.rar_filepath):
                    return

            if self._update_progress(
                    "Starting " + ` self.num_threads ` + " download threads",
                    STATUS_INITIALIZING):
                return

            # queue first segment of each file to get each file's total size
            #for nzb_file in nzb_files:
            # skip non-PAR2 files if par2 validated it
            #if nzb_file.finished: continue
            #self.download_queue.put([nzb_file.filename, nzb_file, nzb_file.segments[0]], timeout=1)

            # queue the rest of the segments in order
            for nzb_file in nzb_files:

                # skip non-PAR2 files if par2 validated it
                if nzb_file.finished: continue

                if self._update_progress("Queueing file", STATUS_INITIALIZING,
                                         nzb_file.filename):
                    return
                for nzb_segment in nzb_file.segments[0:]:
                    self.download_queue.put(
                        [nzb_file.filename, nzb_file, nzb_segment], timeout=1)

            # start download threads
            for i in range(self.num_threads):
                thread = threading.Thread(name=` i `,
                                          target=self._download_thread)
                thread.start()
                self.threads.append(thread)

            if self._update_progress(
                    "Starting " + ` self.num_threads ` + " download threads",
                    STATUS_INITIALIZING):
                return
Exemplo n.º 43
0
def yenc_decode(encoded_lines):

    # check for start tag
    first_line = 0
    for line in encoded_lines:
        if line[:7] == "=ybegin":
            break;
        first_line += 1

    if first_line == len(encoded_lines):
        raise Exception("ybegin line not found")

    file_size = None

    # =ybegin part=2 total=66 line=128 size=50000000
    ybegin_match = sre.search("size=(\d+)", encoded_lines[first_line][7:])
    if ybegin_match == None:
        raise Exception("ybegin line is malformed")
    else:
        file_size = int(ybegin_match.group(1))

    decoded_buffer = ""
    part_number = None
    part_begin = None
    part_end = None
    part_size = None
    for line in encoded_lines[first_line+1:]:

        if line[:6] == "=ypart":
            ypart_match = sre.search("begin=(\d+) end=(\d+)", line[6:])
            if ypart_match == None:
                raise Exception("ypart line is malformed")
            else:
                part_begin = int(ypart_match.group(1))
                part_end = int(ypart_match.group(2))
            continue

        elif line[:5] == "=yend":
            yend_match = sre.search("size=(\d+) part=(\d+) pcrc32=([0-9a-zA-Z]{8})", line[5:])
            if yend_match == None:
                raise Exception("yend line is malformed")
            else:
                part_size = int(yend_match.group(1))
                part_number = int(yend_match.group(2))
                pcrc32 = int(yend_match.group(3), 16)
                if (crc32(decoded_buffer) & 0xffffffff) != pcrc32:
                    raise Exception("CRC32 checksum failed", crc32(decoded_buffer) & 0xffffffff, pcrc32)
            break

        i = 0
        end = len(line)
        while i < end:
            byte = line[i]

            # end of line
            if byte in "\r\n":
                break;

            # escape byte
            if byte == '=':
                i += 1
                decoded_buffer += decode_escape_table[ord(line[i])]

            # normal byte
            else:
                decoded_buffer += decode_table[ord(byte)]

            i += 1

    if part_size != None and part_size != len(decoded_buffer):
        print "Warning: yend size attribute does not equal buffer length"

    return decoded_buffer, part_number, part_begin, part_end, file_size