Пример #1
0
def make_build_alignment(c_alns, mc_alns, hmc_alns, strand, threshold, max_assignments):
    # translation tables for methylation
    C_trans_table = string.maketrans("C", "C")
    mC_trans_table = string.maketrans("C", "E")
    hmC_trans_table = string.maketrans("C", "O")

    C_table = collect_assignments(c_alns, strand, threshold, max_assignments, C_trans_table)
    mC_table = collect_assignments(mc_alns, strand, threshold, max_assignments, mC_trans_table)
    hmC_table = collect_assignments(hmc_alns, strand, threshold, max_assignments, hmC_trans_table)

    nb_c_assignments = C_table.shape[0] if C_table is not None else "None"
    nb_mc_assignments = mC_table.shape[0] if mC_table is not None else "None"
    nb_hmc_assignments = hmC_table.shape[0] if hmC_table is not None else "None"

    print("[buildAlignments] NOTICE: I found {C} C-assignments, {mC} mC-assignments, and {hmC} hmC-assignments "
          "for strand {strand}"
          "".format(C=nb_c_assignments, mC=nb_mc_assignments, hmC=nb_hmc_assignments, strand=strand),
          file=sys.stderr)
    tables = []

    for table in (C_table, mC_table, hmC_table):
        if table is None:
            continue
        else:
            tables.append(table)

    return pd.concat(tables)
Пример #2
0
 def __rhymeQuotient(comment):
     """
     Calculate the "rhymy-ness" of a comment, basically how many line ending words
     are similar to other line-ending words
     """
     #strip punctuation and numbers
     comment = comment.translate(string.maketrans("",""), string.punctuation)
     comment = comment.translate(string.maketrans("",""), string.digits)
     
     lines = comment.split('\n')
     
     lastWords = []
     for line in lines:
         words = line.split()
         if len(words) >= 1:
             lastWords.append(words[-1])
             
     # can't have rhymes if we have fewer than two lines
     if len(lastWords) < 2:
         return 0
     
     #print lastWords
     # now score each word by similarity with a following word
     for i in range(len(lastWords)):
         best = 0
         for j in range(i+1,len(lastWords)):
             best = max(best, TimesComments.__sharedLetters(lastWords[i],lastWords[j]))
         lastWords[i] = best
         
     lastWords = map(lambda x: 5 if x >= 2 else x, lastWords)
     return sum(lastWords)/(len(lastWords)-1)
Пример #3
0
def unlisted_words(sample, reference):
    #Sample string is cleaned and cut, removing punctuation/symbols, making all characters lowercase then splitting the string into a list based on spacing or newline
    s = (sample.translate(string.maketrans("",""),string.punctuation)).lower().split() 
    #Reference string is cleaned and cut, removing punctuation/symbols, making all characters lowercase then splitting the string into a list based on spacing or newline
    r = (reference.translate(string.maketrans("",""),string.punctuation)).lower().split()
    #Using list comprehension identifies if a word in sample is in reference. The list is then converted to a set removing duplicates and transformed again into a list.
    return list(set([w for w in s if r.__contains__(w) == False]))  
Пример #4
0
 def checkreserveduser(user):
   if os.path.isdir(os.path.join(config.datadir, user)):
     return
   if user.lower() != user:
     errorexit('Username should be lowercase.')
   normalized = user.lower()
   if os.path.isdir(os.path.join(config.datadir, normalized)):
     errorexit('Username is reserved.')
   normalized = user.lower()
   if normalized != user and os.path.isdir(
         os.path.join(config.datadir, normalized)):
     errorexit('Username is reserved.')
   normalizedi = normalized.translate(string.maketrans('013456789', 'oieasbtbg'))
   if normalized != normalizedi and os.path.isdir(
         os.path.join(config.datadir, normalizedi)):
     errorexit('Username is reserved.')
   normalizedl = normalized.translate(string.maketrans('013456789', 'oleasbtbg'))
   if normalizedl != normalized and os.path.isdir(
         os.path.join(config.datadir, normalizedl)):
     errorexit('Username is reserved.')
   with open(os.path.join(progpath, 'bad-words.txt')) as f:
     badwords = f.read().splitlines()
   if any(word in badwords for word in [normalized, normalizedi, normalizedl]):
     errorexit('Username is reserved.')
   return
Пример #5
0
def solve_partial(w, h, pattern, answer, fix_num, solve_num, reset_loop_count=RESET_LOOP_COUNT):
    trans_str_wall = answer[:fix_num]
    trans_table_wall = string.maketrans(trans_str_wall, 
                                        '=' * len(trans_str_wall))
    trans_str_asta = answer[fix_num + solve_num:-1].replace('=', '')
    trans_table_asta = string.maketrans(trans_str_asta,
                                        '*' * len(trans_str_asta))
    pattern_rep = pattern.translate(trans_table_wall)
    pattern_rep = pattern_rep.translate(trans_table_asta)
    answer_rep = answer.translate(trans_table_wall)
    answer_rep = answer_rep.translate(trans_table_asta)

    ####### debug #######
    print '--------- pattern_rep'
    print_pattern(w, h, pattern_rep)
    print '--------- answer_rep'
    print_pattern(w, h, answer_rep)
    ####### debug #######

    move = solve_all(w, h, pattern_rep, answer_rep, reset_loop_count)

    ####### debug #######
    if move:
        pattern_work = create_pattern(w, h, pattern, move)
        print '--------- succeeded'
        print_pattern(w, h, pattern_work)
    else:
        print '--------- not succeeded'
    ####### debug #######
    return move
Пример #6
0
def make_table(do_encrypt=True):
    original = ''.join([chr(i) for i in range(0, 128)])
    encrypted = ''.join([chr(i * 2 % 127) for i in range(0, 128)])
    if do_encrypt:
        return string.maketrans(original, encrypted)
    else:
        return string.maketrans(encrypted, original)
def build_sample_ids_transtable():
    """Build translation table for sample ids being MIENS compliant"""
    all_chars = "".join([chr(i) for i in range(128)])
    valid_sample_id_chars = letters + digits + "."
    non_valid_sample_id_chars = all_chars.translate(maketrans("", ""), valid_sample_id_chars)
    trans_table = maketrans(non_valid_sample_id_chars, "." * len(non_valid_sample_id_chars))
    return trans_table
Пример #8
0
def _translator(frm='', to='', delete='', keep=None):
    """generate a translator which can be called on a string for substituting
    ``frm`` to ``to`` , deleting ``delete`` and keep the ``keep`` . This
    funciton is only for python 2 becuase the ``maketrans`` funciton returns
    a dict in python 3.

    examples:

    >>> trans = translator('1234567890', '!@#$%^&*()')
    >>> trans('a1b2c3d4e5f6g7')
    'a!b@c#d$e%f^g&'
    >>> trans = translator('1234567890', '!@#$%^&*()', '123')
    >>> trans('a1b2c3d4e5f6g7')
    'abcd$e%f^g&'
    >>> trans = translator('1234567890', '!@#$%^&*()', '123', '345')
    >>> trans('a1b2c3d4e5f6g7')
    '$%'
    >>> trans = translator('1234567890', '!@#$%^&*()', '123', '345ab')
    >>> trans('a1b2c3d4e5f6g7')
    'ab$%'
    """
    if len(to) == 1:
        to = to * len(frm)

    table = string.maketrans(frm, to)
    if keep is not None:
        all_chars = string.maketrans('', '')
        delete = all_chars.translate(all_chars,
                                     keep.translate(all_chars, delete))

    def translate(s):
        return s.translate(table, delete)

    return translate
	def generateHashtagSpecificVocabulary(self):
		wordsMappedToHashtags = {}

		for tweet in self.trainSet:
			words = []
			hashtags = []
			for word in tweet.split():
				if word.startswith('#') and len(word) > 2:
					word = word.lower().translate(string.maketrans("",""), string.punctuation) # remove punctuation
					hashtags.append(word)
					words.append(word)
				else:
					if '@' in word:
						continue
					if word in self.stopWords:
						continue
					word = word.lower().translate(string.maketrans("",""), string.punctuation) # remove punctuation
					words.append(word)

			for hashtag in hashtags:
				if hashtag not in wordsMappedToHashtags:
					wordsMappedToHashtags[hashtag] = {}
				for word in words:
					if word not in wordsMappedToHashtags[hashtag]:
						wordsMappedToHashtags[hashtag][word] = 1.0
					else:
						wordsMappedToHashtags[hashtag][word] += 1.0

		return wordsMappedToHashtags
Пример #10
0
def scrape_links_and_wordlistify(links, lower=False, verbose=1):
    import nltk
    import requests
    import string
    raw = ''
    wordlist = {}
    for site in links:
        try:
            if verbose == 1:
                print '[+] fetching data from: ', site
            if site.find('http://pastebin.com/') == 0:
                raw = requests.get(site.replace('http://pastebin.com/', 'http://pastebin.com/raw.php?i=')).content
            else:
                raw = requests.get(site).content
            if lower == False:
                l = string.translate(nltk.clean_html(raw), string.maketrans(string.punctuation, ' ' * 32)).split()
                freq_an(l, wordlist)
            else:
                l = string.lower(nltk.clean_html(raw))
                l = string.translate(l, string.maketrans(string.punctuation, ' ' * 32)).split()
                freq_an(l, wordlist)
        except:
            if verbose == 1:
                print '[-] Skipping url: ', site
    return wordlist
	def generateCounts(self):
		wordCounts = {}
		hashtagCounts = {}

		for tweet in self.trainSet:
			hashtags = []
			for word in tweet.split():
				if word.startswith('#') and len(word) > 2:
					word = word.lower().translate(string.maketrans("",""), string.punctuation) # remove punctuation
					hashtags.append(word)
					if word not in wordCounts:
						wordCounts[word] = 1
					else:
						wordCounts[word] += 1
				else:
					if '@' in word:
						continue
					if word in self.stopWords:
						continue
					word = word.lower().translate(string.maketrans("",""), string.punctuation) # remove punctuation
					if word not in wordCounts:
						wordCounts[word] = 1
					else:
						wordCounts[word] += 1

			for hashtag in hashtags:
				if hashtag not in hashtagCounts:
					hashtagCounts[hashtag] = 1.0
				else:
					hashtagCounts[hashtag] += 1.0

		return wordCounts, hashtagCounts
Пример #12
0
def ROT13(test_string):
    """Return a encrypted string offset by 13


    Encrytion does not impact spaces or special characters"""


    shifted_cypherlower = string.ascii_lowercase[13:] + string.ascii_lowercase[:13]
    shifted_cyperupper = string.ascii_uppercase[13:] + string.ascii_uppercase[:13]
    transtable_lower = string.maketrans(string.ascii_lowercase, shifted_cypherlower)
    #print transtable_lower 
    transtable_upper = string.maketrans(string.ascii_uppercase, shifted_cyperupper)
    #print transtable_upper

    
    encrypted_text = []

    for letter in test_string:
        if letter.islower():
            encrypted_text.append(letter.translate(transtable_lower))
        else:
            encrypted_text.append(letter.translate(transtable_upper))
        final_encryption =  "".join(encrypted_text)

    return final_encryption
Пример #13
0
 def __init__(self): 
     self.currentTroll = random.randint(0, 10)
     self.trollCounter = 0
     self.trolls = [
         #aradia
         { 'prefix': '', 'replace': lambda x: x.translate(string.maketrans('oo', '00')) },
         #terezi
         { 'prefix': '', 'replace': lambda x: x.upper().translate(string.maketrans('AIE', '413')) },
         #tavros
         { 'prefix': '', 'replace': lambda x: x.title().swapcase() },
         #sollux
         { 'prefix': '', 'replace': lambda x: x.replace('s', '2').replace('S', '2').replace('i', 'ii').replace('I', 'II') },
         #karkat
         { 'prefix': '', 'replace': lambda x: x.upper() },
         #nepeta
         { 'prefix': ':33 <', 'replace': lambda x: x.replace('ee', '33').replace('EE', '33') },
         #kanaya
         { 'prefix': '', 'replace': lambda x: x.capitalize() },
         #vriska
         { 'prefix': '', 'replace': lambda x: x.translate(string.maketrans('bB', '88')).replace('ate', '8') },
         #equius
         { 'prefix': 'D -->', 'replace': lambda x: x.translate(string.maketrans('xX', '%%')) },
         #gamzee TODO need a full func
         #eridan
         { 'prefix': '', 'replace': lambda x: x.replace('w', 'ww').replace('v', 'vv').replace('W', 'WW').replace('V', 'VV') },
         #feferi
         { 'prefix': '', 'replace': lambda x: x.replace('h', ')(').replace('H', ')(').replace('E', '-E') },
     ]
Пример #14
0
def tm():

    import sys
    import nltk
    import string

    input_file_name = raw_input("Please enter the input file name: ")
    input_path = raw_input("Please enter the input path: ")
    output_file_name = raw_input("Please enter the output file name: ")
    print '\nPlease note that the above entered filename would be used as',
    print 'a prefix for the entire set of documents to be generated.\n'
    output_path = raw_input("Please enter the output path: ")

    with open (input_path + '\\' + input_file_name + '.txt','r') as f:

        para = []
        data = f.read()
        selected=0
        notselect=0
        sentences = data.split("\n\n")

        print "Total # of paragraphs",len(sentences)

        for x in xrange(len(sentences)):
            cond = sentences[x].endswith(".")
            if cond:
                cnt = sentences[x].count(".")
            else:
                cnt= sentences[x].count(".")+1

            if cnt >5:
                #print "paragraph ",x+1,"is selected"
                selected+=1
                sentences[x] = '@'+sentences[x].lower()
                sentences[x] = sentences[x].translate(string.maketrans("",""),string.digits)
                sentences[x] = sentences[x].translate(string.maketrans("",""),string.punctuation)
                tokens = nltk.word_tokenize(sentences[x])
                lemma  = nltk.WordNetLemmatizer()
                porter = nltk.PorterStemmer()

                afewwords = [lemma.lemmatize(i) for i in tokens]
                afewwords = [porter.stem(i) for i in tokens]

                sentences[x] = ' '.join(afewwords)
                para.append(sentences[x])

                filename = output_path + '\\' + output_file_name + str(selected) + '.txt'
                w = open(filename,'w')
                w.write(''.join(para))
                w.close()
                para = []
            else:
                #print "paragraph ",x+1,"is not selected"
                notselect+=1
            #print "cnt - ", cnt
        #print"\n"

        print "# of paragraphs selected",selected
        print "# of paragraphs not selected",notselect
    f.close()
Пример #15
0
 def __flatten(dic, prefix):
     '''
     recursively pass through a dict and flatten it\'s "internal" dicts
     '''
     results = {}
     if dic is not None:
         try:
             for key in dic.keys():
                 if type(dic[key]) in [float, int]:
                     results["%s.%s" % (
                         prefix,
                         str(key).translate(string.maketrans(".", "_"))
                     )] = dic[key]
                 elif type(dic[key] in [dict]):
                     results.update(
                         GraphiteUploaderPlugin.__flatten(
                             dic[key],
                             "%s.%s" % (
                                 prefix,
                                 key.translate(string.maketrans(".", "_"))
                             )
                         )
                     )
         except AttributeError:
             pass
     return results
Пример #16
0
 def checkData(self,row):
     jobTime = row[4]
     jobTerm = row[5]
     w1={'full time':1,'part time':-1,'':0}    #this dictionary corresponds to time feature
     w2={'permanent':0,'contract':1,'':-1}     #this dictionary corresponds to term feature
     
     if jobTime == '' or jobTerm == '':
         s=row[2].lower()          
         s=s.translate(string.maketrans("‘’‚“”„†‡‰‹›!“#$%&‘()™*+,-�./0123456789:;<=>?@[\]_`{|}~–—΅Ά£¤¥¦§¨©�«¬®―°±²³΄µ¶·ΈΉΊ»Ό½ΎΏήίΰαβγδεζηθικλμνξοπρςστυφχψωϊϋόύώ�€Άβ—�ο�'","                                                                                                                                "))
         if jobTime=='':
             if ('full time' in s and 'part time' in s) or ('full time' not in s and 'part time' not in s):
                 word1=''
             else:
                 if 'full time' in s:      #searching full time in description
                     word1='full time'
                 else:
                     word1='part time'
         else:
             word1=jobTime.translate(string.maketrans("_"," ")) #removing underscore from time feature value
             
         if jobTerm=='':
             if ('permanent' in s and 'contract' in s) or ('permanent' not in s and 'contract' not in s):
                 word2=''
             else:
                 if 'permanent' in s:      #searching permanent in description
                     word2='permanent'
                 else:
                     word2='contract'
         else: word2=jobTerm.translate(string.maketrans("_"," "))   #removing underscore from term feature value
     
     else:
         word1=jobTime.translate(string.maketrans("_"," "))
         word2=jobTerm.translate(string.maketrans("_"," "))
         
     return [word1,w1[word1],word2,w2[word2]]
Пример #17
0
def get_kmer_densities(path, kmer):
    mC_trans = string.maketrans("C", "E")
    hmC_trans = string.maketrans("C", "O")
    c_density = KmerHdpDistribution(path, kmer)
    mc_density = KmerHdpDistribution(path, kmer.translate(mC_trans))
    hmc_density = KmerHdpDistribution(path, kmer.translate(hmC_trans))
    return c_density, mc_density, hmc_density
Пример #18
0
def listShows(path, forBackLog=False):

	if not forBackLog:
		trans = string.maketrans(' ', '.')
	else:
		trans = string.maketrans(' ', '_')

	shows = {}
	try:
		for show_name in os.listdir(path):
			if os.path.isdir(path + show_name) == True:
				sanitized_show_name = show_name.translate(trans, '\'().!').lower()
				shows[sanitized_show_name] = show_name
	except:
		log.error('Unable to find ' + path)
		sys.exit()

	items = config['rss.mapping'].split(',')
	if len(items) > 0:
		for item in items:
			i = item.split('=')
			if len(i) > 0:
				local = i[0].strip()
				dist = i[1].strip().translate(trans, '\'().!').lower()
				shows[dist] = local	
				log.debug('Extra TV shows mapping : %s => %s' % (dist, local) )

	return shows
Пример #19
0
def build_anagrams():
    '''
    build a python dict of sorted-word and the set of words
    Using brown corpus and a file containing words from multiple corpora
    strip all punctuations, retain hyphens, replace underscores with space
    '''
    punctuations_replace = '#"(){}[]<>.+,/:;=?@_|~-'
    punctuations_remove = '!$\'%&\\*^`'
    # 1. Brown Corpus
    for word in nltk.corpus.brown.words():
        w = str(word)
        # polish just the way tokens were
        w_list = w.translate(string.maketrans(punctuations_replace,' '*len(punctuations_replace)), punctuations_remove).strip().lower().split()
        for each_w in w_list:
            # add the word to redis with key as a sorted word
            wam[''.join(sorted(each_w))].add(each_w)
    # 2. Wordnet
    for word in nltk.wordnet.wordnet.words():
        w = str(word)
        # polish just the way tokens were
        w_list = w.translate(string.maketrans(punctuations_replace,' '*len(punctuations_replace)), punctuations_remove).strip().lower().split()
        for each_w in w_list:
            # add the word to redis with key as a sorted word
            wam[''.join(sorted(each_w))].add(each_w)
    # 3. Other corpora
    with open(BIG_WL, 'r') as f:
        for line in f:
            w = str(line).strip()
            # polish just the way tokens were
            w_list = w.translate(string.maketrans(punctuations_replace,' '*len(punctuations_replace)), punctuations_remove).strip().lower().split()
            for each_w in w_list:
                # add the word to redis with key as a sorted word
                wam[''.join(sorted(each_w))].add(each_w)
Пример #20
0
 def test_the_reverse_complement(self):
     """Check obj.reverse_complement() method."""
     mapping = ""
     for example1 in self._examples:
         if isinstance(example1, MutableSeq):
             continue
         try:
             comp = example1.reverse_complement()
         except ValueError as e:
             self.assertEqual(str(e), "Proteins do not have complements!")
             continue
         str1 = str(example1)
         # This only does the unambiguous cases
         if any(("U" in str1, "u" in str1, example1.alphabet == generic_rna)):
             mapping = maketrans("ACGUacgu", "UGCAugca")
         elif any(("T" in str1, "t" in str1, example1.alphabet == generic_dna,
                  example1.alphabet == generic_nucleotide)):
             mapping = maketrans("ACGTacgt", "TGCAtgca")
         elif "A" not in str1 and "a" not in str1:
             mapping = maketrans("CGcg", "GCgc")
         else:
             # TODO - look at alphabet?
             continue
         self.assertEqual(str1.translate(mapping)[::-1], str(comp))
         self.assertEqual(comp.alphabet, example1.alphabet)
Пример #21
0
def buildGoodSet(goodChars=string.printable, badChar='?'):
    """Build a translation table that turns all characters not in goodChars
    to badChar"""
    allChars=string.maketrans("", "")
    badchars=string.translate(allChars, allChars, goodChars)
    rv=string.maketrans(badchars, badChar * len(badchars))
    return rv
Пример #22
0
def html_for_url_node(node):
    if not re.match("javascript:", node["url"]):
        linkURL = sanitize(node["url"])
        keysURL = linkURL
        ktrspaces = "                            "
        ktrtable = string.maketrans("!@#$%^&*()_+-=`~;:'\",<.>/?\\|", ktrspaces)
        keysURL = str(keysURL).translate(ktrtable, "").lower()
        keysURL = str(keysURL).translate(None, "!@#$%^&*()_+-=`~;:'\",<.>/?\\|").lower()
        #
        tags = sanitize(node["name"])
        # tags= node['name'] Check for UTF-8 etc...
        # print "TAGS: ",tags
        # tags = tags.translate(None,'!@#$%^&*()_+-=`~;:\'",<.>/?\\|')
        # tags.translate(None,'!@#$%^&*()_+-=`~;:\'",<.>/?\\|')
        # trtable =                          '                              '
        trspaces = "                            "
        trtable = string.maketrans("!@#$%^&*()_+-=`~;:'\",<.>/?\\|", trspaces)
        # tags = str(tags).translate(trtable,'!@#$%^&*()_+-=`~;:\'",<.>/?\\|')
        tags = str(tags).translate(trtable, "").lower()
        tags = str(tags).translate(None, "!@#$%^&*()_+-=`~;:'\",<.>/?\\|").lower()
        #
        allTags = tags + " " + keysURL
        print "# '", sanitize(node["url"]), "'", allTags
        # print '# \'',sanitize(node['url']),'\'', tags
        return '<dt><a href="%s">%s</a>\n' % (sanitize(node["url"]), sanitize(node["name"]))
    else:
        return ""
Пример #23
0
def reverseComplement(seq, rna=False):
    if rna:
        complements = string.maketrans("AUCGN", "UAGCN")
        return convertToRNA(seq).translate(complements)[::-1]
    else:
        complements = string.maketrans("ATCGN", "TAGCN")
        return convertToDNA(seq).translate(complements)[::-1]
Пример #24
0
def hashword(plaintext):
	"""
	Munge a plaintext word into something else. Hopefully, the result
	will have some mnemonic value.
	"""
	# get a list of random bytes. A byte will be randomly picked from
	# this list when needed.
	rb = getrandomlist()
	# 0.25 chance of case being swapped
	if rb[rb[0]] < 64:
		plaintext = string.swapcase(plaintext)
	# 0.50 chance of vowels being translated one of two ways.
	if rb[rb[2]] > 127:
		plaintext = string.translate(plaintext, 
			string.maketrans('aeiou AEIOU', '@3!0& 4#10%'))
	else:
		plaintext = string.translate(plaintext, 
			string.maketrans('aeiou AEIOU', '^#1$~ $3!0&'))
	# 0.4 chance of some additional consonant translation
	if rb[rb[4]] < 102:
		plaintext = string.translate(plaintext, 
			string.maketrans('cglt CGLT', '(<1+ (<1+'))
	# if word is short, add some digits
	if len(plaintext) < 5:
		plaintext = plaintext + `rb[5]`
	# 0.2 chance of some more digits appended
	if rb[rb[3]] < 51:
		plaintext = plaintext + `rb[205]`
	return plaintext
Пример #25
0
def url_sign( uri_path, params, client_id, signing_key ):
    signing_key = signing_key.translate(string.maketrans('-_', '+/'))
    padding_factor = ( 4 - len( signing_key ) % 4 ) % 4
    signing_key += "=" * padding_factor
    binary_key = base64.b64decode(unicode(signing_key).translate(dict(zip(map(ord, u'-_'), u'+/'))))

    # construct URI for signing
    uri_path_params = uri_path + '?'
    first = True
    for k in params.keys():
        if not first:
            uri_path_params += '&'
        else:
            first = False
        uri_path_params = "%(base)s%(key)s=%(value)s" % {
                                                        'base':uri_path_params,
                                                        'key':k,
                                                        'value':urllib.quote_plus(str(params[k]))
                                                        }
    uri_path_params += '&client=' + client_id

    # Sign
    digest = hmac.new(binary_key, uri_path_params, hashlib.sha1).digest()
    digest = base64.b64encode( digest )
    digest = digest.translate(string.maketrans('+/', '-_'))
    return "%s&sig=%s" % ( uri_path_params, digest.rstrip('=') )
Пример #26
0
 def checkData(self,row):
     jobTime = row[4]
     jobTerm = row[5]
     w1={'full time':1,'part time':-1,'':0}    #this dictionary corresponds to time feature
     w2={'permanent':0,'contract':1,'':-1}     #this dictionary corresponds to term feature
     
     if jobTime == '' or jobTerm == '':
         s=row[2].lower()          
         s=s.translate(string.maketrans("‘’‚“”„†‡‰‹›!“#$%&‘()™*+,-˜./0123456789:;<=>?@[\]_`{|}~–—¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿Þßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ€¢â—ªïž'","                                                                                                                                "))
         if jobTime=='':
             if ('full time' in s and 'part time' in s) or ('full time' not in s and 'part time' not in s):
                 word1=''
             else:
                 if 'full time' in s:      #searching full time in description
                     word1='full time'
                 else:
                     word1='part time'
         else:
             word1=jobTime.translate(string.maketrans("_"," ")) #removing underscore from time feature value
             
         if jobTerm=='':
             if ('permanent' in s and 'contract' in s) or ('permanent' not in s and 'contract' not in s):
                 word2=''
             else:
                 if 'permanent' in s:      #searching permanent in description
                     word2='permanent'
                 else:
                     word2='contract'
         else: word2=jobTerm.translate(string.maketrans("_"," "))   #removing underscore from term feature value
     
     else:
         word1=jobTime.translate(string.maketrans("_"," "))
         word2=jobTerm.translate(string.maketrans("_"," "))
         
     return [word1,w1[word1],word2,w2[word2]]
Пример #27
0
def translate(read, ref, match):
    read = read[::-1]
    read = read.translate(string.maketrans('ACGTacgt', 'TGCAtgca'))
    ref = ref[::-1]
    ref = ref.translate(string.maketrans('ACGTacgtRYKMBVDH', 'TGCAtgcaYRMKVBHD'))
    match = match[::-1]
    return (read, ref, match)
Пример #28
0
 def retrieve_access_token (self):
  output.speak(_("Please wait while an access token is retrieved from Twitter."), True)
  httpd = BaseHTTPServer.HTTPServer(('127.0.0.1', 8080), Handler)
  twitterDataOrig = str(self.config['oauth']['twitterData'])
  trans = maketrans("-_~", "+/=")
  twitterDataTrans = twitterDataOrig.translate(trans)
  twitterData = b64decode(twitterDataTrans)
  twitterData = literal_eval(twitterData)
  tw = Twython(twitterData[0], twitterData[1], auth_endpoint='authorize')
  try:
   auth = tw.get_authentication_tokens("http://127.0.0.1:8080")
  except SSLError:
   output.speak(_("Sorry, we can't connect to Twitter. You may want to adjust your firewall or antivirus software appropriately"), True)
  webbrowser.open_new_tab(auth['auth_url'])
  global logged, verifier
  logged = False
  while logged == False:
   httpd.handle_request()
  self.auth_handler = Twython(twitterData[0], twitterData[1], auth['oauth_token'], auth['oauth_token_secret'])
  token = self.auth_handler.get_authorized_tokens(verifier)
  output.speak(_("Retrieved access token from Twitter."), True)
  httpd.server_close()
  data = [token['oauth_token'], token['oauth_token_secret']]
  eData = dumps(data)
  trans = maketrans("+/=", "-_~")
  eData = b64encode(eData)
  eData = eData.translate(trans)
  self.config['oauth']['userData'] = eData
  self.login()
  del (httpd, auth, tw, token, logged, verifier, twitterData, twitterDataOrig, data, edata, self.auth_handler)
def get_encrypt_decrypt_tables():
    orig_table = string.maketrans(b'', b'')
    orig_table = [orig_table[i: i + 1] for i in range(len(orig_table))]
    orig_table_decrypt = string.maketrans(b''.join(orig_table), string.maketrans(b'', b''))

    encrypt_table, decrypt_table = get_table(bytes(PASSWORD))
    return encrypt_table, decrypt_table
Пример #30
0
def translator(frm='',to='',delete = '',keep=None):
	"""
		@write by Chris Perkins
		@copy by Qingluan

		a close-block factory
		to deal with some normal string

	"""
	import string


	if len(to)==1:
		to = to * len(frm)
	trans = string.maketrans(frm,to)

	if keep is not None:
		all_chars = string.maketrans('','') # this will get most of chars 
		
		delete = all_chars.translate(allchars,keep.translate(allchars,delete))

	def translate(s):
		return s.translate (trans,delete)
	
	return translate
Пример #31
0
def remove_punctuation(text):
    result = text.translate(string.maketrans("", ""), string.punctuation)
    print(result)
    return result
#!/usr/bin/env python
import sys
import string
from sklearn.feature_extraction import stop_words

stops = set(stop_words.ENGLISH_STOP_WORDS)
stops.update('0', '1', '2', '3', '4', '5', '6', '7', '8', '9') # add digits to stopwords (brute force way)
stops.update(['thou', 'shall', 'thee']) # notice, update needs a formal "set", or else it will add each single character...

# get all lines from stdin
for line in sys.stdin:
    # remove leading and trailing whitespace and convert to lowercase
    line = line.strip().lower()

    # remove punctuation
    # (I wonder if it would be best done on the full line here, vs. on 'words' after the split)?
    line = line.translate(string.maketrans("",""), string.punctuation)
    
    # split the line into words; splits on any whitespace
    words = line.split()

    # output tuples (word, 1) in tab-delimited format, but only if they're not a stopword!
    for word in words:
        if word not in stops: # omit stopwords
            if len(word) > 3: # ignore words shorter than 3 characters
                print '%s\t%s' % (word, "1")
Пример #33
0
reg_decl_with_pe=re.compile("<(![^-\[]|\?)")
reg_subst_pe_search=re.compile(">|%")

reg_cond_sect=re.compile("<!\\[|\\]\\]>")
reg_litval_stop=re.compile("%|&#")

# RFC 1766 language codes

reg_lang_code=re.compile("([a-zA-Z][a-zA-Z]|[iIxX]-([a-zA-Z])+)(-[a-zA-Z])*")

# Mapping regexps to error codes
# NB: 3900 is reported directly from _get_name

reg2code={
    reg_name.pattern : 3900, reg_ver.pattern : 3901,
    reg_enc_name.pattern : 3902, reg_std_alone.pattern : 3903,
    reg_hex_digits.pattern : 3905,
    reg_digits.pattern : 3906, reg_pe_ref.pattern : 3907,
    reg_attr_type.pattern : 3908, reg_attr_def.pattern : 3909,
    reg_nmtoken.pattern : 3910}
    
# Some useful variables

predef_ents={"lt":"&#60;","gt":"&#62;","amp":"&#38;","apos":"&#39;",
             "quot":'&#34;'}

# Translation tables

ws_trans=string.maketrans("\r\t\n","   ")  # Whitespace normalization
id_trans=string.maketrans("","")           # Identity transform 
Пример #34
0
    InvalidResourceSpecification,
    )
from .defaults import (
    GET,
    POST,
    DELETE,
    Status,
    DupeOption,
    UuidOption,
    JobDefFormat,
    JOB_RUN_TIMEOUT,
    JOB_RUN_INTERVAL,
    )

_JOB_ID_CHARS = ascii_letters + digits
_JOB_ID_TRANS_TAB = maketrans(_JOB_ID_CHARS, '#' * len(_JOB_ID_CHARS))
_JOB_ID_TEMPLATE = '########-####-####-####-############'
_RUNDECK_RESP_FORMATS = ('xml')  # TODO: yaml and json
_EXECUTION_COMPLETED = (Status.FAILED, Status.SUCCEEDED, Status.ABORTED)
_EXECUTION_PENDING = (Status.RUNNING,)


def is_job_id(job_id):
    """Checks if a Job ID "looks" like a UUID. It does not check if it exists as a job in Rundeck.
        And of course, a Rundeck Job ID does not have to be a "UUID". Any unique string will do
        so be prepared for false negatives if you customize your job ids.

    :Parameters:
        job_id : str
            a Rundeck Job ID
Пример #35
0
def rot(message, shift=3):
    "Employs the Ceasar Cipher to encode/decode messages."
    alphabet = ascii_lowercase
    shifted_alphabet = alphabet[shift:] + alphabet[:shift]
    table = maketrans(alphabet, shifted_alphabet)
    return message.lower().translate(table)
Пример #36
0
def process(lst):
    prccd_item_list = []
    for tweet in lst:
        #		print "[original]", tweet
        #                print(tweet)

        # Normalizing utf8 formatting
        tweet = tweet.decode("unicode-escape").encode("utf8").decode("utf8")
        #tweet = tweet.encode("utf-8")
        tweet = tweet.encode("ascii", "ignore")
        tweet = tweet.strip(' \t\n\r')

        # 1. Lowercasing
        tweet = tweet.lower()
        #		print "[lowercase]", tweet

        # Word-Level
        tweet = re.sub(' +', ' ',
                       tweet)  # replace multiple spaces with a single space

        # 2. Normalizing digits
        tweet_words = tweet.strip('\r').split(' ')
        for word in [word for word in tweet_words if word.isdigit()]:
            tweet = tweet.replace(word, "D" * len(word))
#		print "[digits]", tweet

# 3. Normalizing URLs
        tweet_words = tweet.strip('\r').split(' ')
        for word in [
                word for word in tweet_words
                if '/' in word or '.' in word and len(word) > 3
        ]:
            tweet = tweet.replace(word, "httpAddress")
#		print "[URLs]", tweet

# 4. Normalizing username
        tweet_words = tweet.strip('\r').split(' ')
        for word in [
                word for word in tweet_words
                if word[0] == '@' and len(word) > 1
        ]:
            tweet = tweet.replace(word, "usrId")
#		print "[usrename]", tweet

# 5. Removing special Characters
        punc = '@$%^&*()_+-={}[]:"|\'\~`<>/,'
        trans = string.maketrans(punc, ' ' * len(punc))
        tweet = tweet.translate(trans)
        #		print "[punc]", tweet

        # 6. Normalizing +2 elongated char
        tweet = re.sub(r"(.)\1\1+", r'\1\1', tweet.decode('utf-8'))
        #		print "[elong]", tweet

        # 7. tokenization using tweetNLP
        tweet = ' '.join(twokenize.simpleTokenize(tweet))
        #		print "[token]", tweet

        #8. fix \n char
        tweet = tweet.replace('\n', ' ')

        prccd_item_list.append(tweet.strip())
#		print "[processed]", tweet.replace('\n', ' ')
    return prccd_item_list
Пример #37
0
def get_transtable():
    transtable = maketrans(ORIGIN_ALPHABET, CUSTOMER_ALPHABET)
    return transtable
Пример #38
0
        self.typereso = typereso
        self.type_descriptions = []
        self.signature = kwargs.pop('signature', None)
        for td in type_descriptions:
            self.type_descriptions.extend(td)
        for td in self.type_descriptions:
            td.finish_signature(self.nin, self.nout)
        if kwargs:
            raise ValueError('unknown kwargs %r' % str(kwargs))


# String-handling utilities to avoid locale-dependence.

import string
if sys.version_info[0] < 3:
    UPPER_TABLE = string.maketrans(string.ascii_lowercase,
                                   string.ascii_uppercase)
else:
    UPPER_TABLE = bytes.maketrans(bytes(string.ascii_lowercase, "ascii"),
                                  bytes(string.ascii_uppercase, "ascii"))


def english_upper(s):
    """ Apply English case rules to convert ASCII strings to all upper case.

    This is an internal utility function to replace calls to str.upper() such
    that we can avoid changing behavior with changing locales. In particular,
    Turkish has distinct dotted and dotless variants of the Latin letter "I" in
    both lowercase and uppercase. Thus, "i".upper() != "I" in a "tr" locale.

    Parameters
    ----------
Пример #39
0
import logging
import hashlib
import hmac
import bcrypt
from time import time
from struct import pack, unpack
from base64 import b64decode
from string import maketrans

usersafe_encoding = maketrans('-$%', 'OIl')

class auth:
    def __init__(self, reqdata):
        self.reqdata = reqdata

    # First try if it is a valid token
    # Failure may just indicate that we were passed a password
    def auth_token(self):
        try:
            token = b64decode(self.password.translate(usersafe_encoding) + '=======')
        except:
            logging.debug('Could not decode token (maybe not a token?)')
            return False

        jid = self.username + '@' + self.domain

        if len(token) != 23:
            logging.debug('Token is too short: %d != 23 (maybe not a token?)' % len(token))
            return False

        (version, mac, header) = unpack('> B 16s 6s', token)
Пример #40
0
def get_reverse_transtable():
    transtable = maketrans(CUSTOMER_ALPHABET, ORIGIN_ALPHABET)
    return transtable