def trainonone(self, pa, ntimes): for k in range(ntimes): for wordvec in pa.getallwordvecs(): _, loss = self.session.run([self.train, self.loss], feed_dict={self.inputs: wordvec[1]}) print("Loss incurred : ", loss) self.saver = tf.train.Saver() self.saver.save(self.session, 'embedding.chk')
def multi_pass(user, pswd, times=1000, hasher=SHA512()): ''' AUTHORS: v0.2.0+ --> pydsigner ''' for i in range(0, times): pswd = hasher.encode(pswd + user + str(i)) return pswd
def __init__(self, pool): self.pool = pool self._eps_tool = EPSTool() gradient_bin = array(encode('B'), range(0, 256)) # the input gradient is 256 pixels wide and 1 pixel height # we don't need more data and scale this on display self._eps_tool.set_image_data(gradient_bin.tostring(), (256, 1))
def factors(n): ''' Returns an set of every factor of @n (including 1 and @n). AUTHORS: v0.4.9+ --> pydsigner ''' return set((x for x in range(1, int(n ** .5) + 2) if not n % x)) | set([n])
def pascals_triangle(depth): ''' Calculate Pascal's triangle to @depth places. returns a list of lists. >>> pascals_triangle(5) [[1], [1, 1], [1, 2, 1], [1, 3, 3, 1], [1, 4, 6, 4, 1]] AUTHORS: v0.4.7+ --> pydsigner ''' res = [] for i in range(depth): row = [1] for c in range(1, i + 1): row.append(sum(res[-1][c - 1:c + 1])) res.append(row) return res
def nth(self, n): """ Get the n-th iteration of this L-System. this is just a convenience method based on how L-Systems are used. :param int n: Number of iteration. 0 means :raises: LSystemOverflow if """ gen = iter(self) #extract the 0th iteration current = next(gen) #Continue to extract iterations for i in range(n): current = next(gen) return current
def __init__(self, *args): ''' SortedDict([(key_0, value_0), (key_1, value_1)..., (key_n, value_n)]) SortedDict([key_0, key_1..., key_n], [value_0, value_1..., value_n]) SortedDict([key_0, key_1..., key_n], default) SortedDict((key_0, value_0), (key_1, value_1)..., (key_n, value_n)) SortedDict() ''' self.data = [] self.keydict = {} if len(args) == 1: for key, value in args[0]: self._add(key, value) elif len(args) == 2: if hasattr(args[1], '__len__') and len(args[1]) == len(args[0]): for i in range(len(args[0])): self._add(args[0][i], args[1][i]) else: for key in args[0]: self._add(key, args[1]) else: for key, value in args: self._add(key, value)
def train(folder): ''' Function to train autoencoder. ''' t = time.time() lr_decay = 0.95 pa = ProbArray() # Frequency to filter out low freq words freq = {} filepaths = list(map(lambda x: folder + "/" + x, os.listdir(folder))) rgx = re.compile("([\w][\w']*\w)") # Another iterator to count frequency of words print("Pre-processing (clearning garbage words)") for filepath in filepaths: text = open(filepath).read().lower() tokens = re.findall(rgx, text) N = len(tokens) for i in range(0, N): if tokens[i] in freq: freq[tokens[i]] += 1 else: freq[tokens[i]] = 1 # Sort the frequencies storing in (freq, token) pairs and prune words with freq < min_count tokenFreq = sorted(freq.items(), key=lambda x: x[1]) garbageWords = [] for item in tokenFreq: if item[1] < min_count: garbageWords.append(item[0]) print("Generating co-occurence matrix") doc_text = "" for filepath in filepaths: text = open(filepath).read().lower() words = re.findall(rgx, text) N = len(words) temp = [' '] * (N + contextSize) temp[contextSize:(contextSize + N)] = words words = temp for i in range(contextSize, (contextSize + N)): # Filter out garbage words" #if words[i] not in garbageWords: # Include context size specified by user for j in range(i - contextSize, i): if words[i] != ' ' and words[j] != ' ': pa.addcontext(words[j], words[i]) pa.addcontext(words[i], words[j]) print("Co-occurence matrix generated") print("Starting training") tm = TrainModel(maxnum, newdims, lr) pa.freeze() tm.trainonone(pa, ntimes) #lr /=float(1+k*lr_decay) wordembeddings = {} for numwordvec in pa.getallwordvecs(): (num, wordvec) = numwordvec word = pa.wordnumrelation.getWord(num) if backend == 'tf': embedding = tm.getoutput(wordvec, './embedding.chk') else: embedding = tm.getoutput(wordvec) wordembeddings[word] = embedding print("Training proces done, dumping embedding into persistant storage!") with open(r'./embeddings.pickle', "wb") as outfile: pickle.dump(wordembeddings, outfile) print("Training completed! Embedding done.") print("time is %f" % (time.time() - t))
def train(folder): ''' Function to train autoencoder. ''' t = time.time() lr_decay = 0.95 pa = ProbArray() # Frequency to filter out low freq words freq = {} filepaths = list(map(lambda x: folder + "/" + x,os.listdir(folder))) rgx = re.compile("([\w][\w']*\w)") # Another iterator to count frequency of words print ("Pre-processing (clearning garbage words)") for filepath in filepaths: text = open(filepath).read().lower() tokens = re.findall(rgx, text) N = len(tokens) for i in range(0,N): if tokens[i] in freq: freq[tokens[i]] += 1 else: freq[tokens[i]] = 1 # Sort the frequencies storing in (freq, token) pairs and prune words with freq < min_count tokenFreq = sorted(freq.items(), key = lambda x: x[1]) garbageWords = [] for item in tokenFreq: if item[1] < min_count: garbageWords.append(item[0]) print ("Generating co-occurence matrix") doc_text = "" for filepath in filepaths: text = open(filepath).read().lower() words = re.findall(rgx, text) N = len(words) temp = [' '] * (N + contextSize) temp[contextSize : (contextSize + N)] = words words = temp for i in range(contextSize, (contextSize + N)): # Filter out garbage words" #if words[i] not in garbageWords: # Include context size specified by user for j in range(i-contextSize, i): if words[i] != ' ' and words[j] != ' ': pa.addcontext(words[j], words[i]) pa.addcontext(words[i], words[j]) print ("Co-occurence matrix generated") print ("Starting training") tm = TrainModel(maxnum, newdims, lr) pa.freeze() tm.trainonone(pa, ntimes) #lr /=float(1+k*lr_decay) wordembeddings = {} for numwordvec in pa.getallwordvecs(): (num, wordvec) = numwordvec word = pa.wordnumrelation.getWord(num) if backend == 'tf': embedding = tm.getoutput(wordvec, './embedding.chk') else: embedding = tm.getoutput(wordvec) wordembeddings[word] = embedding print ("Training proces done, dumping embedding into persistant storage!") with open(r'./embeddings.pickle', "wb") as outfile: pickle.dump(wordembeddings, outfile) print ("Training completed! Embedding done.") print ("time is %f" % (time.time()-t))