def minimumTotal3(self, triangle): if not triangle: return for i in xrange(len(triangle) - 2, -1, -1): for j in xrange(len(triangle[i])): triangle[i][j] += min(triangle[i + 1][j], triangle[i + 1][j + 1]) return triangle[0][0]
def minimumTotal(self, triangle): if not triangle: return res = triangle[-1] for i in xrange(len(triangle) - 2, -1, -1): for j in xrange(len(triangle[i])): res[j] = min(res[j], res[j + 1]) + triangle[i][j] return res[0]
def main(true, pred): sum = 0.0 a = confuse(true, pred) print(a) # plot_confusion_matrix(a) for i in xrange(len(a)): for j in xrange(len(a)): if i == j: sum += a[i][j] print("Accuracy : ", (sum / len(true)) * 100)
def multiple_correlation(): A = pd.read_csv('Data/Original-Data/Original_Combine.csv', usecols=['PM 2.5']) B = pd.read_csv('Data/Original-Data/Original_Combine.csv', usecols=['T']) C = pd.read_csv('Data/Original-Data/Original_Combine.csv', usecols=['TM']) D = pd.read_csv('Data/Original-Data/Original_Combine.csv', usecols=['Tm']) E = pd.read_csv('Data/Original-Data/Original_Combine.csv', usecols=['SLP']) F = pd.read_csv('Data/Original-Data/Original_Combine.csv', usecols=['H']) G = pd.read_csv('Data/Original-Data/Original_Combine.csv', usecols=['VV']) H = pd.read_csv('Data/Original-Data/Original_Combine.csv', usecols=['VM']) I = pd.read_csv('Data/Original-Data/Original_Combine.csv', usecols=['V']) coerr = [] coerr.append(B) coerr.append(C) coerr.append(D) coerr.append(E) coerr.append(F) coerr.append(G) coerr.append(H) coerr.append(I) myfinalcorr = [] k = 7 for i in xrange(len(coerr)): mycorr = [] for j in xrange(8): corr1 = pearsonr(A, coerr[i]) corr2 = pearsonr(A, coerr[j]) corr3 = pearsonr(coerr[i], coerr[j]) corr = math.sqrt((math.pow(corr1[0], 2) + math.pow(corr2[0], 2) - (2 * corr1[0] * corr2[0] * corr3[0])) / ( 1 - math.pow(corr3[0], 2))) mycorr.append(corr) k = k - 1 print(mycorr) myfinalcorr.append(mycorr) # print max(myfinalcorr) a = 0.0 for i in myfinalcorr: for j in i: if a < j: a = j print("Max Correlation and Position: ",a, index_2d(myfinalcorr, a)) tick_marks = np.arange(8) h = sns.heatmap(myfinalcorr, square=True) plt.yticks(tick_marks, ['T', 'TM', 'Tm', 'SLP', 'H', 'VV', 'VM', 'V'], rotation=45) plt.xticks(tick_marks, ['T', 'TM', 'Tm', 'SLP', 'H', 'VV', 'VM', 'V'], ha='left', rotation=45) plt.show()
def minimumTotal_2(self, triangle): if not triangle: return for i in xrange(1, len(triangle)): for j in xrange(len(triangle[i])): if j == 0: triangle[i][j] += triangle[i - 1][j] elif j == len(triangle[i]) - 1: triangle[i][j] += triangle[i - 1][j - 1] else: triangle[i][j] += min(triangle[i - 1][j - 1], triangle[i - 1][j]) return min(triangle[-1])
def predict_sequences_multiple(model, data, window_size, prediction_len): # Predict sequence of 50 steps before shifting prediction run forward by 50 steps prediction_seqs = [] for i in xrange(len(data) // prediction_len): curr_frame = data[i * prediction_len] predicted = [] for j in xrange(prediction_len): predicted.append(model.predict(curr_frame[newaxis, :, :])[0, 0]) curr_frame = curr_frame[1:] curr_frame = np.insert(curr_frame, [window_size - 1], predicted[-1], axis=0) prediction_seqs.append(predicted) return prediction_seqs
def sort_type1(request, file_path): global sorted if (request == "file size"): """ Return list of file paths in directory sorted by file size """ root = tk.Tk() root.title('Sort by Size') # Get list of files filepaths = [] for basename in os1.listdir(file_path): filename = os1.path.join(file_path, basename) if os1.path.isfile(filename): filepaths.append(filename) # Re-populate list with filename, size tuples for i in xrange(len(filepaths)): filepaths[i] = (filepaths[i], os1.path.getsize(filepaths[i])) filepaths.sort(key=lambda filename: filename[1], reverse=True) # Re-populate list with just filenames for i in xrange(len(filepaths)): filepaths[i] = filepaths[i][0] file = "" for i in filepaths: file = file + '\n' + i tk.Label(root, text=file).pack() tk.Button(root, text='Ok', command=root.destroy).pack() root.mainloop() elif (request == "last modified"): root = tk.Tk() root.title('Sort by Size') paths = sorted(Path(file_path).iterdir(), key=os.path.getmtime) file = "" for i in paths: file = file + '\n' + str(i) tk.Label(root, text=file).pack() tk.Button(root, text='Ok', command=root.destroy).pack() root.mainloop() elif (request == "Name"): root = tk.Tk() root.title('Sort by Size') sorted = sorted(os1.listdir(file_path)) files = "" for file in sorted: files = files + "\n" + file tk.Label(root, text=files).pack() tk.Button(root, text='Ok', command=root.destroy).pack() root.mainloop()
def minimumTotal_1(self, triangle): if not triangle: return res = [[0 for i in xrange(len(row))] for row in triangle] res[0][0] = triangle[0][0] for i in xrange(1, len(triangle)): for j in xrange(len(triangle[i])): if j == 0: res[i][j] = res[i - 1][j] + triangle[i][j] elif j == len(triangle[i]) - 1: res[i][j] = res[i - 1][j - 1] + triangle[i][j] else: res[i][j] = min(res[i - 1][j - 1], res[i - 1][j]) + triangle[i][j] return min(res[-1])
def upload(): uploadFile = request.files['data_file'] res = "OK" if not uploadFile: res = "No file" colMap = parse_multi_form(request.form)['col'] df = pd.read_csv(uploadFile, encoding='windows-1252') df.drop(df.tail(2).index, inplace=True) data = df.to_dict() Session = scoped_session(session_factory) session = Session() length = len(list(data.values())[0]) for i in xrange(length): valueMapping = {} for col in colMap: valueMapping[col] = str(data[colMap[col]][i]) if data[ colMap[col]][i] == data[colMap[col]][i] else None if col == 'custom_label' and data[colMap[col]][i] == data[ colMap[col]][i]: valueMapping['store_id'] = data[colMap[col]][i].split("-")[0] ins = insert(Store) ins = ins.values(valueMapping) session.execute(ins) session.commit() response = app.response_class(response=json.dumps(res), status=200, mimetype='application/json') return response
def count_for_cates(trainText): ''' extract feature words compute the # of every word appeared in each class; the # of words in each class ''' docCount = [0] * len(lables) wordCount = collections.defaultdict(doc_dict) # computation for line in trainText: lable, text = line.strip().split(' ', 1) index = lable2id(lable) words = text.split(' ') for word in words: wordCount[word][index] += 1 docCount[index] += 1 # print(docCount) # print(wordCount) # calculate mutual information miDict = collections.defaultdict(doc_dict) N = sum(docCount) for k, vs in wordCount.items(): for i in xrange(len(vs)): N11 = vs[i] N10 = sum(vs) - N11 N01 = docCount[i] - N11 N00 = N - N11 - N10 - N01 mi = mutual_info(N, N11, N10 + N11, N01 + N11) + mutual_info( N, N10, N10 + N11, N00 + N10) + mutual_info( N, N01, N01 + N11, N01 + N00) + mutual_info( N, N00, N00 + N10, N00 + N01) miDict[k][i] = mi # print(miDict) f2 = open('featureFile.csv', 'w') writer2 = csv.writer(f2) writer2.writerow(docCount) fWords = set() for i in xrange(len(docCount)): keyf = lambda x: x[1][i] sortedDict = sorted(miDict.items(), key=keyf, reverse=True) # 10 * num of classes: num of featureWord for j in xrange(5 * num): fWords.add(sortedDict[j][0]) writer2.writerow(fWords)
def test_dictionary_access(): """ tint as key of dictionary :return: """ reset_comparisons() assert len(Comparisons) == 0 key1 = tint(10) key2 = tint(123) key3 = tint(109324) value1 = random.sample(xrange(100), 10) value2 = random.sample(xrange(100), 10) value3 = random.sample(xrange(100), 10) dictionary = {key1: value1, key2: value2, key3: value3} # check 'in' operator with list l = [1, 2, 3] assert tint(2) in l # works # test access via index assert dictionary[key1] assert dictionary[key2] assert dictionary[10] assert dictionary[tint(10)] # dictionary.keys() return type 'dict_keys' (set of dictionary view) assert key1 in dictionary.keys() assert key2 in dictionary.keys() assert key3 in dictionary.keys() assert 10 in dictionary.keys() assert tint(10) in dictionary.keys() assert tint(123) in dictionary.keys() assert tint(109324) in dictionary.keys() # tint has to be hashable for key in dictionary.keys(): assert isinstance(key, tint) assert key.has_taint() for key, value in dictionary.items(): assert isinstance(key, tint) assert key.has_taint()
def create_connection(self, params): self.logger.info('Attempting to run create_connection_guacamole') guacamole_username = params["guacamole_username"] guacamole_password = params["guacamole_password"] guacamole_connection_name = params["guacamole_connection_name"] guacamole_protocol = params["guacamole_protocol"] vnc_password = params["vnc_password"] container_ip = params["container_ip"] container_port = params["container_port"] try: salt = bytearray(random.getrandbits(8) for _ in xrange(32)) salt_hex = ''.join('{:02X}'.format(x) for x in salt) password_hash = hashlib.sha256( (guacamole_password + salt_hex).encode('utf-8)')).digest() # delete guacamole user GuacamoleUser.objects.using('guacamole').filter( username=guacamole_username).delete() # create new connection connection = GuacamoleConnection.objects.using('guacamole').create( connection_name=guacamole_connection_name, protocol=guacamole_protocol, failover_only=0) user = GuacamoleUser.objects.using('guacamole').create( username=guacamole_username, password_salt=salt, password_hash=password_hash, password_date=timezone.now(), disabled=0, expired=0) GuacamoleConnectionPermission.objects.using('guacamole').create( user=user, connection=connection, permission='READ') GuacamoleConnectionParameter.objects.using('guacamole').create( connection=connection, parameter_name="hostname", parameter_value=container_ip) GuacamoleConnectionParameter.objects.using('guacamole').create( connection=connection, parameter_name="port", parameter_value=container_port) GuacamoleConnectionParameter.objects.using('guacamole').create( connection=connection, parameter_name="password", parameter_value=vnc_password) return "ok" except Exception as e: return e
def is_prime(num): if num == 2: return True if num < 2 or num % 2 == 0: return False for n in xrange(3, int(num**0.5) + 2, 2): if num % n == 0: return False return True
def count_for_cates(trainText): pplCount = [0] * len(lables) cateCount = collections.defaultdict(doc_dict) # computation for line in trainText: lable = line[0] fileCategory = line[1] index = lable2id(lable) # the # of one fileCategory reviewed by each ppl cateCount[fileCategory][index] += 1 # the # of fileCategory reviewed by each ppl pplCount[index] += 1 # print(pplCount) # print(cateCount) # calculate mutual information # the relevant degree of fileCategory to ppl miDict = collections.defaultdict(doc_dict) N = sum(pplCount) for k, vs in cateCount.items(): for i in xrange(len(vs)): N11 = vs[i] N10 = sum(vs) - N11 N01 = pplCount[i] - N11 N00 = N - N11 - N10 - N01 mi = mutual_info(N, N11, N10 + N11, N01 + N11) + mutual_info( N, N10, N10 + N11, N00 + N10) + mutual_info( N, N01, N01 + N11, N01 + N00) + mutual_info( N, N00, N00 + N10, N00 + N01) miDict[k][i] = mi # print(miDict) f2 = open('Rec_featureFile.csv', 'w') writer2 = csv.writer(f2) writer2.writerow(pplCount) fWords = set() for i in xrange(len(pplCount)): keyf = lambda x: x[1][i] sortedDict = sorted(miDict.items(), key=keyf, reverse=True) # 10 * # of labels: # of most relevant fileCategory to ppl for j in xrange(30): fWords.add(sortedDict[j][0]) writer2.writerow(fWords)
def plot_results_multiple(predicted_data, true_data, prediction_len): fig = plt.figure(facecolor='white') ax = fig.add_subplot(111) ax.plot(true_data, label='True Data') print('yo') # Pad the list of predictions to shift it in the graph to it's correct start for i, data in enumerate(predicted_data): padding = [None for p in xrange(i * prediction_len)] plt.plot(padding + data, label='Prediction') plt.legend() plt.show()
def predict_sequence_full(model, data, window_size): # Shift the window by 1 new prediction each time, re-run predictions on new window curr_frame = data[0] predicted = [] for i in xrange(len(data)): predicted.append(model.predict(curr_frame[newaxis, :, :])[0, 0]) curr_frame = curr_frame[1:] curr_frame = np.insert(curr_frame, [window_size - 1], predicted[-1], axis=0) return predicted
def correction_score(self, words_count, old_sentence, new_sentence): # """ # Take a old sentence and a new sentence, for each words in the new sentence, if it's same as the orginal sentence, assign 0.95 prob # If it's not same as original sentence, give 0.05 / (count(similarword) - 1) # """ score = 1 for i in xrange(len(new_sentence)): if new_sentence[i] in words_count: score *= 0.95 else: score *= (0.05 / (words_count[old_sentence[i]] - 1)) return math.log(score)
def gradient_descent_2(alpha, x, y, numIterations): m = x.shape[0] # number of samples(rows) theta = np.ones(2) x_transpose = x.transpose() for iter in xrange(0, numIterations): hypothesis = np.dot(x, theta) loss = hypothesis - y J = np.sum(loss**2) / (2 * m) # cost print("iter %s | J: %.3f" % (iter, J)) gradient = np.dot(x_transpose, loss) / m theta = theta - alpha * gradient # update return theta
def post_process(self, before): after = before # Add timestamps if there isnt one but is a datetime if ('return' in after): if (isinstance(after['return'], list)): for x in xrange(0, len(after['return'])): if (isinstance(after['return'][x], dict)): if ('datetime' in after['return'][x] and 'timestamp' not in after['return'][x]): after['return'][x]['timestamp'] = float(createTimeStamp(after['return'][x]['datetime'])) return after
def __init__(self, root, train=True, transform=None, target_transform=None, download=False, classes=None): super(iCIFAR10, self).__init__(root, train=train, classes=classes, transform=transform, target_transform=target_transform, download=download) # Select subset of classes if self.train: train_data = [] train_labels = [] for i in range(500*classes[0], 500*classes[-1]): train_data.append(i) train_labels.append(i) for i in xrange(len(self.train_data)): if self.train_labels[i] in classes: train_data.append(self.train_data[i]) train_labels.append(self.train_labels[i]) self.train_data = np.array(train_data) self.train_labels = train_labels else: test_data = [] test_labels = [] for i in xrange(len(self.test_data)): if self.test_labels[i] in classes: test_data.append(self.test_data[i]) test_labels.append(self.test_labels[i]) self.test_data = np.array(test_data) self.test_labels = test_labels
def maxTurbulenceSize(self, A): N = len(A) ans = 1 anchor = 0 for i in xrange(1, N): c = cmp(A[i-1], A[i]) if c == 0: anchor = i elif i == N-1 or c * cmp(A[i], A[i+1]) != -1: ans = max(ans, i - anchor + 1) anchor = i return ans
def train(self): # """ # Train unigram and bigram # """ for sentence in self.sentences: sentence.insert(0, '<s>') sentence.append('</s>') for i in xrange(len(sentence) - 1): token1 = sentence[i] token2 = sentence[i + 1] self.laplaceUnigramCounts[token1] += 1 self.laplaceBigramCounts[(token1, token2)] += 1 self.total += 1 self.total += 1 self.laplaceUnigramCounts[sentence[-1]] += 1
def cpf_funcional(): n = [random.randrange(10) for i in xrange(9)] # calcula digito 1 e acrescenta ao numero s = sum(x * y for x, y in zip(n, range(10, 1, -1))) d1 = 11 - s % 11 if d1 >= 10: d1 = 0 n.append(d1) # calcula digito 2 e acrescenta ao numero s = sum(x * y for x, y in zip(n, range(11, 1, -1))) d2 = 11 - s % 11 if d2 >= 10: d2 = 0 n.append(d2) return "%d%d%d.%d%d%d.%d%d%d-%d%d" % tuple(n)
def write_reverse_binary(self, input_file_path: str, output_file_path: str, buffer_size: int): """ Write reverse binary from input file to output file :param input_file_path: the input file path :param output_file_path: the output file path :param buffer_size: buffer chuck size. """ self.__check_buffer_size(buffer_size) buffer_size = 1 << buffer_size with open(input_file_path, 'rb') as file, \ open(output_file_path, 'wb') as file_out: file.seek(0, os.SEEK_END) for cursor_position in reversed(xrange(0, file.tell(), buffer_size)): file.seek(cursor_position, os.SEEK_SET) file_out.write(file.read(buffer_size)[::-1])
def predict(featureFile, modelFile, testText): pplCount, features = load_feature_words(featureFile) pplCount = list(map(int, pplCount)) pplScores = [math.log(count * 1.0 / sum(pplCount)) for count in pplCount] scores = load_model(modelFile) rCount = 0 totCount = 0 lable = [] for line in testText: lable = line[0] fileCategory = line[1] index = lable2id(lable) preValues = list(pplScores) if fileCategory in features: for i in xrange(len(preValues)): preValues[i] += math.log(scores[fileCategory][i]) m = max(preValues) pIndex = preValues.index(m) if pIndex == index: rCount += 1 totCount += 1
def _generate_every_dimension_combination(self): """Generates every combination of dimension values. Example: for two dimensions, with two possible values each: [(UK, MALE), (UK, FEMALE), (US, MALE), (US, FEMALE)] Returns: A generator for every combination of dimension values. """ # Edge case: dimensions were defined but the report is empty. The dimension # values need to be flattened: number_of_values = len( list(itertools.chain(*self.dimension_values.values()))) if self.dimension_values.keys() and not number_of_values: comb = ([] for _ in xrange(len(self.dimension_values))) return comb # Normal case: return itertools.product(*self.dimension_values.values())
def train_bayes(trainText, featureFile): ''' train naive bayes model compute the # of feature words appeared in each class ''' pplCount, features = load_feature_words(featureFile) cateCount = collections.defaultdict(doc_dict) tCount = [0] * len(pplCount) for line in trainText: lable = line[0] fileCategory = line[1] index = lable2id(lable) if fileCategory in features: tCount[index] += 1 cateCount[fileCategory][index] += 1 csvfile = open('Rec_modelFile.csv', 'w') outModel = csv.writer(csvfile) # Laplace Smoothing for k, v in cateCount.items(): scores = [(v[i] + 1) * 1.0 / (tCount[i] + len(cateCount)) for i in xrange(len(v))] outModel.writerow([k, scores])
def predict(featureFile, modelFile, testText): ''' 预测文档的类标,标准输入每一行为一个文档 ''' docCounts, features = load_feature_words(featureFile) docCounts = list(map(int, docCounts)) docScores = [math.log(count * 1.0 / sum(docCounts)) for count in docCounts] scores = load_model(modelFile) lable = [] for line in testText: text = line.strip() words = text.split(' ') preValues = list(docScores) for word in words: if word in features: for i in xrange(len(preValues)): preValues[i] += math.log(scores[word][i]) m = max(preValues) pIndex = preValues.index(m) lable.append('C' + str(pIndex)) # print(lable) return lable
def train_bayes(trainText, featureFile): ''' train naive bayes model compute the # of feature words appeared in each class ''' docCounts, features = load_feature_words(featureFile) wordCount = collections.defaultdict(doc_dict) tCount = [0] * len(docCounts) for line in trainText: lable, text = line.strip().split(' ', 1) index = lable2id(lable) words = text.split(' ') for word in words: if word in features: tCount[index] += 1 wordCount[word][index] += 1 csvfile = open('modelFile.csv', 'w') outModel = csv.writer(csvfile) # Laplace Smoothing for k, v in wordCount.items(): scores = [(v[i] + 1) * 1.0 / (tCount[i] + len(wordCount)) for i in xrange(len(v))] outModel.writerow([k, scores])
def lable2id(lable): for i in xrange(len(lables)): if lable == lables[i]: return i