def svoExecute(fpathL1, fpathR1, fpathL2, fpathR2): #initialize (imLprev, imRprev) = openImg.getImgs(fpathL1, fpathR1) (imLnext, imRnext) = openImg.getImgs(fpathL2, fpathR2) size = [2, 2] #get key points kpLprev, descLprev = features.getFeatures(imLprev, size) kpRprev, descRprev = features.getFeatures(imRprev, size) #correspondences corLprev_St, corRprev_St, matchKeptL_St, matchKeptR_St = features.getCorres( descLprev, descRprev, kpLprev, kpRprev) #triangulate must be of the form 2 x N [x3dprev, pErrPrev, matchKept_3d] = triangulation.triangulate(corLprev_St, corRprev_St) #Get temporal correspondences kpLnext, descLnext = features.getFeatures(imLnext, size) corLprev_T, corLnext_T, matchKeptPrev_T, matchKeptNext_T = features.getCorres( descLprev, descLnext, kpLprev, kpLnext) (x3dprev_final, x2dnext_final) = idxMerge(matchKeptL_St, matchKept_3d, x3dprev, corLnext_T, matchKeptPrev_T, matchKeptNext_T) #Get updated camera pose [rot, trans] = PnP.camPose(x3dprev_final, x2dnext_final, pErrPrev) return (rot, trans, x3dprev_final)
def imageToImage(images, paths, keypoint_type, descriptor_type, score_fun = lambda i,s,u : numpy.mean(s)) : """ Compare every image with every other image, generating a few different scores input: images [List of nparrays] all the images labels [List of Strings] labels of all the images keypoint_type [String] e.g "SURF" or "ORB" etc keypoint_descriptor [String] e.g "SURF" or "ORB" etc score_fun [(list(int), list(int/float), list(float)) -> float] The score function take a list of indices, a list of scores (distance between two descriptors), a list of scores (uniqueness of best/second best score) and returns a floating point number. output: [list of (boolean, score)] a list where the boolean is true if the images where of the same person and false if not """ # Get keypoints #keypoints = map(lambda i : f.getKeypoints(keypoint_type, i), images) # Get descriptors #data = map(lambda i,k : f.getDescriptors(descriptor_type, i, k), images, keypoints) data = [f.getFeatures([p],keypoint_type, descriptor_type) for p in paths] indices, keypoints, descriptors = zip(*data) # Return the scores labeled with a boolean to indicate if they are of same set return matchDescriptors(descriptors, paths, descriptor_type, score_fun)
def main(): video_src = -1 cam = cv2.VideoCapture(video_src) cam.set(cv2.CAP_PROP_FRAME_WIDTH, 640) cam.set(cv2.CAP_PROP_FRAME_HEIGHT, 480) # get train features img = cv2.imread('logo_train.png') train_features = features.getFeatures(img) cur_time = timeit.default_timer() frame_number = 0 scan_fps = 0 while True: frame_got, frame = cam.read() if frame_got is False: break frame_number += 1 if not frame_number % 100: scan_fps = 1 / ((timeit.default_timer() - cur_time) / 100) cur_time = timeit.default_timer() region = features.detectFeatures(frame, train_features) cv2.putText(frame, f'FPS {scan_fps:.3f}', org=(0, 50), fontFace=cv2.FONT_HERSHEY_COMPLEX_SMALL, fontScale=1, color=(0, 0, 255)) if region is not None: box = cv2.boxPoints(region) box = np.int0(box) cv2.drawContours(frame, [box], 0, (0, 255, 0), 2) cv2.imshow("Preview", frame) if cv2.waitKey(10) == 27: break
def match(paths, options = {}) : # Get parameters prune_fun = options.get("prune_fun", weightMatrix.pruneThreshold) prune_limit = options.get("prune_limit", 2.5) keypoint_type = options.get("keypoint_type", "SIFT") descriptor_type = options.get("descriptor_type", "SIFT") verbose = options.get("verbose", False) split_limit = options.get("split_limit", 50) cluster_prune_limit = options.get("cluster_prune_limit", 1.5) # Get all feature points indices, ks, ds = features.getFeatures(paths, options) # Calculate weight matrix weights = weightMatrix.init(ds, descriptor_type) # Get cluster weights cluster_weights = prune_fun(weights, prune_limit) # Cluster graph partitions = cluster(cluster_weights, indices, split_limit = split_limit, prune_limit = cluster_prune_limit, verbose=verbose) if verbose : print("%i partitions" % len(set(partitions))) def match_fun(threshold) : match_data = list(getPartitionMatches(partitions, cluster_weights, weights, indices, threshold)) if len(match_data) == 0 : return [], [], [] match_ind, ratios, scores = zip(*match_data) # Get positions matches = [getMatchPosition(m_i, m_j, ks) for (m_i, m_j) in match_ind] return matches, ratios, scores return lambda t : match_fun(t)
def match(paths, options = {}) : keypoint_type = options.get("keypoint_type", "SIFT") descriptor_type = options.get("descriptor_type", "SIFT") use_ball_tree = options.get("use_ball_tree", False) # Get all feature points indices, ks, ds = features.getFeatures(paths, options) # Use cv2's matcher to get matching feature points distances = features.angleDist(ds[indices == 0], ds[indices == 1]) if use_ball_tree : ii, ss, uu = features.ballMatch(descriptor_type, ds[indices == 0], ds[indices == 1]) else : ii, ss, uu = features.bfMatch(descriptor_type, ds[indices == 0], ds[indices == 1]) # Get all positions (pos_im1, pos_im2) = (features.getPositions(ks[indices == 0]), features.getPositions(ks[indices == 1])) # Define a function that given a threshold returns a set of matches def match_fun(threshold) : match_data = [(numpy.array((pos_im1[i], pos_im2[j])), uu[i], ss[i]) for i,j in enumerate(ii) if uu[i] < threshold] if len(match_data) == 0 : return [], [], [] matches, ratios, scores = zip(*match_data) return matches, ratios, scores return lambda t : match_fun(t)
def match(paths, options = {}) : # Get parameters prune_fun = options.get("prune_fun", weightMatrix.pruneTreshold) prune_limit = options.get("prune_limit", 3) min_edges = options.get("min_edges", 1) min_coherence = options.get("min_coherence", -1.0) keypoint_type = options.get("keypoint_type", "ORB") descriptor_type = options.get("descriptor_type", "BRIEF") verbose = options.get("verbose", False) split_limit = options.get("split_limit", 999999) cluster_prune_limit = options.get("cluster_prune_limit", 1.5) # Get all feature points indices, ks, ds = features.getFeatures(paths, keypoint_type = keypoint_type, descriptor_type = descriptor_type) # Calculate weight matrix (hamming distances) weights = weightMatrix.init(ds, descriptor_type) # Get cluster weights cluster_weights = prune_fun(weights, prune_limit) # Cluster graph partitions = cluster(cluster_weights, indices, split_limit = split_limit, prune_limit = cluster_prune_limit, verbose=verbose) if verbose : print("%i partitions" % len(set(partitions))) # Get matches matches = getPartitionMatches(partitions, cluster_weights, indices, min_edges, min_coherence) # Get find their positions matchPos = [getMatchPosition(m_i, m_j, ks) for (m_i,m_j) in matches] return matchPos
def predict(doc2vec, data, output, mlp=None): """ Answer Reranking with rank ~ cosine(q_i, a_i)^(-1) """ # data : zip(questions, commentsL) ... see 'constructData' out = open(output, 'w') for q, cl in data: scores = [] q_w = preprocessor(q[1]) q_v = doc2vec.infer_vector(q_w) ac_v = getAverageCV(doc2vec, cl) for j, c in enumerate(cl): c_w = preprocessor(c[1]) c_v = doc2vec.infer_vector(c_w) f_v = getFeatures(doc2vec, q_w, c_w, \ { 'qid' : q[0], 'cid' : c[0], 'rank' : j }) f_v.extend( [cosine(q_v, c_v), cosine(q_v, ac_v), cosine(c_v, ac_v)]) score, pred = predictAux(q_v, c_v, ac_v, f_v, mlp) scores.append([score, j, 0, pred]) scores = sorted(scores, key=lambda score: score[0], reverse=True) for i in range(len(scores)): scores[i][2] = i + 1 scores = sorted(scores, key=lambda score: score[1]) for score in scores: out.write('\t'.join([ q[0], cl[score[1]][0], str(score[2]), str(score[0]), score[3] ])) out.write('\n') out.close()
def match_speed(paths, options = {}) : keypoint_type = options.get("keypoint_type", "SIFT") descriptor_type = options.get("descriptor_type", "SIFT") leaf_size = options.get("leaf_size", 2) radius_size = options.get("radius_size", 300) dist_threshold = options.get("dist_threshold", 100) shuffle_keypoints = options.get("shuffle_keypoints", False) # Get all feature points indices, ks, ds = features.getFeatures(paths, keypoint_type, descriptor_type, shuffle_keypoints) # Construct ball tree bt = BallTree(ds, leaf_size=leaf_size) # Filter nodes ns = filterNodes(bt, radius_size) # Get matches match_data = list(getMatches(bt, ns, indices, ks, ds, dist_threshold)) def match_fun(ratio_threshold) : matches = [(pos, s, u) for pos, s, u in match_data if u < ratio_threshold] if len(matches) == 0 : return [], [], [] else : return zip(*matches) return lambda t : match_fun(t)
def getFeatures(paths, filter_features, options) : """ Retrieves features and filters them """ feature_points = features.getFeatures(paths, options) if filter_features == [] : return feature_points else : ff = numpy.ones(len(feature_points[0]), dtype = numpy.bool) ff[filter_features] = False indices = feature_points[0][ff] ks = feature_points[1][ff] ds = feature_points[2][ff] return indices, ks, ds
def getCorrespondences(paths, homography, keypoint, descriptor, distance_threshold) : print("%s " % features.getLabel(paths[0])), # Get all feature points indices, ks, ds = features.getFeatures(paths, { "keypoint_type" : keypoint, "descriptor_type" : descriptor }) # Get all positions (pos_im1, pos_im2) = (features.getPositions(ks[indices == 0]), features.getPositions(ks[indices == 1])) # For all possible combinations, check if the match is acceptable correspondences = sum([1 for (p1, p2) in itertools.product(pos_im1, pos_im2) if matchDistance(p1, p2, homography) <= distance_threshold]) return correspondences
def scoreImages(paths, cluster_edges = 3, score_edges = 40, size = 36, withGeometry = True, withCertainty = True, cluster_prune = weightMatrix.pruneHighest, score_prune = weightMatrix.pruneThreshold, normalize = True, score_type = scoreWeights) : """ Given paths to two images, the images are scored based on how well their traits match """ default_val = 0.0 # Get features indices, keypoints, descriptors = features.getFeatures(paths, size=size) if descriptors == None : print(paths) print("No descriptors found. Returning score %i" % default_val) return default_val # Get weights full_weights = weightMatrix.init(descriptors) score_weights = score_prune(full_weights, score_edges) cluster_weights = cluster_prune(score_weights, cluster_edges) # Cluster graph partitions = louvain.cluster(cluster_weights) # Match the traits scores, partition_indices = score_type(score_weights, partitions, indices, scoring=lambda m,c : m + c if c > 0 else 0.0, normalize=normalize) # Get the geometric multiplier geom_multiplier = geometryMultiplier(partitions, partition_indices, numpy.array(indices), keypoints) # Get the certainty factor certainty_factor = certaintyFactor(len(partition_indices)) if withCertainty else 1.0 score_sum = sum([s*m for s,m in zip(geom_multiplier, scores)]) if withGeometry else sum(scores) # Get final score by multiplying certainty final_score = score_sum * certainty_factor # Get labels and print [l1, l2] = [features.getLabel(p) for p in paths] print("Score: %0.4f for %s and %s (clusters: %i)" % (final_score,l1,l2, len(partition_indices))) return final_score
def match(paths, options = {}) : use_ball_tree = options.get("use_ball_tree", False) # Get all feature points indices, ks, ds = features.getFeatures(paths, options) # Use cv2's matcher to get matching feature points match_data = features.bfMatch(ds[indices == 0], ds[indices == 1]) # Get all positions (pos_im1, pos_im2) = (features.getPositions(ks[indices == 0]), features.getPositions(ks[indices == 1])) # Define a function that given a threshold returns a set of matches def match_fun(match_data, threshold) : match_data = [(numpy.array((pos_im1[i], pos_im2[j])), s, u) for (i, j), s, u in match_data if u < threshold] if len(match_data) == 0 : return [], [], [] matches, ratios, scores = zip(*match_data) return matches, ratios, scores return lambda t : match_fun(match_data, t)
def match_radius(paths, options = {}) : leaf_size = options.get("leaf_size", 10) radius_size = options.get("radius_size", 300) ratio_boost = options.get("ratio_boost", 1.0) group_limit = options.get("group_limit", 5) # Get all feature points indices, ks, ds = features.getFeatures(paths, options) # Construct ball tree bt = BallTree(ds, leaf_size=leaf_size) # Query function for ball tree def query_all() : max_index = indices.max() for i, descriptor in enumerate(ds) : if indices[i] < max_index : idxs = numpy.array(bt.query_radius(descriptor, r=radius_size)[0]) group_size = len(idxs) # Get unique match for (i,j), m, s, u in query_unique(bt, i, descriptor, indices, ks) : if group_size >= group_limit : yield m, s, u*ratio_boost, group_size else : yield m, s, u, group_size # Get matches match_data = list(query_all()) def match_fun(ratio_threshold) : matches = [(pos, s, u, g) for pos, s, u, g in match_data if u < ratio_threshold] if len(matches) == 0 : return [], [], [], [] else : return zip(*matches) return lambda t : match_fun(t)
def trainNN(doc2vec, data): """ Train MLP """ mlp = MLPClassifier( solver = param['solver'], \ hidden_layer_sizes = param['hidden'], \ activation = param['activation'], \ learning_rate = 'adaptive', \ early_stopping = False, \ random_state = 1, \ max_iter = 1000, \ verbose = True ) X = [] Y = [] if data is not None: for q, cl in data: q_w = preprocessor(q[1]) q_v = doc2vec.infer_vector(q_w) q_v /= norm(q_v) ac_v = getAverageCV(doc2vec, cl) for j, c in enumerate(cl): c_w = preprocessor(c[1]) c_v = doc2vec.infer_vector(c_w) c_v /= norm(c_v) f_v = getFeatures(doc2vec, q_w, c_w, \ { 'qid' : q[0], 'cid' : c[0], 'rank' : j }) f_v.extend( [cosine(q_v, c_v), cosine(q_v, ac_v), cosine(c_v, ac_v)]) X.append(np.append(np.append(q_v, c_v), np.append(ac_v, f_v))) Y.append(transformLabel(c[2])) np.savez('out/trainNN.npz', x=X, y=Y) else: npzfile = np.load('out/trainNN.npz') X = npzfile['x'] Y = npzfile['y'] mlp.fit(X, Y) return mlp
def cnn(self, algo, max_words=3000, feats=False, chi2=False): print('type == ', algo, feats, 'chi2=', str(chi2), max_words) thedata, emb_size = self.splitData(self.input_data, algo) if self.clas in [5, 8, 9]: testset = rc.set_input_data(None, (self.clas * 10 + 1), clas=self.clas) testset, emb_size = self.splitData(testset, algo) print('input len:', len(thedata)) tokenizer = Tokenizer(num_words=max_words) tokenizer.fit_on_texts(thedata) self.dictionary = tokenizer.word_index vocab_size = len(tokenizer.word_index) + 1 allWordIndices = [] for text in thedata: wordIndices = self.convert_text_to_index_array(text) allWordIndices.append(wordIndices) allWordIndices = np.asarray(allWordIndices) mode = ["binary"] for m in mode: print('mode', m) train_x = tokenizer.sequences_to_matrix(allWordIndices, mode=m) if feats: if chi2: featus = getFeatureschi2(self.corpus, clas=self.clas) else: featus = getFeatures(self.corpus, clas=self.clas) print('Stats::', featus.shape) print('Stats::', train_x.shape) train_x = np.hstack((train_x, featus)) print('Stats::', featus.shape) print('Stats::', train_x.shape) train_y = list(map(lambda x: self.c[x], self.output_data)) train_y = keras.utils.to_categorical(train_y, self.nb_classes) X_train, X_test, Y_train, Y_test = train_test_split(train_x, train_y, test_size=0.2, shuffle=True) input_size = len(train_x[0]) # one test validation_split = [0.1] batch = [200] nb_neurone = [50] # number of filters nb_epoch = [5] activation = ['relu'] optimizer = ['adam'] loss = ['mse'] model = Sequential() print('emb_size', emb_size) model.add(Embedding(vocab_size, emb_size, input_length=input_size)) model.add( Conv1D(nb, activation=a, kernel_size=self.nb_classes, input_shape=(input_size, 1))) model.add(MaxPooling1D(self.nb_classes)) model.add(Flatten()) model.add(Dense(self.nb_classes, activation='sigmoid')) model.compile(loss=l, optimizer=o, metrics=['accuracy']) model.fit(X_train, Y_train, batch_size=b, epochs=epoch, verbose=1, validation_split=vs) print('evaluation') y_pred = model.predict(X_test) score = model.evaluate(X_test, Y_test, verbose=0) print('acc', score[1]) Yt_test = np.argmax(Y_test, axis=1) # Convert one-hot to index y_pred = model.predict_classes(X_test) print(classification_report(Yt_test, y_pred, digits=4)) print('done')
df_train = preprocessing.prep_trainset(df_train) # Trainset preproccessing df_descr = preprocessing.prep_descr(df_descr, df_train) # Product descriptions preproccessing df_attr = preprocessing.prep_attr(df_attr, df_train) # Product attributes preproccessing # df_train.to_pickle('df_train_prep.pkl') # df_descr.to_pickle('df_descr_prep.pkl') # df_attr.to_pickle('df_attr_prep_new1.pkl') # df_train = pd.read_pickle('df_train_prep.pkl') # df_descr = pd.read_pickle('df_descr_prep.pkl') # df_attr = pd.read_pickle('df_attr_prep.pkl') # Phase 2 Feature enginnering # df_train, df_similarities, df_fuzzy = features.feature_engineering(df_train, df_descr, df_attr) df_train, df_train2 = features.getFeatures(df_train, df_descr, df_attr) # df_train2 stored for modelling phase df_train, df_similarities = features.similarityMetrics(df_train) # df_fuzzy = features.fuzzy(df_train) # df_train.to_pickle('df_train_feat.pkl') # df_similarities.to_pickle('df_similarities_feat.pkl') # df_fuzzy.to_pickle('df_fuzzy_feat.pkl') # df_train = pd.read_pickle('df_train_feat.pkl') # df_similarities = pd.read_pickle('df_similarities_feat.pkl') # df_fuzzy = pd.read_pickle('df_fuzzy_feat.pkl') # Phase 3 Modelling modeling.run(df_train2, df_similarities, df_fuzzy)
import features import openImg import triangulation leftpaths, rightpaths = openImg.getFilenames() import sys import pdb for m in range(1): # get features imgL,imgR = openImg.getImgs(leftpaths[m], rightpaths[m]) kp1,desc1 = features.getFeatures(imgL, [2,2]) kp2,desc2 = features.getFeatures(imgR, [2,2]) leftCorres, rightCorres, leftCorresidx, rightCorresidx = features.getCorres(desc1, desc2, kp1,kp2) x3dSave, perrFin, idxSave = triangulation.triangulate(leftCorres,rightCorres) if m==0: x3d = x3dSave else: x3d = np.concatenate((x3d,x3dSave), axis =1) print(m) import pdb; pdb.set_trace() x3d = x3d.transpose()
rawdata = pd.read_csv('../kaggle_datasets/flight-delays/flights.csv', encoding='latin-1', error_bad_lines=False) name = "flight-delays" rawdata.info() rawdata.isnull().sum() # In[86]: drop = "AIRLINE_DELAY" encoded = rawdata.copy(deep=True) #encoded = encoded.fillna(0) #encoded.dropna(axis=0, inplace=True) for x in encoded: if encoded[x].dtype == "object": encoded[x] = encoded[x].astype('category').cat.codes if encoded.shape[0] > 25000: encoded = encoded.sample(n=25000, axis=0) encoded.info() # In[87]: Y = encoded[drop] X = encoded.drop([drop], axis=1) Y = Y.to_numpy() X = X.to_numpy() vec = features.getFeatures(X, Y, name) features.serialize(name, vec) vec
(x_train, y_train), (x_test, y_test), preproc = text.texts_from_array( x_train=x_train, y_train=y_train, x_test=x_test, y_test=y_test, class_names=listclasses, preprocess_mode='bert', maxlen=200, max_features=15000) if feat: if chi2: featus = getFeatureschi2(corpus, clas=clas) else: featus = getFeatures(corpus, clas=clas) featus = featus.tolist() [ x_train[0][x].tolist().extend(featus[x]) for x in range(0, split - 1) ] [ x_train[1][x].tolist().extend(featus[x]) for x in range(0, split - 1) ] [ x_test[0][x - split].tolist().extend(featus[x]) for x in range(split, len(featus)) ] [
import features import cv2 import params img1 = cv2.imread(params.IMGLOCATION + '/image_02/data/0000000000.png', 0) img2 = cv2.imread(params.IMGLOCATION + '/image_03/data/0000000000.png', 0) imgout = cv2.imread(params.IMGLOCATION + 'image_02/data/0000000000.png') size = [3, 3] kp1, desc1 = features.getFeatures(img1, size) kp2, desc2 = features.getFeatures(img2, size) leftCorres, rightCorres, leftCorresidx, rightCorresidx = features.getCorres( desc1, desc2, kp1, kp2) #img3 = cv2.drawMatchesKnn(img1,kp1,img2,kp2,corres,imgout,flags=2) #cv2.imshow("Display",img3) #cv2.waitKey(0) # View features import pdb pdb.set_trace() #kpimg = cv2.drawKeypoints(img1,kp1,imgout) #cv2.imshow("Display Window",kpimg) #cv2.waitKey(0)
''' Label the blobs using a previously trained model Gary Bishop July 2018 ''' import pandas as pd import Args import pickle from features import getFeatures args = Args.Parse(inblobs='output.blobs.bz2', outblobs='output.labeled.bz2', model='models/LR1.pkl') data = pd.read_pickle(args.inblobs) model = pickle.load(open(args.model, 'rb')) features = getFeatures(data) labels = model.predict(features) data.isdot = labels data.to_pickle(args.outblobs)
from pydriller import RepositoryMining, GitRepository import datetime from splclassifier import SPLClassifier #from manualcommits import getManualResultsKconfig, getMakeFileResultsManual, getAMFileResultsManual from features import getLinuxF, getFeatures from getCommitsLinux import getLinuxCommits from getCommits import getListCommits import re dt1 = datetime.datetime(2017, 3, 8, 0, 0, 0) dt2 = datetime.datetime(2017, 12, 31, 0, 0, 0) #listaCommitsLinux = getLinuxCommits() features = getFeatures() arq = open('saida_rc_errados.csv','w') listaCommits = getListCommits() ''' fileKind = sys.argv[1] if(fileKind == 'makefile'): print("SOU MAKEFILE") listaCommits = getListCommits() features = getFeatures() arq = open('automated-rc-soletta-retest.csv','w') elif(fileKind == 'kconfig'): print("SOU KCONFIG") arq = open('automated-results-kconfig-uclibc.csv','w') else:
def match(paths, options = {}) : # Get matches in usual format def matchFromIndex(i,j) : return (features.getPosition(ks[indices == 0][i]), features.getPosition(ks[indices == 1][j])) # Get options k_init = options.get("k_init", 50) max_iterations = options.get("max_iterations", 20) min_partition_size = options.get("min_partitions_size", 10) max_sd = options.get("max_sd", 40) min_distance = options.get("min_distance", 25) verbose = options.get("verbose", False) keypoint_type = options.get("keypoint_type", "SIFT") descriptor_type = options.get("descriptor_type", "SIFT") ratio_threshold = options.get("ratio_threshold", 1.0) # Get images images = map(features.loadImage, paths) # Get all feature points indices, ks, ds = features.getFeatures(paths, options) # Get positions positions = numpy.array(features.getPositions(ks)) # Get matches match_points = getMatchPoints(indices, ks, ds, descriptor_type = descriptor_type) if len(match_points) == 0 : return lambda t : [], [], [] # Partition with isodata part_1 = isodata.cluster(positions[indices==0], k_init=k_init, max_iterations=max_iterations, min_partition_size=min_partition_size, max_sd=max_sd, min_distance=min_distance) part_2 = isodata.cluster(positions[indices==1], k_init=k_init, max_iterations=max_iterations, min_partition_size=min_partition_size, max_sd=max_sd, min_distance=min_distance) # Show the clusters if verbose : display.showTwoPartitions(part_1, part_2, indices, images, positions) # Get a matrix of the matches so that part_corr_{i,j} is equal to the # amount of matches between partition i and j part_corr = getLinkMat(part_1, part_2, match_points) # For each partition figure out which partitions correspond partition_links = [getPartitionLinks(row) for row in part_corr] # Get all keypoint matches from the matching clusters match_set = [] for i,ms in enumerate(partition_links) : for (j,s) in ms : match_set.extend(getPartitionMatches(match_points, part_1 == i, part_2 == j)) # def match_fun(threshold) : # # For each partition figure out which partitions correspond # partition_links = [getPartitionLinks(row, threshold) for row in part_corr] # # Get all keypoint matches from the matching clusters # match_set = [] # for i, ms in enumerate(partition_links) : # for (j, s) in ms : # match_set.extend(getPartitionMatches(match_points, part_1 == i, part_2 == j)) # match_data = [(matchFromIndex(i, j), u, 0) for((i,j),u) in match_set if u < ratio_threshold] # if len(match_data) == 0 : return [], [], [] # matches, ratios, scores = zip(*match_data) # return matches, ratios, scores # Define a function that given a threshold returns a set of matches def match_fun(threshold) : match_data = [(matchFromIndex(i,j), u, 0) for ((i,j),u) in match_set if u < threshold] if len(match_data) == 0 : return [], [], [] matches, ratios, scores = zip(*match_data) return matches, ratios, scores return match_fun
def hello(): trending = getFeatures() return render_template("index.html", features=trending)
def detection(tweet): resp = None subj = dict() polr = dict() polr['raw_text'] = tweet subj['raw_text'] = tweet subj['postext'] = clean.cleanTextPos(tweet) subj['text'] = clean.cleanText(tweet) polr['text'] = clean.cleanText(tweet) # Terms as PosTags in Subjectivity subj['posterms'] = freeling.getPOS(subj['postext']) subj['terms'] = tokens.getTokens(subj['text']) # may be tokens.getTokens(text, stopwords) # Terms as word Tokens in polarity polr['terms'] = tokens.getTokens(polr['text']) # may be tokens.getTokens(text, stopwords) ## SELECT TERMS... BY DEFAULT WORDS! ###### completar words o postags subj['features'] = features.getFeatures(subj['posterms'] + subj['terms'], 'unigrams') polr['features'] = features.getFeatures(polr['terms'], 'uni+bigrams') # print subj['features'] # print polr['features'] subj['vectormodel'] = vector_model.getModel(subTerms, subj['features'], 'tf') polr['vectormodel'] = vector_model.getModel(polTerms, polr['features'], 'tf') polr['dictvectormodel'] = dict( zip(polTerms, polr['vectormodel']) ) # print subj['vectormodel'] # print polr['vectormodel'] subjectPrediction = subjectSVMmodel.predict(subj['vectormodel']) if showDetails: print "subjectPrediction: ", subjectPrediction if subjectPrediction == 1: polarityPrediction = polarityBayesModel.classify(polr['dictvectormodel']) if polarityPrediction == 1: resp = "pos" else: resp = "neg" if showDetails: print "polarityPrediction: ", polarityPrediction else: resp = "not" return resp
run_features_100_15 = timeit.timeit( 'getFeatures(splitAll(selectData(100, 15)))', setup='from __main__ import getFeatures, splitAll, selectData', number=1) run_features_100_20 = timeit.timeit( 'getFeatures(splitAll(selectData(100, 20)))', setup='from __main__ import getFeatures, splitAll, selectData', number=1) run_features_100_25 = timeit.timeit( 'getFeatures(splitAll(selectData(100, 25)))', setup='from __main__ import getFeatures, splitAll, selectData', number=1) # ========================================================================================= # Memory for feature extraction a) mem_features_20_20 = memUsage(getFeatures(splitAll(selectData(20, 20)))) mem_features_50_20 = memUsage(getFeatures(splitAll(selectData(50, 20)))) mem_features_70_20 = memUsage(getFeatures(splitAll(selectData(70, 20)))) # Memory for feature extraction b) mem_features_100_10 = memUsage(getFeatures(splitAll(selectData(100, 10)))) mem_features_100_15 = memUsage(getFeatures(splitAll(selectData(100, 15)))) mem_features_100_20 = memUsage(getFeatures(splitAll(selectData(100, 20)))) mem_features_100_25 = memUsage(getFeatures(splitAll(selectData(100, 25)))) # ========================================================================================= run_features_a = [ run_features_20_20, run_features_50_20, run_features_70_20, run_features_100_20 ] run_features_b = [ run_features_100_10, run_features_100_15, run_features_100_20,