def draw(self, renderer): ### FIXME this should be made faster (C++ Module? How to deal with C++ linkage problems?) ### Sticking the vtkPoints objects in a cache would help somewhat but not on the first view. ### - Jack if not self.drawn: vtk_points = vtkPoints() points = self.visualiser.getQuantityPoints(self.quantityName, dynamic=self.dynamic) nPoints = len(points) vtk_points.SetNumberOfPoints(nPoints) setPoint = vtkPoints.SetPoint for i in xrange(nPoints): z = points[i] * self.zScale + self.offset setPoint(vtk_points, i, self.visualiser.xPoints[i], self.visualiser.yPoints[i], z) polyData = vtkPolyData() polyData.SetPoints(vtk_points) polyData.SetPolys(self.visualiser.vtk_cells) mapper = vtkPolyDataMapper() mapper.SetInput(polyData) setValue = vtkFloatArray.SetValue if hasattr(self.colour[0], '__call__'): scalars = self.colour[0](self.visualiser.getQuantityDict()) nScalars = len(scalars) vtk_scalars = vtkFloatArray() vtk_scalars.SetNumberOfValues(nScalars) for i in xrange(nScalars): setValue(vtk_scalars, i, scalars[i]) polyData.GetPointData().SetScalars(vtk_scalars) mapper.SetScalarRange(self.colour[1:3]) mapper.Update() self.actor.SetMapper(mapper) Feature.draw(self, renderer)
def get_score(self, msg): Feature.extract(msg) score = 0.0 for (f, w) in self.feature_weight.items(): if f in msg.feature: score += msg.feature[f] * w return score
def init_train_data(fnames, topics): print ('[ init_train_data ] =================') # amap # key : aid # value : attr[0] preferance, attr[1] aid , attr[2] aname train_rank = [] for QID in range(len(topics)): fname = fnames[QID] topic = topics[QID] amap = filter_data(fname) fea = Feature(topic) ext_aids = ZC.get_raw_rank(topic, EXT_TRAIN_A_SIZE) print '[ init_train_data ] amap_1 size = %d ' %(len(amap)) for tid in ext_aids : if not (tid in amap) : amap[tid] = (0, tid, '') print '[ init_train_data ] amap_2 size = %d ' %(len(amap)) for tid in amap : fv = fea.get_feature_vector(tid) #print ('[ init_train_data ] %d get feature vector ok.' %(tid)) train_rank.append( (int(amap[tid][0]), reform_vector(fv), QID) ) print '[ init_train_data ] topic : %s ok , train_rank_size = %d' %(topic, len(train_rank)) ZC.dump_cache() with open('train_rank.dat' , 'w') as f : pprint.pprint(train_rank, f) return train_rank
def Initialize(credentials=None, opt_url=None): """Initialize the EE library. If this hasn't been called by the time any object constructor is used, it will be called then. If this is called a second time with a different URL, this doesn't do an un-initialization of e.g.: the previously loaded Algorithms, but will overwrite them and let point at alternate servers. Args: credentials: OAuth2 credentials. opt_url: The base url for the EarthEngine REST API to connect to. """ data.initialize(credentials, (opt_url + '/api' if opt_url else None), opt_url) # Initialize the dynamically loaded functions on the objects that want them. ApiFunction.initialize() Element.initialize() Image.initialize() Feature.initialize() Collection.initialize() ImageCollection.initialize() FeatureCollection.initialize() Filter.initialize() Geometry.initialize() List.initialize() Number.initialize() String.initialize() Date.initialize() Dictionary.initialize() _InitializeGeneratedClasses() _InitializeUnboundMethods()
def dryer_data2(*feature_names): # data[area][genus][(feature_values)] = langauge_count data = {} # Languages that all features have languages = set() g = Genealogy() feature = Feature(feature_names[0]) for language in feature.languages(): languages.add(language.code) for feature_name in feature_names: feature = Feature(feature_name) this_set = set() for language in feature.languages(): this_set.add(language.code) languages &= this_set for language_code in languages: language = g.find_language_by_code(language_code) area = language.area genus = language.genus.name value = ','.join(v['description'] for v in sorted(language.features.values())) data.setdefault(area, {}) data[area].setdefault(genus, {}) data[area][genus].setdefault(value, 0) data[area][genus][value] += 1 return data
def Initialize(credentials="persistent", opt_url=None): """Initialize the EE library. If this hasn't been called by the time any object constructor is used, it will be called then. If this is called a second time with a different URL, this doesn't do an un-initialization of e.g.: the previously loaded Algorithms, but will overwrite them and let point at alternate servers. Args: credentials: OAuth2 credentials. 'persistent' (default) means use credentials already stored in the filesystem, or raise an explanatory exception guiding the user to create those credentials. opt_url: The base url for the EarthEngine REST API to connect to. """ if credentials == "persistent": credentials = _GetPersistentCredentials() data.initialize(credentials, (opt_url + "/api" if opt_url else None), opt_url) # Initialize the dynamically loaded functions on the objects that want them. ApiFunction.initialize() Element.initialize() Image.initialize() Feature.initialize() Collection.initialize() ImageCollection.initialize() FeatureCollection.initialize() Filter.initialize() Geometry.initialize() List.initialize() Number.initialize() String.initialize() Date.initialize() Dictionary.initialize() Terrain.initialize() _InitializeGeneratedClasses() _InitializeUnboundMethods()
def extract(self, data_set_name, part_num=1, part_id=0): """ Extract the feature from original data set :param data_set_name: name of data set :param part_num: number of partitions of data :param part_id: partition ID which will be extracted :return: """ # load data set from disk data = pd.read_csv('%s/%s.csv' % (self.config.get('DEFAULT', 'source_pt'), data_set_name)).fillna(value="") begin_id = int(1. * len(data) / part_num * part_id) end_id = int(1. * len(data) / part_num * (part_id + 1)) # set feature file path feature_pt = self.config.get('DEFAULT', 'feature_pt') if 1 == part_num: self.data_feature_fp = '%s/%s.%s.smat' % (feature_pt, self.feature_name, data_set_name) else: self.data_feature_fp = '%s/%s.%s.smat.%03d_%03d' % (feature_pt, self.feature_name, data_set_name, part_num, part_id) feature_file = open(self.data_feature_fp, 'w') feature_file.write('%d %d\n' % (end_id - begin_id, int(self.get_feature_num()))) # extract feature for index, row in data[begin_id:end_id].iterrows(): feature = self.extract_row(row) Feature.save_feature(feature, feature_file) feature_file.close() LogUtil.log('INFO', 'save features (%s, %s, %d, %d) done' % (self.feature_name, data_set_name, part_num, part_id))
def __init__(self,**kwargs): u""" :param framefilter: selected framenumber where we will compute histogram :type: array """ Feature.__init__(self,**kwargs)
def predict(test_dir, xpath): feature = Feature(test_dir + '/oracle.png', test_dir + '/test.html', xpath) feature.process() vecter = feature.output_binary() print vecter score = rank_bayes(vecter) sorted_score = sorted(score.iteritems(), key=operator.itemgetter(1)) return sorted_score
def test_can_enable_two_features(self): feature2 = Feature("second_test_feature") request = fake_request() self.feature.enable(request) feature2.enable(request) self.assertTrue(self.feature.is_enabled(request)) self.assertTrue(feature2.is_enabled(request))
def _weight_feature(self, msg): Feature.extract(msg) score = 0.0 for i in range(len(self.feature_name)): f = self.feature_name[i] w = self.w[i] if f in msg.feature: score += msg.feature[f] * w return score
def init_rerank_data(aids , topic): QID = 1 fea = Feature(topic) rerank_data = [] for tid in aids : fv = fea.get_feature_vector(tid) print ('[ init_rerank_data ] %d get feature vector ok.' %(tid)) rerank_data.append( (tid, reform_vector(fv), QID) ) return rerank_data
def set_enabled(request): f = Feature(request.POST['name']) enabled = request.POST['enabled'] == 'True' if enabled: f.enable(request) else: f.disable(request) return redirect("/feature/")
def _test_polynomial(self, polynomial): data = range(10) data = [float(d) for d in data] targets = [d**polynomial for d in data] data = [[d] for d in data] feature = Feature(Objective.MINIMIZE, log_level=logging.WARN) feature.optimize(data, targets) assert feature.polynomial == polynomial, "{0} != {1}".format(feature.polynomial, polynomial)
def __init__(self, biodb, step_size=5, levels=[], name_hier=[]): Feature.__init__(self, biodb= biodb) self.step_size= step_size if levels == []: levels=[None]*3 self.levels= levels if name_hier == []: name_hier= [""]*3 self.name_hier= name_hier self.links=[]
def __init__(self, biodb, step_size=5, levels=[], name_hier=[], parent_hier=[]): Feature.__init__(self, biodb= biodb) self.step_size= step_size if parent_hier == []: parent_hier=[None]*3 self.parent_hier= parent_hier if levels == []: levels=[None]*3 self.levels= levels self.links=[]
def showResults(request): global QUERY global RET_ANS query = request.GET['query'] query = query.encode('UTF-8') if query == QUERY: return JsonResponse(RET_ANS, safe=False) else: QUERY = query #words = jieba.cut_for_search(query) #搜索分词 ch_q = jieba.cut(query) #精准模式的分词 kw_ch = [i for i in ch_q] tag_obj = TagDict.objects.filter(tag_ch__in = kw_ch) # cujiansuo = sum([tag.tag_class for tag in tag_obj], []) kw_en = [tag.tag_en for tag in tag_obj] #存储关键词; cujiansuo_res = sorted(set(cujiansuo), key=cujiansuo.index) qa_obj = QuestionAnswer.objects.filter(id__in=cujiansuo_res) print len(qa_obj) kw_en_len = len(kw_en) count_en = [0]*kw_en_len res_list = [] #最终返回的列表; kw = kw_en for item in qa_obj: q = item.question.lower() a = item.answer.lower() for i in range(kw_en_len): k = kw_en[i] if k in q or k in a: count_en[i] += 1 if Is_rela(q, kw_en): item_t = [item.id, item.question, ret_em(kw, item.answer), item.answer] res_list.append(item_t) D = len(res_list) Idf = [] if not D == 0: Idf = [abs(math.log(D/float(t+1))) for t in count_en] theta1 = 1.0 theta2 = 1.0 theta3 = 1.0 mmax = 0.0 an_b = None for item in res_list: ans_sen = nltk.sent_tokenize(item[3]) en_a = sum([nltk.word_tokenize(t) for t in ans_sen],[]) en_q = nltk.word_tokenize(item[1]) socre_f = Feature(kw_ch, en_q, kw, en_a, Idf) sorce = theta1 * socre_f.length_feature() + sum(map(lambda(x):x*theta2, socre_f.word_feature())) + sum(map(lambda(x):x*theta3, socre_f.tfidf())) if mmax < sorce: mmax = sorce an_b = item RET_ANS = res_list write_file(res_list, query) return JsonResponse(RET_ANS, safe=False)
def __init__(self, quantityName, zScale=1.0, offset=0.0, **kwargs): ''' Parameters: quantityName: string - name of a quantity zScale: float - multiply point z-values by this offset: float - add this to point z-values ''' Feature.__init__(self, **kwargs) self.quantityName = quantityName self.zScale = zScale self.offset = offset
def __init__(self, field, word_file): """ Get the word list from the specified file. :param field: The field to which this feature belongs. :param word_file: The path to an alphabetized word list, one per line. :return: None """ word_file = field.settings.resolve_path(word_file) with open(word_file, 'r') as f: words = f.readlines() words = [w.strip() for w in words if w.islower()] self._dict_words = words Feature.__init__(self, field)
def main(): X = [[1, 2], [2, 3]] root = Feature('root') featureList = np.array([]) for i in range(len(X[0])): feature = Feature('feature_%d' % i) root.transform('init', feature) featureList = np.append(featureList, feature) model = OneHotEncoder(n_values=[5,8], sparse=True) model.fit(X) doWithOneHotEncoder(model, featureList) root.printTree()
def main(): X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) root = Feature('root') featureList = np.array([]) for i in range(len(X[0])): feature = Feature('feature_%d' % i) root.transform('init', feature) featureList = np.append(featureList, feature) model = PCA(n_components=1) model.fit(X) doWithPCA(model, featureList) root.printTree()
def msg2X(self, samples): ''' Convert messages to data matrix format. X: A dict. See explanation of _G() ''' X = {} for m in samples.values(): Feature.extract(m) x = [] for name in self.feature_name: x.append(m.feature[name]) X[m.msg_id] = x return X
def transform(self, fp): found_feature = False for f in fp: if f.name == self.name: yield Feature.apply_config( f, feature_type=FeatureType.TARGET ) found_feature = True else: yield Feature.apply_config( f, feature_type=FeatureType.PREDICTOR ) assert found_feature, "Feature `{}` is not found in the FeaturePool".format(self.name)
def main(): from sklearn.feature_selection import VarianceThreshold X = [[0, 2, 0, 3], [0, 1, 4, 3], [0, 1, 1, 3]] root = Feature('root') featureList = np.array([]) for i in range(len(X[0])): feature = Feature('feature_%d' % i) root.transform('init', feature) featureList = np.append(featureList, feature) model = VarianceThreshold() model.fit(X) doWithSelector(model, featureList) root.printTree()
def __init__(self): self.train_file = FILE_PATH + '/../data/conll.nonexp.train' self.test_file = FILE_PATH + '/../data/conll.nonexp.test' self.model_file = FILE_PATH + '/../data/conll.nonexp.model' self.predicted_file = FILE_PATH + '/../data/conll.nonexp.test.predicted' self.feat_handle = Feature()
class Trainer(object): def __init__(self): super(Trainer, self).__init__() self.tokenizer = Tokenizer() self.feature = Feature() # Training data using the given text and class. def train(self, text, className): # increase class self.feature.increaseClass(className) # tokenize text tokens = self.tokenizer.tokenize(text) # increase token for token in tokens: self.feature.increaseToken(token, className)
def feature(self, subtree): rv = Feature(**dict(subtree)) if rv.feature_elements is None: rv.feature_elements = [] if 'background' not in rv: rv.background = None # Assign background to feature elements. for sc in rv.feature_elements: sc.background = rv.background rv.tags = frozenset(rv.tags) return rv
def Reset(): """Reset the library. Useful for re-initializing to a different server.""" data.reset() ApiFunction.reset() Image.reset() Feature.reset() Collection.reset() ImageCollection.reset() FeatureCollection.reset() Filter.reset() Geometry.reset() Number.reset() String.reset() _ResetGeneratedClasses() global Algorithms Algorithms = _AlgorithmsContainer()
def transform_single(self, f): st = feature_summary(f.data) return Feature.merge_instances( f, Feature(f.name, f.data, st) )
def init_test_data(fname, topic): print ('[ init_train_data ] =================') QID = 1 # amap , key : aid # value : attr[0] preferance, attr[1] aid , attr[2] aname amap = filter_data(fname) fea = Feature(topic) train_rank = [] for tid in amap : aid = int(tid) fv = fea.get_feature_vector(aid) print ('[ init_train_data ] %d get feature vector ok.' %(aid)) train_rank.append( (aid, reform_vector(fv), QID) ) #ZC.dump_cache() return train_rank
def get_feature_by_feat(dict, feat): feat_dict = {} if feat in dict: feat_dict[dict[feat]] = 1 return Feature("", len(dict), feat_dict)
class Warp(): def __init__(self, image, feat_file, grid_height, grid_width, grid_dir, warp_dir, alpha=1, margin=200): self.alpha = alpha self.feat = Feature() # feature object self.read_feature_points(feat_file, margin) self.grid = Grid(image, grid_height, grid_width, margin, grid_dir, warp_dir) self.image = image # the should not change after global warpping, setting this initially is easier self.grid.compute_salience() self.set_grid_info_to_feat() def warp(self): self.GlobalWarp() self.ContentWarp() def GlobalWarp(self): # find the homography src = np.zeros((self.feat.size(), 2)) dest = np.zeros((self.feat.size(), 2)) for i, feat_info in enumerate(self.feat.feat): src[i][0] = feat_info.col src[i][1] = feat_info.row dest[i][0] = feat_info.dest_col dest[i][1] = feat_info.dest_row H, _ = cv2.findHomography(src, dest, cv2.RANSAC) # apply global transform self.grid.GlobalWarp(H) # features need to be transformed as well for i, feat_info in enumerate(self.feat.feat): p = np.array([feat_info.col, feat_info.row, 1]) p_prime = np.dot(H, p) p_prime /= p_prime[-1] p_prime = p_prime[:-1].round().astype('int') self.feat.feat[i].set_global(p_prime[1], p_prime[0]) print ('global warp finished. ', end='', flush=True) def ContentWarp(self): self.compute_bilinear_interpolation() self.grid.compute_u_v() self.build_linear_system_and_solve() self.image = self.image.split('/')[-1] self.grid.show_grid('after transform', self.feat.feat, show=False, save=True, image=self.image) self.map_texture(self.image) def build_linear_system_and_solve(self): ''' A: [w1 0 w2 0 w3 0 w4 0 0 0 ... 0] X: [V_1x] [0 w1 0 w2 0 w3 0 w4 0 0 ... 0] [V_1y] . [V_2x] . [V_2y] . [V_3x] simularity transform [V_3y] . [V_4x] . [V_4y] . . . . . ''' # A*x = B v_map = dict() # the map from Xi to mesh coordinates mesh_map = dict() # the map from mesh coordinates to Xi # construct map map_id = 0 # if x[i] x[i+1] would be the row and col respectively for every even i for row in range(self.grid.global_mesh.shape[0]): for col in range(self.grid.global_mesh.shape[1]): v_map[map_id] = (row, col) mesh_map[(row, col)] = map_id map_id += 2 # build Data Term # build Simularity Transform Term A_simularity = np.zeros((self.grid.count()*16, 2*len(v_map))) B_simularity = np.zeros((self.grid.count()*16, 1)) A_data = np.zeros((2*self.feat.size(), 2*len(v_map))) B_data = np.zeros((2*self.feat.size(), 1)) for i, feat_info in enumerate(self.feat.feat): cell_row, cell_col = feat_info.grid_pos tl = feat_info.temporal_coeff # data term v1_x_pos = mesh_map[(cell_row , cell_col )]; v1_y_pos = v1_x_pos + 1 v2_x_pos = mesh_map[(cell_row+1, cell_col )]; v2_y_pos = v2_x_pos + 1 v3_x_pos = mesh_map[(cell_row+1, cell_col+1)]; v3_y_pos = v3_x_pos + 1 v4_x_pos = mesh_map[(cell_row , cell_col+1)]; v4_y_pos = v4_x_pos + 1 A_data[2*i][v1_x_pos] = tl * feat_info.interpolation_coeff[0] # V1's coeff for x coordinate A_data[2*i][v2_x_pos] = tl * feat_info.interpolation_coeff[1] # V2's coeff for x coordinate A_data[2*i][v3_x_pos] = tl * feat_info.interpolation_coeff[2] # V3's coeff for x coordinate A_data[2*i][v4_x_pos] = tl * feat_info.interpolation_coeff[3] # V4's coeff for x coordinate A_data[2*i+1][v1_y_pos] = tl * feat_info.interpolation_coeff[0] # V1's coeff for y coordinate A_data[2*i+1][v2_y_pos] = tl * feat_info.interpolation_coeff[1] # V2's coeff for y coordinate A_data[2*i+1][v3_y_pos] = tl * feat_info.interpolation_coeff[2] # V3's coeff for y coordinate A_data[2*i+1][v4_y_pos] = tl * feat_info.interpolation_coeff[3] # V4's coeff for y coordinate B_data[2*i] = tl * ( np.array(feat_info.dest_row) + 0.5) # to grid coordinate B_data[2*i+1] = tl * ( np.array(feat_info.dest_col) + 0.5) # to grid coordinate # simularity transfrom term for every grid for cell_row in range(self.grid.g_height): for cell_col in range(self.grid.g_width): Ws = self.grid.gridCell[cell_row][cell_col].salience v1_x_pos = mesh_map[(cell_row , cell_col )]; v1_y_pos = v1_x_pos + 1 v2_x_pos = mesh_map[(cell_row+1, cell_col )]; v2_y_pos = v2_x_pos + 1 v3_x_pos = mesh_map[(cell_row+1, cell_col+1)]; v3_y_pos = v3_x_pos + 1 v4_x_pos = mesh_map[(cell_row , cell_col+1)]; v4_y_pos = v4_x_pos + 1 index_offset = cell_row * self.grid.g_height + cell_col index_offset *= 16 # first triangle u = self.grid.gridCell[cell_row][cell_col].u_v[0][0] v = self.grid.gridCell[cell_row][cell_col].u_v[0][1] A_simularity[index_offset+0][v1_x_pos] = Ws * ( 1 ) A_simularity[index_offset+0][v2_x_pos] = Ws * ( u - 1 ) A_simularity[index_offset+0][v2_y_pos] = Ws * ( v ) A_simularity[index_offset+0][v3_x_pos] = Ws * ( -u ) A_simularity[index_offset+0][v3_y_pos] = Ws * ( -v ) B_simularity[index_offset+0] = 0 A_simularity[index_offset+1][v1_y_pos] = Ws * ( 1 ) A_simularity[index_offset+1][v2_y_pos] = Ws * ( u - 1 ) A_simularity[index_offset+1][v2_x_pos] = Ws * ( -v ) A_simularity[index_offset+1][v3_y_pos] = Ws * ( -u ) A_simularity[index_offset+1][v3_x_pos] = Ws * ( v ) B_simularity[index_offset+1] = 0 # second triangle u = self.grid.gridCell[cell_row][cell_col].u_v[1][0] v = self.grid.gridCell[cell_row][cell_col].u_v[1][1] A_simularity[index_offset+2][v1_x_pos] = Ws * ( 1 ) A_simularity[index_offset+2][v4_x_pos] = Ws * ( u - 1 ) A_simularity[index_offset+2][v4_y_pos] = Ws * ( v ) A_simularity[index_offset+2][v3_x_pos] = Ws * ( -u ) A_simularity[index_offset+2][v3_y_pos] = Ws * ( -v ) B_simularity[index_offset+2] = 0 A_simularity[index_offset+3][v1_y_pos] = Ws * ( 1 ) A_simularity[index_offset+3][v4_y_pos] = Ws * ( u - 1 ) A_simularity[index_offset+3][v4_x_pos] = Ws * ( -v ) A_simularity[index_offset+3][v3_y_pos] = Ws * ( -u ) A_simularity[index_offset+3][v3_x_pos] = Ws * ( v ) B_simularity[index_offset+3] = 0 # third triangle u = self.grid.gridCell[cell_row][cell_col].u_v[2][0] v = self.grid.gridCell[cell_row][cell_col].u_v[2][1] A_simularity[index_offset+4][v2_x_pos] = Ws * ( 1 ) A_simularity[index_offset+4][v3_x_pos] = Ws * ( u - 1 ) A_simularity[index_offset+4][v3_y_pos] = Ws * ( v ) A_simularity[index_offset+4][v4_x_pos] = Ws * ( -u ) A_simularity[index_offset+4][v4_y_pos] = Ws * ( -v ) B_simularity[index_offset+4] = 0 A_simularity[index_offset+5][v2_y_pos] = Ws * ( 1 ) A_simularity[index_offset+5][v3_y_pos] = Ws * ( u - 1 ) A_simularity[index_offset+5][v3_x_pos] = Ws * ( -v ) A_simularity[index_offset+5][v4_y_pos] = Ws * ( -u ) A_simularity[index_offset+5][v4_x_pos] = Ws * ( v ) B_simularity[index_offset+5] = 0 # forth triangle u = self.grid.gridCell[cell_row][cell_col].u_v[3][0] v = self.grid.gridCell[cell_row][cell_col].u_v[3][1] A_simularity[index_offset+6][v2_x_pos] = Ws * ( 1 ) A_simularity[index_offset+6][v1_x_pos] = Ws * ( u - 1 ) A_simularity[index_offset+6][v1_y_pos] = Ws * ( v ) A_simularity[index_offset+6][v4_x_pos] = Ws * ( -u ) A_simularity[index_offset+6][v4_y_pos] = Ws * ( -v ) B_simularity[index_offset+6] = 0 A_simularity[index_offset+7][v2_y_pos] = Ws * ( 1 ) A_simularity[index_offset+7][v1_y_pos] = Ws * ( u - 1 ) A_simularity[index_offset+7][v1_x_pos] = Ws * ( -v ) A_simularity[index_offset+7][v4_y_pos] = Ws * ( -u ) A_simularity[index_offset+7][v4_x_pos] = Ws * ( v ) B_simularity[index_offset+7] = 0 # fifth triangle u = self.grid.gridCell[cell_row][cell_col].u_v[4][0] v = self.grid.gridCell[cell_row][cell_col].u_v[4][1] A_simularity[index_offset+8][v3_x_pos] = Ws * ( 1 ) A_simularity[index_offset+8][v4_x_pos] = Ws * ( u - 1 ) A_simularity[index_offset+8][v4_y_pos] = Ws * ( v ) A_simularity[index_offset+8][v1_x_pos] = Ws * ( -u ) A_simularity[index_offset+8][v1_y_pos] = Ws * ( -v ) B_simularity[index_offset+8] = 0 A_simularity[index_offset+9][v3_y_pos] = Ws * ( 1 ) A_simularity[index_offset+9][v4_y_pos] = Ws * ( u - 1 ) A_simularity[index_offset+9][v4_x_pos] = Ws * ( -v ) A_simularity[index_offset+9][v1_y_pos] = Ws * ( -u ) A_simularity[index_offset+9][v1_x_pos] = Ws * ( v ) B_simularity[index_offset+9] = 0 # sixth triangle u = self.grid.gridCell[cell_row][cell_col].u_v[5][0] v = self.grid.gridCell[cell_row][cell_col].u_v[5][1] A_simularity[index_offset+10][v3_x_pos] = Ws * ( 1 ) A_simularity[index_offset+10][v2_x_pos] = Ws * ( u - 1 ) A_simularity[index_offset+10][v2_y_pos] = Ws * ( v ) A_simularity[index_offset+10][v1_x_pos] = Ws * ( -u ) A_simularity[index_offset+10][v1_y_pos] = Ws * ( -v ) B_simularity[index_offset+10] = 0 A_simularity[index_offset+11][v3_y_pos] = Ws * ( 1 ) A_simularity[index_offset+11][v2_y_pos] = Ws * ( u - 1 ) A_simularity[index_offset+11][v2_x_pos] = Ws * ( -v ) A_simularity[index_offset+11][v1_y_pos] = Ws * ( -u ) A_simularity[index_offset+11][v1_x_pos] = Ws * ( v ) B_simularity[index_offset+11] = 0 # seventh triangle u = self.grid.gridCell[cell_row][cell_col].u_v[6][0] v = self.grid.gridCell[cell_row][cell_col].u_v[6][1] A_simularity[index_offset+12][v4_x_pos] = Ws * ( 1 ) A_simularity[index_offset+12][v1_x_pos] = Ws * ( u - 1 ) A_simularity[index_offset+12][v1_y_pos] = Ws * ( v ) A_simularity[index_offset+12][v2_x_pos] = Ws * ( -u ) A_simularity[index_offset+12][v2_y_pos] = Ws * ( -v ) B_simularity[index_offset+12] = 0 A_simularity[index_offset+13][v4_y_pos] = Ws * ( 1 ) A_simularity[index_offset+13][v1_y_pos] = Ws * ( u - 1 ) A_simularity[index_offset+13][v1_x_pos] = Ws * ( -v ) A_simularity[index_offset+13][v2_y_pos] = Ws * ( -u ) A_simularity[index_offset+13][v2_x_pos] = Ws * ( v ) B_simularity[index_offset+13] = 0 # eighth triangle u = self.grid.gridCell[cell_row][cell_col].u_v[7][0] v = self.grid.gridCell[cell_row][cell_col].u_v[7][1] A_simularity[index_offset+14][v4_x_pos] = Ws * ( 1 ) A_simularity[index_offset+14][v3_x_pos] = Ws * ( u - 1 ) A_simularity[index_offset+14][v3_y_pos] = Ws * ( v ) A_simularity[index_offset+14][v2_x_pos] = Ws * ( -u ) A_simularity[index_offset+14][v2_y_pos] = Ws * ( -v ) B_simularity[index_offset+14] = 0 A_simularity[index_offset+15][v4_y_pos] = Ws * ( 1 ) A_simularity[index_offset+15][v3_y_pos] = Ws * ( u - 1 ) A_simularity[index_offset+15][v3_x_pos] = Ws * ( -v ) A_simularity[index_offset+15][v2_y_pos] = Ws * ( -u ) A_simularity[index_offset+15][v2_x_pos] = Ws * ( v ) B_simularity[index_offset+15] = 0 A_simularity *= self.alpha B_simularity *= self.alpha A = np.vstack((A_data, A_simularity[1:])) B = np.vstack((B_data, B_simularity[1:])) X, _, _, _ = np.linalg.lstsq(A, B, rcond=None) # round the solution X = np.array([ round(x) for x in X.reshape(-1) ]).reshape((-1, 1)) # apply the result for i in range(X.shape[0]): if i % 2 != 0: continue mesh_row, mesh_col = v_map[i] self.grid.warpped_mesh[mesh_row][mesh_col] = np.array([X[i][0], X[i+1][0]]) for cell_row in range(self.grid.g_height): for cell_col in range(self.grid.g_width): v1 = self.grid.warpped_mesh[cell_row ][cell_col ] v2 = self.grid.warpped_mesh[cell_row+1][cell_col ] v3 = self.grid.warpped_mesh[cell_row+1][cell_col+1] v4 = self.grid.warpped_mesh[cell_row ][cell_col+1] self.grid.gridCell[cell_row][cell_col].set_corners(v1, v2, v3, v4) print ('local warp finished.') return def map_texture(self, image): self.grid.map_texture(image) def compute_bilinear_interpolation(self): for i, feat_info in enumerate(self.feat.feat): corresponding_cell = self.grid.gridCell[feat_info.grid_pos[0]][feat_info.grid_pos[1]] self.feat.set_coefficients(i, corresponding_cell.compute_coeff(feat_info.global_pos)) def read_feature_points(self, filename, margin): self.feat.read(filename, margin) def set_grid_info_to_feat(self): for i, feat_info in enumerate(self.feat.feat): self.feat.set_grid_position(i, self.grid.FeatToCellCoor(feat_info.pos))
def get_feature_by_list(list): feat_dict = {} for index, item in enumerate(list): if item != 0: feat_dict[index+1] = item return Feature("", len(list), feat_dict)
class Numbers(BaseModel, self.Settings): stream = Feature(NumberStream, store=False) add1 = Feature(Add, needs=stream, store=False, rhs=1) add2 = Feature(Add, needs=stream, store=False, rhs=1) sumup = Feature(SumUp, needs=(add1, add2), store=True)
def feature(self, ref, path): '''Returns a Feature object corresponding to the passed ref and path''' return Feature(self, ref, path)
class NavDataset(data.Dataset): def __init__(self, json_dirs, tok, img_path, panoramic, args): # read all json files and create a list of query data self.json_dirs = json_dirs # a list of json files self.tok = tok # should be a lang, vision, action aware tokenizer ['VCLS', 'ACLS'] self.mask_index = tok._convert_token_to_id(tok.mask_token) self.feature_store = Feature(img_path, panoramic) self.args = args self.data = [] self.instr_refer = dict() # instr_id : instr_encoding for json_dir in self.json_dirs: with open(json_dir) as f: current_trajs = json.load(f) for traj in current_trajs: self.data += self.disentangle_path(traj) def __getitem__(self, index): # you must return data and label pair tensor query = self.data[index] output = self.getQuery(query) return {key: torch.tensor(value) for key, value in output.items()} def __len__(self): return len(self.data) def disentangle_path(self, traj): query = list() instr_id = traj['instr_id'] instruction = traj['instr_encoding'] self.instr_refer[instr_id] = instruction path = traj['path'] actions = traj['teacher_actions'] action_emds = traj['teacher_action_emd'] for t in range(len(path)): scan = path[t][0] viewpoint = path[t][1] viewIndex = path[t][2] teacher_action = actions[t] absViewIndex, rel_heading, rel_elevation = action_emds[t] current_query = SingleQuery(instr_id, scan, viewpoint, viewIndex, teacher_action, absViewIndex, rel_heading, rel_elevation) if t <= len(path) - 2: next_scan = path[t + 1][0] next_viewpoint = path[t + 1][1] next_viewIndex = path[t + 1][2] next_teacher_action = actions[t + 1] next_absViewIndex, next_rel_heading, next_rel_elevation = action_emds[ t + 1] next_query = SingleQuery(instr_id, next_scan, next_viewpoint, next_viewIndex, next_teacher_action, next_absViewIndex, next_rel_heading, next_rel_elevation) else: next_query = current_query current_query.next = next_query query.append(current_query) # a list of (SASA) return query def getQuery(self, query): # prepare text tensor output = dict() text_seq = torch.LongTensor(self.instr_refer[query.instr_id]) masked_text_seq, masked_text_label, attention_mask = mask_tokens( text_seq, self.tok, self.args) output['masked_text_seq'] = masked_text_seq output['masked_text_label'] = masked_text_label output['lang_attention_mask'] = attention_mask # prepare vision tensor scan, viewpoint, viewindex = query.scan, query.viewpoint, query.viewIndex feature_all, feature_1 = self.feature_store.rollout( scan, viewpoint, viewindex) feature_with_loc_all = np.concatenate( (feature_all, _static_loc_embeddings[viewindex]), axis=-1) output['feature_all'] = feature_with_loc_all # prepare action if query.absViewIndex == -1: teacher_action_embedding = np.zeros(feature_all.shape[-1] + 128, np.float32) else: teacher_view = feature_all[query.absViewIndex, :] loc_embedding = np.zeros(128, np.float32) loc_embedding[0:32] = np.sin(query.rel_heading) loc_embedding[32:64] = np.cos(query.rel_heading) loc_embedding[64:96] = np.sin(query.rel_elevation) loc_embedding[96:] = np.cos(query.rel_elevation) teacher_action_embedding = np.concatenate( (teacher_view, loc_embedding)) output['teacher'] = query.teacher_action output['teacher_embedding'] = teacher_action_embedding # prepare next step info nscan, nviewpoint, nviewindex = query.next.scan, query.next.viewpoint, query.next.viewIndex nfeature_all, nfeature_1 = self.feature_store.rollout( nscan, nviewpoint, nviewindex) nfeature_with_loc_all = np.concatenate( (nfeature_all, _static_loc_embeddings[nviewindex]), axis=-1) output['next_feature_all'] = nfeature_with_loc_all if query.next.absViewIndex == -1: nteacher_action_embedding = np.zeros(feature_all.shape[-1] + 128, np.float32) else: nteacher_view = nfeature_all[query.next.absViewIndex, :] nloc_embedding = np.zeros(128, np.float32) nloc_embedding[0:32] = np.sin(query.next.rel_heading) nloc_embedding[32:64] = np.cos(query.next.rel_heading) nloc_embedding[64:96] = np.sin(query.next.rel_elevation) nloc_embedding[96:] = np.cos(query.next.rel_elevation) nteacher_action_embedding = np.concatenate( (nteacher_view, nloc_embedding)) output['next_teacher'] = query.next.teacher_action output['next_teacher_embedding'] = nteacher_action_embedding # prepare random next step info prob = np.random.random() if prob <= 0.5: output['isnext'] = 1 output['next_img'] = output['next_feature_all'] else: output['isnext'] = 0 candidates = list(range(36)) candidates.remove(nviewindex) fake_nviewindex = np.random.choice(candidates) ffeature_all, ffeature_1 = self.feature_store.rollout( nscan, nviewpoint, fake_nviewindex) ffeature_with_loc_all = np.concatenate( (ffeature_all, _static_loc_embeddings[fake_nviewindex]), axis=-1) output['next_img'] = ffeature_with_loc_all return output def random_word(self, text_seq): tokens = text_seq.copy() # already be [cls t1 t2 sep] output_label = [] for i, token in enumerate(tokens): if i == 0 or i == len(tokens) - 1: output_label.append(0) continue prob = np.random.random() if prob < 0.15: prob /= 0.15 output_label.append(tokens[i]) # 80% randomly change token to mask token if prob < 0.8: tokens[i] = self.mask_index # 10% randomly change token to random token elif prob < 0.9: tokens[i] = random.randrange(len(self.tok)) # 10% randomly change token to current token else: tokens[i] = tokens[i] # just keep it else: tokens[i] = tokens[i] # just keep it output_label.append(0) return tokens, output_label
from application import App from feature import Feature apps = [ App('hyper', 'https://releases.hyper.is/download/win'), App('git', 'https://central.github.com/deployments/desktop/desktop/latest/win32'), App('atom', 'https://atom.io/download/windows_x64'), App('python3.6.2', 'https://www.python.org/ftp/python/3.6.2/python-3.6.2.exe', '/quiet PrependPath=1'), Feature('Microsoft-Windows-Subsystem-Linux') ] def main(): for i in range(len(apps)): if apps[i].download: apps[i].download() for i in range(len(apps)): apps[i].install() if __name__ == '__main__': main()
class Timestamps(BaseModel, self.Settings): stream = Feature(TextStream, store=True) t1 = Feature(Timestamp, needs=stream, store=True) t2 = Feature(Timestamp, needs=stream, store=False) cat = Feature( \ Concatenate, needs=[t1, t2], store=False)
class D1(BaseModel, self.Settings): stream = Feature(TextStream, store=True) words = Feature(Tokenizer, needs=stream, store=False)
def __init__(self, column): Feature.__init__(self) self.column = column
def __init__(self, field, reverse=False): self.reverse = reverse Feature.__init__(self, field)
def __init__(self, field, string): self._string = string Feature.__init__(self, field)
class D2(D1): words = Feature(Tokenizer, needs=D1.stream, store=True)
def get_feature_by_feat_list(dict, feat_list): feat_dict = {} for feat in feat_list: if feat in dict: feat_dict[dict[feat]] = 1 return Feature("", len(dict), feat_dict)
class D(BaseModel, self.Settings): stream = Feature(TextStream, store=True) opposite = Feature(Contrarion, needs=stream, store=True)
class ConvTextClassfication(object): def __init__(self, args): self.args = args self.data_reader = Data_reader() self.raw_train_data = self.data_reader.read_train_data() self.raw_test_data = self.data_reader.read_test_data() self.feature = Feature(args) self.train_data = [] self.labels = [] self.val_data = [] self.val_labels = [] self.test_data = [] def process_data(self): self.train_data, self.labels = self.feature.extract_feature(self.raw_train_data) self.test_data = self.feature.extract_test_feature(self.raw_test_data) def partition_data(self): num = len(self.train_data) partition_point = num - int(num / 10.0) #print self.labels self.val_data = self.train_data[partition_point:] self.val_labels = self.labels[partition_point:] self.train_data = self.train_data[:partition_point] self.labels = self.labels[:partition_point] def _loss(self, logits, L2_loss, labels): cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=labels, name='aaa') cross_entropy_mean = tf.reduce_mean(cross_entropy, name='ppp') return cross_entropy_mean def _forward(self, batch_x): layers = [] layers.append(tfnnutils.InputLayer()) layers.append(tfnnutils.Conv2D('conv1', ksize=(self.args.feature, 7), kernels=1)) layers.append(tfnnutils.MaxPool((1, 3))) layers.append(tfnnutils.Conv2D('conv2', ksize=(self.args.feature, 7), kernels=1)) layers.append(tfnnutils.MaxPool((1, 3))) layers.append(tfnnutils.Conv2D('conv3', ksize=(self.args.feature, 3), kernels=1)) layers.append(tfnnutils.Conv2D('conv4', ksize=(self.args.feature, 3), kernels=1)) layers.append(tfnnutils.Conv2D('conv5', ksize=(self.args.feature, 3), kernels=1)) layers.append(tfnnutils.Conv2D('conv6', ksize=(self.args.feature, 3), kernels=1)) layers.append(tfnnutils.MaxPool((1, 3))) layers.append(tfnnutils.Flatten()) layers.append(tfnnutils.FCLayer('FC1', 1024, act = tf.nn.relu)) layers.append(tfnnutils.FCLayer('FC2', 1024, act = tf.nn.relu)) layers.append(tfnnutils.FCLayer('FC3', 2, act = tf.nn.relu)) L2_loss = 0. last_layer = None for i, layer in enumerate(layers): if hasattr(layer, 'L2_Loss'): L2_loss += layer.L2_Loss batch_x = layer.forward(last_layer, batch_x) last_layer = layer pred = tf.nn.softmax(batch_x) return pred, batch_x, L2_loss def build_model(self): global_step = tf.get_variable( 'global_step', [], initializer=tf.constant_initializer(0), trainable=False) self.lr = tf.placeholder(tf.float32, shape=[]) opt = tf.train.MomentumOptimizer(learning_rate=self.lr, momentum=0.9) self._x = tf.placeholder(tf.float32, shape=[self.args.BatchSize, self.args.feature, self.args.length, 1]) self._y = tf.placeholder(tf.int32) x = self._x y = self._y pred, logits, L2_loss = self._forward(x) loss = self._loss(logits, L2_loss, y) grads = opt.compute_gradients(loss) apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) #init = tf.initialize_all_variables() init = tf.global_variables_initializer() self.sess = tf.Session() self.sess.run(init) self.train_step = apply_gradient_op self.pred_step = pred self.loss_step = loss if self.args.test == 1 and self.args.load_model != '': print 'Restore the model from %s' % self.args.load_model saver = tf.train.Saver() saver.restore(self.sess, self.args.load_model) print 'Finish restoring the model' def get_batch(self, dataset, labels, index): #print 'start getting a batch' st = index * self.args.BatchSize ed = st + self.args.BatchSize #print '---------' #print len(dataset) if ed >= len(dataset): return None, None ret_x = np.zeros((self.args.BatchSize, self.args.feature, self.args.length), np.float32) ret_y = np.zeros((self.args.BatchSize, ), np.int32) ret_x = np.array(dataset[st:ed]) ret_y = np.array(labels[st:ed]) #for i in xrange(st, ed): # print type(dataset[i]['content']) # ret_x[i] = np.array(dataset[i]['content']) # ret_y[i] = np.array(self.labels[i]) ret_x = ret_x.reshape(self.args.BatchSize, self.args.feature, self.args.length, 1) return ret_x, ret_y def get_batch_predict(self, dataset, index): st = index * self.args.BatchSize ed = st + self.args.BatchSize if ed >= len(dataset): return None ret_x = np.zeros((self.args.BatchSize, self.args.feature, self.args.length), np.float32) ret_x = np.array(dataset[st:ed]) ret_y = np.zeros((self.args.BatchSize, ), np.int32) ret_x = ret_x.reshape(self.args.BatchSize, self.args.feature, self.args.length, 1) return ret_x, ret_y def evaluate(self, dataset, labels): batch_size = self.args.BatchSize total_loss = 0. total_err = 0. n_batch = 0 now_pos = 0 print 'start evaluating' while True: prepared_x, prepared_y = self.get_batch(dataset, labels, n_batch) if prepared_x is None: break feed = {self._x: prepared_x, self._y: prepared_y} loss, preds = self.sess.run([self.loss_step, self.pred_step], feed_dict=feed) #print prepared_y[:10] #print preds[:10] total_loss += np.mean(loss) for i in xrange(len(preds)): if np.argmax(preds[i]) != prepared_y[i]: total_err += 1 n_batch += 1 if n_batch > 10: break loss = total_loss / n_batch err = total_err / (n_batch * batch_size) print 'evaluate: loss = %f err = %f' % (loss, err) return loss, err def predict(self, dataset): batch_size = self.args.BatchSize predictions = [] n_batch = 0 now_pos = 0 print 'starting predicting the test dataset' while True: prepared_x, prepared_y = self.get_batch_predict(dataset, n_batch) if prepared_x is None: break feed = {self._x: prepared_x, self._y: prepared_y} _, preds = self.sess.run([self.loss_step, self.pred_step], feed_dict=feed) predictions.extend(preds) n_batch += 1 return predictions def save(self, dirname): try: os.makedirs(dirname) except: pass saver = tf.train.Saver() return saver.save(self.sess, os.path.join(dirname, "model1.ckpt")) def test(self): print 'starting test' predictions = self.predict(self.test_data) with open('ans', 'w') as f: for item in predictions: try: f.write(item[1]) except: print item def train(self): lr = self.args.lr best_acc = 0.0 for epoch in xrange(self.args.num_epoch): n_train_batch = 0 print n_train_batch batch_size = self.args.BatchSize if epoch > 0 and epoch % 3 == 0: lr /= 2.0 while True: prepared_x, prepared_y = self.get_batch(self.train_data, self.labels, n_train_batch) if prepared_x is None: print 'miemiemie' break feed = {self.lr: lr, self._x: prepared_x, self._y: prepared_y} _, loss = self.sess.run([self.train_step, self.loss_step], feed_dict=feed) if n_train_batch % 100 == 0: print 'The iteration is %d train loss is: %f' % (n_train_batch, loss) if n_train_batch % 1000 == 0: self.evaluate(self.val_data, self.val_labels) n_train_batch += 1 print 'start saving the model' self.save(args.save_model) print 'finish saving the model'
class D1(BaseModel): stream = Feature(TextStream, store=True)
if __name__ == "__main__": data_path = "data/20_newsgroups" corpus = Corpus() corpus.load_index(data_path) # gen tokens, uncomment when it needs regenerate # corpus.gen_tokens(gnosisTokenizer()) # calc features corpus_tokens = [] corpus_labels = [] for category in corpus.category_list: content = Tokenizer.load_category(category) if content: corpus_tokens.extend(content) corpus_labels.extend([corpus.category_list.index(category)] * len(content)) feature = Feature() feature.make_vsm(corpus_tokens) # feature.print_vsm() # reduce feature, k==0 means auto detect # feature.reducex(corpus_labels, cate_list=corpus.category_list) feature.reduce_feature(corpus_labels, k=0) feature_id = "feature.txt" feature.store(feature_id) # classify # lib svm classifier = LibSvmClassifier(feature_id) y_actual, y_predict = classifier.do_classify() Classifier.predict_info("Lib SVM", y_actual, y_predict) # sklearn svm classifier = SvmClassifier(feature.feature_vec, feature.feature_label)
class Doc4(BaseModel): stream = Feature(TextStream, chunksize=10, store=False) smaller = Feature(TextStream, needs=stream, chunksize=3, store=True)
class Document(BaseModel, self.Settings): stream = Feature(TextStream, store=True) dam = Feature(Dam, needs=stream, store=False) words = Feature(Tokenizer, needs=dam, store=False) count = JSONFeature(WordCount, needs=words, store=False)
class D1(BaseModel): stream = Feature(TextStream, store=False) echo = Feature(Echo, needs=stream, store=True)
class D(BaseModel, self.Settings): stream = Feature(TextStream, store=True) copy = Feature(Counter, needs=stream, store=False) words = Feature(Tokenizer, needs=copy, store=False) count = JSONFeature(WordCount, needs=words, store=False)
class MultipleRoots(BaseModel): stream1 = Feature(TextStream, chunksize=3, store=False) stream2 = Feature(TextStream, chunksize=3, store=False) cat = Feature(EagerConcatenate, needs=[stream1, stream2], store=True)
def _Promote(arg, klass): """Wrap an argument in an object of the specified class. This is used to e.g.: promote numbers or strings to Images and arrays to Collections. Args: arg: The object to promote. klass: The expected type. Returns: The argument promoted if the class is recognized, otherwise the original argument. """ if arg is None: return arg if klass == 'Image': return Image(arg) elif klass == 'Feature': if isinstance(arg, Collection): # TODO(user): Decide whether we want to leave this in. It can be # quite dangerous on large collections. return ApiFunction.call_( 'Feature', ApiFunction.call_('Collection.geometry', arg)) else: return Feature(arg) elif klass == 'Element': if isinstance(arg, Element): # Already an Element. return arg elif isinstance(arg, Geometry): # Geometries get promoted to Features. return Feature(arg) elif isinstance(arg, ComputedObject): # Try a cast. return Element(arg.func, arg.args, arg.varName) else: # No way to convert. raise EEException('Cannot convert %s to Element.' % arg) elif klass == 'Geometry': if isinstance(arg, Collection): return ApiFunction.call_('Collection.geometry', arg) else: return Geometry(arg) elif klass in ('FeatureCollection', 'Collection'): # For now Collection is synonymous with FeatureCollection. if isinstance(arg, Collection): return arg else: return FeatureCollection(arg) elif klass == 'ImageCollection': return ImageCollection(arg) elif klass == 'Filter': return Filter(arg) elif klass == 'Algorithm': if isinstance(arg, basestring): # An API function name. return ApiFunction.lookup(arg) elif callable(arg): # A native function that needs to be wrapped. args_count = len(inspect.getargspec(arg).args) return CustomFunction.create(arg, 'Object', ['Object'] * args_count) elif isinstance(arg, Encodable): # An ee.Function or a computed function like the return value of # Image.parseExpression(). return arg else: raise EEException('Argument is not a function: %s' % arg) elif klass == 'Dictionary': if isinstance(arg, dict): return arg else: return Dictionary(arg) elif klass == 'String': if (types.isString(arg) or isinstance(arg, ComputedObject) or isinstance(arg, String)): return String(arg) else: return arg elif klass == 'List': return List(arg) elif klass in ('Number', 'Float', 'Long', 'Integer', 'Short', 'Byte'): return Number(arg) elif klass in globals(): cls = globals()[klass] ctor = ApiFunction.lookupInternal(klass) # Handle dynamically created classes. if isinstance(arg, cls): # Return unchanged. return arg elif ctor: # The client-side constructor will call the server-side constructor. return cls(arg) elif isinstance(arg, basestring): if hasattr(cls, arg): # arg is the name of a method in klass. return getattr(cls, arg)() else: raise EEException('Unknown algorithm: %s.%s' % (klass, arg)) else: # Client-side cast. return cls(arg) else: return arg
class D2(BaseModel, self.Settings): stream = Feature(TextStream, store=True) words = Feature(Tokenizer, needs=stream, store=False) count = JSONFeature(WordCount, needs=words, store=True) aggregate = JSONFeature(WordCountAggregator, needs=count, store=True)
class Split(BaseModel, self.Settings): stream = Feature(TextStream, store=False) uppercase = Feature(ToUpper, needs=stream, store=True) lowercase = Feature(ToLower, needs=stream, store=True) cat = Feature( Concatenate, needs=[uppercase, lowercase], store=False)
def _Promote(arg, klass): """Wrap an argument in an object of the specified class. This is used to e.g.: promote numbers or strings to Images and arrays to Collections. Args: arg: The object to promote. klass: The expected type. Returns: The argument promoted if the class is recognized, otherwise the original argument. """ if arg is None: return arg if klass == 'Image': return Image(arg) elif klass == 'Feature': if isinstance(arg, Collection): # TODO(user): Decide whether we want to leave this in. It can be # quite dangerous on large collections. return ApiFunction.call_( 'Feature', ApiFunction.call_('Collection.geometry', arg)) else: return Feature(arg) elif klass in ('Element', 'EEObject'): # TODO(user): Remove EEObject once the server is updated. if isinstance(arg, Element): # Already an EEObject. return arg elif isinstance(arg, ComputedObject): # Try a cast. return Element(arg.func, arg.args) else: # No way to convert. raise EEException('Cannot convert %s to Element.' % arg) elif klass == 'Geometry': if isinstance(arg, Collection): return ApiFunction.call_('Collection.geometry', arg) else: return Geometry(arg) elif klass in ('FeatureCollection', 'Collection'): # For now Collection is synonymous with FeatureCollection. if isinstance(arg, Collection): return arg else: return FeatureCollection(arg) elif klass == 'ImageCollection': return ImageCollection(arg) elif klass == 'Filter': return Filter(arg) elif klass == 'Algorithm' and isinstance(arg, basestring): return ApiFunction.lookup(arg) elif klass == 'Date': if isinstance(arg, basestring): try: import dateutil.parser # pylint: disable=g-import-not-at-top except ImportError: raise EEException( 'Conversion of strings to dates requires the dateutil library.') else: return dateutil.parser.parse(arg) elif isinstance(arg, numbers.Number): return datetime.datetime.fromtimestamp(arg / 1000) elif isinstance(arg, ComputedObject): # Bypass promotion of this and do it directly. func = ApiFunction.lookup('Date') return ComputedObject(func, func.promoteArgs(func.nameArgs([arg]))) else: return arg elif klass == 'Dictionary': if klass not in globals(): # No dictionary class defined. return arg cls = globals()[klass] if isinstance(arg, cls): return arg elif isinstance(arg, ComputedObject): return cls(arg) else: # Can't promote non-ComputedObjects up to Dictionary; no constructor. return arg elif klass == 'String': if (types.isString(arg) or isinstance(arg, ComputedObject) or isinstance(arg, String) or types.isVarOfType(arg, String)): return String(arg) else: return arg elif klass in globals(): cls = globals()[klass] # Handle dynamically created classes. if isinstance(arg, cls): return arg elif isinstance(arg, basestring): if not hasattr(cls, arg): raise EEException('Unknown algorithm: %s.%s' % (klass, arg)) return getattr(cls, arg)() else: return cls(arg) else: return arg
def processSQuADtrain(trainfile, destfile, useQuestionClassificationAPI, authcode): ''' Generate "Question Gold Top3_Distractors Q_Coarse Q_Fine Gold_spaCy Candidate_Type" for SQuAD train dataset :param trainfile: SQuAD train dataset 'SQuAD/train-v2.0.json' :param destfile: destination file, saving "Question Gold Top3_Distractors Q_Coarse Q_Fine Gold_spaCy Candidate_Type" columns as a tsv file :param useQuestionClassificationAPI: true/false, for detail please refer to http://www.harishmadabushi.com/research/questionclassification/question-classification-api-documentation/ :param authcode: authcode for QuestionClassificationAPI, please contact http://www.harishmadabushi.com/research/questionclassification/question-classification-api-documentation/ :return: None ''' with open(trainfile) as f: data = json.load(f) with open(destfile, 'w') as fw: fw.write('\t'.join([ 'Question', 'Gold', 'Top3_Distractors', 'Q_Coarse', 'Q_Fine', 'Gold_spaCy', 'Candidate_Type' ]) + '\n') articleList = [] for i, article in enumerate( data['data']): # SQuAD train dataset num of articles = 442 title = article['title'] paragraphs = article['paragraphs'] contextAll = '' QAList = [] for paragraph in paragraphs: contextAll += paragraph[ 'context'] + '\n' # merge all paragraphs article = Article(title=title, contextAll=contextAll) article.nlp() # named entity recognition article.entityBERTEmb = getEntityBERTEmb(article.entitySet) for paragraph in paragraphs: for qid, qa in enumerate(paragraph['qas']): curQA = QA(question=qa['question'], isImpossible=qa['is_impossible']) response = None if useQuestionClassificationAPI.lower() == 'true': response = questionClassificationAPI( curQA.question, authcode) if response and response['status'] == 'Success': curQA.questionCoarseType = response['major_type'] curQA.questionFineType = response['minor_type'] if curQA.isImpossible: curQA.gold = None curQA.goldStartIdx = None curQA.goldEndIdx = None curQA.goldNERType = None else: curQA.gold = qa['answers'][0]['text'] curQA.goldStartIdx = qa['answers'][0]['answer_start'] curQA.goldEndIdx = curQA.goldStartIdx + len(curQA.gold) if curQA.gold in article.entityText: # if gold exactly matches an entity curQA.goldNERType = article.entityText[curQA.gold] else: curQA.goldNERType = None for start in range( curQA.goldStartIdx, curQA.goldEndIdx ): # if gold contains part of an entity if start in article.entityStartIdx: curQA.goldNERType = article.entityStartIdx[ start] break distractorCandidatesNERTypeSet, condition = getDistractorCandidatesNERTypeSet( curQA.questionCoarseType, curQA.questionFineType, curQA.goldNERType) for type in distractorCandidatesNERTypeSet: curQA.distractorCandidates.update( article.entityDict[type]) QAList.append(curQA) print('P' + str(i + 1) + 'Q' + str(qid + 1) + ': ' + curQA.question) fw.write(curQA.question + '\t') print('Gold:', curQA.gold) fw.write((curQA.gold if curQA.gold else 'None') + '\t') # print(curQA.distractorCandidates) # t3 = time.time() finallist = [] goldBERTEmb = getGoldBERTEmb(curQA.gold) quesBERTEmb = getQuesBERTEmb(curQA.question) for d in curQA.distractorCandidates: if curQA.gold and (curQA.gold.lower() in d[0].lower() or d[0].lower() in curQA.gold.lower()): continue feature = Feature(embed, vocab, curQA.question, curQA.gold, d[0], curQA.goldStartIdx, d[1], article.posDict, article.entityBERTEmb, goldBERTEmb, quesBERTEmb) score = feature.score finallist.append([score, d[0]]) # t4 = time.time() finallist.sort(reverse=True) print('My candidate:', [c[1] for c in finallist[:3]]) fw.write(str([c[1] for c in finallist[:3]]) + '\t') print('Ques API tag:', curQA.questionCoarseType, ',', curQA.questionFineType) fw.write((curQA.questionCoarseType if curQA.questionCoarseType else 'None') + '\t' + \ (curQA.questionFineType if curQA.questionFineType else 'None') + '\t') print('Gold spaCy tag:', curQA.goldNERType) fw.write( (curQA.goldNERType if curQA.goldNERType else 'None') + '\t') print('distractorCandidatesNERTypeSet', distractorCandidatesNERTypeSet) fw.write(str(distractorCandidatesNERTypeSet) + '\t\n') print('') # print('t2-t1, t4-t3', t2-t1, t4-t3) # print('WikiData Description:', wbSearchEntities(curQA.gold)) article.QAList = QAList articleList.append(article)
def __init__(self, field, phrases): self._phrases = phrases Feature.__init__(self, field)
class Doc(BaseModel, self.Settings): stream = Feature(TextStream, store=True) final = Feature(TheLastWord, needs=stream, store=True)