def find_mentions(entities): """ Find unique entities and their mentions Args: entities: (dic) a struct for each entity Returns: (dic) unique entities based on their grounded ID, if -1 ID=UNK:No """ equivalents = [] for e in entities: if e.kb_id not in equivalents: equivalents.append(e.kb_id) # mention-level data sets g = to_graph(equivalents) cc = connected_components(g) unique_entities = OrderedDict() unk_id = 0 for c in cc: if tuple(c)[0] == '-1': continue unique_entities[tuple(c)] = [] # consider non-grounded entities as separate entities for e in entities: if e.kb_id[0] == '-1': unique_entities[tuple(('UNK:' + str(unk_id),))] = [e] unk_id += 1 else: for ue in unique_entities.keys(): if list(set(e.kb_id).intersection(set(ue))): unique_entities[ue] += [e] return unique_entities
def preprocess(raw_data, dataset): global all_values mean = np.mean(all_values, axis=0).tolist() std = np.std(all_values, axis=0).tolist() mean = np.array(mean) std = np.array(std) print('parsing smiles as graphs...') processed_data = {'train': [], 'valid': [], 'test': []} file_count = 0 for section in ['train', 'valid', 'test']: for i, (smiles, prop) in enumerate([(mol['smiles'], mol['prop']) for mol in raw_data[section]]): nodes, edges = to_graph(smiles, dataset) if len(edges) <= 0: continue prop = np.array(prop) processed_data[section].append({ 'targets': prop.tolist(), 'graph': edges, 'node_features': nodes, }) if file_count % 2000 == 0: print('finished processing: %d' % file_count, end='\r') file_count += 1 print('%s: 100 %% ' % (section)) with open('molecules_%s_%s.json' % (section, dataset), 'w') as f: json.dump(processed_data[section], f)
def preprocess(raw_data, dataset): print('parsing smiles as graphs...') processed_data = {'train': [], 'valid': []} file_count = 0 for section in ['train', 'valid']: all_smiles = [] # record all smiles in training dataset for i, (smiles, QED) in enumerate([(mol['smiles'], mol['QED']) for mol in raw_data[section]]): nodes, edges = to_graph(smiles, dataset) if len(edges) <= 0: continue processed_data[section].append({ 'targets': [[(QED)]], 'graph': edges, 'node_features': nodes, 'smiles': smiles }) all_smiles.append(smiles) if file_count % 2000 == 0: print('finished processing: %d' % file_count, end='\r') file_count += 1 print('%s: 100 %% ' % (section)) # save the dataset with open('molecules_%s_%s.json' % (section, dataset), 'w') as f: json.dump(processed_data[section], f) # save all molecules in the training dataset if section == 'train': utils.dump('smiles_%s.pkl' % dataset, all_smiles)
def preprocess(raw_data, dataset): print('Parsing smiles as graphs...') processed_data = {'train': [], 'valid': [], 'test': []} file_count = 0 for section in ['train', 'valid', 'test']: all_smiles = [] # record all smiles in training dataset for i, (smiles, QED, hist) in enumerate([(mol['smiles'], mol['QED'], mol['hist']) for mol in raw_data[section]]): nodes, edges = utils.to_graph(smiles, dataset) if len(edges) <= 0: print('Error. Molecule with len(edges) <= 0') continue processed_data[section].append({ 'targets': [[QED]], 'graph': edges, 'node_features': nodes, 'smiles': smiles, 'hist': hist }) all_smiles.append(smiles) if file_count % 1000 == 0: print('Finished processing: %d' % file_count, end='\r') file_count += 1 print('%s: 100 %% ' % (section)) with open('molecules_%s_%s.json' % (section, dataset), 'w') as f: json.dump(processed_data[section], f) print("Train molecules = " + str(len(processed_data['train']))) print("Valid molecules = " + str(len(processed_data['valid']))) print("Test molecules = " + str(len(processed_data['test'])))
def build_model(image, param1, param2, cycle4, cycle8, facet): """ build ilp model for piecewise linear """ print("Building Cplex model...") # initialize model model = cplex.Cplex() # set sense model.objective.set_sense(model.objective.sense.minimize) # get discrete second derivative derivative = utils.get_derivative(image) # build graph graph = utils.to_graph(image) # build objective function vars = get_varibles(image) model.variables.add(names=vars) colnames, obj, types = get_obj(derivative, param2) model.variables.add(obj=obj, types=types, names=colnames) # add constraints rows, senses, rhs = get_constraints(image, derivative, param1, cycle4=cycle4, cycle8=cycle8) model.linear_constraints.add(lin_expr=rows, senses=senses, rhs=rhs) # parallel model.parameters.parallel.set(-1) model.parameters.threads.set(32) # register callback #model.register_callback(cutremoveCallback) model.register_callback(multicutCallback) # associate additional data multicutCallback._graph = graph.copy() multicutCallback._names = model.variables.get_names() multicutCallback._facet = facet #cutremoveCallback._names = model.variables.get_names() return model
def affine_regression(image): """ perform a parametric affine fitting """ # build graph graph = utils.to_graph(image) affine_params = np.zeros((*image.shape, 3)) print("Fitting affine parameters...") for (i, j) in graph.nodes(): # find the best 4 points fiting plane mse = float("inf") #======================= right and down ================================ # avoid out of bound if i < image.shape[0] - 1 and j < image.shape[1] - 1: X = [[i, j]] y = [image[i, j]] # down neighbor X.append([i + 1, j]) y.append(image[i + 1, j]) # right neighbor X.append([i, j + 1]) y.append(image[i, j + 1]) # down right neighbor X.append([i + 1, j + 1]) y.append(image[i + 1, j + 1]) # linear regression cur_affine_param, cur_mse = fit(X, y) if cur_mse < mse: affine_param, mse = cur_affine_param, cur_mse #========================== left and down =============================== # avoid out of bound if i < image.shape[0] - 1 and j: X = [[i, j]] y = [image[i, j]] # down neighbor X.append([i + 1, j]) y.append(image[i + 1, j]) # left neighbor X.append([i, j - 1]) y.append(image[i, j - 1]) # down left neighbor X.append([i + 1, j - 1]) y.append(image[i + 1, j - 1]) # linear regression cur_affine_param, cur_mse = fit(X, y) if cur_mse < mse: affine_param, mse = cur_affine_param, cur_mse #========================== right and up =============================== # avoid out of bound if i and j < image.shape[1] - 1: X = [[i, j]] y = [image[i, j]] # up neighbor X.append([i - 1, j]) y.append(image[i - 1, j]) # right neighbor X.append([i, j + 1]) y.append(image[i, j + 1]) # up right neighbor X.append([i - 1, j + 1]) y.append(image[i - 1, j + 1]) # linear regression cur_affine_param, cur_mse = fit(X, y) if cur_mse < mse: affine_param, mse = cur_affine_param, cur_mse #========================= left and up ================================= # avoid out of bound if i and j: X = [[i, j]] y = [image[i, j]] # down neighbor X.append([i - 1, j]) y.append(image[i - 1, j]) # left neighbor X.append([i, j - 1]) y.append(image[i, j - 1]) # down left neighbor X.append([i - 1, j - 1]) y.append(image[i - 1, j - 1]) # linear regression cur_affine_param, cur_mse = fit(X, y) if cur_mse < mse: affine_param, mse = cur_affine_param, cur_mse # record best affine parameters affine_params[i, j] = affine_param # set attribute for (i, j) in graph.nodes(): # number of nodes as weight graph.nodes[(i, j)]["weight"] = 1 # coordinates and depth graph.nodes[(i, j)]["pixels"] = np.array([[i, j, image[i, j]]]) # affine parameters graph.nodes[(i, j)]["affine_params"] = affine_params[i, j] return graph