def add_splitter(self, model_name, receptacle_id): splitter_id = self.global_ids.get_next_splitter_id() rids = [] receptacle_count = SPLITTER_MODELS[model_name] for r_id in range(receptacle_count): rids.append(self.global_ids.get_next_receptacle_id()) if True == DEBUG: print('Support.add_splitter() receptacle_count:{0}, rids:{1}'.format(receptacle_count, rids)) sp = Splitter(model_name, splitter_id, rids) receptacle = self.get_receptacle_by_id(receptacle_id) receptacle.connect_load('SPLITTER', sp) self.full_receptacle_ids.append(receptacle_id) self.empty_receptacle_ids.remove(receptacle_id) r = Results() r.set_object_id(splitter_id) r.set_next_receptacle_id_from_list(rids) self.splitter_ids.append(splitter_id) del(r_id) for r_id in rids: r1 = sp.get_receptacle_by_id(r_id) if None == r1: print('Support.add_splitter sp.get_receptacle_by_id returned None for r1. r_id:{0}, rids:{1}'.format(r_id, rids)) self.empty_receptacle_ids.append(r_id) if True == DEBUG: print('Support.add_splitter(). Final empty_receptacle_ids:{0}'.format(self.empty_receptacle_ids)) print('Support.add_splitter(). Final full_receptacle_ids:{0}'.format(self.full_receptacle_ids)) return r
def split_patents(self, temp_dir, filename): self.logger.info("splitting files") xmls = get_files(temp_dir, ".xml") splitter = Splitter() for file in xmls: splitter.split_file(file, join(self.working_directory, self.patentDir, filename))
def main(): # Set working directory to project folder os.chdir('../.') print(os.getcwd()) # Create Logger File to track all changes logger = Logger(os) # Create a list of words for parser to ignore stop_words = [] #['PSY', 'STAT'] ninja = Splitter(stop_words) ans = input('Do you want to manually input lines? ') # Create Messenger Object to ask prompts if 'y' in ans or 'Y' in ans: messenger = Messenger() line = '' exit = False while not exit: line = messenger.collect_input() if line == 'quit' or line == 'exit': exit = True else: # Output would be collected here print(ninja.split_line(line)) #else: print('Exiting code')
def hy_prediction(dataio, patient_info, patient_array, params): ### Task Setting task = params['task'][1] split_method = 'ratio' ratio = 0.8 # provide the ratio krun = 10 # run 5 times then average the result ### Initialization acc = np.zeros(krun, dtype='float32') # evaluation metric n_feature = dataio.feature.feature_len param_w = np.zeros([n_feature, krun], dtype='float32') # weights parameter ### H&Y Reading feature = dataio.feature patient_info = feature.get_hy_stage(patient_info, patient_array) print ('-----') # split = Splitter(task, patient_array, ratio, split_method, patient_info) split = Splitter(task, patient_array, ratio, split_method) for k in range(krun): ### Data Splitting train_data, test_data = split.get_splitter(k) ### Model Training hy_pred = HYPredictor(k, patient_info, train_data, params['result_path']) model, y_pred = hy_pred.train_model() param_w[:,k], _ = hy_pred.get_param() ### Evaluating hy_eval = Evaluator(model, test_data, patient_info, task, hy_pred) acc[k] = hy_eval.compute_accuracy() print ('-----') print ('Accuracy of the %s task: %f' %(task, np.sum(acc)/krun)) ### Displaying Feature (selected by prediction model) feature = dataio.feature feature.get_pred_feature(param_w, krun, 'yh')
def pd_prediction(dataio, patient_info, patient_array, params): ### Task Setting task = params['task'][0] # disease prediction split_method = 'cross-validation' kfold = 5 # 5-fold validation ### Initialization auc = np.zeros(kfold, dtype='float32') # evaluation metrics ap = np.zeros(kfold, dtype='float32') n_feature = dataio.feature.feature_len param_w = np.zeros([n_feature, kfold], dtype='float32') print ('-----') split = Splitter(task, patient_array, kfold, split_method) for k in range(kfold): # each fold, k is the index of test set ### Data Splitting train_data, test_data = split.get_splitter(k) ### Model Training pd_pred = PDPredictor(k, patient_info, train_data, params['result_path']) model, y_pred = pd_pred.train_model() param_w[:,k], _ = pd_pred.get_param() ### Evaluating pd_eval = Evaluator(model, test_data, patient_info, task, pd_pred) auc[k], ap[k] = pd_eval.compute_accuracy() print ('-----') print ('AUC of the %s task: %f' %(task, np.sum(auc)/kfold)) print ('Average Precision of the %s task: %f' %(task, np.sum(ap)/kfold)) ### Displaying Feature (selected by prediction model) feature = dataio.feature feature.get_pred_feature(param_w, kfold, 'pd')
def moca_prediction(dataio, patient_info, patient_array, params): ### Task Setting task = params['task'][2] split_method = 'ratio' ratio = 0.8 # provide the ratio krun = 5 # run 5 times then average the result ### Initialization rmse = np.zeros(krun, dtype='float32') # evaluation metric n_feature = dataio.feature.feature_len param_w = np.zeros([n_feature, krun], dtype='float32') ### MoCA Reading feature = dataio.feature patient_info = feature.get_moca_score(patient_info, patient_array) print ('-----') split = Splitter(task, patient_array, ratio, split_method) for k in range(krun): ### Data Splitting train_data, test_data = split.get_splitter(k) ### Model Training moca_pred = MoCAPredictor(k, patient_info, train_data, params['result_path']) model, y_pred = moca_pred.train_model() param_w[:,k] = moca_pred.get_param() ### Evaluating pd_eval = Evaluator(model, test_data, patient_info, task, moca_pred) rmse[k] = pd_eval.compute_accuracy() print ('-----') print ('RMSE of the %s task: %f' %(task, np.sum(rmse)/krun)) ### Displaying Feature (selected by prediction model) feature = dataio.feature feature.get_pred_feature(param_w, krun, 'moca')
def test_splitter_does_the_map(self): areas = read_kml_areas("areas.kml") city_spots = [ { "name": "skytower", "type": "shopping mall", "lat": 17.019690, "lng": 51.094880 }, { "name": "Biedronka close to skytower", "type": "small shop", "lat": 17.018921, "lng": 51.097994 }, { "name": "Panorama Racławicka", "type": "historical building", "lat": 17.044462, "lng": 51.110171 }, { "name": "Galeria Dominikańska", "type": "shopping mall", "lat": 17.040685, "lng": 51.108244 }, ] splitter = Splitter() assigned = splitter.split_all_points(city_spots, areas) self.assertTrue(len(assigned) == 2) self.assertIn('Rynek 1', assigned) self.assertIn('Gajowice 1', assigned)
def visitTextFile(self, textfile): splitter = Splitter(textfile.filePath, len(self.workers)) file_split_result = splitter.split() self.operations[textfile.id] = FilePartition(textfile.id, len(self.workers), file_split_result, textfile.filePath) self._set_collect_count(textfile)
def __init__(self, modelId, runNo, filter_dics, filename, _type, splitter_type): super(Tracker, self).__init__() self.modelId = modelId self.runNo = runNo self.run_dir = str(modelId) + '_' + str(runNo) self.filename = filename self.filters = self.prepare_filters(filter_dics) self.type = _type # lazy or eager self.splitter = Splitter(splitter_type) self.track_data = {}
def __init__(self): self.splitter = Splitter() self.postagger = POSTagger() self.dicttagger = DictionaryTagger([ '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/positive.yml', '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/negative.yml', '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/morePositive.yml', '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/moreNegative.yml', '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/invert.yml' ])
def process_text(text): splitter = Splitter() postagger = POSTagger() # Split the sentences to words splitted_sentences = splitter.split(text) # Do Parts of Speech Tagging on the words pos_tagged_sentences = postagger.pos_tag(splitted_sentences) dict_tagged_sentences = dicttagger.tag(pos_tagged_sentences) return sum_score(dict_tagged_sentences)
def OCRImage(self, imageUrl, splitLength=None): # //////////////////////////////////////// # Call ocr.space API, wait and return list of tweets ocrResult = self.ocr_api.ocr_url(imageUrl) if splitLength != None: tweetsToSend = Splitter.forTweets( ocrResult, splitLength=splitLength ) # tweet length limits (280-8-15) user name and brackets else: tweetsToSend = Splitter.forTweets(ocrResult) if DEBUG: print(" Tweet chain length {}".format(len(tweetsToSend))) return tweetsToSend
def split_by_silence(self): ''' Uses the Splitter class to split the audio by silence to get the timestamps and respective filenames of the split segments. Segments is a list of tuples (filename, (start_time, end_time)). ''' if (self.audio_extracted): splitter = Splitter(self.audio_file) else: raise Exception( "ERROR: File has not been extracted from video yet.") splitter.run() self.segments = splitter.get_segments()
def __init__(self, dset_name, net_names, hard_labels, device, exp=None): if exp is None: exp = 0 while osp.exists(osp.join(cfg.DATA_DIR, 'exp_' + str(exp))): exp += 1 self.exp_dir = osp.join(cfg.DATA_DIR, 'exp_' + str(exp)) self.num_exp = exp dset_dir = osp.join(self.exp_dir, dset_name) self.splitting_dir = osp.join(dset_dir, cfg.SPLITTING_DIR) self.feat_dir = osp.join(dset_dir, cfg.FEATURE_DIR) self.label_dir = osp.join(dset_dir, cfg.LABEL_DIR, 'hard' if hard_labels else 'soft') self.net_dir = osp.join(dset_dir, cfg.NET_DIR, 'hard' if hard_labels else 'soft') self.res_dir = osp.join(dset_dir, cfg.RESULT_DIR, 'hard' if hard_labels else 'soft') self.dset = cfg.DSETS[dset_name] self.splitter = Splitter(self.dset, self.splitting_dir) self.extractor = Extractor(self.dset, self.splitting_dir, self.feat_dir, net_names, device) self.augmenter = Augmenter(self.dset, self.splitting_dir, self.feat_dir, self.label_dir, net_names, hard_labels) self.trainer = Trainer(self.dset, self.label_dir, self.net_dir, self.res_dir, net_names, hard_labels, device)
def run_splitter(batch): dataset = Splitter( batch, annotations=args["--annotations"], labels=args["--labels"], overlap=args["--overlap"], duration=args["--duration"], output_directory=args["--output_directory"], ) dataloader = torch.utils.data.DataLoader( dataset, # batch_size=batch_size, batch_size=1, shuffle=False, num_workers=args["--cores_per_node"], collate_fn=dataset.collate_fn, ) start = timer() outputs = [] for data in dataloader: for out in data: outputs.append(out) end = timer() print("DEBUG: end - start", end - start) return outputs
def __call__(self, _, *, audio_paths=[]): batch_size = len(audio_paths) if batch_size == 0: return [] dataset = Splitter( audio_paths, annotations=args["--annotations"], labels=args["--labels"], overlap=args["--overlap"], duration=args["--duration"], output_directory=args["--output_directory"], ) dataloader = torch.utils.data.DataLoader( dataset, # batch_size=batch_size, batch_size=1, shuffle=False, num_workers=args["--cores_per_node"], collate_fn=dataset.collate_fn, ) start = timer() outputs = [] for idx, data in enumerate(dataloader): for out in data: outputs.append(out) end = timer() print("DEBUG: end - start", end - start) return outputs
def build_tree(self, X, y, sample_weight, class_distribution, level): # Need node weight for counting feature importances and probability of classes node_weight = np.sum(class_distribution) # node has only one class if np.unique(y).shape[0] == 1: proba = class_distribution / node_weight return Tree(info=proba, is_leaf=True) # node has less than min_samples_split samples if y.shape[0] < self.min_samples_split: proba = class_distribution / node_weight return Tree(info=proba, is_leaf=True) # node has less than 2*min_samples_leaf samples, so children would have less than min_sample_leaf samples if y.shape[0] < 2 * self.min_samples_leaf: proba = class_distribution / node_weight return Tree(info=proba, is_leaf=True) # tree has max_depth depth if self.max_depth is not None: if level == self.max_depth: proba = class_distribution / node_weight return Tree(info=proba, is_leaf=True) splitter = Splitter() feature, threshold, split_pos, index = splitter.find_best_split(X, y, sample_weight, class_distribution, self.min_samples_leaf) if feature is None: proba = class_distribution / node_weight return Tree(info=proba, is_leaf=True) gain = splitter.impurity_gain left_distribution = splitter.left_distribution right_distribution = splitter.right_distribution self.feature_importances_[feature] += gain * (float(node_weight) / self.total_weight) X = X[index] y = y[index] sample_weight = sample_weight[index] left_tree = self.build_tree(X[0:split_pos], y[0:split_pos], sample_weight[0:split_pos], left_distribution, level+1) right_tree = self.build_tree(X[split_pos:], y[split_pos:], sample_weight[split_pos:], right_distribution, level+1) return Tree(left_tree, right_tree, Predicate(feature, threshold), class_distribution)
def processQuestion(gloveModel, question, minLen=1, maxLen=3, useAPI=False, useSynonyms=False): tagger = POSTagger() pos = tagger.parse(question) # create splitter and generalizer splitter = Splitter() if question[-1] == '?' or question[-1] == '.': question = question[:-1] gen_question = splitter.generalize(question, pos) labels = [] resultsExists = False if not useAPI: parts = list(splitter.split(gen_question, min=minLen, max=maxLen)) else: resultsExists = True apiResult, _ = api.getBinaryRelations(question) parts = [ rel.predicate for rel in apiResult if len(rel.predicate_positions_) > 1 ] for part in parts: if len(part.split()) > 1: labels.append(part.split()[0] + ''.join(''.join([w[0].upper(), w[1:].lower()]) for w in part.split()[1:])) if useSynonyms: predicates = [max(part.split(), key=len) for part in parts] if predicates is not None and len(predicates) > 0: for predicate in predicates: for part in list(parts): if predicate in part: for syn in gloveModel.gloveModel.most_similar( predicate.lower()): parts.append(part.replace(predicate, syn[0])) if len(parts) == 0: resultsExists = False parts = list(splitter.split(gen_question, min=minLen, max=maxLen)) # create embedder part vectors = [] for part in parts: vectors.append(gloveModel.getVector(part)) return vectors, parts, pos, gen_question, labels, resultsExists
def __init__(self): self.sentences = [] self.abbreviation = {} self.load_data() self.load_abbrv() self.normalizer = Normalizer() self.splitter = Splitter() self.corrector = Filter() self.lemmatizer = WordNetLemmatizer() self.missing_apostrophe_vocab = [ 'isnt', ' arent', 'wasnt', 'werent', 'wont', 'dont', 'didnt', 'doesnt', 'couldnt', 'shouldnt', 'hasnt', 'havent', 'hadnt' ] self.tokenizer_mistake_vocab = [ 'isn', 'aren', 'wasn', 'weren', 'won', 'don', 'didn', 'doesn', 'couldn', 'shouldn', 'hasn', 'haven', 'hadn' ] self._norm = joblib.load('model.crfsuite')
def assemble2(self): """ Builder method: build a Chain of linked Components :return: """ log.info('Assembling Chain: %s...' % self.chain_str) # Create linked list of input/filter/output (ETL Component) objects chain_str = self.chain_str split_comps = [] while chain_str: chain_str = chain_str.strip() # Check and handle Splitter construct # e.g. input_xml_file |(transformer_xslt|output_file) (output_std) (transformer_xslt|output_std) if chain_str.startswith('('): etl_section_name, chain_str = chain_str.split(')', 1) etl_section_name = etl_section_name.strip('(') # Check for subchain (split at Filter level) if '|' in etl_section_name: # Have subchain: use Chain to assemble sub_chain = Chain(etl_section_name, self.config_dict) sub_chain.assemble2() child_comp = sub_chain.first_comp else: # Single component (Output) to split child_comp = factory.create_obj(self.config_dict, etl_section_name.strip()) # Assemble Components (can be subchains) for Splitter later split_comps.append(child_comp) if '(' in chain_str: # Still components (subchains) to assemble for Splitter continue if len(split_comps) > 0: # Next component is Splitter with children etl_comp = Splitter(self.config_dict, split_comps) split_comps = [] else: # "Normal" case: regular Components piped in Chain if '|' in chain_str: # More than one component in remaining Chain etl_section_name, chain_str = chain_str.split('|', 1) else: # Last element, we're done! etl_section_name = chain_str chain_str = None # Create the ETL component by name and properties etl_comp = factory.create_obj(self.config_dict, etl_section_name.strip()) # Add component to end of Chain self.add(etl_comp)
def __init__(self): self.splitter = Splitter() self.postagger = POSTagger() self.dicttagger = DictionaryTagger( [ "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/positive.yml", "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/negative.yml", "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/morePositive.yml", "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/moreNegative.yml", "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/invert.yml", ] )
def __init__(self, dataset: ds.Dataset): self.loader = Splitter(batch_size=32).get_all(dataset) self.dataset_name = str(dataset) self.features = [] self.labels = [] self.pos_class = dataset.pos_class print(f'Encoding: {self.dataset_name}') if not os.path.exists('data'): os.makedirs('data') if not os.path.exists('data/encoded/'): os.makedirs('data/encoded/') self.root_dir = 'data/encoded/'
def __init__(self, memoryFile): self.nCycles = 0 # Used to hold number of clock cycles spent executing instructions self.dataMemory = DataMemory(memoryFile) self.instructionMemory = InstructionMemory(memoryFile) self.registerFile = RegisterFile() self.alu = ALU() self.mainControl = MainControl() self.splitter = Splitter() self.signExtender = SignExtender() self.andGate = AndGate() self.breaker = Breaker() self.constant4 = Constant(4) # self.randomControl = RandomControl() self.pcMux1 = Mux() self.pcMux2 = Mux() self.regMux = Mux() self.aluMux = Mux() self.resultMux = Mux() self.luiMux = Mux() self.adder = Add() self.branchAdder = Add() self.jumpAddress = JMPAddress() self.shiftBranch = LeftShiftTwo() self.shiftJump = LeftShiftTwo() self.pc = PC(hex(0xbfc00000)) # hard coded "boot" address self.elements = [self.constant4, self.adder, self.instructionMemory, self.breaker, self.splitter, self.shiftJump, self.mainControl, self.regMux, self.signExtender, self.luiMux, self.registerFile, self.jumpAddress, self.shiftBranch, self.branchAdder, self.aluMux, self.alu, self.dataMemory, self.andGate, self.pcMux1, self.pcMux2, self.resultMux, self.registerFile, self.pc] self._connectCPUElements()
class SentimentAnalyzingService(object): def __init__(self): self.splitter = Splitter() self.postagger = POSTagger() self.dicttagger = DictionaryTagger([ '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/positive.yml', '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/negative.yml', '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/morePositive.yml', '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/moreNegative.yml', '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/invert.yml' ]) def valueOf(self, sentiment): if sentiment == 'positive': return 1 if sentiment == 'negative': return -1 return 0 def sentence_score(self, sentence_tokens, previous_token, acum_score): if not sentence_tokens: return acum_score else: current_token = sentence_tokens[0] tags = current_token[2] token_score = sum([self.valueOf(tag) for tag in tags]) if previous_token is not None: previous_tags = previous_token[2] if 'inc' in previous_tags: token_score *= 2.0 elif 'dec' in previous_tags: token_score /= 2.0 elif 'inv' in previous_tags: token_score *= -1.0 return self.sentence_score(sentence_tokens[1:], current_token, acum_score + token_score) def sentiment_score(self, dictTaggedSentences): return sum([ self.sentence_score(sentence, None, 0.0) for sentence in dictTaggedSentences ]) def performBasicSentimentAnalysis(self, textToBeAnalysed): sentences = self.splitter.splitParagraphToListOfSentences( textToBeAnalysed) pos_tagged_sentences = self.postagger.pos_tag(sentences) dict_tagged_sentences = self.dicttagger.tag(pos_tagged_sentences) score = self.sentiment_score(dict_tagged_sentences) return score
class SentimentAnalyzingService(object): def __init__(self): self.splitter = Splitter() self.postagger = POSTagger() self.dicttagger = DictionaryTagger( [ "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/positive.yml", "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/negative.yml", "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/morePositive.yml", "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/moreNegative.yml", "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/invert.yml", ] ) def valueOf(self, sentiment): if sentiment == "positive": return 1 if sentiment == "negative": return -1 return 0 def sentence_score(self, sentence_tokens, previous_token, acum_score): if not sentence_tokens: return acum_score else: current_token = sentence_tokens[0] tags = current_token[2] token_score = sum([self.valueOf(tag) for tag in tags]) if previous_token is not None: previous_tags = previous_token[2] if "inc" in previous_tags: token_score *= 2.0 elif "dec" in previous_tags: token_score /= 2.0 elif "inv" in previous_tags: token_score *= -1.0 return self.sentence_score(sentence_tokens[1:], current_token, acum_score + token_score) def sentiment_score(self, dictTaggedSentences): return sum([self.sentence_score(sentence, None, 0.0) for sentence in dictTaggedSentences]) def performBasicSentimentAnalysis(self, textToBeAnalysed): sentences = self.splitter.splitParagraphToListOfSentences(textToBeAnalysed) pos_tagged_sentences = self.postagger.pos_tag(sentences) dict_tagged_sentences = self.dicttagger.tag(pos_tagged_sentences) score = self.sentiment_score(dict_tagged_sentences) return score
def attempt_dd_improvement(main_tour, best_length, other_tour, other_length): segments = Splitter(main_tour, other_tour).get_segments() positive_kmoves, negative_kmoves = segments_to_beneficial_kmoves(xy, segments, main_tour) # Now that we have independent kmoves, it can be very expensive to try all possible combinations. # So for efficiency's sake we try high-yield (supposedly) simple combinations: # 1. Try all beneficial kmoves at once. Quit if succeeds (cannot get better). # 2. Try each beneficial kmove sequentially, starting from highest-gain. # 3. Exclude negative k-moves sequentially, starting from highest-loss. naive_gain = best_length - other_length if positive_kmoves: max_positive_gain = sum([x[0] for x in positive_kmoves]) if max_positive_gain < naive_gain: print('MAX POSITIVE GAIN < NAIVE GAIN') main_tour = other_tour best_length = other_length return main_tour, best_length all_positive = combine_segment_array([x[1] for x in positive_kmoves]) test_tour = perform_kmove(main_tour, all_positive) if len(test_tour) == len(main_tour): main_tour = test_tour print('Trying all {} positive kmoves together worked; gain: {} (naive gain: {})'.format(len(positive_kmoves), max_positive_gain, naive_gain)) assert(max_positive_gain >= naive_gain) assert(max_positive_gain > 0) return main_tour, best_length - max_positive_gain # There may be cases where naive gain is more than decomposed gains: # decomposed gains currently only return moves that can be independently performed. # Infeasible moves that are improvements but only can be combined with other moves to become feasible # (a potentially computationally expensive search) will be excluded from the decomposed moves. dd_gain = 0 # gain due to decomposed kmoves. for k in positive_kmoves: print(' trying {}-opt move with gain {}'.format(len(k[1]['adds']), k[0])) test_tour = perform_kmove(main_tour, k[1]) if len(test_tour) == len(main_tour): main_tour = test_tour best_length -= k[0] dd_gain += k[0] if naive_gain > dd_gain: print('naive_gain ({}) greater than dd_gain ({})'.format(naive_gain, dd_gain)) main_tour = other_tour best_length = other_length if dd_gain > 0 and dd_gain > naive_gain: print(' dd gain {} greater than naive gain {}'.format(dd_gain, naive_gain)) elif naive_gain > 0: print('NAIVE GAIN > 0 WITH NO DD POSITIVE GAIN') main_tour = other_tour best_length = other_length return main_tour, best_length
def perturbed_hill_climb(xy, tour): tries = 0 success = 0 best_length = tour_util.length(xy, tour) while True: new_tour, naive_new_length = two_opt.optimize(xy, tour_util.double_bridge(tour)) # double bridge #test_tour = tour[:] #random.shuffle(test_tour) #new_tour, naive_new_length = two_opt.optimize(xy, test_tour) # random restart segments = Splitter(tour, new_tour).get_segments() kmoves = segments_to_beneficial_kmoves(xy, segments, tour) max_gain = 0 if kmoves: max_gain = sum([k[0] for k in kmoves]) naive_gain = best_length - naive_new_length # There may be cases where naive gain is more than decomposed gains: # decomposed gains currently only return moves that can be independently performed. # Infeasible moves that are improvements but only can be combined with other moves to become feasible # (a potentially computationally expensive search) wil be excluded from the decomposed moves. dd_gain = 0 # gain due to decomposed kmoves. if kmoves: for k in kmoves: print(' trying {}-opt move with gain {}'.format(len(k[1]['adds']), k[0])) test_tour = perform_kmove(tour, k[1]) if len(test_tour) == len(tour): tour = test_tour best_length -= k[0] dd_gain += k[0] if naive_gain > dd_gain: print('naive_gain ({}) greater than dd_gain ({})'.format(naive_gain, dd_gain)) tour = new_tour best_length = naive_new_length if naive_gain > 0 or dd_gain > 0: success += 1 if dd_gain > 0 and dd_gain > naive_gain: print(' dd gain {} greater than naive gain {}'.format(dd_gain, naive_gain)) tries += 1 current_length = basic.tour_length(xy, tour) assert(best_length == current_length) if current_length <= TARGET_LENGTH: break print('current best: {} (iteration {}), improvement rate: {}'.format(best_length, tries, success / tries))
def assemble(self): """ Builder method: build a Chain of linked Components :return: """ log.info('Assembling Chain: %s...' % self.chain_str) # Create linked list of input/filter/output (ETL Component) objects chain_str_arr = self.chain_str.split('|') for etl_section_name in chain_str_arr: # Check for splitting outputs construct using '+' # TODO: may also construct combining Inputs or split to multiple sub-Chains # for now only Outputs supported for splitting if '+' in etl_section_name: section_names = etl_section_name.split('+') log.info('Splitting to: %s' % etl_section_name) child_comps = [] for section_name in section_names: if '(' in section_name and ')' in section_name: section_name = section_name.replace(',', '|') section_name = section_name.strip('(') section_name = section_name.strip(')') # Create the child ETL component by name and properties child_comp = factory.create_obj(self.config_dict, section_name.strip()) child_comps.append(child_comp) etl_comp = Splitter(self.config_dict, child_comps) else: # Create the ETL component by name and properties etl_comp = factory.create_obj(self.config_dict, etl_section_name.strip()) # Add component to end of Chain self.add(etl_comp)
def squishySplineIk(startLoc, endLoc): ikJoints = list() startJoint = pmc.createNode('joint') adv.alignObjects(startJoint, startLoc) endJoint = pmc.createNode('joint') adv.alignObjects(endJoint, endLoc) pmc.parent(endJoint, startJoint) startJoint.orientJoint('xzy', secondaryAxisOrient='zup') pmc.makeIdentity(endJoint, apply=True, jointOrient=True) Splitter.doSplit(startJoint, 10) ikJoints.append(startJoint) ikJoints.extend(reversed(startJoint.getChildren(ad=True, type='joint'))) for i, ikj in enumerate(ikJoints): ikj.radius.set(2) ikj.rename('ikj_spine{0:02d}'.format(i)) # Create second set of joints rigJoints = adv.makeDuplicateJoints(joints=ikJoints, search='ikj_', replace='local_rig_', connectBone=False) # HACK I haven't figured out how to create SDK nodes procedurally, # so making some dummy locs to make the curve I need a = pmc.createNode('transform') b = pmc.createNode('transform') pmc.setKeyframe(a.ty, t=0, value=2.5, inTangentType='flat', outTangentType='flat') pmc.setKeyframe(a.ty, t=10, value=0, inTangentType='flat', outTangentType='flat') pmc.keyTangent(a.ty, index=[0], inAngle=0) pmc.keyTangent(a.ty, index=[1], inAngle=-30) pmc.keyTangent(a.ty, index=[0], outAngle=0) pmc.keyTangent(a.ty, index=[1], outAngle=-30) animSquashCurve = a.ty.listConnections()[0] animSquashCurve.output.disconnect(a.ty) animSquashCurve.rename('squash_ramp') pmc.setKeyframe(a.tx, t=0, value=0, inTangentType='flat', outTangentType='flat') pmc.setKeyframe(a.tx, t=5, value=1, inTangentType='flat', outTangentType='flat') pmc.setKeyframe(a.tx, t=10, value=0, inTangentType='flat', outTangentType='flat') animTwistCurve = a.tx.listConnections()[0] animTwistCurve.output.disconnect(a.tx) animTwistCurve.rename('twist_ramp') pmc.delete(a, b) animControls = dict() animControls['lower_spine'] = adv.makeControlNode('ctl_lower_spine', targetObject=rigJoints[2], alignRotation=False) animControls['middle_spine'] = adv.makeControlNode('ctl_middle_spine') animControls['upper_spine'] = adv.makeControlNode('ctl_upper_spine', targetObject=rigJoints[-2], alignRotation=False) animControls['lower_spine'][0].rotateOrder.set(adv.ROO_YXZ) animControls['middle_spine'][0].rotateOrder.set(adv.ROO_YXZ) animControls['upper_spine'][0].rotateOrder.set(adv.ROO_YXZ) pmc.pointConstraint(animControls['lower_spine'][0], animControls['upper_spine'][0], animControls['middle_spine'][1], mo=False) pmc.orientConstraint(animControls['lower_spine'][0], animControls['upper_spine'][0], animControls['middle_spine'][1], mo=False) splineIk = pmc.ikHandle(sj=ikJoints[0], ee=ikJoints[-1], sol='ikSplineSolver', parentCurve=False, createCurve=True, simplifyCurve=True, numSpans=2, rootOnCurve=False, n='sik_spine') splineIkHandle = splineIk[0] spline = splineIk[2] spline.rename('crv_spine') clusterJoints = list() clusterJoints.append(pmc.createNode('joint', n='clj_spine0')) pmc.parentConstraint(animControls['lower_spine'][0], clusterJoints[-1]) clusterJoints.append(pmc.createNode('joint', n='clj_spine1')) pmc.parentConstraint(animControls['middle_spine'][0], clusterJoints[-1]) clusterJoints.append(pmc.createNode('joint', n='clj_spine2')) pmc.parentConstraint(animControls['upper_spine'][0], clusterJoints[-1]) pmc.skinCluster(clusterJoints, spline, maximumInfluences=3) pmc.parentConstraint(animControls['lower_spine'][0], ikJoints[0], maintainOffset=True) for clj in clusterJoints: clj.radius.set(3) splineIkHandle.dTwistControlEnable.set(1) splineIkHandle.dWorldUpType.set(4) splineIkHandle.dWorldUpAxis.set(0) splineIkHandle.dWorldUpVector.set([0.0, 0.0, 1.0]) splineIkHandle.dWorldUpVectorEnd.set([0.0, 0.0, 1.0]) animControls['lower_spine'][0].worldMatrix[0].connect(splineIkHandle.dWorldUpMatrix) animControls['upper_spine'][0].worldMatrix[0].connect(splineIkHandle.dWorldUpMatrixEnd) normalizeNode = stretchySplineIk(splineIkHandle, useScale=True, globalScaleAttr='ctl_main.size') sqrtScale = pmc.createNode('multiplyDivide', n='sqrt_spine_scale') sqrtScale.operation.set(3) sqrtScale.input2X.set(0.5) normalizeNode.outputX.connect(sqrtScale.input1X) invScale = pmc.createNode('multiplyDivide', n='div_spine_inverse_scale') invScale.operation.set(2) invScale.input1X.set(1.0) sqrtScale.outputX.connect(invScale.input2X) jointGroups = list() for i, jnt in enumerate(rigJoints): preTransform = adv.zeroOut(jnt, 'pre') jointGroups.append(preTransform) ikNode = adv.zeroOut(jnt, 'hlp_ik') pmc.pointConstraint(ikJoints[i], ikNode) pmc.orientConstraint(ikJoints[i], ikNode) twistNode = adv.zeroOut(jnt, 'hlp_twist') twistCache = pmc.createNode('frameCache', n='frm_{0}_twist'.format(jnt)) animTwistCurve.output.connect(twistCache.stream) twistCache.varyTime.set(i) rotateMultiplier = pmc.createNode('multiplyDivide', n='mul_{0}_twist'.format(jnt.shortName())) twistCache.varying.connect(rotateMultiplier.input2X) animControls['middle_spine'][0].rotateY.connect(rotateMultiplier.input1X) rotateMultiplier.outputX.connect(twistNode.rotateX) volumeCache = pmc.createNode('frameCache', n='frm_{0}_volume'.format(jnt.shortName())) animSquashCurve.output.connect(volumeCache.stream) volumeCache.varyTime.set(i) pow_ = pmc.createNode('multiplyDivide', n='pow_{0}'.format(jnt.shortName())) pow_.operation.set(3) invScale.outputX.connect(pow_.input1X) volumeCache.varying.connect(pow_.input2X) pow_.outputX.connect(jnt.scaleY) pow_.outputX.connect(jnt.scaleZ) pmc.group(animControls['lower_spine'][1], animControls['upper_spine'][1], animControls['middle_spine'][1], n='grp_spine_anim') pmc.group(splineIkHandle, spline, n='grp_spine_rig_systems') pmc.group(clusterJoints, startJoint, n='grp_spine_system_joints') pmc.group(jointGroups, n='grp_spine_bind_joints') return rigJoints
""" This splitter attempts to maximize apparent total surplus """ from splitter import Splitter from splitter import Bid class SurplusMaximizer(Splitter): def score(self, bid, averages): return bid.amount - averages[bid.item] items = ["Room 1", "Room 2", "Room 3"] bids = [Bid("Room 1", "Joey", 10), Bid("Room 1", "Josh", 15), Bid("Room 2", "Joey", 5), Bid("Room 2", "Josh", 0)] s = Splitter() print s.split(items, ["Joey", "Josh"], bids)
from main import Main; from data_set import Data_Set; from dummy_master import Dummy_Master; from regressor import Regressor; from metrics import Metrics; from back_elimination import Back_Eliminations; from set_reader import Set_Reader; from splitter import Splitter; from plot import Plot; from process_data import Pre_Process_Data; import visual-python m = Main('init'); r = Regressor(); sp = Splitter(); mt = Metrics(); m.print(); be = Back_Eliminations(); pd = Pre_Process_Data(); sr = Set_Reader(); sr.read_files(); # sr.print_files_shapes(); train = sr.get_train(); test = sr.get_test(); ploter = Plot(); ploter.cut_survived(train, test); # ploter.plot_set_survived(sr.get_train(), "Sex", "Survived"); # ploter.plot_set_survived(sr.get_train(), "Pclass" ,"Survived");
parameters['language'], parameters['path_to_fmridata'], input_path, logger=logs, **kwargs) logs.validate() logs.info("Retrieve arguments for each model...") kwargs_splitter = get_splitter_information(parameters) kwargs_compression = get_compression_information(parameters) kwargs_transformation = get_data_transformation_information(parameters) kwargs_estimator_model = get_estimator_model_information(parameters) logs.validate() logs.info("Instanciations of the classes...") splitter = Splitter(**kwargs_splitter) compressor = Compressor(**kwargs_compression) transformer = Transformer(**kwargs_transformation) estimator_model = EstimatorModel(**kwargs_estimator_model) logs.validate() logs.info("Defining Pipeline flow...") ## Pipeline splitter_cv_external = Task([splitter.split], name='splitter_cv_external') compressor_external = Task([compressor.compress], input_dependencies=[splitter_cv_external], name='compressor_external', flatten_inputs=[True]) transform_data_external = Task( [transformer.make_regressor, transformer.scale], input_dependencies=[splitter_cv_external, compressor_external],
class SplitterTest(TestCase): def setUp(self): self.s = Splitter() def test_returns_none_when_loot_is_undivisible_by_number_of_pirates(self): self.assertEqual(None,self.s.split([2,3],2)) def test_returns_none_when_there_are_not_enough_gems(self): self.assertEqual(None,self.s.split([4,2],3)) def test_returns_none_when_there_is_a_gem_greater_than_share(self): self.assertEqual(None,self.s.split([4,2,3],3)) def test_everybody_gets_the_same_kind_of_bucket_when_we_have_only_one_type_of_gem(self): self.assertEqual([[2],[2]],self.s.split([2,2],2)) self.assertEqual([[2],[2],[2]],self.s.split([2,2,2],3)) self.assertEqual([[2,2],[2,2]],self.s.split([2,2,2,2],2)) def test_everybody_gets_the_same_kind_of_bucket_when_we_have_the_same_set_of_gem_and_pirates(self): self.assertEqual([[3,2],[3,2]],self.s.split([3,2,3,2],2)) self.assertEqual([[3,2],[3,2],[3,2]],self.s.split([3,2,3,2,3,2],3)) def test_everybody_gets_the_same_value_with_a_different_number_of_gems(self): self.assertEqual([[3],[2,1]],self.s.split([1,2,3],2)) self.assertEqual([[3,2,2,2],[3,2,2,2],[3,2,2,2]],self.s.split([3,3,3,2,2,2,2,2,2,2,2,2],3)) #famoso caso da morte self.assertEqual([[7],[5,2]],self.s.split([7,5,2],2)) def test_should_not_create_a_bucket_greater_than_share(self): self.assertEqual([[13,1],[7,7]],self.s.split([13,7,7,1],2)) def test_should_rollback_when_the_first_decision_doesnt_fit(self): self.assertEqual([[7, 2, 2],[3, 3, 3, 2]],self.s.split([7, 3, 3, 3, 2, 2, 2],2)) def test_should_rollback_when_the_second_decision_doesnt_fit_also(self): self.assertEqual([[7, 2, 2, 2],[3, 3, 3, 2, 2]],self.s.split([7, 3, 3, 3, 2, 2, 2, 2, 2],2))
def setUp(self): self.s = Splitter()
from splitter import Splitter def merge_all(): for i, fname in enumerate(os.listdir('output')): if i == 0: df = pd.read_csv('output/' + fname) else: df = pd.merge(df, pd.read_csv('output/' + fname), how='outer', on='datetime') df.to_csv('health_care.csv') if __name__ == "__main__": print("Convert apple health care xml to csv.") s = Splitter() s.get_body_mass() s.get_burned_energy() s.get_heart_rate() s.get_stand_time() s.get_step_count() s.get_walking_distance() print("Merge all csv.") merge_all() print("Done.")
return item """ def select_item(self, splitter, user): max_item = self.actions[0] max_val = np.random.beta( max_item.successes + self.alpha, max_item.count - max_item.successes + self.beta) for item in self.actions[1:]: if not (user in splitter.train_set.keys() and item.item in splitter.train_set[user])\ or self.follow_back(splitter, item.item, user): val = np.random.beta(item.successes + self.alpha, item.count - item.successes + self.beta) if val > max_val: max_item = item max_val = val self.removed = 1 return max_item if __name__ == "__main__": from splitter import Splitter from plot import plot_results_graph import matplotlib.pyplot as plt spl = Splitter("../data/movieLens_binary_mini.dat", separator=" ") bandit = UCBBandit(spl, "mini", param=0, criteria="mean") print(len(bandit.actions)) plot_results_graph("mini", "eps") plt.show()