def run(review): review = str(review) result = "" for i in range(0, 6): if predict_model("ent/model_ent_" + str(i) + ".json", review) == 1: attrs = get_attr(i) for attr_index, attr in enumerate(attrs): if predict_model("attr/model_attr_" + str(attr) + ".json", review) == 1: polar = predict_model( "pol/model_pol_" + str(attr) + ".json", review) if (polar != 0): result += str(Support.indexToName( attr)) + " : " + Support.indexToPolar(polar) + "\n" print(result) return result
def predict(self, test_ex): """ return predicted class to given test example """ cleaned_exam = Support.preprocessing_string(test_ex) post_prob = self.__calExProb(cleaned_exam) prediction = self.classes[np.argmax(post_prob)] return prediction
def cycle_of_users_test(num): for j in range(1, num + 1): print(single_user(j)) response_json = requests.get(f'{user_endpoint}').json()['support'] singleUserSupport = Support.accessParams(response_json) for i, q in response_json.items(): if not i: return f'Bug with {i}' print( f'\nGET: support: url: {singleUserSupport[0]}, text: {singleUserSupport[1]}' )
def test(self, test_set): """ test current model with test_set """ predictions = [] for ex in test_set: # clean the example cleaned_exams = Support.preprocessing_string(ex) # get posterior probability of every examples in test set post_prob = self.__calExProb(cleaned_exams) predictions.append(self.classes[np.argmax(post_prob)]) return np.array(predictions)
def train(self): """ training the Attribute Classifier """ self.classifiers = [] print("[Training Attribute Classifier with VLSP 2018]") print("---------------- Training In Progress --------------------") for i in range(0, 12): print("Training: " + Support.indexToName(i)) nb = NaiveBayes(np.unique(self.label[i])) print('-------- Start Cross Validation ------------') nb.cross_validation(self.comments, self.label[i]) print('-------- End Cross Validation ------------') print(len(self.comments)) print(len(self.label[i])) self.classifiers.append(nb) print('----------------- Training Completed ---------------------')
""" with open(model, encoding='utf-8') as json_file: data = json.load(json_file) classes = np.asarray(data["classes"]) cates_info = data["cates_info"] cates_info = {int(k): v for k, v in cates_info.items()} for cate_index, cate in enumerate(classes): cates_info[cate_index] = { int(k): v for k, v in cates_info[cate_index].items() } nb = NaiveBayes(classes) nb.cates_info = cates_info return nb # Test Entity for i in range(0, 6): print("Testing Entity : " + Support.indexToEntity(i)) model = get_model("ent/model_ent_" + str(i) + ".json") test(model, "ent", i) # Test Attribute for i in range(0, 12): print("Testing Attribute : " + Support.indexToName(i)) model = get_model("attr/model_attr_" + str(i) + ".json") test(model, "attr", i) # Test Polar for i in range(0, 12): print("Testing Polar : " + Support.indexToName(i)) model = get_model("pol/model_pol_" + str(i) + ".json") test(model, "pol", i)
def main(): parser = argparse.ArgumentParser( description='Determine PacBio read support for gaps in scaffolds', usage=usage()) # Main arguments parser.add_argument('scaffolds', action='store', help='The input scaffolds in Fasta format') parser.add_argument('subreads', action='store', help='The PacBio subreads in BAM format') parser.add_argument('-t', '--threads', dest='threads', type=int, \ help='Number of threads to use for multi-threaded processes, default=1', default=1) # Arguments for Setup setup_args = parser.add_argument_group('Setup') setup_args.add_argument('-n', '--min_gap', dest='min_gap', type=int, default=200, \ help='Minimum number of consecutive Ns to be considered a gap, default=200') setup_args.add_argument('-x', '--max_gap', dest='max_gap', type=int, default=1000000, \ help='Maximum number of consecutive Ns to be considered a gap, default=Inf') setup_args.add_argument('-f', '--flank_size', dest='flank_size', type=int, default=1000, \ help='Number of extracted bases flanking gaps and scaffold ends, default=1000') # Arguments for Support support_args = parser.add_argument_group('Support') support_args.add_argument('-b', '--blasr', dest='blasr', type=str, \ help='Parameters to pass to BLASR', default='') support_args.add_argument('-d', '--min_reads', dest='min_reads', type=int, \ help='The minimum number of reads required to support a gap', default=5) support_args.add_argument('-w', '--wiggle', dest='wiggle', type=int, \ help='The percent deviation allowed from predicted gap size', default=0.5) # Arguments for Assembly assembly_args = parser.add_argument_group('Assembly') assembly_args.add_argument('-m', '--minimap', dest='minimap', \ help='Parameters to pass to Minimap', default='-Sw5 -L100 -m0') assembly_args.add_argument('-a', '--miniasm', dest='miniasm', \ help='Parameters to pass to Miniasm', default='') assembly_args.add_argument('-r', '--racon', dest='racon', \ help='Parameters to pass to Racon', default='') # Parse the arguments args = parser.parse_args() # Initialize classes setup = Setup() support = Support() assembly = Assembly() placement = Placement() # Check for save point try: save = open('jelly2.save', 'r').read() print "Found save point:", save except IOError: write_save('setup') # Run Setup if check_save('setup'): setup.run(args) write_save('mapping') # Run Support if check_save('mapping'): support.mapping(args) write_save('sorting') if check_save('sorting'): support.sorting(args) write_save('indexing') if check_save('indexing'): support.indexing(args) write_save('support') if check_save('support'): support.find_support(args) write_save('assembly') # Run Assembly if check_save('assembly'): assembly.assemble_gaps(args) write_save('placement') # Run Placement if check_save('placement'): placement.load_data(args) placement.fill_gaps()
def __train(self, dataset, labels): # read input params self.examples = dataset self.labels = labels self.bag_dicts = np.array( [defaultdict(lambda: 0) for index in range(self.classes.shape[0])]) # only convert to numpy arrays if initially not passed as numpy arrays if not isinstance(self.examples, np.ndarray): self.examples = np.array(self.examples) if not isinstance(self.labels, np.ndarray): self.labels = np.array(self.labels) # create BoW for each category for cate_index, cate in enumerate(self.classes): # get all examples of category equal cate all_cate_examples = self.examples[self.labels == cate] # clean examples cleaned_exams = [ Support.preprocessing_string(cate_exam) for cate_exam in all_cate_examples ] cleaned_exams = pd.DataFrame(data=cleaned_exams) # store this bag of word of the particular category np.apply_along_axis(self.createBagOfWord, 1, cleaned_exams, cate_index) # TO-DO: calculate parameters for prior probability of class c - p(c) prob_classes = np.empty(self.classes.shape[0]) words = [] cate_word_counts = np.empty(self.classes.shape[0]) for cate_index, cate in enumerate(self.classes): # get p(c) prob_classes[cate_index] = np.sum(self.labels == cate) / float( self.labels.shape[0]) # get total count of words in each class count = list(self.bag_dicts[cate].values()) cate_word_counts[cate_index] = np.sum( np.array(list(self.bag_dicts[cate_index].values()))) + 1 # get all words of this category words = self.bag_dicts[cate_index].keys() # build vocabulary set and get size of the set self.vocab = np.unique(np.array(words)) self.vocab_size = self.vocab.shape[0] # get p(d) - denominator value denominators = np.array([ cate_word_counts[cate_index] + self.vocab_size + 1 for cate_index, cate in enumerate(self.classes) ]) # change all category info to tuple format self.cates_info = [(self.bag_dicts[cate_index], prob_classes[cate_index], denominators[cate_index]) for cate_index, cate in enumerate(self.classes)] self.cates_info = np.array(self.cates_info)