Exemplo n.º 1
0
def run(review):
    review = str(review)
    result = ""
    for i in range(0, 6):
        if predict_model("ent/model_ent_" + str(i) + ".json", review) == 1:
            attrs = get_attr(i)
            for attr_index, attr in enumerate(attrs):
                if predict_model("attr/model_attr_" + str(attr) + ".json",
                                 review) == 1:
                    polar = predict_model(
                        "pol/model_pol_" + str(attr) + ".json", review)
                    if (polar != 0):
                        result += str(Support.indexToName(
                            attr)) + " : " + Support.indexToPolar(polar) + "\n"
    print(result)
    return result
Exemplo n.º 2
0
    def predict(self, test_ex):
        """
        return predicted class to given test example
        """
        cleaned_exam = Support.preprocessing_string(test_ex)
        post_prob = self.__calExProb(cleaned_exam)
        prediction = self.classes[np.argmax(post_prob)]

        return prediction
Exemplo n.º 3
0
def cycle_of_users_test(num):
    for j in range(1, num + 1):
        print(single_user(j))

    response_json = requests.get(f'{user_endpoint}').json()['support']
    singleUserSupport = Support.accessParams(response_json)
    for i, q in response_json.items():
        if not i:
            return f'Bug with {i}'
    print(
        f'\nGET: support: url: {singleUserSupport[0]}, text: {singleUserSupport[1]}'
    )
Exemplo n.º 4
0
    def test(self, test_set):
        """
        test current model with test_set
        """
        predictions = []

        for ex in test_set:

            # clean the example
            cleaned_exams = Support.preprocessing_string(ex)

            # get posterior probability of every examples in test set
            post_prob = self.__calExProb(cleaned_exams)

            predictions.append(self.classes[np.argmax(post_prob)])

        return np.array(predictions)
Exemplo n.º 5
0
    def train(self):
        """
        training the Attribute Classifier
        """
        self.classifiers = []

        print("[Training Attribute Classifier with VLSP 2018]")
        print("---------------- Training In Progress --------------------")

        for i in range(0, 12):
            print("Training: " + Support.indexToName(i))

            nb = NaiveBayes(np.unique(self.label[i]))

            print('-------- Start Cross Validation ------------')
            nb.cross_validation(self.comments, self.label[i])
            print('-------- End Cross Validation ------------')
            print(len(self.comments))
            print(len(self.label[i]))

            self.classifiers.append(nb)

        print('----------------- Training Completed ---------------------')
Exemplo n.º 6
0
    """
    with open(model, encoding='utf-8') as json_file:
        data = json.load(json_file)
        classes = np.asarray(data["classes"])
        cates_info = data["cates_info"]
        cates_info = {int(k): v for k, v in cates_info.items()}
        for cate_index, cate in enumerate(classes):
            cates_info[cate_index] = {
                int(k): v
                for k, v in cates_info[cate_index].items()
            }
        nb = NaiveBayes(classes)
        nb.cates_info = cates_info
        return nb


# Test Entity
for i in range(0, 6):
    print("Testing Entity : " + Support.indexToEntity(i))
    model = get_model("ent/model_ent_" + str(i) + ".json")
    test(model, "ent", i)
# Test Attribute
for i in range(0, 12):
    print("Testing Attribute : " + Support.indexToName(i))
    model = get_model("attr/model_attr_" + str(i) + ".json")
    test(model, "attr", i)
# Test Polar
for i in range(0, 12):
    print("Testing Polar : " + Support.indexToName(i))
    model = get_model("pol/model_pol_" + str(i) + ".json")
    test(model, "pol", i)
Exemplo n.º 7
0
def main():
    parser = argparse.ArgumentParser(
        description='Determine PacBio read support for gaps in scaffolds',
        usage=usage())
    # Main arguments
    parser.add_argument('scaffolds',
                        action='store',
                        help='The input scaffolds in Fasta format')
    parser.add_argument('subreads',
                        action='store',
                        help='The PacBio subreads in BAM format')
    parser.add_argument('-t', '--threads', dest='threads', type=int, \
        help='Number of threads to use for multi-threaded processes, default=1', default=1)
    # Arguments for Setup
    setup_args = parser.add_argument_group('Setup')
    setup_args.add_argument('-n', '--min_gap', dest='min_gap', type=int, default=200, \
        help='Minimum number of consecutive Ns to be considered a gap, default=200')
    setup_args.add_argument('-x', '--max_gap', dest='max_gap', type=int, default=1000000, \
        help='Maximum number of consecutive Ns to be considered a gap, default=Inf')
    setup_args.add_argument('-f', '--flank_size', dest='flank_size', type=int, default=1000, \
        help='Number of extracted bases flanking gaps and scaffold ends, default=1000')
    # Arguments for Support
    support_args = parser.add_argument_group('Support')
    support_args.add_argument('-b', '--blasr', dest='blasr', type=str, \
        help='Parameters to pass to BLASR', default='')
    support_args.add_argument('-d', '--min_reads', dest='min_reads', type=int, \
        help='The minimum number of reads required to support a gap', default=5)
    support_args.add_argument('-w', '--wiggle', dest='wiggle', type=int, \
        help='The percent deviation allowed from predicted gap size', default=0.5)
    # Arguments for Assembly
    assembly_args = parser.add_argument_group('Assembly')
    assembly_args.add_argument('-m', '--minimap', dest='minimap', \
        help='Parameters to pass to Minimap', default='-Sw5 -L100 -m0')
    assembly_args.add_argument('-a', '--miniasm', dest='miniasm', \
        help='Parameters to pass to Miniasm', default='')
    assembly_args.add_argument('-r', '--racon', dest='racon', \
        help='Parameters to pass to Racon', default='')
    # Parse the arguments
    args = parser.parse_args()
    # Initialize classes
    setup = Setup()
    support = Support()
    assembly = Assembly()
    placement = Placement()
    # Check for save point
    try:
        save = open('jelly2.save', 'r').read()
        print "Found save point:", save
    except IOError:
        write_save('setup')
    # Run Setup
    if check_save('setup'):
        setup.run(args)
        write_save('mapping')
    # Run Support
    if check_save('mapping'):
        support.mapping(args)
        write_save('sorting')
    if check_save('sorting'):
        support.sorting(args)
        write_save('indexing')
    if check_save('indexing'):
        support.indexing(args)
        write_save('support')
    if check_save('support'):
        support.find_support(args)
        write_save('assembly')
    # Run Assembly
    if check_save('assembly'):
        assembly.assemble_gaps(args)
        write_save('placement')
    # Run Placement
    if check_save('placement'):
        placement.load_data(args)
        placement.fill_gaps()
Exemplo n.º 8
0
    def __train(self, dataset, labels):
        # read input params
        self.examples = dataset
        self.labels = labels
        self.bag_dicts = np.array(
            [defaultdict(lambda: 0) for index in range(self.classes.shape[0])])

        # only convert to numpy arrays if initially not passed as numpy arrays
        if not isinstance(self.examples, np.ndarray):
            self.examples = np.array(self.examples)
        if not isinstance(self.labels, np.ndarray):
            self.labels = np.array(self.labels)

        # create BoW for each category
        for cate_index, cate in enumerate(self.classes):
            # get all examples of category equal cate
            all_cate_examples = self.examples[self.labels == cate]

            # clean examples
            cleaned_exams = [
                Support.preprocessing_string(cate_exam)
                for cate_exam in all_cate_examples
            ]
            cleaned_exams = pd.DataFrame(data=cleaned_exams)

            # store this bag of word of the particular category
            np.apply_along_axis(self.createBagOfWord, 1, cleaned_exams,
                                cate_index)

        # TO-DO: calculate parameters for prior probability of class c - p(c)
        prob_classes = np.empty(self.classes.shape[0])
        words = []
        cate_word_counts = np.empty(self.classes.shape[0])

        for cate_index, cate in enumerate(self.classes):
            # get p(c)
            prob_classes[cate_index] = np.sum(self.labels == cate) / float(
                self.labels.shape[0])

            # get total count of words in each class
            count = list(self.bag_dicts[cate].values())
            cate_word_counts[cate_index] = np.sum(
                np.array(list(self.bag_dicts[cate_index].values()))) + 1

            # get all words of this category
            words = self.bag_dicts[cate_index].keys()

        # build vocabulary set and get size of the set
        self.vocab = np.unique(np.array(words))
        self.vocab_size = self.vocab.shape[0]

        # get p(d) - denominator value
        denominators = np.array([
            cate_word_counts[cate_index] + self.vocab_size + 1
            for cate_index, cate in enumerate(self.classes)
        ])

        # change all category info to tuple format
        self.cates_info = [(self.bag_dicts[cate_index],
                            prob_classes[cate_index], denominators[cate_index])
                           for cate_index, cate in enumerate(self.classes)]
        self.cates_info = np.array(self.cates_info)