def run(self):
     soup = super(ForbesScraper, self).get_soup_object()
     # Gets the breaking article from Forbes investing page
     headline = soup.find_all(
         "a",
         {"class": "headlink h1--dense card__color--benjamins-green"})[0]
     headline_text = headline.text
     headline_link = headline.get('href', '')
     print("----")
     print("Breaking article: %s" % (headline_text))
     print("Breaking article link: %s" % (headline_link))
     txt_classifier = Classifier(headline_text)
     print(txt_classifier.sentiment())
     # Gets the editors' picks on the left side
     latest_picks = {}
     for latest_picks_article in soup.find_all(
             "a", {"class": "section-pick__title"}):
         self.article_link, link = latest_picks_article.get(
             'href', ''), latest_picks_article.get('href', '')
         self.article_title, title = latest_picks_article.text, latest_picks_article.text
         latest_picks[link] = title
         print("----")
         print("Latest pick link title: %s" % (title))
         print("Latest pick link: %s" % (link))
         self.classify_headline(title)
Пример #2
0
 def classify_headline(self, headline):
     # Set self.sentiment
     txt_classifier = Classifier(headline)
     sentiment = txt_classifier.sentiment()
     print(sentiment)
     self.sentiment = sentiment
     self.update_avgs()
Пример #3
0
def getIntent():
    print(request.json)
    print(request.json['sentence'])
    request_object = request.json
    sentence = request.json['sentence']
    if client is not None:
        if 'classifier' not in cache.keys():
            cache["classifier"] = Classifier()

        classifier = cache["classifier"]

        result = classifier.classifyIntent(sentence)
        classification = dict()
        print(result)
        if len(result) > 0:

            print(result)
            if result[1] < classifier.ERROR_THRESHOLD:
                get_database_context().add_not_found_sentence(sentence)

            classification['intent'] = result[0]
        else:
            classification['intent'] = ""
            get_database_context().add_not_found_sentence(sentence)
    else:
        print("NO DATABASE")

        classification = dict()
        classification['intent'] = "NO DATABASE"

    response_object = removekey(request_object, "sentence")
    response_object["classifications"] = classification

    return jsonify(response_object)
Пример #4
0
def getIntent():
    request_object = request.json
    sentence = request.json['sentence']
    if client is not None:
        if 'intents' not in cache.keys():
            cache["intents"] = Classifier("intents", client)

        classifier = cache["intents"]

        results = classifier.classify(sentence)

        classification = dict()
        if len(results) > 0:
            classification['intent'] = results[0][0]
        else:
            classification['intent'] = ""
    else:
        print("NO DATABASE")

        classification = dict()
        classification['intent'] = "NO DATABASE"

    response_object = removekey(request_object, "sentence")
    response_object["classifications"] = classification

    return jsonify(response_object)
Пример #5
0
def getEntity():
    request_object = request.json
    sentence = request.json['sentence']
    prior_intents = request.json['context']["priorIntent"]["intent"]
    if client is not None:
        classifier_name = "entities@" + prior_intents

        if classifier_name not in cache.keys():
            cache[classifier_name] = Classifier(classifier_name, client)

        classifier = cache[classifier_name]

        results = classifier.classify(sentence)

        classification = dict()
        if len(results) > 0:
            classification['entity'] = results[0][0]
        else:
            classification['entity'] = ""
    else:
        print("NO DATABASE")

        classification = dict()
        classification['entity'] = "NO DATABASE"

    response_object = removekey(request_object, "sentence")
    response_object["classifications"] = classification

    return jsonify(response_object)
Пример #6
0
def getEntity():
    request_object = request.json
    sentence = request.json['sentence']

    if client is not None:
        if 'classifier' not in cache.keys():
            cache["classifier"] = Classifier()

        classifier = cache["classifier"]
        # keep
        results = classifier.classifyEntity(sentence)
        # strip keep only name of entity
        classification = dict()
        if len(results) > 0:
            classification['entity'] = results[0][0]
        else:
            classification['entity'] = ""
    else:
        print("NO DATABASE")

        classification = dict()
        classification['entity'] = "NO DATABASE"

    response_object = removekey(request_object, "sentence")
    response_object["classifications"] = classification

    return jsonify(response_object)
Пример #7
0
def classify_pages(in_path, out_path):
    classifier = Classifier()

    with open(out_path, 'wb') as f:
        for site, html in utils.read_file_multiple(in_path):
            if classifier.classify(html):
                pickle.dump((site, html), f)
    def __init__(self, db):
        self.db = db

        self.city = self.db["area"].find_one({
            "name": configuration.AREA
        })

        self.classifier = Classifier(self.db)
Пример #9
0
def train_Engine():
    result = get_trainer().start_training()
    if result:
        cache["classifier"] = Classifier()
        cache["classifier"].load(DatabaseContext(client), get_cos_context())
        return jsonify("Success! Engine was trained"), 200
    else:
        return jsonify("Error! Engine wasn't trained.."), 404
Пример #10
0
def main(config_):
    config = Config(config_)
    print("Model Framework: ", config.get("framework"), " Model Labels: ", config.get("labels"))
    broker = Broker(config)
    broker.listen()
    classifier = Classifier(config)
    thread = threading.Thread(target=process_request, kwargs={"broker": broker, "classifier": classifier})
    thread.start()
Пример #11
0
def trainIntents():
    if client is not None:
        intents = Trainer("intents", client)
        intents.start_training()
        if 'intents' not in cache.keys():
            cache['intents'] = Classifier('intents', client)
        else:
            cache['intents'].load()
        return jsonify([])
    else:
        print("NO DATABASE")
        return "NO DATABASE"
    def __call__(self):
        # test
        # self.train = self.train.head(200)
        # self.test = self.test.head(100)

        self.clf = Classifier(output_folder=self.output_folder,
                              RS=15,
                              train=self.train,
                              test=self.test,
                              fold_splits=self.splits,
                              clf_name=self.clf_name,
                              mapping_dict=config.mapping_dict)
        self.clf()
        print('Saved to %s' % self.output_folder)
Пример #13
0
def trainEntity():
    intent = request.json['intent']
    if client is not None:
        classifier_name = "entities@" + intent
        entities = Trainer(classifier_name, client)
        entities.start_training()
        if classifier_name not in cache.keys():
            cache[classifier_name] = Classifier(classifier_name, client)
        else:
            cache[classifier_name].load()
        return jsonify([])
    else:
        print("NO DATABASE")
        return "NO DATABASE"
Пример #14
0
def main():
    classifier = Classifier(model_name="random_forest")

    logger.debug(
        "top 20 feature importances: {}".format(
            get_feature_importance(classifier)
        )
    )

    test_features, test_labels = get_test_data("test")

    logger.debug(
        "classification report: {}".format(
            get_classification_report(
                test_labels["is_returning_customer"].values,
                classifier.classify(test_features))
        )
    )
Пример #15
0
def compare_crawler():
    heuristic_file = os.path.join(consts.DATA_DIR,
                                  'using-heuristic-pages.pickle')
    bsf_file = os.path.join(consts.DATA_DIR, 'bfs-pages.pickle')
    hr_bfs = os.path.join(consts.RESULTS_DIR, 'bfs_harvest_ratio_results.csv')
    hr_heuristic = os.path.join(consts.RESULTS_DIR,
                                'heuristic_harvest_ratio_results.csv')

    if not os.path.exists(heuristic_file):
        crawl(True, heuristic_file)

    if not os.path.exists(bfs_file):
        crawl(True, bsf_file)

    classifier = Classifier()

    harvest_ratio(heuristic_file, hr_heuristic, classifier)
    harvest_ratio(bsf_file, hr_bfs, classifier)
Пример #16
0
def testIntent():
    request_object = request.json
    sentence = request.json['sentence']
    if client is not None:
        if sentence == 'populate':
            # populate database with base data and train all neuronal netwroks
            populate_intents(client)
            populate_entities_for_meal(client)
            populate_entities_for_timetables(client)
            populate_entities_for_navigation(client)
            cache["intents"].load()
            cache["entities@timetables"].load()
            cache["entities@meal"].load()

            classification = dict()
            classification['intent'] = "Populated"
        else:
            if 'intents' not in cache.keys():
                cache["intents"] = Classifier("intents", client)

            classifier = cache["intents"]

            results = classifier.classify(sentence)

            classification = dict()
            if len(results) > 0:
                classification['intent'] = results[0][0]
            else:
                classification['intent'] = ""
    else:
        print("NO DATABASE")

        classification = dict()
        classification['intent'] = "NO DATABASE"

    response_object = removekey(request_object, "sentence")
    response_object["classifications"] = classification

    return 'Results: %s' % classification['intent']
Пример #17
0
def main():
    global reporting, print_classification, classifier
    args = get_args()
    # load either web or pop-up reporting based on args
    reporting_module = 'reporting.' + ('web' if args.web else 'popup')
    print("Loading " + reporting_module)
    reporting = importlib.import_module(reporting_module)

    classifier = Classifier(args.age_gender)

    # if process is killed with ctrl+c display stats
    signal.signal(signal.SIGINT, sigint_handler)

    if args.video is not None:
        cap = cv2.VideoCapture(args.video)
        frame_nr = 0
        while cap.isOpened():
            ret, frame = cap.read()
            frame = cv2.resize(frame, None, fx=0.25, fy=0.25)
            if frame_nr % 4 == 0:
                every_frame(frame, time.time())
            frame_nr += 1
            if cv2.waitKey(1) & 0xFF == ord('q'):
                raise SystemExit
        return

    if args.file is not None:
        frame = cv2.imread(args.file)
        every_frame(frame, time.time())
        if cv2.waitKey() & 0xFF == ord('q'):
            raise SystemExit
        return

    if args.print_classification:
        print_classification = True

    # on every frame from the stream run stuff
    stream_video(every_frame)
Пример #18
0
def load_sites_feeds():
    from tech_rss.models import Site
    fix_multiprocessing()

    clf = Classifier()
    for site in Site.objects.all():
        print('Starting {}'.format(site.domain))
        news = site.get_new_news()

        if not news:
            continue

        categories = clf.predict(news)
        for category, page in zip(categories, news):
            print(CATEGORIES_SHORT[category])
            print(page['title'], '\n')

            url, title = save_post(category, page, site)

            users = site.users.filter(categories__contains=[category])
            users_id = [getattr(user, 'id') for user in users]

            send_post_to_subscribers(TelegramBot, users_id, url, title)
Пример #19
0
def main():
    classifier = Classifier()
    classifier.build_model()
    classifier.add_smoothing()
    classifier.spam_vocabulary_probs, classifier.ham_vocabulary_probs = classifier.write_model_data(
        'model.txt', classifier.vocabulary)
    classifier.test_model('baseline-result.txt',
                          classifier.spam_vocabulary_probs,
                          classifier.ham_vocabulary_probs)
    print("------Experiment 2, Stop Words Filtering------")
    classifier.experiment2_stop_words()
    print("------Experiment 3, Word Length Filtering------")
    classifier.experiment3_length_filtering()
    print("------Experiment 4, Frequency 1 Filtering------")
    classifier.experiment4_frequency_filtering(file_name='frequencyFiltered0',
                                               lower_cutoff_frequency=1,
                                               higher_cutoff_frequency=1)
    print("------Experiment 4, Frequency <=5 Filtering------")
    classifier.experiment4_frequency_filtering(file_name='frequencyFiltered1',
                                               lower_cutoff_frequency=0,
                                               higher_cutoff_frequency=5)
    print("------Experiment 4, Frequency <=10 Filtering------")
    classifier.experiment4_frequency_filtering(file_name='frequencyFiltered2',
                                               lower_cutoff_frequency=0,
                                               higher_cutoff_frequency=10)
    print("------Experiment 4, Frequency <=15 Filtering------")
    classifier.experiment4_frequency_filtering(file_name='frequencyFiltered3',
                                               lower_cutoff_frequency=0,
                                               higher_cutoff_frequency=15)
    print("------Experiment 4, Frequency <=20 Filtering------")
    classifier.experiment4_frequency_filtering(file_name='frequencyFiltered4',
                                               lower_cutoff_frequency=0,
                                               higher_cutoff_frequency=20)
    print("------Experiment 4, Top 10 percent Filtering------")
    classifier.experiment4_most_frequent_filtering('mostFrequencyFiltered0',
                                                   10)
    print("------Experiment 4, Top 15 percent Filtering------")
    classifier.experiment4_most_frequent_filtering('mostFrequencyFiltered1',
                                                   15)
    print("------Experiment 4, Top 20 percent Filtering------")
    classifier.experiment4_most_frequent_filtering('mostFrequencyFiltered2',
                                                   20)
    print("------Experiment 4, Top 25 percent Filtering------")
    classifier.experiment4_most_frequent_filtering('mostFrequencyFiltered3',
                                                   25)

    experiment5_file_name = 'smoothing'

    for n in range(0, 11):
        smoothing_value = round((n * 0.1), 1)
        file_name = experiment5_file_name + str(smoothing_value)
        print("------Experiment 5, smoothing value %s------" % smoothing_value)
        classifier_5 = Classifier()
        classifier_5.build_model()
        classifier_5.add_smoothing(smoothing_value)
        classifier_5.spam_vocabulary_probs, classifier_5.ham_vocabulary_probs = classifier_5.write_model_data(
            file_name + 'model.txt',
            classifier_5.vocabulary,
            smoothing_value=smoothing_value)
        classifier_5.test_model(file_name + 'baseline-result.txt',
                                classifier_5.spam_vocabulary_probs,
                                classifier_5.ham_vocabulary_probs)
Пример #20
0
from classifier.classifier import Classifier

classifier = Classifier()
classifier.train_model()
print(classifier.is_question('do you hold a credit card'))
Пример #21
0
def main(*args):
    """Train the model.

    Args:
        *args: args to be parsed by the ArgumentParser

    Returns:
        None
    """
    # Instantiating with formatter_class argument will make default values print
    # in the help message.
    parser = argparse.ArgumentParser(
        description=('Train a new network on a dataset and save the model as ' +
                     'a checkpoint'),
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )
    parser.add_argument('data_directory', type=str,
                        help=('path to the directory containing the ' +
                              'training, validation and testing sets.'))
    parser.add_argument('--save_dir', type=str, default='.',
                        help='set the directory to save checkpoints in')
    parser.add_argument('--checkpoint', type=str,
                        help='load a checkpoint to continue training')
    parser.add_argument('--arch', type=str.lower, default='alexnet',
                        choices=[k.lower()
                                 for k in Classifier.IMAGENET_MODELS.keys()],
                        help='choose the model architecture')
    parser.add_argument('--learning_rate', type=float, default=0.001,
                        help='learning rate to use while training')
    parser.add_argument('--hidden_units', type=int, nargs='+',
                        default=[4096, 1000],
                        help="sizes of the classifier's hidden layers")
    parser.add_argument('--epochs', type=int, default=3,
                        help='number of epochs to go through during training')
    parser.add_argument('--no_validate', action='store_true',
                        help=("don't validate using validation set during " +
                              "training"), )
    parser.add_argument('--test_model', action='store_true',
                        help=('use test dataset to test model accuracy after ' +
                              'training'))
    parser.add_argument('--gpu', action='store_true',
                        help=('use the gpu to train the network if one is ' +
                              'available'))
    parser.add_argument('--no_active_session', action='store_true',
                        help="don't keep session alive (if on a local machine)")
    parser.add_argument('--no_save_checkpoint', action='store_true',
                        help=("don't save a checkpoint after training to " +
                              "save disk space"))
    parser.add_argument('--write_log_file', action='store_true',
                        help=('write training loss and accuracy data to a ' +
                              'log file at {save_dir}/{model_arch}.out'))
    args = parser.parse_args(args)

    keep_active = not args.no_active_session  # whether we need active_session
    validate = not args.no_validate  # whether to use validation set
    save_checkpoint = not args.no_save_checkpoint

    data_dir = args.data_directory.rstrip('/')
    try:
        num_categories = len([
            d for d in os.listdir(data_dir + '/test') if d.isnumeric()
        ])
    except FileNotFoundError:
        print(f'ERROR: {data_dir} not found.', file=sys.stderr)
        sys.exit(-1)
    except NotADirectoryError:
        print(f'ERROR: {data_dir} is not a directory.', file=sys.stderr)
        sys.exit(-1)

    if args.gpu:
        device = 'cuda'
        if not torch.cuda.is_available():
            print('ERROR: cuda is not available on this machine.',
                  'Use cpu for training instead.',
                  file=sys.stderr)
            sys.exit(-1)
    else:
        device = 'cpu'

    if args.checkpoint:
        trainer = ModelTrainer(
            data_dir,
            classifier=Classifier(checkpoint=args.checkpoint)
        )
    else:
        trainer = ModelTrainer(
            data_dir,
            model_architecture=args.arch,
            output_size=num_categories,
            hidden_layers=args.hidden_units,
            learn_rate=args.learning_rate
        )

    save_dir = args.save_dir.rstrip('/')
    try:
        os.listdir(save_dir)
    except FileNotFoundError:
        os.mkdir(save_dir)
    except NotADirectoryError:
        print(f'WARNING: {save_dir} is not a directory. ' +
              'Saving checkpoint and writing any training logs to current ' +
              'directory instead.',
              file=sys.stderr)
        save_dir = '.'

    with open(f'{save_dir}/{args.arch}.txt', 'w') \
            if args.write_log_file else dont_open() as log_file:
        try:
            with active_session() if keep_active else no_context():
                trainer.train_classifier(validate=validate,
                                         num_epochs=args.epochs,
                                         device=device,
                                         output_file=log_file,
                                         print_status=True)
        except (NewConnectionError, ConnectionError) as e:
            print('Exception raised in active_session context manager.',
                  file=sys.stderr)
            print('If running on a local machine, use',
                  '--no_active_session flag.',
                  file=sys.stderr)
            print(e, file=sys.stderr)
            sys.exit(-1)

    if save_checkpoint:
        trainer.classifier.save_checkpoint(save_dir + '/checkpoint.pth')

    if args.test_model:
        try:
            with active_session() if keep_active else no_context():
                    accuracy = trainer.test_accuracy(device=device,
                                                     print_status=True)
        except (NewConnectionError, ConnectionError) as e:
            print('Exception raised in active_session context manager.',
                  file=sys.stderr)
            print('If running on a local machine, use',
                  '--no_active_session flag.',
                  file=sys.stderr)
            print(e, file=sys.stderr)
            sys.exit(-1)

        msg = f'Test Accuracy: {accuracy*100:.4f}%'
        print(msg)

        if args.write_log_file:
            with open(f'{save_dir}/{args.arch}.txt', 'a') as log_file:
                print(msg, file=log_file)
Пример #22
0
elif os.path.isfile('vcap-local.json'):
    with open('vcap-local.json') as f:
        vcap = json.load(f)
        print('Found local VCAP_SERVICES')
        creds = vcap['services']['cloudantNoSQLDB'][0]['credentials']
        user = creds['username']
        password = creds['password']
        url = 'https://' + creds['host']
        client = Cloudant(user, password, url=url, connect=True)
        client.create_database('trainer', throw_on_exists=False)
        client.create_database('synapse', throw_on_exists=False)

cache = dict()
if client is not None:
    # create Classifier cache on startup
    cache["intents"] = Classifier("intents", client)
    cache["intents"].load()
    cache["entities@timetables"] = Classifier("entities@timetables", client)
    cache["entities@timetables"].load()
    cache["entities@meal"] = Classifier("entities@meal", client)
    cache["entities@meal"].load()

# On Bluemix, get the port number from the environment variable PORT
# When running this app on the local machine, default the port to 8000
port = int(os.getenv('PORT', 8000))


def removekey(d, key):
    r = dict(d)
    del r[key]
    return r
Пример #23
0
import telebot
from flask import Flask, request

import settings

from classifier.classifier import Classifier
from classifier.data.image_processing import image_from_file

bot = telebot.TeleBot(settings.TOKEN,)
server = Flask(__name__)

# Init image classifier
classifier = Classifier(
    base_net_path=settings.BASIC_NET_PATH,
    refferi_net_path=settings.REFFERI_NET_PATH,
    white_net_path=settings.WHITE_NET_PATH,
    blue_net_path=settings.BLUE_NET_PATH,
    device=settings.DEVICE
)


@bot.message_handler(content_types=['photo'])
def get_photo_message(message):
    """
    Predict label of request photos.
    :param message: massage that contains photo
    :return: label of photo, string
    """
    # Download photo and save as file object
    telegram_file_id = message.photo[-1].file_id
    telegram_file = bot.get_file(telegram_file_id)
Пример #24
0
def main(*args):
    """Predict the top K classes of an image.

    Args:
        *args: args to be parsed by the ArgumentParser

    Returns:
        None
    """
    # Instantiating with formatter_class argument will make default values print
    # in the help message.
    parser = argparse.ArgumentParser(
        description='Process an image & report results.',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        'image_path',
        type=str,
        help=('path to the image to process or to a dataset ' +
              'directory with images to choose randomly from ' +
              'Ex: flowers/test/1/image_06743.jpg or ' + 'flowers/test'))
    parser.add_argument('checkpoint',
                        type=str,
                        help='path to the model checkpoint to load')
    parser.add_argument('--top_k',
                        type=int,
                        default=1,
                        help='Return top K most likely classes')
    parser.add_argument('--category_names',
                        type=str,
                        help='use a mapping of categories to real names')
    parser.add_argument('--gpu',
                        action='store_true',
                        help=('if available, use gpu to process the image ' +
                              'instead of the cpu'))
    args = parser.parse_args(args)

    if os.path.isdir(args.image_path):
        print(f'{args.image_path} is a directory.',
              'Choosing a random image to process.')
        image_path = get_random_image_from_dir(args.image_path)
        print(f'Using image: {image_path}')
    else:
        image_path = args.image_path

    if not os.path.isfile(args.checkpoint):
        print(f'ERROR: {args.checkpoint} is not a file.', file=sys.stderr)
        sys.exit(-1)

    if args.category_names:
        cat_to_name = load_json(args.category_names)
    else:
        cat_to_name = None

    if args.gpu:
        device = 'cuda'
        if not torch.cuda.is_available():
            print('ERROR: cuda is not available on this machine.',
                  'Use cpu for prediction instead.',
                  file=sys.stderr)
            sys.exit(-1)
    else:
        device = 'cpu'

    classifier = Classifier(checkpoint=args.checkpoint)
    probs, classes = classifier.predict(image_path,
                                        topk=args.top_k,
                                        device=device)

    if cat_to_name is not None:
        classes = [cat_to_name[c] for c in classes]
        class_len = len(max(cat_to_name.values(), key=len))
    else:
        class_len = 10  # padding needed to space column 1 title 'Class' below

    print(f'{"Class":{class_len}}{"Probability"}')
    for prob, class_ in zip(probs, classes):
        print(f'{class_:{class_len}}{prob:4.2f}')
Пример #25
0
    def __init__(self,
                 dataset_root='flowers',
                 classifier=None,
                 **classifier_kwargs):
        """You can create a ModelTrainer with a Classifier object if you have a
        model checkpoint to load and want to continue training, or you can pass
        a valid set of keyword arguments to create a new Classifier object
        before training.

        Args:
            dataset_root (str): the directory where the train, valid, and test
                datasets are located from.
            classifier (classifier.Classifier): the Classifier containing the
                PyTorch model to train. If no Classifier is given as an
                argument, a new Classifier can be created using the
                classifier_kwargs passed instead.
            **classifier_kwargs: if no classifier is given, these will be used
                to create a new Classifier to train. See Classifier.__init__()
                in classifier.py for valid kwargs.

        Examples:
            trainer = ModelTrainer(
                'flowers',
                classifier=Classifier(checkpoint='checkpoint.pth')
            )

            trainer = ModelTrainer(
                'flowers',
                model_architecture='alexnet',
                output_size=102,
                hidden_layers=[4096, 1000],
                learn_rate=0.005
            )
        """
        self._data_dir = dataset_root
        self._train_dir = self._data_dir + '/train'
        self._valid_dir = self._data_dir + '/valid'
        self._test_dir = self._data_dir + '/test'

        # Define transforms for training, validation and testing sets.
        # validation and test transforms are the same.
        self._data_transforms = {
            'train':
            transforms.Compose([
                transforms.RandomRotation(30),
                transforms.RandomResizedCrop(224),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406],
                                     [0.229, 0.224, 0.225])
            ]),
            'valid':
            transforms.Compose([
                transforms.Resize(255),
                transforms.CenterCrop(224),
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406],
                                     [0.229, 0.224, 0.225])
            ]),
            'test':
            transforms.Compose([
                transforms.Resize(255),
                transforms.CenterCrop(224),
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406],
                                     [0.229, 0.224, 0.225])
            ])
        }

        # Load the datasets with ImageFolder
        self._image_datasets = {
            'train':
            datasets.ImageFolder(self._train_dir,
                                 transform=self._data_transforms['train']),
            'valid':
            datasets.ImageFolder(self._valid_dir,
                                 transform=self._data_transforms['valid']),
            'test':
            datasets.ImageFolder(self._test_dir,
                                 transform=self._data_transforms['test'])
        }

        # Using the image datasets and the transforms, define the dataloaders
        self._dataloaders = {
            'train':
            torch.utils.data.DataLoader(self._image_datasets['train'],
                                        batch_size=64,
                                        shuffle=True),
            'valid':
            torch.utils.data.DataLoader(self._image_datasets['valid'],
                                        batch_size=64),
            'test':
            torch.utils.data.DataLoader(self._image_datasets['test'],
                                        batch_size=64)
        }

        if classifier:
            self.classifier = classifier
        else:
            self.classifier = Classifier(
                class_to_idx=self._image_datasets['train'].class_to_idx,
                **classifier_kwargs)
Пример #26
0
def train(args):
    # Read dataset source.json
    if not os.path.exists(os.path.join(DATASETS, args.dataset_name)):
        print(f"Dataset {args.dataset_name} does not exist", file=sys.stderr)
        sys.exit(1)
    dataset_source_fname = os.path.join(DATASETS, args.dataset_name,
                                        "source.json")
    with open(dataset_source_fname, "r") as f:
        dataset_source = json.load(f)
    if "weights" not in dataset_source:
        print(
            f"Dataset {args.dataset_name} was not created using noise classes.",
            file=sys.stderr)
        print(
            "An attempt will be made to match the labels to existing classes.",
            file=sys.stderr,
            flush=True)
        dataset_source["weights"] = dict(
            (label, 1 / len(dataset_source["labels"]))
            for label in dataset_source["labels"])

    if args.update is not None:
        noise_classes_old = load_noise_classes(args, False)
        if noise_classes_old is None:
            args.update = None

    # Iterate over noise classes
    print("Initializing classifiers")
    noise_classes = NoiseClass.from_file(args.noise_classes)
    default_settings = None
    for label in dataset_source["weights"]:
        if label not in noise_classes:
            print(
                f"Label {label} of dataset {args.dataset_name} is not among the defined noise classes in {args.noise_classes}",
                file=sys.stderr,
                flush=True)
            continue

        nc = noise_classes[label]
        if nc.classifiers is None:
            # Pseudo-class
            continue

        train_all = True
        if args.update is not None and nc.id in noise_classes_old:
            nc_old = noise_classes_old[nc.id]
            train_all = False
            if json.dumps(nc.degradations) != json.dumps(nc_old.degradations):
                print(
                    f"Warning: the degradation definition of {nc.id} has changed.",
                    file=sys.stderr)
                print(
                    "If you have not already, please generate a new dataset using degradations/create_dataset.py",
                    file=sys.stderr,
                    flush=True)
                print(
                    f"All classifiers in {nc.id} will be retrained from scratch"
                )
                train_all = True

        print(label + ":")

        for classifier_ind, classifier_spec in enumerate(nc.classifiers):
            print(
                f"\t {classifier_spec['type']} (feature: {classifier_spec['feature']}",
                end="")
            if len(nc.classifiers) > 1:
                print(", weight:", classifier_spec.get('weight', 1), end="")
            if "bootstrap" in classifier_spec:
                print(", bootstrap:", classifier_spec["bootstrap"], end="")
            if classifier_spec.get("vad", None):
                print(", VAD:",
                      "unvoiced" if classifier_spec["vad"].get(
                          "inverse", False) else "voiced",
                      end="")
            print(")")

            # Setup classifier specifications
            classifier_complete_defaults(classifier_spec,
                                         args.classifier_defaults,
                                         default_settings)

            # Initialize or copy old classifier
            _type = next((m for m in available_models
                          if m.__name__ == classifier_spec["type"]), None)
            if _type is None:
                print(
                    f"Unrecognized classifier type {classifier_spec['type']}",
                    file=sys.stderr)
                if classifier_spec['type'] == 'GenHMM':
                    raise genhmm_err
            config = {
                _type.__name__:
                dict((cat, classifier_spec[cat]) for cat in setting_categories
                     if classifier_spec[cat] is not None)
            }

            if args.update is not None and nc.id in noise_classes_old and classifier_ind < len(
                    nc_old.classifiers):
                classifier_spec_old = nc_old.classifiers[classifier_ind]
                if not train_all and nc.id not in args.update \
                  and classifier_specs_equal(classifier_spec, classifier_spec_old):
                    classifier_spec["instance"] = classifier_spec_old[
                        "instance"]
                    classifier_spec["notrain"] = True
                    continue

            classifier_spec["instance"] = Classifier(
                [nc.id, nc.id + " (negative)"], _type, config, silent=False)

    # Prune noise_classes
    for nc_id in tuple(noise_classes.keys()):
        classifiers = noise_classes[nc_id].classifiers
        if classifiers is None or sum(
                1 for spec in classifiers
                if spec.get("instance") is not None) == 0:
            del noise_classes[nc_id]

    # Check if skipping training is allowed
    if args.update is not None and set(noise_classes.keys()) != set(
            noise_classes_old.keys()):
        print("The set of noise classes have changed", file=sys.stderr)
        print(
            "All classifiers for all noise classes must be retrained from scratch",
            file=sys.stderr,
            flush=True)
        for nc in noise_classes.values():
            for spec in nc.classifiers:
                if "notrain" in spec:
                    del spec["notrain"]

    # Train classifiers grouped by feature
    rng = None
    spec_inds_sorted = _sort_spec_inds(noise_classes)
    filenames, classes, labels = read_dataset(args.dataset_name, "train")
    for i, (spec, nc, feats, idxs) in enumerate(
            _iterate_classifiers(spec_inds_sorted, filenames, args.recompute)):
        print(f"Training ({i + 1}/{len(spec_inds_sorted)})")
        if args.update is not None and spec.get("notrain", False):
            print("Keeping old classifier")
            del spec["notrain"]
            continue

        label_ind, = np.where(classes == spec["instance"].noise_types[0])[0]
        labels_binary = labels[:, label_ind]
        labels_binary = np.column_stack((labels_binary, ~labels_binary))

        # Bootstrapping
        if spec.get("bootstrap", False):
            if len(nc.classifiers) == 1:
                print(
                    "Warning: Bootstrapping a single classifier - please use model averaging or the entire training set.",
                    file=sys.stderr,
                    flush=True)
            if rng is None:
                rng = np.random.default_rng()
            sample_inds = rng.choice(np.arange(len(filenames)), len(filenames))

            feats_used = [
                feats[feat_ind] for sample_ind in sample_inds
                for feat_ind in np.where(idxs == sample_ind)[0]
            ]
            idxs_used = [
                idx for idx, sample_ind in enumerate(sample_inds)
                for _ in range(np.sum(idxs == sample_ind))
            ]
            labels_used = labels_binary[sample_inds, :]

            #for li in range(len(sample_inds)):
            #	for fi in np.where(idxs_used == idxs_used[li])[0]:
            #		assert labels_used[fi] == labels_binary[sample_inds[li]]
        else:
            feats_used = feats
            idxs_used = idxs
            labels_used = labels_binary

        spec["instance"].train(feats_used, idxs_used, labels_used, args.models)

    print("Training complete")

    # Save
    fname = os.path.join(args.models, args.dataset_name + ".noiseclasses")
    if os.path.exists(fname):
        print("Overwriting", fname)
    with open(fname, "wb") as f:
        pickle.dump(noise_classes, f)
    print("Saved to", fname)
Пример #27
0
 def __init__(self):
     self.model = Model()
     self.model.model_load()
     self.r = Reader()
     self.r.read_corpus()
     self.tagger = Classifier(self.r.train_sents, self.model)
Пример #28
0
if __name__ == "__main__":
    from torch.utils import data
    from sklearn.model_selection import train_test_split

    from generator.generator import Generator
    from discriminator.discriminator_semi import SemiSupervisedDiscriminator
    from classifier.classifier import Classifier
    from data.data_loader import ImageDataset, ImageTransform, make_datapath_list

    z_dim = 20
    image_size_g = 64
    image_size_d = 12
    num_classes = 10
    G = Generator(image_size_g, z_dim)
    D = SemiSupervisedDiscriminator(image_size_d, num_classes)
    C = Classifier(image_size_d, num_classes)

    G.apply(weights_init)
    D.apply(weights_init)

    print("Finish initialization of the network")

    label_list = list(range(num_classes))
    img_list, label_list = make_datapath_list(label_list)
    train_img_list, test_img_list, train_label_list, test_label_list = train_test_split(
        img_list, label_list, test_size=0.2)

    mean = (0.5, )
    std = (0.5, )
    train_dataset = ImageDataset(data_list=train_img_list,
                                 transform=ImageTransform(mean, std),
Пример #29
0
import sys
import os
from PIL import Image
from classifier.classifier import Classifier

cnn = Classifier(json_file='model.json', weights_file='model.h5')
exit_program = False
count_true = 0
count_false = 0
while (exit_program == False):
    type_input = input("Folder(F) or Single File(S)?: ")
    if type_input == "F" or type_input == "f":
        if not os.path.exists('animals_and_humans'):
            os.mkdir('animals_and_humans')
        if not os.path.exists('nothing'):
            os.mkdir('nothing')
        folder_name = input("Folder Name: ")
        if os.path.exists(folder_name):
            test_images = os.listdir(folder_name)
            if len(test_images) > 0:
                for image in test_images:
                    print(image)
                    if image.startswith('.'):
                        print(image + " not read")
                    else:
                        path_image = "./" + folder_name + "/" + image
                        animal, accuracy = cnn.predict_animal(path_image)
                        if (animal):
                            os.rename(path_image,
                                      "./animals_and_humans/" + image)
                            count_true += 1
Пример #30
0
def run(opt):

    # output dir
    if os.path.exists(opt.save_dir):
        shutil.rmtree(opt.save_dir)
    os.makedirs(opt.save_dir)

    # load dataset
    dataset = Dataloader(source=opt.source, imgsz=opt.img_size).dataset

    # load object detection model, and weights
    detector = Detector(detector_type=opt.detector_type,
                        cfg_file=opt.detector_cfg_file)
    detector.run_through_once(opt.img_size)  # 空跑一次

    # load object tracking model
    tracker = Tracker(tracker_type=opt.tracker_type,
                      cfg_file=opt.tracker_cfg_file)

    # load pose detection model
    poser = Poser(poser_type=opt.poser_type, cfg_file=opt.poser_cfg_file)

    # load classifier model
    clssifier = Classifier(classifier_type=opt.classifier_type,
                           cfg_file=opt.classifier_cfg_file)

    print(detector.device, detector.cfg)
    filt_with_txt = False  # 先分析一下status标注文件.txt,存在的才进行检测,这样能加快速度
    if filt_with_txt:
        from classifier.data_analyse import anaylise_label
        label_ret = anaylise_label()
        label_stems = [x[0] for x in label_ret]

    for img_idx, (path, img, im0s, vid_cap) in enumerate(dataset):
        # print(type(img), type(im0s))
        # print(type(im0s), im0s.shape)
        if dataset.is_camera:
            im0s = im0s[0]
            path = f'{path[0]}/{img_idx:0<6}.jpg'
        if filt_with_txt:
            fold_stem = path.split('/')[-2]
            idx = label_stems.index(fold_stem)
            # print(fold_stem, label_stems, idx)
            img_stem = Path(path).stem
            valid_stems = [Path(x).stem for x in label_ret[idx][-1]]
            in_it = f'track_{img_stem}' in valid_stems
            # print(path, in_it, label_ret[idx][-1][0])
            if not in_it:
                continue
        # img: [3, w, h], preprocess, inference, NMS,
        det_ret = detector.detect(
            path, img,
            im0s)  # detect result: nparray, [num_obj, 6] 6: xyxy,conf,cls
        # detector.imshow(im0s, det_ret)
        # track
        tra_ret = tracker.track(
            det_ret,
            im0s)  # track result: list, [num_obj, 7], 7: xyxy, cls, tid, trace
        # print(tra_ret[:, 5])
        # tracker.imshow(im0s, tra_ret, path)
        # pose detect
        pose_ret = poser.detect_pose(tra_ret, im0s, path, return_type='zzd')
        # zzd format: np.array(object): [num_obj, 10],10: xyxy cls tid trace keypoints kp_score proposal_score
        # print(pose_ret)
        poser.imshow(im0s, pose_ret, path, resize=(1280, 720))
        # classifier
        if opt.feature_save_dir is not None:  # 保存特征的
            clssifier.build_and_save_feature(pose_ret,
                                             path,
                                             save_dir=opt.feature_save_dir)
            print(f'\rsaving features: [{img_idx + 1:>3}/{len(dataset)}] ',
                  end='')
            continue

        # status_ret = clssifier.detect_status(pose_ret, path, is_camera=dataset.is_camera)
        # zzd format: np.array(object): [num_obj, 12], 12: 比10多了status_idx和status
        # clssifier.imshow(im0s, status_ret, show_name='x', resize=(1280, 720))
        # print(status_ret)

        if img_idx == 10:
            if cv2.waitKeyEx(0) == ord('q'):
                raise StopIteration