示例#1
0
#! python3

import shutil, os, re

datePattern = re.compile(
    r"""^(.*?)
    ((0|1?\d)-
    ((0|1|2|3)?\d)-
    ((19|20)\d\d)
    (.*?)$
    """, re.VERBOSE)

for amerFilename in os.lsitdir('.'):
    mo = datePattern.search(amerFilename)

    if mo == None:
        continue

    beforePart = mo.group(1)
    monthPart = mo.group(2)
    dayPart = mo.group(4)
    yearPart = mo.group(6)
    afterPart = mo.group(8)

    euroFilename = beforePart + dayPart + '-' + monthPart + '-' + yearPart + afterPart

    absWorkingDir = os.path.abspath('.')
    amerFilename = os.path.join(absWorkingDir, amerFilename)
    euroFilename = os.path.join(absWorkingDir, euroFilename)

    print(f'Renaming "{amerFilename}" to "{euroFilename}"...')
示例#2
0
def train(model,
          config,
          experiment,
          training_directory=None,
          validation_directory=None,
          file_identifier=None):

    if training_directory is None:
        training_directory = config['dataset']['training_directory']

    if validation_directory is None:
        validation_directory = config['dataset']['validation_directory']

    # get file path
    weights_file = f'{file_identifier}_{config["experiment"]["name"]}_weights.h5'
    model_file = f'{file_identifier}_{config["experiment"]["name"]}_model.h5'

    # get plot file names
    accuracy_plot_file = f'{file_identifier}_accuracy_plot.png'
    loss_plot_file = f'{file_identifier}_loss_plot.png'

    # plot file paths
    accuracy_plot_path = os.path.join('../tmp', accuracy_plot_file)
    loss_plot_path = os.path.join('../tmp', loss_plot_file)

    # weights/model file paths
    model_path = os.path.join('../tmp', model_file)
    weights_path = os.path.join('../tmp', weights_file)

    # set patience (number of epochs where val_acc does not improve before stopping)
    patience = config['hyper_parameters'].get('patience', 0)
    baseline = config['hyper_parameters'].get('baseline', 0)
    monitor = config['hyper_parameters'].get('monitor', 'val_acc')

    # callbacks to be called after every epoch
    callbacks = [
        ModelCheckpoint(weights_path,
                        monitor=monitor,
                        verbose=1,
                        save_best_only=True),
        TensorBoard(log_dir=os.path.join('../tmp', 'logs'), batch_size=8)
    ]

    # number of splits in dataset
    split = config['dataset']['split']

    # number of samples in dataset
    samples = config['dataset']['samples']

    # metrics to measure under training
    metrics = ['accuracy']

    # set number of  training and validation samples
    training_samples = samples - (samples // split
                                  )  # number of training samples in dataset
    validation_samples = samples // split  # number of validation samples in dataset

    # set number of epochs
    epochs = config['hyper_parameters']['epochs']

    # set batch size
    batch_size = config['hyper_parameters']['batch_size']

    # set number of layers to freeze (not train)
    freeze_layers = config['hyper_parameters']['freeze_layers']

    # number of classes in dataset
    number_of_classes = config['dataset']['number_of_classes']

    # set image dimensions
    image_width = config['image_processing'][
        'image_width']  # change based on the shape/structure of your images
    image_height = config['image_processing'][
        'image_height']  # change based on the shape/structure of your images
    image_channels = config['image_processing'][
        'image_channels']  # number of image channels

    # set training steps based on training sampels and batch size
    training_steps = training_samples // batch_size  # number of training batches in one epoch
    validation_steps = validation_samples // batch_size  # number of validation batches in one epoch

    # build optimizer
    optimizer = build_optimizer(config['optimizer'])

    training_generator_file = config['image_processing'][
        'training_data_generator']
    validation_generator_file = config['image_processing'][
        'validation_data_generator']

    # build data generators
    training_data_generator = importlib.import_module(
        f'generators.{training_generator_file}').train_data_generator
    validation_data_generator = importlib.import_module(
        f'generators.{validation_generator_file}').validation_data_generator

    # freeze layers based on freeze_layers parameter
    for layer in model.layers[:freeze_layers]:
        layer.trainable = False
    for layer in model.layers[freeze_layers:]:
        layer.trainable = True

    # initialize training generator
    training_generator = training_data_generator.flow_from_directory(
        training_directory,
        target_size=(image_width, image_height),
        batch_size=batch_size,
        class_mode='categorical',
        follow_links=True)

    # initialize validation generator
    validation_generator = validation_data_generator.flow_from_directory(
        validation_directory,
        target_size=(image_width, image_height),
        batch_size=batch_size,
        class_mode='categorical',
        follow_links=True)

    # only set early stopping if patience is more than 0
    if patience > 0:

        # append early stopping to callbacks
        callbacks.append(
            EarlyStopping(monitor=monitor,
                          patience=patience,
                          verbose=0,
                          baseline=baseline))

    # print out the class indicies for sanity
    print('train indicies: ', training_generator.class_indices)
    print('validation indicies: ', validation_generator.class_indices)

    # compile model
    model.compile(loss='categorical_crossentropy',
                  optimizer=optimizer,
                  metrics=metrics)

    # difference in class counts
    class_dispersion = {}

    class_list = os.listdir(training_directory)
    class_list.sort()

    class_weigths = None

    if config['hyper_parameters']['class_weights']:
        for class_index, class_name in enumerate(class_list):
            class_dispersion[class_index] = len(
                os.lsitdir(os.path.join(training_directory, class_name)))

        class_weigths = create_class_weight(class_dispersion)

        for index, weight in class_weigths.items():
            print(f'{class_list[index]}: ', weight)

    # train model and get training metrics
    history = model.fit_generator(training_generator,
                                  steps_per_epoch=training_steps,
                                  epochs=epochs,
                                  validation_data=validation_generator,
                                  validation_steps=validation_steps,
                                  callbacks=callbacks,
                                  workers=0,
                                  use_multiprocessing=False,
                                  class_weight=class_weigths)

    # plot loss
    plot_loss(history, loss_plot_path)

    # plot accuracy
    plot_accuracy(history, accuracy_plot_path)

    # add plots to experiment
    experiment.add_artifact(accuracy_plot_path)
    experiment.add_artifact(loss_plot_path)

    # load best weights
    if os.path.isfile(weights_path):
        model.load_weights(weights_path)
        experiment.add_artifact(weights_path)

    # save keras model file
    model.save(model_path)

    # add keras model file to experiment
    experiment.add_artifact(model_path)

    # return training history metrics
    return model
示例#3
0
        self.defdict["AUTHOR"].append(self.article.author)
        self.defdict["DATELINE"].append(self.article.dateline)
        self.defdict["EXCHANGES"].append(self.article.exchanges)
        self.defdict["COMPANIES"].append(self.article.companies)
        self.defdict["ORGS"].append(self.article.orgs)
        self.defdict["MKNOTE"].append(self.article.mknote)

        #create new Article object
        self.article = Article()

    self.in_d = False


# main function:
directory = 'reuters21578/sgml_files'
files = os.lsitdir(directory)

# set up handler and parser
parser = xml.sax.make_parser()
handler = MyHandler()
parser.setContentHandler(handler)

# loop through 22 files
for f in files:
    cur_file = directory + f
    #only open sgm files
    if f.endswith('.sgm') and f.startswith('reut'):
        parser.parse(cur_file)


# csv, with doc id and "LEWISSPLIT" train/test information
示例#4
0
if a == "Y":
    while True:
        add_user_name = input("Username:"******"Password:"******"limit(MB):")
        add_user_limit *= 1048576
        adduser(add_user_name, add_user_passwd, add_user_limit)
        a = input("Do you want to go back?\n(c:Continue)[Y/n/c]:")
        if a == "Y":
            db.close()
            exit()
        elif a == "c":
            break

# Add users from subscription.
os.chdir("subscription")
a = input("Do you want to add all users from subscritpion?[Y/n]:")
if a == "Y":
    add_user_limit = input("Default user limit(MB):")
    filename = os.lsitdir()
    for i in filename:
        add_user_name = radom.randint(0, 10000)
        add_user_passwd = i
        add_user_limit *= 1048576
        adduser(add_user_name, add_user_passwd, add_user_limit)
    print("All done!")
    db.close()

a = input("Press Enter to go back.")
exit() if a == "Y" else exit()
示例#5
0
    if len(args) < 1:
        print "[!] Not enough Arguments, Need at least file path"
        parser.print_help()
        sys.exit()

    # Check for file or dir
    is_file = os.path.isfile(args[0])
    is_dir = os.path.isdir(args[0])

    if options.recursive:
        if not is_dir:
            print "[!] Recursive requires a directory not a file"
            sys.exit()

        # Read all the things
        for filename in os.lsitdir(args[0]):
            file_data = open(os.path.join(args[0], filename), 'rb').read()
            print "[+] Reading {0}".format(filename)
            config_data = run(file_data)

    else:
        if not is_file:
            print "[!] You did not provide a valid file."
            sys.exit()

        # Read in the file.
        file_data = open(os.path.join(args[0], args[0]), 'rb').read()
        print "[+] Reading {0}".format(args[0])
        config_data = run(file_data)
        print_output(config_data, options.output)
示例#6
0
import zipfile

local_zip = './horse-or-human.zip'
zip_ref = zipfile.Zipfile(local_zip, 'r')
zip_ref.extractall('./horse-or-human')
zip_ref.close()

# Define Directories
train_horse_dir = os.path.join('./horse-or-human/horses')
train_human_dir = os.path.join('./horse-or-human/humans')

train_horse_names = os.listdir(train_horse_dir)
train_human_names = os.listdir(train_human_dir)

print('total training horse images: ', len(os.listdir(train_horse_dir)))
print('total training human images: ', len(os.lsitdir(train_human_dir)))

# Draw Image
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

## parameters for the graph
nrows = 4
ncols = 4

## index iterating over images
pic_index = 0

## set up matplotlib fig, and size it to fit 4*4 pics
fig = plt.gcf()
fig.set_size_inches(ncols * 4, nrows * 4)
示例#7
0
    if len(args) < 1:
        print "[!] Not enough Arguments, Need at least file path"
        parser.print_help()
        sys.exit()

    # Check for file or dir
    is_file = os.path.isfile(args[0])
    is_dir = os.path.isdir(args[0])

    if options.recursive:
        if not is_dir:
            print "[!] Recursive requires a directory not a file"
            sys.exit()

        # Read all the things
        for filename in os.lsitdir(args[0]):
            file_data = open(os.path.join(args[0], filename), 'rb').read()
            print "[+] Reading {0}".format(filename)
            config_data = run(file_data)

    else:
        if not is_file:
            print "[!] You did not provide a valid file."
            sys.exit()

        # Read in the file.
        file_data = open(os.path.join(args[0], args[0]), 'rb').read()
        print "[+] Reading {0}".format(args[0])
        config_data = run(file_data)
        print_output(config_data, options.output)
示例#8
0
import os
import sqlite3


def printTables(iphoneDB):
    try:
        conn = sqlite3.connect(iphoneDB)
        c = conn.cursor()
        c.execute("SELECT tbl_name FROM sqlite_master \
            WHERE type=='table';")

        print("\n[*] Database: " + iphoneDB)

        for row in c:
            print("[-] Table: " + str(row))

    except:
        pass

    conn.close()


dirList = os.lsitdir(os.getcwd())

for fileName in dirList:
    printTables(fileName)