def test_Parser_data(self): with open(os.path.join(os.path.dirname(sys.argv[0]), "test_parser.txt")) as fr: p = Parser(fr) with open( os.path.join(os.path.dirname(sys.argv[0]), "test_parser_dataset.txt"), "w") as fw: p.get_data(fw)
def __init__(self, data_path): self._images_path = os.path.join(data_path, 'images') self._parser = Parser(data_path) self._results = MdUtils(file_name='results', title='Overview') self._num_images = None self._counts = None self._image_shape = None self._image_shape_mean = None
def test_Parser_stats(self): with open(os.path.join(os.path.dirname(sys.argv[0]), "test_parser.txt")) as fr: p = Parser(fr) with open( os.path.join(os.path.dirname(sys.argv[0]), "test_parser_result.txt"), "w") as fw: p.get_stats(fw) result = os.path.join(os.path.dirname(sys.argv[0]), "test_parser_result.txt") compare = os.path.join(os.path.dirname(sys.argv[0]), "test_parser_compare.txt") with open(result) as fresult: with open(compare) as fcompare: for lineresult in fresult: linecompare = fcompare.readline()
class Splitter(object): def __init__(self, data_path, split_ratio): self._images_path = os.path.join(data_path, 'images') self._out_path = os.path.join(data_path, 'images-split') self._split_ratio = split_ratio self._parser = Parser(data_path) def create_directories(self, labels): shutil.rmtree(self._out_path, True) os.makedirs(self._out_path) for d in ['test', 'train']: for label in labels: os.makedirs(os.path.join(self._out_path, d, label)) def get_train_test_image_list(self, image_list): np.random.shuffle(image_list) train_images, test_images = np.split(np.array(image_list), [int(len(image_list) * self._split_ratio), ]) return train_images, test_images def copy_images(self, images, base_dir, label): for image in images: src = os.path.join(self._images_path, image) dst = os.path.join(self._out_path, base_dir, label, image) if os.path.isfile(src): print(f'Copying {src} to {dst}') shutil.copy(src, dst) def split(self): self._parser.parse() self.create_directories(self._parser.labels) for label in self._parser.labels: train_images, test_images = self.get_train_test_image_list(self._parser.get_label_images(label)) self.copy_images(train_images, 'train', label) self.copy_images(test_images, 'test', label) def split_binary(self): self._parser.parse() self.create_directories(self._parser.binary_labels) train_images, test_images = self.get_train_test_image_list(self._parser.get_no_anomaly_images()) self.copy_images(train_images, 'train', 'No-Anomaly') self.copy_images(test_images, 'test', 'No-Anomaly') train_images, test_images = self.get_train_test_image_list(self._parser.get_anomaly_images()) self.copy_images(train_images, 'train', 'Anomaly') self.copy_images(test_images, 'test', 'Anomaly')
def init(cls): # set font config cls.fontconfig = Config.font_vars["large"] cls.parser = Parser() cls.parser.parse("questions.json") pygame.init() cls.screen = pygame.display.set_mode( (Config.display_width, Config.display_height)) # display cards cls.screen.fill(Config.niceblue) cls.font = pygame.font.Font(Config.font_setting[0], cls.fontconfig[0]) pools = cls.parser.get_pools() cls.cardboard = Cardboard(cls.switch_menu) cls.menu = Menu(cls.screen, Config.font_setting[0], pools, cls.switch_cardboard) pygame.display.flip() cls.main()
def __init__(self, data_path, split_ratio): self._images_path = os.path.join(data_path, 'images') self._out_path = os.path.join(data_path, 'images-split') self._split_ratio = split_ratio self._parser = Parser(data_path)
ENV = {'mode': args.mode, 'ip': args.ip, 'port': args.port} """ Priv Publish server """ UPLOAD_FOLDER = os.environ['PRIV_DATA'] ALLOWED_EXTENSIONS = set(['txt', 'csv']) #DATA_HOME = os.environ['PRIV_DATA'] app = Flask(__name__, template_folder="templates", static_folder='templates/components') app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER app.config['MAX_CONTENT_LENGTH'] = 8 * 1024 * 1024 # 8MB app.secret_key = "pRiV-PuBLish" pd_parser = Parser() # home page @app.route("/", methods=["GET"]) def home(): return render_template('homepage.html') # synthetic page @app.route("/data_generation", methods=["GET"]) def data_generation(): prod_ip = ENV['ip'] return render_template('data_generation.html', prod_ip=prod_ip)
class DataAnalyzer(object): def __init__(self, data_path): self._images_path = os.path.join(data_path, 'images') self._parser = Parser(data_path) self._results = MdUtils(file_name='results', title='Overview') self._num_images = None self._counts = None self._image_shape = None self._image_shape_mean = None def _compute_stats(self): self._num_images = self._parser.data.shape[1] self._counts = self._parser.data.loc['anomaly_class'].value_counts( ).to_dict() def _plot_random_image(self): random_image_file = f'{random.randint(0, self._num_images)}.jpg' image = img.imread(os.path.join(self._images_path, random_image_file)) self._image_shape = image.shape fig = plt.figure() plt.tight_layout() plt.imshow(image) plt.xticks([]), plt.yticks([]) plt.tight_layout() plt.title('Random Image') plt.savefig('random_image.png') plt.close(fig) plt.figure() plt.tight_layout() plt.hist(image.flatten()) plt.xlabel('Pixel Value') plt.ylabel('Counts') plt.title('Histogram') plt.savefig('random_image_histogram.png') plt.close(fig) def _compute_mean_shape(self): h, w = [], [] for im in self._parser.image_list: print(f'Reading image: {im}') image = img.imread(os.path.join(self._images_path, im)) h.append(image.shape[0]) w.append(image.shape[1]) self._image_shape_mean = (np.mean(h), np.mean(w)) def _plot_image_each_class(self): fig = plt.figure(figsize=(10, 10)) plt.tight_layout() plt.title('Random Image In Each Class') i = 1 for label in self._parser.labels: image_list = self._parser.get_label_images(label) random_selection = random.choice(image_list) image_path = self._parser.data.loc['image_filepath'].tolist( )[random_selection] image = img.imread(os.path.join(self._images_path, image_path[7:])) plt.subplot(4, 3, i) i += 1 plt.subplots_adjust(hspace=1, wspace=1) plt.title(f'Class: {label}') cur_axes = plt.gca() cur_axes.axes.get_xaxis().set_ticks([]) cur_axes.axes.get_yaxis().set_ticks([]) plt.imshow(np.uint8(image)) plt.savefig('random_image_each_class.png') plt.close(fig) def analyze(self): self._parser.parse() self._compute_stats() self._plot_random_image() self._compute_mean_shape() self._plot_image_each_class() def save_results(self): self._results.new_paragraph(f'Number of images: {self._num_images}') self._results.new_paragraph( f'Number of unique classes: {len(self._parser.labels)}') self._results.new_paragraph(f'Class names:') self._results.new_list(items=self._parser.labels) self._results.new_paragraph(f'Number of images per class: ') self._results.new_list( items=[f'{k}: {v}' for k, v in self._counts.items()]) self._results.new_paragraph(f'Image shape: {self._image_shape}') self._results.new_paragraph( f'Mean Image shape: {self._image_shape_mean}') self._results.new_paragraph( self._results.new_inline_image(text='Random Image', path='random_image.png')) self._results.new_paragraph( self._results.new_inline_image(text='Histogram', path='random_image_histogram.png')) self._results.new_paragraph( self._results.new_inline_image(text='Classes', path='random_image_each_class.png')) self._results.create_md_file()
def main(): argdict = dict(zip(sys.argv, sys.argv[1:] + [''])) if "-h" in argdict: print(help_message) return #Set of filenames to data files. raw_filename = join_filenames("data", "tweets.csv") filtered_filename = join_filenames("data", "_tweets_filtered.txt") stat_filename = join_filenames("data", "tweets_stat.txt") tokenized_filename = join_filenames("data", "tweets_tokenized.txt") #Dimension of the model session_config = configparser.ConfigParser() session_config.read('session.ini') word2vec_batch_size = 640 embedding_size = int(session_config['dimension']['embedding_size']) gen_batch_size = 128 gen_seq_length = int(session_config['dimension']['gen_seq_length']) gen_hidden_size = [int(x) for x in session_config['dimension']['gen_hidden_size'].split(',')] #Hyper-parameter of the model learning_rate = 1E-06 if "-i" in argdict: #Filter valid tweets from data file, and use nlp parser to tokenize tweets if os.path.isfile(tokenized_filename): proceed = (input("Erasing old data. OK to proceed? (Y/N)") == "Y") else: proceed = True if proceed: with open_utf8(raw_filename, "r") as raw_file_r: #Filter actual tweets preparser = Preparser(raw_file_r) preparser.extract(filter=True) with open_utf8(filtered_filename, "w") as filtered_file_w: preparser.save(filtered_file_w) #Tokenize tweets with open_utf8(filtered_filename, "r") as filtered_file_r: parser = Parser(filtered_file_r) with open_utf8(stat_filename, "w") as stat_file_w: parser.get_stats(stat_file_w) with open_utf8(tokenized_filename, "w") as tokenized_file_w: parser.get_data(tokenized_file_w) if "-w" in argdict and int(argdict["-w"]) >= 0: #Start or continue word2vec optimization word2vec_num_step = int(argdict["-w"]) if "-W" in argdict: word2vec_save_filename = join_filenames("saves", argdict["-W"]) else: word2vec_save_filename = join_filenames( "saves", session_config['save_file']['word2vec_save']) word2vec_restore = os.path.isfile(word2vec_save_filename+".meta") word2vec = Word2Vec(tokenized_filename, stat_filename) word2vec.give_code() word2vec.tf_init(embedding_size=embedding_size, batch_size=word2vec_batch_size, seed=None) word2vec.tf_run(word2vec_num_step, word2vec_save_filename, restore=word2vec_restore) if "-g" in argdict and int(argdict["-g"]) >= 0: #Start or continue generator learning with open_utf8(stat_filename, "r") as stat_file_r, open_utf8(tokenized_filename, "r") as tokenized_file_r: embeddings = word2vec.Embeddings() if "-G" in argdict: gen_save_filename = join_filenames("saves", argdict["-G"]) else: gen_save_filename = join_filenames( "saves", session_config['save_file']['generator_save']) gen_restore = os.path.isfile(gen_save_filename+".meta") generator = Generator(embeddings) generator.nn_init( gen_batch_size, gen_seq_length, gen_hidden_size, learning_rate=learning_rate, seed=None, use_vector=("-V" in argdict)) generator.train_real_data(int(argdict["-g"]), tokenized_file_r, gen_save_filename, restore=gen_restore) if "-s" in argdict and int(argdict["-s"]) >= 0: result_filename = join_filenames(argdict["-S"]) unparser = Unparser(result_filename) sentences = generator.generate(gen_save_filename, int(argdict["-s"])) unparser.save(sentences)
#!/usr/bin/python import os, sys from data.parser import Parser sys.path.append(os.path.dirname(__file__)) data_filename='news_tagged_data.txt' if __name__ == "__main__": parser = Parser(data_filename) X,Y = parser.parse() print X print Y