def category(self,event): if self.dirname is None: return start = time.time() self.statusBasPrint(u"系统正在疯狂运算中,请稍候...") Preprocessor.preprocess(self.dirname) self.id2path = utils.parse_path_file(VAR_NEW_PATH_FILE) self.statusBasPrint('C++ classifier is running...') # p = subprocess.call('Utils.exe', stdout=PIPE, stdin=PIPE, stderr=PIPE) p = subprocess.call('Utils.exe') self.statusBasPrint('C++ classifier has finished') theta = np.loadtxt(VAR_THETA_FILE_RESULT) # return as a list [ [(name, probability), (name, probability), ...], ...] self.result = utils.get_topic_dist(theta) self.packages=self.builtdata(self.result) for i in self.packages: index = self.list.InsertStringItem(sys.maxint, i[0]) self.list.SetStringItem(index, 1, i[1]) self.list.SetStringItem(index, 2, i[2]) self.list.SetStringItem(index,3,i[3]) #max(dict.iterkeys(),key=lambda k:dict[k]) self.statusBasPrint(u"文档分类完成!") elapsed = (time.time() - start) self.Printtime(str(elapsed)+'s')
def test_get_feature_names(self): feature_names = ["f1", "f2", "f3"] data_set = np.array([feature_names, ["1", "2", "3"], ["", "4", "5"]]) preprocessor = Preprocessor(data_set) names = preprocessor.get_feature_names() self.assertTrue(feature_names[0] == names[0]) self.assertTrue(feature_names[1] == names[1]) self.assertTrue(feature_names[2] == names[2])
def initComponents(self, crop=None): undistort = False self.pre = Preprocessor(self.rawSize, self.threshold, undistort, crop=crop) self.featureEx = FeatureExtraction(self.pre.cropSize) self.gui = GUI(self.world, self.pre.cropSize, self.threshold, self) self.world.setResolution(self.pre.cropSize)
def __init__(self, pitchnum, stdout, sourcefile, resetPitchSize, resetThresholds, displayBlur, normalizeAtStartup, noDribbling): self.running = True self.connected = False self.stdout = stdout if sourcefile is None: self.cap = Camera() else: filetype = 'video' if sourcefile.endswith(('jpg', 'png')): filetype = 'image' self.cap = VirtualCamera(sourcefile, filetype) calibrationPath = os.path.join('calibration', 'pitch{0}'.format(pitchnum)) self.cap.loadCalibration(os.path.join(sys.path[0], calibrationPath)) self.preprocessor = Preprocessor(pitchnum, resetPitchSize) if self.preprocessor.hasPitchSize: self.gui = Gui(self.preprocessor.pitch_size) else: self.gui = Gui() self.threshold = Threshold(pitchnum, resetThresholds, displayBlur, normalizeAtStartup) self.thresholdGui = ThresholdGui(self.threshold, self.gui) self.features = Features(self.gui, self.threshold) self.filter = Filter(noDribbling) eventHandler = self.gui.getEventHandler() eventHandler.addListener('q', self.quit) while self.running: try: if not self.stdout: self.connect() else: self.connected = True if self.preprocessor.hasPitchSize: self.outputPitchSize() self.gui.setShowMouse(False) else: eventHandler.setClickListener(self.setNextPitchCorner) while self.running: self.doStuff() except socket.error: self.connected = False # If the rest of the system is not up yet/gets quit, # just wait for it to come available. time.sleep(1) # Strange things seem to happen to X sometimes if the # display isn't updated for a while self.doStuff() if not self.stdout: self.socket.close()
def prepare_training_set(training_set_files): preprocessor = Preprocessor() ds = SupervisedDataSet(500 * 2, 4) for pattern_type, files in training_set_files.iteritems(): print "[INFO]: Processing {}".format(PatternType(pattern_type).name) for file in files: entry = preprocessor.preprocess(pattern_type, file) sums = list(entry.sums.x) sums.extend(entry.sums.y) pattern_type_output = [0, 0, 0, 0] pattern_type_output[pattern_type.value - 1] = 1 # pattern_type_output = [pattern_type.value - 1] ds.addSample(tuple(sums), tuple(pattern_type_output)) print [file, pattern_type_output] return ds
class Vision(): rawSize = (640, 480) def __init__(self, world, filenames=None, simulator=None, once=False, headless=False): logging.info('Initialising vision') self.headless = headless self.capture = Capture(self.rawSize, filenames, once) self.threshold = threshold.AltRaw() self.threshold = threshold.PrimaryRaw() self.world = world self.simulator = simulator self.initComponents() self.times=[] self.N=0 logging.debug('Vision initialised') def initComponents(self, crop=None): undistort = False self.pre = Preprocessor(self.rawSize, self.threshold, undistort, crop=crop) self.featureEx = FeatureExtraction(self.pre.cropSize) self.gui = GUI(self.world, self.pre.cropSize, self.threshold, self) self.world.setResolution(self.pre.cropSize) def formatTime(self, t): return time.strftime('%H:%M:%S', time.localtime(t)) \ + ( '%.3f' % (t - math.floor(t)) )[1:] #discard leading 0 def processFrame(self): startTime = time.time() logging.debug("Frame %d at %s", self.N, self.formatTime(startTime) ) self.N += 1 logging.debug("Capturing a frame") frame = self.capture.getFrame() logging.debug("Entering preprocessing") standard = self.pre.get_standard_form(frame) logging.debug("Entering feature extraction") ents = self.featureEx.features(standard, self.threshold) logging.debug("Detected entities:", ents) logging.debug("Entering World") self.world.update(startTime, ents) logging.debug("Updating GUI") if not self.headless: try: self.gui.updateWindow('raw', frame) self.gui.updateWindow('standard', standard) self.gui.draw(ents, startTime) except Exception, e: logging.error("GUI failed: %s", e) raise endTime = time.time() self.times.append( (endTime - startTime) )
def __init__(self, pitchnum, stdout, sourcefile, resetPitchSize, noGui, debug_window, pipe): self.noGui = noGui self.lastFrameTime = self.begin_time = time.time() self.processed_frames = 0 self.running = True self.stdout = stdout self.pipe = pipe if sourcefile is None: self.camera = Camera() else: self.filetype = 'video' if sourcefile.endswith(('jpg', 'png')): self.filetype = 'image' self.gui = Gui(self.noGui) self.threshold = Threshold(pitchnum) self.thresholdGui = ThresholdGui(self.threshold, self.gui) self.preprocessor = Preprocessor(resetPitchSize) self.features = Features(self.gui, self.threshold) # if self.debug_window: # self.debug_window = DebugWindow() # else: # self.debug_window = None calibrationPath = os.path.join('calibration', 'pitch{0}'.format(pitchnum)) self.camera.loadCalibration(os.path.join(sys.path[0], calibrationPath)) eventHandler = self.gui.getEventHandler() eventHandler.addListener('q', self.quit) # Ugly stuff for smoothing coordinates - should probably move it self._pastSize = 5 self._pastCoordinates = { 'yellow': [(0, 0)] * self._pastSize, 'blue': [(0, 0)] * self._pastSize, 'ball': [(0, 0)] * self._pastSize } self._pastAngles = { 'yellow': [1.0] * self._pastSize, 'blue': [1.0] * self._pastSize } while self.running: if self.preprocessor.hasPitchSize: self.outputPitchSize() self.gui.setShowMouse(False) else: eventHandler.setClickListener(self.setNextPitchCorner) while self.running: self.doStuff()
def main(): p = optparse.OptionParser() p.add_option('-r', action = 'store_true', dest = "redo", default = False) opts, args = p.parse_args() output_file = '' if len(args) == 1: fileName = args[0] elif len(args) == 2: fileName = args[0] output_file = args[1] elif not args: sys.stderr.write("Error: please specify a file name\n") raise SystemExit(1) elif len(args) > 2: sys.stderr.write("Error: too much argument\n") raise SystemExit(1) # split the sentences processor = Preprocessor(fileName) sentences = processor.getSentences() # create the likelihood table, prior probability table and so on if opts.redo or not (os.path.isfile("likelihood.pkl") and os.path.isfile("prior_prob.pkl") and os.path.isfile("tags.pkl") and os.path.isfile("vocabulary.pkl")): viterbi_util.compute_table("training.pos") # run viterbi algorithm viterbi = Viterbi() output = [] for sentence in sentences: tag_seq = viterbi.go(sentence) output.append((sentence, tag_seq)) # write the result into a file viterbi_util.write_out(output, output_file)
def __init__(self, world, filename=None, simulator=None): logging.info('Initialising vision') if simulator: self.capture = SimCapture(simulator) else: self.capture = MPlayerCapture(self.rawSize, filename) #self.capture = Capture(self.rawSize, filename) self.threshold = threshold.PrimaryRaw() self.pre = Preprocessor(self.rawSize, self.threshold, simulator) self.featureEx = FeatureExtraction(self.pre.cropSize) self.interpreter = Interpreter() self.world = world self.gui = GUI(world, self.pre.cropSize, self.threshold) self.histogram = Histogram(self.pre.cropSize) self.times=[] self.N=0 debug.thresholdValues(self.threshold.Tfg, self.gui) logging.debug('Vision initialised')
import yaml from repository import MongoRepository from preprocess import Preprocessor if __name__ == '__main__': config = yaml.safe_load(open("config.yml")) data_dir = config['data_dir'] mongo_connection = config['mongo_connection'] mongo_repository = MongoRepository(mongo_connection) preprocessor = Preprocessor() mongo_repository.process_questions( source_collection=mongo_repository.questions, target_collection=mongo_repository.preprocessed_questions, processor=preprocessor)
record_suffix = 'axial_t2_only' # Load data abnormal_cases = list(range(70)) healthy_cases = list(range(100)) metadata = Metadata(data_path, label_path, abnormal_cases, healthy_cases, dataset_tag='') # metadata = Metadata(data_path, label_path, abnormal_cases, healthy_cases, dataset_tag=' cropped') print('Loading images...') for patient in metadata.patients: print(f'Loading patient {patient.get_id()}') patient.load_image_data() # Preprocess data preprocessor = Preprocessor(constant_volume_size=reference_size) metadata.patients = preprocessor.process(metadata.patients, ileum_crop=False, region_grow_crop=True, statistical_region_crop=True) # Serialise data into TF Records record_generator = TFRecordGenerator(record_out_path, record_suffix) # record_generator.generate_train_test(test_proportion, metadata.patients) record_generator.generate_cross_folds(k, metadata.patients) print('Done')
def preprocess(): ''' call preprocessor to process raw data ''' preprocessor = Preprocessor() preprocessor.preprocess(input_file_name)
def generic_function(frame_readers, area, session_id, detector, face_extractor, matcher, register_commands, sent_msg_queue): ''' This is main function ''' print("Area: {}".format(area)) print('Thread {} created'.format(session_id)) frame_counter = 0 tracker_manager = TrackerManager(area) # clear_tracking_folder() # if Config.Matcher.CLEAR_SESSION: # clear_session_folder() if not os.path.exists(Config.SEND_RBMQ_DIR): os.mkdir(Config.SEND_RBMQ_DIR) preprocessor = Preprocessor() # matcher = KdTreeMatcher() # matcher._match_case = 'TCH' # face_extractor = components['face_ext'] # detector = components['detector'] # face_rec_graph = FaceGraph() # detector = MTCNNDetector(face_rec_graph) # face_cascade = components['face_cascade'] # eye_detector = components['eye_detector'] # mouth_detector = components['mouth_detector'] frame_reader = frame_readers[session_id] register_command = register_commands[session_id] if Config.CALC_FPS: start_time = time.time() unavailable_counter = time.time() last_labels = 'empty' # matcher.build(Config.REG_IMAGE_FACE_DICT_FILE) try: while True: try: reg_msg_list = register_command.get(False) except queue.Empty: reg_msg_list = None if reg_msg_list is not None: print(reg_msg_list) update_message = '{}|register_ko|Register Fail'.format( session_id) person_id = reg_msg_list[0] # .lower() file_url_msg = reg_msg_list[1] list_of_reg_trackers = TrackerManager(area) frame_counter = 0 saved_frames = save_frames(file_url_msg) if saved_frames == []: print("save frames is None") update_message = '{}|register_ko|Empty Source or Invalid Format'.format( session_id) else: print('Detecting Faces and Extracting Features ...') saved_frames.reverse() for frame in saved_frames: list_of_reg_trackers.update_dlib_trackers(frame) origin_bbs, points = detector.detect_face(frame) if origin_bbs is None: print('not detect face on frame') break for i, origin_bb in enumerate(origin_bbs): if is_inner_of_range(origin_bb, frame.shape): continue display_face, str_padded_bbox = CropperUtils.crop_display_face( frame, origin_bb) cropped_face = CropperUtils.crop_face( frame, origin_bb) # Calculate embedding preprocessed_image = preprocessor.process( cropped_face) emb_array, _ = face_extractor.extract_features( preprocessed_image) face_info = FaceInfo(origin_bb, emb_array, frame_counter, display_face, str_padded_bbox) matched_track_id = list_of_reg_trackers.track( face_info) list_of_reg_trackers.update( matched_track_id, frame, face_info) frame_counter += 1 if frame_counter > 601: break if list_of_reg_trackers.current_trackers != {}: embs, lbls, result_status = extract_images( list_of_reg_trackers.current_trackers, person_id) if result_status == 'ok': matcher.update(embs, lbls) registered_ids = set(lbls) registered_msg = ', '.join(registered_ids) # send message to rb update_message = '{}|register_ok|Registered {}'.format( session_id, registered_msg) print('REGISTER DONEEEEEEEEEEEEE\n') elif result_status == 'many_faces': print( 'REGISTER ERROR: Many faces or your head turns too fast' ) # send message to rb update_message = '{}|register_ko|Many faces in the sequence'.format( session_id) elif result_status == 'not_good': update_message = '{}|register_ko|Not enough faces registerd'.format( session_id) else: print('REGISTER ERROR') # send message to rb update_message = '{}|register_ko|Register Error'.format( session_id) else: print('No tracker found') update_message = '{}|register_ko|No Face Detected'.format( session_id) sent_msg_queue.put( ('{}-status'.format(Config.DEMO_FOR), update_message)) frame_reader.clear() # LIVE MODE frame = frame_reader.next_frame() if frame is None: if time.time( ) - unavailable_counter >= Config.TIME_KILL_NON_ACTIVE_PROCESS: if register_commands[session_id].empty(): frame_readers.pop(session_id, None) register_commands.pop(session_id, None) return time.sleep(1) tracker_manager.find_and_process_end_track() # print('Waiting for new frame') continue unavailable_counter = time.time() print("Frame ID: %d" % frame_counter) fps_counter = time.time() tracker_manager.update_dlib_trackers(frame) if frame_counter % Config.Frame.FRAME_INTERVAL == 0: # display_frame = frame print(Config.Frame.FRAME_INTERVAL) detector.detect_face(frame) origin_bbs, points = detector.detect_face(frame) for i, origin_bb in enumerate(origin_bbs): bb_size = calc_bb_percentage(origin_bb, frame.shape) # print(bb_size) if (is_inner_of_range(origin_bb, frame.shape) and calc_bb_percentage(origin_bb, frame.shape) > Config.Track.BB_SIZE): continue display_face, str_padded_bbox = CropperUtils.crop_display_face( frame, origin_bb) cropped_face = CropperUtils.crop_face(frame, origin_bb) print('pass Crop Utils') # Calculate embedding preprocessed_image = preprocessor.process(cropped_face) emb_array, _ = face_extractor.extract_features( preprocessed_image) print('calculated embedding') # TODO: refractor matching_detected_face_with_trackers face_info = FaceInfo(origin_bb, emb_array, frame_counter, display_face, str_padded_bbox) matched_track_id = tracker_manager.track(face_info) tracker_manager.update(matched_track_id, frame, face_info) tracker_manager.check_and_recognize_tracker( matcher, matched_track_id, short_term_add_new=False) matched_tracker = tracker_manager.current_trackers[ matched_track_id] if matched_tracker.face_id.startswith( 'TCH-{}'.format(area)): matched_tracker.face_id = Config.Matcher.NEW_FACE print('update trackers list') if tracker_manager.current_trackers[ matched_track_id].face_id == last_labels: continue last_labels = tracker_manager.current_trackers[ matched_track_id].face_id image_id = '{}_{}_{}.jpg'.format( tracker_manager.current_trackers[matched_track_id]. face_id, time.time(), frame_counter) img_dir = os.path.join(Config.SEND_RBMQ_DIR, image_id) misc.imsave(img_dir, display_face) face_msg = '|'.join([ session_id, tracker_manager. current_trackers[matched_track_id].face_id, 'images/' + img_dir.split('/')[-1] ]) if not Config.Matcher.NEW_FACE in face_msg: # rabbit_mq.send('{}-result'.format(Config.DEMO_FOR), face_msg) sent_msg_queue.put( ('{}-result'.format(Config.DEMO_FOR), face_msg)) if matched_tracker.face_id == Config.Matcher.NEW_FACE: tracker_manager.current_trackers.pop( matched_track_id, None) # draw frame # display_frame = draw_img( # display_frame, # origin_bb, # str(bb_size) # # track_manager.current_trackers[matched_track_id].face_id # ) # display_frame = cv2.cvtColor(display_frame, cv2.COLOR_RGB2BGR) # display_frame = cv2.resize(display_frame, (1280, 720)) # cv2.imshow("FACE TRACKING SYSTEM {}".format(session_id), display_frame) # key = cv2.waitKey(1) # if key & 0xFF == ord('q'): # break tracker_manager.find_and_process_end_track() frame_counter += 1 if Config.CALC_FPS: print("FPS: %f" % (1 / (time.time() - fps_counter))) except KeyboardInterrupt: print('Keyboard Interrupt !!! Release All !!!') tracker_manager.long_term_history.check_time(matcher) if Config.CALC_FPS: print('Time elapsed: {}'.format(time.time() - start_time)) print('Avg FPS: {}'.format( (frame_counter + 1) / (time.time() - start_time)))
from models import * from preprocess import Preprocessor import sys import time saved_model = './weights/adem_model.pkl' if __name__ == '__main__': time_start = time.time() pp = Preprocessor() adem = ADEM(pp, None, saved_model) contexts = ['</s> <first_speaker> hello . how are yours today ? </s>'] #'</s> <first_speaker> i love starbucks coffee </s>', #'</s> <first_speaker> photo to see my television debut go to - some. some on- hehe! </s> <second_speaker> it really was you? i thought ppl were recognizing someone who looked like you! were the oysters worth the wait? </s>'] true = ['</s> <second_speaker> i am fine . thanks </s>'] #'</s> <second_speaker> i like their latte </s>', #"</s> <first_speaker> yeah it was me . haha i'd kinda forgotten about it it was filmed a while ago </s>"] model = ['</s> <second_speaker> i am fine . thanks </s>'] #'</s> <second_speaker> I want to play golf . </s>', #"</s> <first_speaker> i'm not sure. i just don't know what to do with it. </s>"] print 'Model Loaded!' print adem.get_scores(contexts, true, model) time_end = time.time() print time_end - time_start
def test_transform_date(self): dask_data = dd.read_csv('data_duplicate.csv') x = Preprocessor(['feat1', 'feat2', 'feat3'], 'target', dask_data, ['0', '1'], categorical_features=['feat4']) x.execute(duplicates_invalid=True, missing=True, scale=True, transform=True, encode_target=False, train=True) expected_output_dict = { 'target': { 0: '0', 1: '1', 2: '0', 6: '1', 7: '0', 8: '1', 9: '1', 10: '0' }, 'feat1': { 0: -1.043, 1: -0.209, 2: -1.043, 6: 0.626, 7: -0.209, 8: -0.209, 9: -0.209, 10: 2.294 }, 'feat2': { 0: -0.954, 1: 0.867, 2: -0.954, 6: -0.347, 7: -0.954, 8: 0.26, 9: 0.0, 10: 2.081 }, 'feat3': { 0: -0.632, 1: 0.0, 2: 0.0, 6: 2.53, 7: -0.632, 8: 0.0, 9: -0.632, 10: -0.632 }, 'feat4': { 0: 'a', 1: 'Other', 2: 'b', 6: 'a', 7: 'b', 8: 'c', 9: 'c', 10: 'a' } } self.assertEqual(expected_output_dict, x.df.round(3).head(8).to_dict())
class Vision: def __init__(self, pitchnum, stdout, sourcefile, resetPitchSize): self.running = True self.connected = False self.stdout = stdout if sourcefile is None: self.cap = Camera() else: filetype = 'video' if sourcefile.endswith(('jpg', 'png')): filetype = 'image' self.cap = VirtualCamera(sourcefile, filetype) calibrationPath = os.path.join('calibration', 'pitch{0}'.format(pitchnum)) self.cap.loadCalibration(os.path.join(sys.path[0], calibrationPath)) self.gui = Gui() self.threshold = Threshold(pitchnum) self.thresholdGui = ThresholdGui(self.threshold, self.gui) self.preprocessor = Preprocessor(resetPitchSize) self.features = Features(self.gui, self.threshold) eventHandler = self.gui.getEventHandler() eventHandler.addListener('q', self.quit) while self.running: try: if not self.stdout: self.connect() else: self.connected = True if self.preprocessor.hasPitchSize: self.outputPitchSize() self.gui.setShowMouse(False) else: eventHandler.setClickListener(self.setNextPitchCorner) while self.running: self.doStuff() except socket.error: self.connected = False # If the rest of the system is not up yet/gets quit, # just wait for it to come available. time.sleep(1) # Strange things seem to happen to X sometimes if the # display isn't updated for a while self.doStuff() if not self.stdout: self.socket.close() def connect(self): print("Attempting to connect...") self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.socket.connect( (HOST, PORT) ) self.connected = True def quit(self): self.running = False def doStuff(self): if self.cap.getCameraMatrix is None: frame = self.cap.getImage() else: frame = self.cap.getImageUndistort() frame = self.preprocessor.preprocess(frame) self.gui.updateLayer('raw', frame) ents = self.features.extractFeatures(frame) self.outputEnts(ents) self.gui.loop() def setNextPitchCorner(self, where): self.preprocessor.setNextPitchCorner(where) if self.preprocessor.hasPitchSize: print("Pitch size: {0!r}".format(self.preprocessor.pitch_size)) self.outputPitchSize() self.gui.setShowMouse(False) self.gui.updateLayer('corner', None) else: self.gui.drawCrosshair(where, 'corner') def outputPitchSize(self): print(self.preprocessor.pitch_size) self.send('{0} {1} {2} \n'.format( PITCH_SIZE_BIT, self.preprocessor.pitch_size[0], self.preprocessor.pitch_size[1])) def outputEnts(self, ents): # Messyyy if not self.connected or not self.preprocessor.hasPitchSize: return self.send("{0} ".format(ENTITY_BIT)) for name in ['yellow', 'blue', 'ball']: entity = ents[name] x, y = entity.coordinates() # The rest of the system needs (0, 0) at the bottom left if y != -1: y = self.preprocessor.pitch_size[1] - y if name == 'ball': self.send('{0} {1} '.format(x, y)) else: angle = 360 - (((entity.angle() * (180/math.pi)) - 360) % 360) self.send('{0} {1} {2} '.format(x, y, angle)) self.send(str(int(time.time() * 1000)) + " \n") def send(self, string): if self.stdout: sys.stdout.write(string) else: self.socket.send(string)
def test_remove_missing_values(self): dask_data = dd.read_csv('data_duplicate.csv') x = Preprocessor(['feat1', 'feat2', 'feat3'], 'target', dask_data, ['0', '1'], categorical_features=['feat4']) x.execute(duplicates_invalid=True, missing=True, scale=False, transform=False, encode_target=False, train=True) expected_output_dict = { 'target': { 0: '0', 1: '1', 2: '0', 6: '1', 7: '0', 8: '1', 9: '1', 10: '0' }, 'feat1': { 0: 1, 1: 2, 2: 1, 6: 3, 7: 2, 8: 2, 9: 2, 10: 5 }, 'feat2': { 0: 2.0, 1: 5.0, 2: 2.0, 6: 3.0, 7: 2.0, 8: 4.0, 9: 3.571, 10: 7.0 }, 'feat3': { 0: 3.0, 1: 3.2, 2: 3.2, 6: 4.0, 7: 3.0, 8: 3.2, 9: 3.0, 10: 3.0 }, 'feat4': { 0: 'a', 1: 'Other', 2: 'b', 6: 'a', 7: 'b', 8: 'c', 9: 'c', 10: 'a' } } self.assertEqual(expected_output_dict, x.df.round(3).head(8).to_dict())
args.epochs) savefile_path = os.path.join(args.save, savefile) logfile_path = os.path.join(args.save, logfile) torch.manual_seed(1) if torch.cuda.is_available(): torch.cuda.manual_seed(1) ### Load preprocessed data pprint(logfile_path, '=' * 89) pprint(logfile_path, 'preprocessing data...') pprint(logfile_path, '=' * 89) glove_path = "./data/glove.840B.300d.txt" preprocessor = Preprocessor(datapath) train_data = Corpus(os.path.join(datapath, 'train.dat')) valid_data = Corpus(os.path.join(datapath, 'valid.dat')) test_data = Corpus(os.path.join(datapath, 'test.dat')) ### Build model pprint(logfile_path, '=' * 89) pprint(logfile_path, 'building model...') pprint(logfile_path, '=' * 89) n = len(preprocessor.vocab) if args.model == 'LSTM': model = LSTMModel(n, args.e_dim, args.h_dim, args.dropout, False) elif args.model == 'bi-LSTM': model = LSTMModel(n, args.e_dim, args.h_dim, args.dropout, True)
def main(args): model = get_model(args) if args.task == 'validate': X_Train = load_csv(args.train_X) T_Train = load_csv(args.train_T).flatten() X_Train_phi, phi = preprocess(args, X_Train, T_Train) logging.info('Training') model.validate(X_Train_phi, T_Train, params=get_param_validate(args)) elif args.task == 'train': X_Train = load_csv(args.train_X) T_Train = load_csv(args.train_T).flatten() X_Train_phi, phi = preprocess(args, X_Train, T_Train) inds = range(len(X_Train)) np.random.shuffle(inds) X_Train_phi = X_Train_phi[inds] T_Train = T_Train[inds] logging.info('Training') model.train(X_Train_phi, T_Train, param=get_param(args)) train_acc = model.eval(X_Train_phi, T_Train) logging.info('Training Accuracy = %f' % train_acc) if args.test_X != None and args.test_T != None: X_Test = load_csv(args.test_X) T_Test = load_csv(args.test_T).flatten() X_Test_phi = phi.transform(X_Test) test_acc = model.eval(X_Test_phi, T_Test) logging.info('Testing Accuracy = %f' % test_acc) print(test_acc) if args.save != None: model.save('%s' % args.save) logging.info('Model saved at %s' % args.save) phi.save('%s' % args.save + '_phi') logging.info('Model preprocessor saved at %s' % args.save + '_phi') elif args.task == 'plot': model.load(args.load) logging.info('Model loaded from %s' % args.load) logging.info('Plotting') l_tree = model.model.estimators_ plot_decision_tree(l_tree, args.dot) elif args.task == 'dt_eval': phi = Preprocessor() phi.load(args.load + '_phi') X_Test = load_csv(args.test_X) T_Test = load_csv(args.test_T).flatten() X_Test_phi = phi.transform(X_Test) model.load(args.load) logging.info('Model loaded from %s' % args.load) logging.info('Decision Tree Evaluating') l_tree = model.model.estimators_ for tree in l_tree: test_acc = tree.score(X_Test_phi, T_Test) print('%f' % test_acc)
def get_data_generator(args, model_args, mappings, schema): from cocoa.core.scenario_db import ScenarioDB from cocoa.core.dataset import read_dataset, EvalExample from cocoa.core.util import read_json from core.scenario import Scenario from core.price_tracker import PriceTracker from core.slot_detector import SlotDetector from retriever import Retriever from preprocess import DataGenerator, LMDataGenerator, EvalDataGenerator, Preprocessor import os.path # TODO: move this to dataset if args.eval: dataset = [] for path in args.eval_examples_paths: dataset.extend( [EvalExample.from_dict(schema, e) for e in read_json(path)]) else: dataset = read_dataset(args, Scenario) lexicon = PriceTracker(model_args.price_tracker_model) slot_detector = SlotDetector(slot_scores_path=model_args.slot_scores) # Model config tells data generator which batcher to use model_config = {} if args.retrieve or model_args.model in ('ir', 'selector'): model_config['retrieve'] = True if args.predict_price: model_config['price'] = True # For retrieval-based models only: whether to add ground truth response in the candidates if model_args.model in ('selector', 'ir'): if 'loss' in args.eval_modes and 'generation' in args.eval_modes: print '"loss" requires ground truth reponse to be added to the candidate set. Please evaluate "loss" and "generation" separately.' raise ValueError if (not args.test) or args.eval_modes == ['loss']: add_ground_truth = True else: add_ground_truth = False print 'Ground truth response {} be added to the candidate set.'.format( 'will' if add_ground_truth else 'will not') else: add_ground_truth = False # TODO: hacky if args.model == 'lm': DataGenerator = LMDataGenerator if args.retrieve or args.model in ('selector', 'ir'): retriever = Retriever(args.index, context_size=args.retriever_context_len, num_candidates=args.num_candidates) else: retriever = None preprocessor = Preprocessor(schema, lexicon, model_args.entity_encoding_form, model_args.entity_decoding_form, model_args.entity_target_form, slot_filling=model_args.slot_filling, slot_detector=slot_detector) trie_path = os.path.join(model_args.mappings, 'trie.pkl') if args.eval: data_generator = EvalDataGenerator(dataset, preprocessor, mappings, model_args.num_context) else: if args.test: model_args.dropout = 0 train, dev, test = None, None, dataset.test_examples else: train, dev, test = dataset.train_examples, dataset.test_examples, None data_generator = DataGenerator(train, dev, test, preprocessor, schema, mappings, retriever=retriever, cache=args.cache, ignore_cache=args.ignore_cache, candidates_path=args.candidates_path, num_context=model_args.num_context, trie_path=trie_path, batch_size=args.batch_size, model_config=model_config, add_ground_truth=add_ground_truth) return data_generator
#!/usr/bin/env python #-*- encoding:utf-8 -*- import sys, os from preprocess import Preprocessor from features import FeatureSelector from bayes import BayesClassifier if __name__ == '__main__': train_file = sys.argv[1] test_file = sys.argv[2] pr = Preprocessor() pr.build_vocabulary_and_categories(train_file) fs = FeatureSelector(train_file, ck = 500) fs.select_features() bc = BayesClassifier(train_file, test_file, model = 'bernoulli') bc.train() bc.test()
import os import unittest from preprocess import Preprocessor path = os.getcwd() + "/glove_twitter/glove_twitter_200d_clean.txt" preprocessor = Preprocessor(path=path, max_length_dictionary=None) class tweet_test(unittest.TestCase): def setUp(self): self.text = "@BTS_twt: We met @torikelly @iambeckyg @ciara https://t.co/j7jXeTHc4A" return def test_clean(self): expected_result = " we met" result = preprocessor.clean_text(self.text) self.assertEqual(result, expected_result) def test_tokenizer(self): expected_result = ['met'] result_1 = preprocessor.tokenize_text(preprocessor.clean_text(self.text)) self.assertEqual(result_1, expected_result) def test_replace(self): expected_result = [517] result_2 = preprocessor.replace_token_with_index( preprocessor.tokenize_text(preprocessor.clean_text(self.text)), preprocessor.embeddingMap ) self.assertEqual(result_2, expected_result) def test_padsequence(self):
class QueryProcessor: """ Class which contain methods to process the query and return the results """ def __init__(self): self.prep = Preprocessor() self.genesis_ic = wn.ic(genesis, False, 0.0) def get_docs(self, query): """ Retrieve the mongodb objects of the query word that contains the inverted index list along with the tf of that word. idf is also calculated and stored. Args: query (list): The preprocessed search query as a list of words. Returns: dict: key is the query word and the value is an object with the word's idf and the inverted index list. """ data = {} tot_docs = Doc.objects().count() for word in query: ind = Index.objects(key=word).first() if not ind: continue data[word] = { "idf": math.log(tot_docs / len(ind.documents), 10), # calculate idf of the query word "docs": ind.documents, # Documents which contain word } return data def jc_sim(self, sent, ref_words): """Calculate Similarity score between the query and a sentence of the document Args: sent(str) : Sentence from the document ref_words : Preprocessed Query Returns: int : Similarity score between the sentence and the query """ sim = 0 words = self.prep.preprocess(sent) if len(words) < 5: return 0 for w in words: maxi = 0 for w1 in wn.synsets(w): for t in ref_words: for w2 in wn.synsets(t): if (w1._pos in ("n", "v", "a", "r") and w2._pos in ("n", "v", "a", "r") and w1._pos == w2._pos): n = w1.jcn_similarity( w2, self.genesis_ic ) # calculate Jiang Conrath Similarity between two words if w1 == w2 or n > 1: maxi += 10 else: maxi = max(maxi, n) sim += maxi return sim / max(len(ref_words), len(words)) def fetch_top_n(self, query, n=5): """ Fetch the best n documents out of all based on the tf-idf score. Args: query (str): Pre-processed query n (int) : The number of relevant documents to be fetched Returns: list : The best n documents based on tf-idf score. """ all_docs = self.get_docs(query) ranks = defaultdict(int) for word, data in all_docs.items(): for d in data["docs"]: ranks[d.doc] += d.tf * data["idf"] ranks = sorted(ranks.items(), key=lambda kv: -kv[1]) return list(ranks)[:n] def process_query(self, query): """ Computes and retrieves the result of the query Args: query (str): The search query given by the user. Returns: list : It contians the document paths and the best 5 sentences for the corresponding document . """ query = self.prep.preprocess(query) ranks = self.fetch_top_n(query) ans = [] for r in ranks: file_path = Path(r[0].file_path) # print(file_path.name,file_path.parent.parent.parent ) new_path = file_path.with_suffix(".json") new_path = get_real_path(new_path) with open(new_path, "r") as f: data = set(json.load(f)["sentences"]) sen = tuple((self.jc_sim(s, query), s) for s in data) best = tuple(sorted( sen, key=lambda x: -x[0]))[:5] # Slice top five sentences ans.append((file_path, best)) return ans
def __init__(self): self.prep = Preprocessor() self.genesis_ic = wn.ic(genesis, False, 0.0)
def test_remove_missing_indices(self): arr = [1,2,3] p = Preprocessor(np.array(arr)) self.assertTrue(p.remove_missing_indices(arr) == -1)
class Vision(): #rawSize = (768,576) rawSize = (640, 480) # Whether to 'crash' when something non-critical like the GUI fails debug = True def __init__(self, world, filename=None, simulator=None, once=False, headless=False): logging.info('Initialising vision') if simulator: self.capture = SimCapture(simulator) else: self.capture = Capture(self.rawSize, filename, once) self.headless = headless self.threshold = threshold.AltRaw() self.pre = Preprocessor(self.rawSize, self.threshold, simulator) self.featureEx = FeatureExtraction(self.pre.cropSize) self.interpreter = Interpreter() self.world = world self.gui = GUI(world, self.pre.cropSize, self.threshold) self.histogram = Histogram(self.pre.cropSize) self.times=[] self.N=0 #debug.thresholdValues(self.threshold.Tblue, self.gui) logging.debug('Vision initialised') def formatTime(self, t): return time.strftime('%H:%M:%S', time.localtime(t)) \ + ( '%.3f' % (t - math.floor(t)) )[1:] #discard leading 0 def processFrame(self): startTime = time.time() logging.debug("Frame %d at %s", self.N, self.formatTime(startTime) ) self.N += 1 logging.debug("Capturing a frame") frame = self.capture.getFrame() logging.debug("Entering preprocessing") standard = self.pre.get_standard_form(frame) bgsub_vals, bgsub_mask = self.pre.bgsub(standard) logging.debug("Entering feature extraction") hist_props_bgsub = self.histogram.calcHistogram(standard) hist_props_abs = self.histogram.calcHistogram(bgsub_vals) self.threshold.updateBGSubThresholds(hist_props_bgsub) #self.threshold.updateAbsThresholds(hist_props_abs) ents = self.featureEx.features(bgsub_vals, self.threshold) logging.debug("Detected entities:", ents) logging.debug("Entering interpreter") self.interpreter.interpret(ents) logging.debug("Entering World") self.world.update(startTime, ents) logging.debug("Updating GUI") if not self.headless: try: bgsub = self.pre.remove_background(standard) self.gui.updateWindow('raw', frame) self.gui.updateWindow('mask', bgsub_mask) self.gui.updateWindow('foreground', bgsub_vals) self.gui.updateWindow('bgsub', bgsub) self.gui.updateWindow('standard', standard) canny = cv.CreateImage(self.pre.cropSize, 8,1) # adaptive = cv.CreateImage(self.pre.cropSize, 32,3) # tmp = cv.CreateImage(self.pre.cropSize, 8,3) # cv.Convert(standard, adaptive) cv.CvtColor(bgsub, canny, cv.CV_BGR2GRAY) cv.Threshold(canny, canny, 150, 255, cv.CV_THRESH_OTSU) # cv.Threshold(canny, canny, 100, 255, cv.CV_ADAPTIVE_THRESH_GAUSSIAN_C) # cv.Sobel(adaptive, adaptive, 1,1,1) # cv.Convert(adaptive, tmp) # cv.ConvertScale(tmp, tmp, 10) # cv.CvtColor(tmp, canny, cv.CV_BGR2GRAY) # cv.Threshold(canny,canny, 50, 255, cv.CV_THRESH_BINARY) #cv.Canny(canny,canny, 100, 180,3) cv.CvtColor(canny, bgsub, cv.CV_GRAY2BGR) new = self.featureEx.detectCircles(bgsub) self.gui.updateWindow('adaptive', canny) self.gui.updateWindow('new', new) self.gui.draw(ents, startTime) except Exception, e: logging.error("GUI failed: %s", e) if self.debug: raise endTime = time.time() self.times.append( (endTime - startTime) )
import pickle from preprocess import Preprocessor from prepare_training_set import get_set_files from model import PatternType def load_nn(): fileObj = open("nn.pkl", "r") nn = pickle.load(fileObj) fileObj.close() return nn if __name__ == "__main__": preprocessor = Preprocessor() nn = load_nn() test_set_files = get_set_files("test_set") training_set_files = get_set_files("training_set") for pattern_type, files in test_set_files.iteritems(): print "[INFO]: Processing {}".format(PatternType(pattern_type).name) for file in files: entry = preprocessor.preprocess(pattern_type, file) sums = list(entry.sums.x) sums.extend(entry.sums.y) result_nn = nn[0].activate(sums) max_result = max(result_nn) result = [1 if i == max_result else 0 for i in result_nn] print [file, result]
#!/usr/bin/env python # coding: utf-8 """ Script used to analyze final cyclotron measurements""" __author__ = 'Andreas Gsponer' __license__ = 'MIT' import numpy as np from analyzeImage import analyze_image from surface import CubicFitRotated, CubicFit from preprocess import Preprocessor import plots import smoothing P = Preprocessor(min_threshold=45, max_threshold=230, offset=150) # corner points #path = "../measurements/cyclotron/main_measurements/pre/" #P.import_video(path + "/before_vacuum.mkv") # Beam in drift space focused by the BTL #path = "../measurements/cyclotron/main_measurements/cyclotron/focused_by_btl/2/" #P.import_video(path + "/0_to_660_compressed.mkv") # Beam focused by the BTL being refocused by the MBL #path = "../measurements/cyclotron/main_measurements/cyclotron/mbl_second/2" #P.import_video(path + "/0_to_660_compressed.mkv") # Flat beam being focused by the MBL magnets path = "../measurements/cyclotron/main_measurements/cyclotron/flat_beam/2/660_to_0"
# # .. Note:: # If you run this notebook you can train, interrupt the kernel, # evaluate, and continue training later. Comment out the lines where the # encoder and decoder are initialized and run ``trainEpochs`` again. # print('-' * 30, 'Starting', '-' * 30) vocab_file = '../vocab/vocab' tokenizer_file = '../tokenizer/src_tokenizer' vocab = Vocab(vocab_file, 100000) tokenizer = Tokenizer(vocab) with open(tokenizer_file, mode='wb') as file: pickle.dump(tokenizer, file) max_sequence_len = 100 p = Preprocessor(1, 'data/sentences.txt', tokenizer, max_sequence_len) data = p.get_data()[:5000] print('-' * 30, 'Loaded data', '-' * 30) hidden_size = 256 encoder1 = EncoderRNN(vocab.NumIds(), hidden_size) decoder1 = DecoderRNN(hidden_size, vocab.NumIds(), 1) if use_cuda: encoder1 = encoder1.cuda() decoder1 = decoder1.cuda() trainEpochs(encoder1, decoder1, 5000, p, print_every=100) ###################################################################### #
def train_model(args): logging.basicConfig(format='[%(asctime)s] %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO) logging.info('Train model') logging.info('Loading data...') # Split data into training set and test set xs, ys, n = load_data(args.X, args.Y, shuffle=True) n_train = int(args.frac * n) # Data preprocessing preprocessor = Preprocessor() rng = abs(args.max - args.min) xs_n = preprocessor.normalize(xs, rng) xs_n_filtered = xs_n if args.craft: xs_n_filtered = filter_data(xs_n_filtered) # Feature extraction logging.info('Computing means and sigmas (%s)...' % args.pre) means, sigmas = get_means_sigmas(args, xs_n_filtered) if args.craft: means, sigmas = crafted_gaussian_feature(means, sigmas) def phi(x): pre = Preprocessor() return pre.gaussian(pre.normalize(x, rng), means, sigmas) logging.info('Preprocessing... (d = %d; craft-feature %d)' % (means.shape[0], args.craft)) phi_xs = phi(xs) phi_xs_train, ys_train = phi_xs[:n_train], ys[:n_train] phi_xs_test, ys_test = phi_xs[n_train:], ys[n_train:] phi_dim = len(phi_xs_train[0]) model = get_model(args, (phi_dim, )) logging.info('Using model %s (plot = %s)' % (args.model, args.plot)) def f(x): return np.round(np.clip(model.test(sess, x), args.min, args.max)) with tf.Session() as sess: logging.info('Training... (optimizer = %s)' % args.optimizer) if args.K <= 1: train_loss = train(args, sess, model, phi_xs_train, ys_train) logging.info('Training loss = %f' % train_loss) if n_train < n: test_loss = model.eval(sess, phi_xs_test, ys_test) logging.info('Testing loss = %f' % test_loss) if args.output is not None: logging.info('Save model at %s' % args.output) model.save_to_file(sess, args.output) np.save(args.output + '-mean', means) np.save(args.output + '-sigma', sigmas) if args.plot is not None: logging.info('Plotting... (output = %s)' % args.fig) if args.plot == '3d': plot_3d(f, phi, args.min, args.max, args.min, args.max, 0, 1081, args.fig) elif args.plot == '2d': plot_2d_map(f, phi, args.min, args.max, args.min, args.max) else: validation_loss = train_cross_validation(args, sess, model, phi_xs_train, ys_train) log_filename = args.log with open(log_filename, 'w') as log_file: log_file.write('%s\t%s\n' % (log_filename, validation_loss))
def __init__(self, w2v_path=r".\data\vi.vec"): self.clearner = Preprocessor() self.vectorizer = Vectorizer(w2v_path)
def phi(x): pre = Preprocessor() return pre.gaussian(pre.normalize(x, rng), means, sigmas)
ndc1_als.append(n1) ndc10_als.append(n10) return np.mean(ndc1_als),np.mean(ndc10_als) def __get_matrix(self): self.train_df['userId'] = self.train_df['userId'].astype('category') self.train_df['movieId'] = self.train_df['movieId'].astype('category') ratings_matrix = sp.coo_matrix( (self.train_df['rating'].astype(np.float32) , ( self.train_df['movieId'].cat.codes.copy(), self.train_df['userId'].cat.codes.copy() ) ) ) ratings_matrix = ratings_matrix.tocsr() return ratings_matrix prep = Preprocessor('ratings.csv') prep.process(0.4) mdl = ALS_helper() mdl.train() a,b = mdl.validate() logger.info('ndcg@1 = {}, ndcg@10 = {}'.format(a,b))
class Vision: def __init__(self, pitchnum, stdout, sourcefile, resetPitchSize, noGui, debug_window, pipe): self.noGui = noGui self.lastFrameTime = self.begin_time = time.time() self.processed_frames = 0 self.running = True self.stdout = stdout self.pipe = pipe if sourcefile is None: self.camera = Camera() else: self.filetype = 'video' if sourcefile.endswith(('jpg', 'png')): self.filetype = 'image' self.gui = Gui(self.noGui) self.threshold = Threshold(pitchnum) self.thresholdGui = ThresholdGui(self.threshold, self.gui) self.preprocessor = Preprocessor(resetPitchSize) self.features = Features(self.gui, self.threshold) # if self.debug_window: # self.debug_window = DebugWindow() # else: # self.debug_window = None calibrationPath = os.path.join('calibration', 'pitch{0}'.format(pitchnum)) self.camera.loadCalibration(os.path.join(sys.path[0], calibrationPath)) eventHandler = self.gui.getEventHandler() eventHandler.addListener('q', self.quit) # Ugly stuff for smoothing coordinates - should probably move it self._pastSize = 5 self._pastCoordinates = { 'yellow': [(0, 0)] * self._pastSize, 'blue': [(0, 0)] * self._pastSize, 'ball': [(0, 0)] * self._pastSize } self._pastAngles = { 'yellow': [1.0] * self._pastSize, 'blue': [1.0] * self._pastSize } while self.running: if self.preprocessor.hasPitchSize: self.outputPitchSize() self.gui.setShowMouse(False) else: eventHandler.setClickListener(self.setNextPitchCorner) while self.running: self.doStuff() def quit(self): self.running = False self.pipe.send('q') def print_fps(self): thisFrameTime = time.time() time_diff = thisFrameTime - self.lastFrameTime fps = 1.0 / time_diff self.processed_frames = self.processed_frames + 1 avg_fps = self.processed_frames * 1.0 / (thisFrameTime - self.begin_time) self.lastFrameTime = thisFrameTime if self.stdout: print("Instantaneous fps = %f Average fps = %f" % (fps, avg_fps)) def doStuff(self): frame = self.camera.getImageUndistort() # Uncomment to see changes in barrell distortion matrix # calibrationPath = os.path.join('calibration', 'pitch{0}'.format(0)) # self.camera.loadCalibration(os.path.join(sys.path[0], calibrationPath)) frame = self.preprocessor.preprocess(frame) self.gui.updateLayer('raw', frame) ents = self.features.extractFeatures(frame) self.outputEnts(ents) self.print_fps() self.gui.loop() def setNextPitchCorner(self, where): self.preprocessor.setNextPitchCorner(where) if self.preprocessor.hasPitchSize: self.outputPitchSize() self.gui.setShowMouse(False) self.gui.updateLayer('corner', None) else: self.gui.drawCrosshair(where, 'corner') def outputPitchSize(self): if self.stdout: print ("Pitch size:\t %i\t %i\n" % tuple(self.preprocessor.pitch_size)) # if self.debug_window: # self.debug_window.insert_text("Pitch size:\t %i\t %i\n" % tuple(self.preprocessor.pitch_size)) self.pipe.send(InitSignal(self.preprocessor.pitch_size[0], self.preprocessor.pitch_size[1])) def addCoordinates(self, entity, coordinates): self._pastCoordinates[entity].pop(0) self._pastCoordinates[entity].append(coordinates) #(x, y) = coordinates; # if the frame is bad(-1) then add the most recent coordinate instead #if (x != -1): # self._pastCoordinates[entity].append(coordinates) #else: # self._pastCoordinates[entity].append(self._pastCoordinates[entity][-1]) def smoothCoordinates(self, entity): x = sum(map(lambda (x, _): x, self._pastCoordinates[entity])) / self._pastSize y = sum(map(lambda (_, y): y, self._pastCoordinates[entity])) / self._pastSize return (x, y) def addAngle(self, entity, angle): self._pastAngles[entity].pop(0) self._pastAngles[entity].append(angle) # if the frame is bad(-1) then add the most recent angle instead # good angle is always in (0,2pi), bad angle is -1, careful with real number #if (angle > -0.5): # self._pastAngles[entity].append(angle) #else: # self._pastAngles[entity].append(self._pastAngles[entity][-1]) def smoothAngle(self, entity): # angle is periodic (of 2pi) and a bit tricky to smooth temp = sorted (self._pastAngles[entity]) # if max_angle > min_angle > pi, those angles are crossing 0 # we must add a period to the small ones if (temp[-1] - temp[0] > math.pi): temp = map(lambda angle: angle + 2*math.pi if angle < math.pi else angle, temp) return sum(temp) / self._pastSize # add/substract period (2pi) so angle is always in (0,2pi) # assume they are off by at most a period def standardize_angle(self, angle): if (angle > 2*math.pi): return angle - 2*math.pi if (angle < 0): return angle + 2*math.pi return angle def outputEnts(self, ents): # Messyyy if not self.preprocessor.hasPitchSize: return msg_data = [] for name in ['yellow', 'blue', 'ball']: entity = ents[name] coordinates = entity.coordinates() # This is currently not needed # if the frame is not recognized, skip a maximum of self.max_skip times #if (coordinates[0] != -1): # self.addCoordinates(name, coordinates) # self.skip = 0 #else: # if (self.skip < self.max_skip): # self.skip = self.skip + 1; # else: # self.addCoordinates(name, coordinates) self.addCoordinates(name, coordinates) x, y = self.smoothCoordinates(name) # TODO: The system needs (0, 0) at top left! if y != -1: y = self.preprocessor.pitch_size[1] - y if name == 'ball': # self.send('{0} {1} '.format(x, y)) msg_data += [int(x), int(y)] #print (self._pastCoordinates[name]) #print(coordinates) else: # angle is currently clockwise, this makes it anti-clockwise angle = self.standardize_angle( 2*math.pi - entity.angle() ) self.addAngle(name, angle) angle = self.standardize_angle ( self.smoothAngle(name) ); msg_data += [int(x), int(y), angle] msg_data.append(int(time.time() * 1000)) data = FrameData(*msg_data) if self.stdout: print ("Yellow:\t %i\t %i\t Angle:\t %s\nBlue:\t %i\t %i\t Angle:\t %s\nBall:\t %i\t %i\t\nTime:\t %i\n" % tuple(msg_data)) # if debug_window: # debug_window.insert_text("Yellow:\t %i\t %i\t Angle:\t %s\nBlue:\t %i\t %i\t Angle:\t %s\nBall:\t %i\t %i\t\nTime:\t %i\n" % tuple(msg_data)) self.pipe.send(data)
# coding=utf-8 import sys reload(sys) sys.setdefaultencoding('utf-8') import jieba from preprocess import Preprocessor from embedding import * import pickle # Use case examples: wordlist, embeds, len_words, embed_dim = get_word2vec() write2word2vec(wordlist) # Use an out-of-the-box dictionary sent = u'“年”字有多少笔? 笔顺编号:311212,,??!!.。>》\、' sentence = [word for word in jieba.cut(Preprocessor().replace_line(sent))] p = Preprocessor() p.load_dictionary(dict_name='../data/dbqa.word2vec.wordlist.txt') print len(p.word_to_index) print '/'.join(sentence) indices = p.word_list_to_index_list(sentence) print indices print '/'.join(p.index_list_to_word_list(indices)) # You may also want to fit the dictionary from corpus #p.reset() p.fit_on_corpus(insert_new_word_into_dict=False) #p.save_dictionary() print 'Vocab size:', p.vocab_size # questions: list of sentences, where a sentence is a list comprising of word indices
full_path_output = os.path.join (OUTPUT_DIR, filename) frame.save(full_path_output) return OK # Main program if len(sys.argv) > 1: pitchnum = int (sys.argv[1]) else: pitchnum = 0 threshold = Threshold(pitchnum) gui = Gui(1) features = Features(gui, threshold) preprocessor = Preprocessor(False) INPUT_DIR = './input_images' OUTPUT_DIR = './output_images' error_list = [] calibrationPath = os.path.join('calibration', 'pitch{0}'.format(pitchnum)); camera = Camera(); camera.loadCalibration(os.path.join(sys.path[0], calibrationPath)); # Statistics recog = {'ball' : 0, 'blue' : 0, 'yellow' : 0} n_files = 0 # Process all images in IMPUT_DIR for filename in os.listdir(INPUT_DIR):
np.set_printoptions(threshold=np.inf) #create a DB interactor interactor = DBInteractor("season_batting") #gets the dataframe df = interactor.get_current_data_frame() #print(df) #df = df.drop(['yearID','stint','stint','teamID','lgId','HBP', 'playerID'], axis=1) arr_with_ids = interactor.df_to_numpy_matrix() cols = ['playerID', 'yearID'] df = interactor.drop_useless_stuff(cols) #converts it to a numpy matrix arr = interactor.df_to_numpy_matrix() arr = arr.astype(float) #print arr #print arr #don't forget to disconnect interactor.disconnect() #create a preprocessor to preprocess the data #this doesn't do anything very useful right now p = Preprocessor(arr, df) arr = p.preprocess(arr) sample = arr print arr.shape[1] fit_samples_gmm(sample,1) fit_samples_kmeans(sample, sample.shape[1], 10) rand_indices = np.random.choice(np.arange(0,len(sample)), replace=False, size=len(sample)) rand_samples = sample[rand_indices] af = fit_affinity_propagation(samples=rand_samples)