Exemplos de Preprocessor em Python, exemplos de preprocess.Preprocessor em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: TagYourText.py Projeto: achieverForever/TagYourText

    def category(self,event):

        if self.dirname is None:
            return

        start = time.time()
        self.statusBasPrint(u"系统正在疯狂运算中，请稍候...")

        Preprocessor.preprocess(self.dirname)

        self.id2path = utils.parse_path_file(VAR_NEW_PATH_FILE)

        self.statusBasPrint('C++ classifier is running...')

        # p = subprocess.call('Utils.exe', stdout=PIPE, stdin=PIPE, stderr=PIPE)
        p = subprocess.call('Utils.exe')

        self.statusBasPrint('C++ classifier has finished')

        theta = np.loadtxt(VAR_THETA_FILE_RESULT)

        # return as a list [ [(name, probability), (name, probability), ...], ...]
        self.result = utils.get_topic_dist(theta)
        self.packages=self.builtdata(self.result)
        for i in self.packages:
            index = self.list.InsertStringItem(sys.maxint, i[0])
            self.list.SetStringItem(index, 1, i[1])
            self.list.SetStringItem(index, 2, i[2])
            self.list.SetStringItem(index,3,i[3])
            #max(dict.iterkeys(),key=lambda k:dict[k])
        self.statusBasPrint(u"文档分类完成！")
        elapsed = (time.time() - start)
        self.Printtime(str(elapsed)+'s')

Exemplo n.º 2

0

Exibir arquivo

Arquivo: unit_tests.py Projeto: rohan-varma/awesome-athletes

 def test_get_feature_names(self):
     feature_names = ["f1", "f2", "f3"]
     data_set = np.array([feature_names, ["1", "2", "3"], ["", "4", "5"]])
     preprocessor = Preprocessor(data_set)
     names = preprocessor.get_feature_names()
     self.assertTrue(feature_names[0] == names[0])
     self.assertTrue(feature_names[1] == names[1])
     self.assertTrue(feature_names[2] == names[2])

Exemplo n.º 3

0

Exibir arquivo

Arquivo: vision.py Projeto: eliask/SDP2011-Robotniks

 def initComponents(self, crop=None):
     undistort = False
     self.pre = Preprocessor(self.rawSize, self.threshold,
                             undistort, crop=crop)
     self.featureEx = FeatureExtraction(self.pre.cropSize)
     self.gui = GUI(self.world, self.pre.cropSize, self.threshold, self)
     self.world.setResolution(self.pre.cropSize)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: vision.py Projeto: MarkPortnell/the_mad_hatter

    def __init__(self, pitchnum, stdout, sourcefile, resetPitchSize, resetThresholds, displayBlur, normalizeAtStartup, noDribbling):
               
        self.running = True
        self.connected = False
   
        self.stdout = stdout 

        if sourcefile is None:  
            self.cap = Camera()
        else:
            filetype = 'video'
            if sourcefile.endswith(('jpg', 'png')):
                filetype = 'image'

            self.cap = VirtualCamera(sourcefile, filetype)
        
        calibrationPath = os.path.join('calibration', 'pitch{0}'.format(pitchnum))
        self.cap.loadCalibration(os.path.join(sys.path[0], calibrationPath))

        self.preprocessor = Preprocessor(pitchnum, resetPitchSize)
        if self.preprocessor.hasPitchSize:
            self.gui = Gui(self.preprocessor.pitch_size)
        else:
            self.gui = Gui()
        self.threshold = Threshold(pitchnum, resetThresholds, displayBlur, normalizeAtStartup)
        self.thresholdGui = ThresholdGui(self.threshold, self.gui)
        self.features = Features(self.gui, self.threshold)
        self.filter = Filter(noDribbling)
        
        eventHandler = self.gui.getEventHandler()
        eventHandler.addListener('q', self.quit)

        while self.running:
            try:
                if not self.stdout:
                    self.connect()
                else:
                    self.connected = True

                if self.preprocessor.hasPitchSize:
                    self.outputPitchSize()
                    self.gui.setShowMouse(False)
                else:
                    eventHandler.setClickListener(self.setNextPitchCorner)

                while self.running:
                    self.doStuff()

            except socket.error:
                self.connected = False
                # If the rest of the system is not up yet/gets quit,
                # just wait for it to come available.
                time.sleep(1)

                # Strange things seem to happen to X sometimes if the
                # display isn't updated for a while
                self.doStuff()

        if not self.stdout:
            self.socket.close()

Exemplo n.º 5

0

Exibir arquivo

Arquivo: prepare_training_set.py Projeto: YangLeoZhao/Tailor

def prepare_training_set(training_set_files):
    preprocessor = Preprocessor()
    ds = SupervisedDataSet(500 * 2, 4)
    for pattern_type, files in training_set_files.iteritems():
        print "[INFO]: Processing {}".format(PatternType(pattern_type).name)
        for file in files:
            entry = preprocessor.preprocess(pattern_type, file)
            sums = list(entry.sums.x)
            sums.extend(entry.sums.y)
            pattern_type_output = [0, 0, 0, 0]
            pattern_type_output[pattern_type.value - 1] = 1
            # pattern_type_output = [pattern_type.value - 1]
            ds.addSample(tuple(sums), tuple(pattern_type_output))
            print [file, pattern_type_output]

    return ds

Exemplo n.º 6

0

Exibir arquivo

Arquivo: vision.py Projeto: eliask/SDP2011-Robotniks

class Vision():
    rawSize = (640, 480)

    def __init__(self, world, filenames=None, simulator=None,
                 once=False, headless=False):
        logging.info('Initialising vision')
        self.headless = headless
        self.capture = Capture(self.rawSize, filenames, once)
        self.threshold = threshold.AltRaw()
        self.threshold = threshold.PrimaryRaw()
        self.world = world
        self.simulator = simulator

        self.initComponents()
        self.times=[]
        self.N=0
        logging.debug('Vision initialised')

    def initComponents(self, crop=None):
        undistort = False
        self.pre = Preprocessor(self.rawSize, self.threshold,
                                undistort, crop=crop)
        self.featureEx = FeatureExtraction(self.pre.cropSize)
        self.gui = GUI(self.world, self.pre.cropSize, self.threshold, self)
        self.world.setResolution(self.pre.cropSize)

    def formatTime(self, t):
        return time.strftime('%H:%M:%S', time.localtime(t)) \
            + ( '%.3f' % (t - math.floor(t)) )[1:] #discard leading 0

    def processFrame(self):
        startTime = time.time()
        logging.debug("Frame %d at %s", self.N,
                      self.formatTime(startTime) )
        self.N += 1

        logging.debug("Capturing a frame")
        frame = self.capture.getFrame()
        logging.debug("Entering preprocessing")
        standard = self.pre.get_standard_form(frame)
        logging.debug("Entering feature extraction")

        ents = self.featureEx.features(standard, self.threshold)
        logging.debug("Detected entities:", ents)
        logging.debug("Entering World")
        self.world.update(startTime, ents)

        logging.debug("Updating GUI")
        if not self.headless:
            try:
                self.gui.updateWindow('raw', frame)
                self.gui.updateWindow('standard', standard)
                self.gui.draw(ents, startTime)
            except Exception, e:
                logging.error("GUI failed: %s", e)
                raise

        endTime = time.time()
        self.times.append( (endTime - startTime) )

Exemplo n.º 7

0

Exibir arquivo

Arquivo: vision.py Projeto: SSabev/SDPCode

    def __init__(self, pitchnum, stdout, sourcefile, resetPitchSize, noGui, debug_window,  pipe):

        self.noGui = noGui
        self.lastFrameTime = self.begin_time = time.time()
        self.processed_frames = 0

        self.running = True
        self.stdout = stdout

        self.pipe = pipe
        

        
        if sourcefile is None:
            self.camera = Camera()
        else:
            self.filetype = 'video'
            if sourcefile.endswith(('jpg', 'png')):
                self.filetype = 'image'

        self.gui = Gui(self.noGui)
        self.threshold = Threshold(pitchnum)
        self.thresholdGui = ThresholdGui(self.threshold, self.gui)
        self.preprocessor = Preprocessor(resetPitchSize)
        self.features = Features(self.gui, self.threshold)
        # if self.debug_window:
        #     self.debug_window = DebugWindow()
        # else:
        #     self.debug_window = None

        calibrationPath = os.path.join('calibration', 'pitch{0}'.format(pitchnum))
        self.camera.loadCalibration(os.path.join(sys.path[0], calibrationPath))

        eventHandler = self.gui.getEventHandler()
        eventHandler.addListener('q', self.quit)

        # Ugly stuff for smoothing coordinates - should probably move it
        self._pastSize = 5
        self._pastCoordinates = {
                            'yellow': [(0, 0)] * self._pastSize,
                            'blue': [(0, 0)] * self._pastSize,
                            'ball': [(0, 0)] * self._pastSize
                            }
        self._pastAngles = {
                            'yellow': [1.0] * self._pastSize,
                            'blue': [1.0] * self._pastSize
                           }

        while self.running:
            if self.preprocessor.hasPitchSize:
                self.outputPitchSize()
                self.gui.setShowMouse(False)
            else:
                eventHandler.setClickListener(self.setNextPitchCorner)

            while self.running:
                self.doStuff()

Exemplo n.º 8

0

Exibir arquivo

Arquivo: driver.py Projeto: rabbit-run/POS_tagger

def main():
    p = optparse.OptionParser()
    p.add_option('-r', action = 'store_true', dest = "redo", default = False)
    opts, args = p.parse_args()
    
    output_file = ''
    if len(args) == 1:
        fileName = args[0]
    elif len(args) == 2:
        fileName = args[0]
        output_file = args[1]
    elif not args:
        sys.stderr.write("Error: please specify a file name\n")
        raise SystemExit(1)
    elif len(args) > 2:
        sys.stderr.write("Error: too much argument\n")
        raise SystemExit(1)
    
    # split the sentences
    processor = Preprocessor(fileName)
    sentences = processor.getSentences()
    
    # create the likelihood table, prior probability table and so on
    if opts.redo or not (os.path.isfile("likelihood.pkl")
        and os.path.isfile("prior_prob.pkl")
        and os.path.isfile("tags.pkl")
        and os.path.isfile("vocabulary.pkl")):
        viterbi_util.compute_table("training.pos")
        
    # run viterbi algorithm
    viterbi = Viterbi()
    output = []
    
    for sentence in sentences:
       tag_seq = viterbi.go(sentence)
       output.append((sentence, tag_seq))
    
    # write the result into a file
    viterbi_util.write_out(output, output_file)

Exemplo n.º 9

0

Exibir arquivo

Arquivo: vision3.py Projeto: eliask/SDP2011-Robotniks

    def __init__(self, world, filename=None, simulator=None):
        logging.info('Initialising vision')
        if simulator:
            self.capture = SimCapture(simulator)
        else:
            self.capture = MPlayerCapture(self.rawSize, filename)
            #self.capture = Capture(self.rawSize, filename)

        self.threshold = threshold.PrimaryRaw()
        self.pre = Preprocessor(self.rawSize, self.threshold, simulator)
        self.featureEx = FeatureExtraction(self.pre.cropSize)
        self.interpreter = Interpreter()
        self.world = world
        self.gui = GUI(world, self.pre.cropSize, self.threshold)
        self.histogram = Histogram(self.pre.cropSize)

        self.times=[]
        self.N=0

        debug.thresholdValues(self.threshold.Tfg, self.gui)

        logging.debug('Vision initialised')

Exemplo n.º 10

0

Exibir arquivo

import yaml

from repository import MongoRepository
from preprocess import Preprocessor

if __name__ == '__main__':

    config = yaml.safe_load(open("config.yml"))
    data_dir = config['data_dir']
    mongo_connection = config['mongo_connection']
    mongo_repository = MongoRepository(mongo_connection)
    preprocessor = Preprocessor()
    mongo_repository.process_questions(
        source_collection=mongo_repository.questions,
        target_collection=mongo_repository.preprocessed_questions,
        processor=preprocessor)

Exemplo n.º 11

0

Exibir arquivo

record_suffix = 'axial_t2_only'

# Load data
abnormal_cases = list(range(70))
healthy_cases = list(range(100))
metadata = Metadata(data_path,
                    label_path,
                    abnormal_cases,
                    healthy_cases,
                    dataset_tag='')
# metadata = Metadata(data_path, label_path, abnormal_cases, healthy_cases, dataset_tag=' cropped')

print('Loading images...')
for patient in metadata.patients:
    print(f'Loading patient {patient.get_id()}')
    patient.load_image_data()

# Preprocess data
preprocessor = Preprocessor(constant_volume_size=reference_size)
metadata.patients = preprocessor.process(metadata.patients,
                                         ileum_crop=False,
                                         region_grow_crop=True,
                                         statistical_region_crop=True)

# Serialise data into TF Records
record_generator = TFRecordGenerator(record_out_path, record_suffix)
# record_generator.generate_train_test(test_proportion, metadata.patients)
record_generator.generate_cross_folds(k, metadata.patients)

print('Done')

Exemplo n.º 12

0

Exibir arquivo

Arquivo: h1b_counting.py Projeto: konnomiya/h1b-counter

def preprocess():
    '''
    call preprocessor to process raw data
    '''
    preprocessor = Preprocessor()
    preprocessor.preprocess(input_file_name)

Exemplo n.º 13

0

Exibir arquivo

def generic_function(frame_readers, area, session_id, detector, face_extractor,
                     matcher, register_commands, sent_msg_queue):
    '''
    This is main function
    '''
    print("Area: {}".format(area))
    print('Thread {} created'.format(session_id))
    frame_counter = 0
    tracker_manager = TrackerManager(area)

    # clear_tracking_folder()

    # if Config.Matcher.CLEAR_SESSION:
    #     clear_session_folder()

    if not os.path.exists(Config.SEND_RBMQ_DIR):
        os.mkdir(Config.SEND_RBMQ_DIR)

    preprocessor = Preprocessor()
    # matcher = KdTreeMatcher()
    # matcher._match_case = 'TCH'
    # face_extractor = components['face_ext']
    # detector = components['detector']
    # face_rec_graph = FaceGraph()
    # detector = MTCNNDetector(face_rec_graph)

    # face_cascade = components['face_cascade']
    # eye_detector = components['eye_detector']
    # mouth_detector = components['mouth_detector']

    frame_reader = frame_readers[session_id]
    register_command = register_commands[session_id]
    if Config.CALC_FPS:
        start_time = time.time()
    unavailable_counter = time.time()
    last_labels = 'empty'
    # matcher.build(Config.REG_IMAGE_FACE_DICT_FILE)
    try:
        while True:
            try:
                reg_msg_list = register_command.get(False)
            except queue.Empty:
                reg_msg_list = None
            if reg_msg_list is not None:
                print(reg_msg_list)
                update_message = '{}|register_ko|Register Fail'.format(
                    session_id)
                person_id = reg_msg_list[0]  # .lower()
                file_url_msg = reg_msg_list[1]
                list_of_reg_trackers = TrackerManager(area)
                frame_counter = 0
                saved_frames = save_frames(file_url_msg)
                if saved_frames == []:
                    print("save frames is None")
                    update_message = '{}|register_ko|Empty Source or Invalid Format'.format(
                        session_id)
                else:
                    print('Detecting Faces and Extracting Features ...')
                    saved_frames.reverse()
                    for frame in saved_frames:
                        list_of_reg_trackers.update_dlib_trackers(frame)
                        origin_bbs, points = detector.detect_face(frame)
                        if origin_bbs is None:
                            print('not detect face on frame')
                            break
                        for i, origin_bb in enumerate(origin_bbs):
                            if is_inner_of_range(origin_bb, frame.shape):
                                continue
                            display_face, str_padded_bbox = CropperUtils.crop_display_face(
                                frame, origin_bb)
                            cropped_face = CropperUtils.crop_face(
                                frame, origin_bb)
                            # Calculate embedding
                            preprocessed_image = preprocessor.process(
                                cropped_face)
                            emb_array, _ = face_extractor.extract_features(
                                preprocessed_image)

                            face_info = FaceInfo(origin_bb, emb_array,
                                                 frame_counter, display_face,
                                                 str_padded_bbox)
                            matched_track_id = list_of_reg_trackers.track(
                                face_info)
                            list_of_reg_trackers.update(
                                matched_track_id, frame, face_info)

                        frame_counter += 1
                        if frame_counter > 601:
                            break
                    if list_of_reg_trackers.current_trackers != {}:
                        embs, lbls, result_status = extract_images(
                            list_of_reg_trackers.current_trackers, person_id)
                        if result_status == 'ok':
                            matcher.update(embs, lbls)
                            registered_ids = set(lbls)
                            registered_msg = ', '.join(registered_ids)
                            # send message to rb
                            update_message = '{}|register_ok|Registered {}'.format(
                                session_id, registered_msg)
                            print('REGISTER DONEEEEEEEEEEEEE\n')
                        elif result_status == 'many_faces':
                            print(
                                'REGISTER ERROR: Many faces or your head turns too fast'
                            )
                            # send message to rb
                            update_message = '{}|register_ko|Many faces in the sequence'.format(
                                session_id)
                        elif result_status == 'not_good':
                            update_message = '{}|register_ko|Not enough faces registerd'.format(
                                session_id)
                        else:
                            print('REGISTER ERROR')
                            # send message to rb
                            update_message = '{}|register_ko|Register Error'.format(
                                session_id)
                    else:
                        print('No tracker found')
                        update_message = '{}|register_ko|No Face Detected'.format(
                            session_id)

                sent_msg_queue.put(
                    ('{}-status'.format(Config.DEMO_FOR), update_message))

                frame_reader.clear()

            # LIVE MODE
            frame = frame_reader.next_frame()
            if frame is None:
                if time.time(
                ) - unavailable_counter >= Config.TIME_KILL_NON_ACTIVE_PROCESS:
                    if register_commands[session_id].empty():
                        frame_readers.pop(session_id, None)
                        register_commands.pop(session_id, None)
                        return
                time.sleep(1)
                tracker_manager.find_and_process_end_track()
                # print('Waiting for new frame')
                continue

            unavailable_counter = time.time()

            print("Frame ID: %d" % frame_counter)
            fps_counter = time.time()

            tracker_manager.update_dlib_trackers(frame)
            if frame_counter % Config.Frame.FRAME_INTERVAL == 0:
                # display_frame = frame
                print(Config.Frame.FRAME_INTERVAL)
                detector.detect_face(frame)
                origin_bbs, points = detector.detect_face(frame)
                for i, origin_bb in enumerate(origin_bbs):
                    bb_size = calc_bb_percentage(origin_bb, frame.shape)
                    # print(bb_size)
                    if (is_inner_of_range(origin_bb, frame.shape)
                            and calc_bb_percentage(origin_bb, frame.shape) >
                            Config.Track.BB_SIZE):
                        continue

                    display_face, str_padded_bbox = CropperUtils.crop_display_face(
                        frame, origin_bb)
                    cropped_face = CropperUtils.crop_face(frame, origin_bb)
                    print('pass Crop Utils')
                    # Calculate embedding
                    preprocessed_image = preprocessor.process(cropped_face)
                    emb_array, _ = face_extractor.extract_features(
                        preprocessed_image)
                    print('calculated embedding')
                    # TODO: refractor matching_detected_face_with_trackers
                    face_info = FaceInfo(origin_bb, emb_array, frame_counter,
                                         display_face, str_padded_bbox)
                    matched_track_id = tracker_manager.track(face_info)
                    tracker_manager.update(matched_track_id, frame, face_info)
                    tracker_manager.check_and_recognize_tracker(
                        matcher, matched_track_id, short_term_add_new=False)
                    matched_tracker = tracker_manager.current_trackers[
                        matched_track_id]
                    if matched_tracker.face_id.startswith(
                            'TCH-{}'.format(area)):
                        matched_tracker.face_id = Config.Matcher.NEW_FACE

                    print('update trackers list')
                    if tracker_manager.current_trackers[
                            matched_track_id].face_id == last_labels:
                        continue
                    last_labels = tracker_manager.current_trackers[
                        matched_track_id].face_id
                    image_id = '{}_{}_{}.jpg'.format(
                        tracker_manager.current_trackers[matched_track_id].
                        face_id, time.time(), frame_counter)
                    img_dir = os.path.join(Config.SEND_RBMQ_DIR, image_id)
                    misc.imsave(img_dir, display_face)
                    face_msg = '|'.join([
                        session_id, tracker_manager.
                        current_trackers[matched_track_id].face_id,
                        'images/' + img_dir.split('/')[-1]
                    ])
                    if not Config.Matcher.NEW_FACE in face_msg:
                        # rabbit_mq.send('{}-result'.format(Config.DEMO_FOR), face_msg)
                        sent_msg_queue.put(
                            ('{}-result'.format(Config.DEMO_FOR), face_msg))

                    if matched_tracker.face_id == Config.Matcher.NEW_FACE:
                        tracker_manager.current_trackers.pop(
                            matched_track_id, None)

                    # draw frame
                    # display_frame = draw_img(
                    #     display_frame,
                    #     origin_bb,
                    #     str(bb_size)
                    #     # track_manager.current_trackers[matched_track_id].face_id
                    #     )

            # display_frame = cv2.cvtColor(display_frame, cv2.COLOR_RGB2BGR)
            # display_frame = cv2.resize(display_frame, (1280, 720))
            # cv2.imshow("FACE TRACKING SYSTEM {}".format(session_id), display_frame)
            # key = cv2.waitKey(1)
            # if key & 0xFF == ord('q'):
            #     break
            tracker_manager.find_and_process_end_track()
            frame_counter += 1
            if Config.CALC_FPS:
                print("FPS: %f" % (1 / (time.time() - fps_counter)))
    except KeyboardInterrupt:
        print('Keyboard Interrupt !!! Release All !!!')
        tracker_manager.long_term_history.check_time(matcher)
        if Config.CALC_FPS:
            print('Time elapsed: {}'.format(time.time() - start_time))
            print('Avg FPS: {}'.format(
                (frame_counter + 1) / (time.time() - start_time)))

Exemplo n.º 14

0

Exibir arquivo

from models import *
from preprocess import Preprocessor
import sys
import time

saved_model = './weights/adem_model.pkl'
if __name__ == '__main__':
    time_start = time.time()
    pp = Preprocessor()
    adem = ADEM(pp, None, saved_model)

    contexts = ['</s> <first_speaker> hello . how are yours today ? </s>']
    #'</s> <first_speaker> i love starbucks coffee </s>',
    #'</s> <first_speaker> photo to see my television debut go to - some. some on- hehe! </s> <second_speaker> it really was you? i thought ppl were recognizing someone who looked like you! were the oysters worth the wait? </s>']
    true = ['</s> <second_speaker> i am fine . thanks </s>']
    #'</s> <second_speaker> i like their latte </s>',
    #"</s> <first_speaker> yeah it was me . haha i'd kinda forgotten about it it was filmed a while ago </s>"]
    model = ['</s> <second_speaker> i am fine . thanks </s>']
    #'</s> <second_speaker> I want to play golf . </s>',
    #"</s> <first_speaker> i'm not sure. i just don't know what to do with it. </s>"]

    print 'Model Loaded!'
    print adem.get_scores(contexts, true, model)

    time_end = time.time()
    print time_end - time_start

Exemplo n.º 15

0

Exibir arquivo

    def test_transform_date(self):
        dask_data = dd.read_csv('data_duplicate.csv')
        x = Preprocessor(['feat1', 'feat2', 'feat3'],
                         'target',
                         dask_data, ['0', '1'],
                         categorical_features=['feat4'])
        x.execute(duplicates_invalid=True,
                  missing=True,
                  scale=True,
                  transform=True,
                  encode_target=False,
                  train=True)
        expected_output_dict = {
            'target': {
                0: '0',
                1: '1',
                2: '0',
                6: '1',
                7: '0',
                8: '1',
                9: '1',
                10: '0'
            },
            'feat1': {
                0: -1.043,
                1: -0.209,
                2: -1.043,
                6: 0.626,
                7: -0.209,
                8: -0.209,
                9: -0.209,
                10: 2.294
            },
            'feat2': {
                0: -0.954,
                1: 0.867,
                2: -0.954,
                6: -0.347,
                7: -0.954,
                8: 0.26,
                9: 0.0,
                10: 2.081
            },
            'feat3': {
                0: -0.632,
                1: 0.0,
                2: 0.0,
                6: 2.53,
                7: -0.632,
                8: 0.0,
                9: -0.632,
                10: -0.632
            },
            'feat4': {
                0: 'a',
                1: 'Other',
                2: 'b',
                6: 'a',
                7: 'b',
                8: 'c',
                9: 'c',
                10: 'a'
            }
        }

        self.assertEqual(expected_output_dict, x.df.round(3).head(8).to_dict())

Exemplo n.º 16

0

Exibir arquivo

Arquivo: vision.py Projeto: GeorgiTsatsev/Year-3

class Vision:
    
    def __init__(self, pitchnum, stdout, sourcefile, resetPitchSize):
               
        self.running = True
        self.connected = False
        
        self.stdout = stdout 

        if sourcefile is None:  
            self.cap = Camera()
        else:
            filetype = 'video'
            if sourcefile.endswith(('jpg', 'png')):
                filetype = 'image'

            self.cap = VirtualCamera(sourcefile, filetype)
        
        calibrationPath = os.path.join('calibration', 'pitch{0}'.format(pitchnum))
        self.cap.loadCalibration(os.path.join(sys.path[0], calibrationPath))

        self.gui = Gui()
        self.threshold = Threshold(pitchnum)
        self.thresholdGui = ThresholdGui(self.threshold, self.gui)
        self.preprocessor = Preprocessor(resetPitchSize)
        self.features = Features(self.gui, self.threshold)
        
        eventHandler = self.gui.getEventHandler()
        eventHandler.addListener('q', self.quit)

        while self.running:
            try:
                if not self.stdout:
                    self.connect()
                else:
                    self.connected = True

                if self.preprocessor.hasPitchSize:
                    self.outputPitchSize()
                    self.gui.setShowMouse(False)
                else:
                    eventHandler.setClickListener(self.setNextPitchCorner)

                while self.running:
                    self.doStuff()

            except socket.error:
                self.connected = False
                # If the rest of the system is not up yet/gets quit,
                # just wait for it to come available.
                time.sleep(1)

                # Strange things seem to happen to X sometimes if the
                # display isn't updated for a while
                self.doStuff()

        if not self.stdout:
            self.socket.close()
        
    def connect(self):
        print("Attempting to connect...")
        self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        self.socket.connect( (HOST, PORT) )
        self.connected = True

    def quit(self):
        self.running = False
        
    def doStuff(self):
        if self.cap.getCameraMatrix is None:
            frame = self.cap.getImage()
        else:
            frame = self.cap.getImageUndistort()

        frame = self.preprocessor.preprocess(frame)
        
        self.gui.updateLayer('raw', frame)

        ents = self.features.extractFeatures(frame)
        self.outputEnts(ents)

        self.gui.loop()

    def setNextPitchCorner(self, where):
        self.preprocessor.setNextPitchCorner(where)
        
        if self.preprocessor.hasPitchSize:
            print("Pitch size: {0!r}".format(self.preprocessor.pitch_size))
            self.outputPitchSize()
            self.gui.setShowMouse(False)
            self.gui.updateLayer('corner', None)
        else:
            self.gui.drawCrosshair(where, 'corner')
    
    def outputPitchSize(self):
        print(self.preprocessor.pitch_size)
        self.send('{0} {1} {2} \n'.format(
                PITCH_SIZE_BIT, self.preprocessor.pitch_size[0], self.preprocessor.pitch_size[1]))

    def outputEnts(self, ents):

        # Messyyy
        if not self.connected or not self.preprocessor.hasPitchSize:
            return

        self.send("{0} ".format(ENTITY_BIT))

        for name in ['yellow', 'blue', 'ball']:
            entity = ents[name]
            x, y = entity.coordinates()

            # The rest of the system needs (0, 0) at the bottom left
            if y != -1:
                y = self.preprocessor.pitch_size[1] - y

            if name == 'ball':
                self.send('{0} {1} '.format(x, y))
            else:
                angle = 360 - (((entity.angle() * (180/math.pi)) - 360) % 360)
                self.send('{0} {1} {2} '.format(x, y, angle))

        self.send(str(int(time.time() * 1000)) + " \n")
        
    def send(self, string):
        if self.stdout:
            sys.stdout.write(string)
        else:
            self.socket.send(string)

Exemplo n.º 17

0

Exibir arquivo

    def test_remove_missing_values(self):
        dask_data = dd.read_csv('data_duplicate.csv')
        x = Preprocessor(['feat1', 'feat2', 'feat3'],
                         'target',
                         dask_data, ['0', '1'],
                         categorical_features=['feat4'])
        x.execute(duplicates_invalid=True,
                  missing=True,
                  scale=False,
                  transform=False,
                  encode_target=False,
                  train=True)
        expected_output_dict = {
            'target': {
                0: '0',
                1: '1',
                2: '0',
                6: '1',
                7: '0',
                8: '1',
                9: '1',
                10: '0'
            },
            'feat1': {
                0: 1,
                1: 2,
                2: 1,
                6: 3,
                7: 2,
                8: 2,
                9: 2,
                10: 5
            },
            'feat2': {
                0: 2.0,
                1: 5.0,
                2: 2.0,
                6: 3.0,
                7: 2.0,
                8: 4.0,
                9: 3.571,
                10: 7.0
            },
            'feat3': {
                0: 3.0,
                1: 3.2,
                2: 3.2,
                6: 4.0,
                7: 3.0,
                8: 3.2,
                9: 3.0,
                10: 3.0
            },
            'feat4': {
                0: 'a',
                1: 'Other',
                2: 'b',
                6: 'a',
                7: 'b',
                8: 'c',
                9: 'c',
                10: 'a'
            }
        }

        self.assertEqual(expected_output_dict, x.df.round(3).head(8).to_dict())

Exemplo n.º 18

0

Exibir arquivo

Arquivo: main.py Projeto: chae9214/aes

        args.epochs)
    savefile_path = os.path.join(args.save, savefile)
    logfile_path = os.path.join(args.save, logfile)

    torch.manual_seed(1)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(1)

    ### Load preprocessed data
    pprint(logfile_path, '=' * 89)
    pprint(logfile_path, 'preprocessing data...')
    pprint(logfile_path, '=' * 89)

    glove_path = "./data/glove.840B.300d.txt"

    preprocessor = Preprocessor(datapath)
    train_data = Corpus(os.path.join(datapath, 'train.dat'))
    valid_data = Corpus(os.path.join(datapath, 'valid.dat'))
    test_data = Corpus(os.path.join(datapath, 'test.dat'))

    ### Build model
    pprint(logfile_path, '=' * 89)
    pprint(logfile_path, 'building model...')
    pprint(logfile_path, '=' * 89)

    n = len(preprocessor.vocab)

    if args.model == 'LSTM':
        model = LSTMModel(n, args.e_dim, args.h_dim, args.dropout, False)
    elif args.model == 'bi-LSTM':
        model = LSTMModel(n, args.e_dim, args.h_dim, args.dropout, True)

Exemplo n.º 19

0

Exibir arquivo

Arquivo: main.py Projeto: lin826/simple-random-forest

def main(args):
    model = get_model(args)
    if args.task == 'validate':
        X_Train = load_csv(args.train_X)
        T_Train = load_csv(args.train_T).flatten()
        X_Train_phi, phi = preprocess(args, X_Train, T_Train)

        logging.info('Training')
        model.validate(X_Train_phi, T_Train, params=get_param_validate(args))
    elif args.task == 'train':
        X_Train = load_csv(args.train_X)
        T_Train = load_csv(args.train_T).flatten()
        X_Train_phi, phi = preprocess(args, X_Train, T_Train)
        inds = range(len(X_Train))
        np.random.shuffle(inds)
        X_Train_phi = X_Train_phi[inds]
        T_Train = T_Train[inds]

        logging.info('Training')
        model.train(X_Train_phi, T_Train, param=get_param(args))

        train_acc = model.eval(X_Train_phi, T_Train)
        logging.info('Training Accuracy = %f' % train_acc)

        if args.test_X != None and args.test_T != None:
            X_Test = load_csv(args.test_X)
            T_Test = load_csv(args.test_T).flatten()
            X_Test_phi = phi.transform(X_Test)
            test_acc = model.eval(X_Test_phi, T_Test)
            logging.info('Testing Accuracy = %f' % test_acc)

            print(test_acc)

        if args.save != None:
            model.save('%s' % args.save)
            logging.info('Model saved at %s' % args.save)

            phi.save('%s' % args.save + '_phi')
            logging.info('Model preprocessor saved at %s' % args.save + '_phi')

    elif args.task == 'plot':
        model.load(args.load)
        logging.info('Model loaded from %s' % args.load)
        logging.info('Plotting')
        l_tree = model.model.estimators_
        plot_decision_tree(l_tree, args.dot)

    elif args.task == 'dt_eval':
        phi = Preprocessor()
        phi.load(args.load + '_phi')

        X_Test = load_csv(args.test_X)
        T_Test = load_csv(args.test_T).flatten()
        X_Test_phi = phi.transform(X_Test)

        model.load(args.load)
        logging.info('Model loaded from %s' % args.load)
        logging.info('Decision Tree Evaluating')
        l_tree = model.model.estimators_
        for tree in l_tree:
            test_acc = tree.score(X_Test_phi, T_Test)
            print('%f' % test_acc)

Exemplo n.º 20

0

Exibir arquivo

def get_data_generator(args, model_args, mappings, schema):
    from cocoa.core.scenario_db import ScenarioDB
    from cocoa.core.dataset import read_dataset, EvalExample
    from cocoa.core.util import read_json

    from core.scenario import Scenario
    from core.price_tracker import PriceTracker
    from core.slot_detector import SlotDetector
    from retriever import Retriever
    from preprocess import DataGenerator, LMDataGenerator, EvalDataGenerator, Preprocessor
    import os.path

    # TODO: move this to dataset
    if args.eval:
        dataset = []
        for path in args.eval_examples_paths:
            dataset.extend(
                [EvalExample.from_dict(schema, e) for e in read_json(path)])
    else:
        dataset = read_dataset(args, Scenario)
    lexicon = PriceTracker(model_args.price_tracker_model)
    slot_detector = SlotDetector(slot_scores_path=model_args.slot_scores)

    # Model config tells data generator which batcher to use
    model_config = {}
    if args.retrieve or model_args.model in ('ir', 'selector'):
        model_config['retrieve'] = True
    if args.predict_price:
        model_config['price'] = True

    # For retrieval-based models only: whether to add ground truth response in the candidates
    if model_args.model in ('selector', 'ir'):
        if 'loss' in args.eval_modes and 'generation' in args.eval_modes:
            print '"loss" requires ground truth reponse to be added to the candidate set. Please evaluate "loss" and "generation" separately.'
            raise ValueError
        if (not args.test) or args.eval_modes == ['loss']:
            add_ground_truth = True
        else:
            add_ground_truth = False
        print 'Ground truth response {} be added to the candidate set.'.format(
            'will' if add_ground_truth else 'will not')
    else:
        add_ground_truth = False

    # TODO: hacky
    if args.model == 'lm':
        DataGenerator = LMDataGenerator

    if args.retrieve or args.model in ('selector', 'ir'):
        retriever = Retriever(args.index,
                              context_size=args.retriever_context_len,
                              num_candidates=args.num_candidates)
    else:
        retriever = None

    preprocessor = Preprocessor(schema,
                                lexicon,
                                model_args.entity_encoding_form,
                                model_args.entity_decoding_form,
                                model_args.entity_target_form,
                                slot_filling=model_args.slot_filling,
                                slot_detector=slot_detector)

    trie_path = os.path.join(model_args.mappings, 'trie.pkl')

    if args.eval:
        data_generator = EvalDataGenerator(dataset, preprocessor, mappings,
                                           model_args.num_context)
    else:
        if args.test:
            model_args.dropout = 0
            train, dev, test = None, None, dataset.test_examples
        else:
            train, dev, test = dataset.train_examples, dataset.test_examples, None
        data_generator = DataGenerator(train,
                                       dev,
                                       test,
                                       preprocessor,
                                       schema,
                                       mappings,
                                       retriever=retriever,
                                       cache=args.cache,
                                       ignore_cache=args.ignore_cache,
                                       candidates_path=args.candidates_path,
                                       num_context=model_args.num_context,
                                       trie_path=trie_path,
                                       batch_size=args.batch_size,
                                       model_config=model_config,
                                       add_ground_truth=add_ground_truth)

    return data_generator

Exemplo n.º 21

0

Exibir arquivo

Arquivo: run.py Projeto: ivyfox/MachineLearningPractice

#!/usr/bin/env python
#-*- encoding:utf-8 -*-

import sys, os

from preprocess import Preprocessor
from features import FeatureSelector
from bayes import BayesClassifier

if __name__ == '__main__':
    train_file = sys.argv[1]
    test_file = sys.argv[2]

    pr = Preprocessor()
    pr.build_vocabulary_and_categories(train_file)

    fs = FeatureSelector(train_file, ck = 500)
    fs.select_features()

    bc = BayesClassifier(train_file, test_file, model = 'bernoulli')
    bc.train()
    bc.test()

Exemplo n.º 22

0

Exibir arquivo

import os
import unittest
from preprocess import Preprocessor

path = os.getcwd() + "/glove_twitter/glove_twitter_200d_clean.txt"
preprocessor = Preprocessor(path=path, max_length_dictionary=None)


class tweet_test(unittest.TestCase):
    def setUp(self):
        self.text = "@BTS_twt: We met @torikelly @iambeckyg @ciara https://t.co/j7jXeTHc4A"
        return

    def test_clean(self):
        expected_result = " we met"
        result = preprocessor.clean_text(self.text)
        self.assertEqual(result, expected_result)

    def test_tokenizer(self):
        expected_result = ['met']
        result_1 = preprocessor.tokenize_text(preprocessor.clean_text(self.text))
        self.assertEqual(result_1, expected_result)

    def test_replace(self):
        expected_result = [517]
        result_2 = preprocessor.replace_token_with_index(
            preprocessor.tokenize_text(preprocessor.clean_text(self.text)), preprocessor.embeddingMap
        )
        self.assertEqual(result_2, expected_result)

    def test_padsequence(self):

Exemplo n.º 23

0

Exibir arquivo

class QueryProcessor:
    """
        Class which contain methods to process the query and return the results
    """
    def __init__(self):
        self.prep = Preprocessor()
        self.genesis_ic = wn.ic(genesis, False, 0.0)

    def get_docs(self, query):
        """ Retrieve the mongodb objects of the query word that contains the inverted index list along with the tf of that word.
            idf is also calculated and stored.

            Args:
                query (list): The preprocessed search query as a list of words.

            Returns:
                dict: key is the query word and the value is an object with the word's idf and the inverted index list.

        """
        data = {}
        tot_docs = Doc.objects().count()
        for word in query:
            ind = Index.objects(key=word).first()
            if not ind:
                continue
            data[word] = {
                "idf": math.log(tot_docs / len(ind.documents),
                                10),  # calculate idf of the query word
                "docs": ind.documents,  # Documents which contain word
            }
        return data

    def jc_sim(self, sent, ref_words):
        """Calculate Similarity score between the query and a sentence of the document
        Args:
            sent(str) : Sentence from the document
            ref_words : Preprocessed Query
        Returns:
            int : Similarity score between the sentence and the query
        """
        sim = 0
        words = self.prep.preprocess(sent)
        if len(words) < 5:
            return 0
        for w in words:
            maxi = 0
            for w1 in wn.synsets(w):
                for t in ref_words:
                    for w2 in wn.synsets(t):
                        if (w1._pos in ("n", "v", "a", "r")
                                and w2._pos in ("n", "v", "a", "r")
                                and w1._pos == w2._pos):
                            n = w1.jcn_similarity(
                                w2, self.genesis_ic
                            )  # calculate Jiang Conrath Similarity between two words
                            if w1 == w2 or n > 1:
                                maxi += 10
                            else:
                                maxi = max(maxi, n)
            sim += maxi
        return sim / max(len(ref_words), len(words))

    def fetch_top_n(self, query, n=5):
        """ Fetch the best n documents out of all based on the tf-idf score.

            Args:
                query (str): Pre-processed query
                n (int) : The number of relevant documents to be fetched

            Returns:
                list : The best n documents based on tf-idf score.

        """
        all_docs = self.get_docs(query)
        ranks = defaultdict(int)
        for word, data in all_docs.items():
            for d in data["docs"]:
                ranks[d.doc] += d.tf * data["idf"]
        ranks = sorted(ranks.items(), key=lambda kv: -kv[1])
        return list(ranks)[:n]

    def process_query(self, query):
        """  Computes and retrieves the result of the query

             Args:
                query (str): The search query given by the user.

             Returns:
                list : It contians the document paths and the best 5 sentences for the corresponding document .

        """
        query = self.prep.preprocess(query)
        ranks = self.fetch_top_n(query)
        ans = []
        for r in ranks:
            file_path = Path(r[0].file_path)
            # print(file_path.name,file_path.parent.parent.parent )
            new_path = file_path.with_suffix(".json")
            new_path = get_real_path(new_path)
            with open(new_path, "r") as f:
                data = set(json.load(f)["sentences"])
                sen = tuple((self.jc_sim(s, query), s) for s in data)
                best = tuple(sorted(
                    sen, key=lambda x: -x[0]))[:5]  # Slice top five sentences
                ans.append((file_path, best))
        return ans

Exemplo n.º 24

0

Exibir arquivo

 def __init__(self):
     self.prep = Preprocessor()
     self.genesis_ic = wn.ic(genesis, False, 0.0)

Exemplo n.º 25

0

Exibir arquivo

Arquivo: unit_tests.py Projeto: rohan-varma/awesome-athletes

 def test_remove_missing_indices(self):
     arr = [1,2,3]
     p = Preprocessor(np.array(arr))
     self.assertTrue(p.remove_missing_indices(arr) == -1)

Exemplo n.º 26

0

Exibir arquivo

Arquivo: vision.py Projeto: eliask/SDP2011-Robotniks

class Vision():
    #rawSize = (768,576)
    rawSize = (640, 480)

    # Whether to 'crash' when something non-critical like the GUI fails
    debug = True

    def __init__(self, world, filename=None, simulator=None, once=False, headless=False):
        logging.info('Initialising vision')
        if simulator:
            self.capture = SimCapture(simulator)
        else:
            self.capture = Capture(self.rawSize, filename, once)

        self.headless = headless

        self.threshold = threshold.AltRaw()
        self.pre = Preprocessor(self.rawSize, self.threshold, simulator)
        self.featureEx = FeatureExtraction(self.pre.cropSize)
        self.interpreter = Interpreter()
        self.world = world
        self.gui = GUI(world, self.pre.cropSize, self.threshold)
        self.histogram = Histogram(self.pre.cropSize)

        self.times=[]
        self.N=0

        #debug.thresholdValues(self.threshold.Tblue, self.gui)

        logging.debug('Vision initialised')

    def formatTime(self, t):
        return time.strftime('%H:%M:%S', time.localtime(t)) \
            + ( '%.3f' % (t - math.floor(t)) )[1:] #discard leading 0

    def processFrame(self):
        startTime = time.time()
        logging.debug("Frame %d at %s", self.N,
                      self.formatTime(startTime) )
        self.N += 1

        logging.debug("Capturing a frame")
        frame = self.capture.getFrame()
        logging.debug("Entering preprocessing")
        standard = self.pre.get_standard_form(frame)
        bgsub_vals, bgsub_mask = self.pre.bgsub(standard)
        logging.debug("Entering feature extraction")

        hist_props_bgsub = self.histogram.calcHistogram(standard)
        hist_props_abs = self.histogram.calcHistogram(bgsub_vals)
        self.threshold.updateBGSubThresholds(hist_props_bgsub)
        #self.threshold.updateAbsThresholds(hist_props_abs)

        ents = self.featureEx.features(bgsub_vals, self.threshold)
        logging.debug("Detected entities:", ents)
        logging.debug("Entering interpreter")
        self.interpreter.interpret(ents)
        logging.debug("Entering World")
        self.world.update(startTime, ents)

        logging.debug("Updating GUI")
        if not self.headless:
            try:
                bgsub = self.pre.remove_background(standard)
                self.gui.updateWindow('raw', frame)
                self.gui.updateWindow('mask', bgsub_mask)
                self.gui.updateWindow('foreground', bgsub_vals)
                self.gui.updateWindow('bgsub', bgsub)
                self.gui.updateWindow('standard', standard)
                canny = cv.CreateImage(self.pre.cropSize, 8,1)
                # adaptive = cv.CreateImage(self.pre.cropSize, 32,3)
                # tmp = cv.CreateImage(self.pre.cropSize, 8,3)
                # cv.Convert(standard, adaptive)
                cv.CvtColor(bgsub, canny, cv.CV_BGR2GRAY)
                cv.Threshold(canny, canny, 150, 255, cv.CV_THRESH_OTSU)
                # cv.Threshold(canny, canny, 100, 255, cv.CV_ADAPTIVE_THRESH_GAUSSIAN_C)
                # cv.Sobel(adaptive, adaptive, 1,1,1)
                # cv.Convert(adaptive, tmp)
                # cv.ConvertScale(tmp, tmp, 10)
                # cv.CvtColor(tmp, canny, cv.CV_BGR2GRAY)
                # cv.Threshold(canny,canny, 50, 255, cv.CV_THRESH_BINARY)
                #cv.Canny(canny,canny, 100, 180,3)
                cv.CvtColor(canny, bgsub, cv.CV_GRAY2BGR)
                new = self.featureEx.detectCircles(bgsub)

                self.gui.updateWindow('adaptive', canny)
                self.gui.updateWindow('new', new)
                self.gui.draw(ents, startTime)
            except Exception, e:
                logging.error("GUI failed: %s", e)
                if self.debug:
                    raise

        endTime = time.time()
        self.times.append( (endTime - startTime) )

Exemplo n.º 27

0

Exibir arquivo

Arquivo: detect_pattern.py Projeto: YangLeoZhao/Tailor

import pickle
from preprocess import Preprocessor
from prepare_training_set import get_set_files
from model import PatternType


def load_nn():
    fileObj = open("nn.pkl", "r")
    nn = pickle.load(fileObj)
    fileObj.close()
    return nn


if __name__ == "__main__":
    preprocessor = Preprocessor()
    nn = load_nn()

    test_set_files = get_set_files("test_set")
    training_set_files = get_set_files("training_set")

    for pattern_type, files in test_set_files.iteritems():
        print "[INFO]: Processing {}".format(PatternType(pattern_type).name)
        for file in files:
            entry = preprocessor.preprocess(pattern_type, file)
            sums = list(entry.sums.x)
            sums.extend(entry.sums.y)
            result_nn = nn[0].activate(sums)
            max_result = max(result_nn)
            result = [1 if i == max_result else 0 for i in result_nn]
            print [file, result]

Exemplo n.º 28

0

Exibir arquivo

#!/usr/bin/env python
# coding: utf-8
""" Script used to analyze final cyclotron measurements"""
__author__ = 'Andreas Gsponer'
__license__ = 'MIT'

import numpy as np

from analyzeImage import analyze_image
from surface import CubicFitRotated, CubicFit
from preprocess import Preprocessor

import plots
import smoothing

P = Preprocessor(min_threshold=45, max_threshold=230, offset=150)

# corner points
#path = "../measurements/cyclotron/main_measurements/pre/"
#P.import_video(path + "/before_vacuum.mkv")

# Beam in drift space focused by the BTL
#path = "../measurements/cyclotron/main_measurements/cyclotron/focused_by_btl/2/"
#P.import_video(path + "/0_to_660_compressed.mkv")

# Beam focused by the BTL being refocused by the MBL
#path = "../measurements/cyclotron/main_measurements/cyclotron/mbl_second/2"
#P.import_video(path + "/0_to_660_compressed.mkv")

# Flat beam being focused by the MBL magnets
path = "../measurements/cyclotron/main_measurements/cyclotron/flat_beam/2/660_to_0"

Exemplo n.º 29

0

Exibir arquivo

#
# .. Note::
#    If you run this notebook you can train, interrupt the kernel,
#    evaluate, and continue training later. Comment out the lines where the
#    encoder and decoder are initialized and run ``trainEpochs`` again.
#

print('-' * 30, 'Starting', '-' * 30)
vocab_file = '../vocab/vocab'
tokenizer_file = '../tokenizer/src_tokenizer'
vocab = Vocab(vocab_file, 100000)
tokenizer = Tokenizer(vocab)
with open(tokenizer_file, mode='wb') as file:
    pickle.dump(tokenizer, file)
max_sequence_len = 100
p = Preprocessor(1, 'data/sentences.txt', tokenizer, max_sequence_len)
data = p.get_data()[:5000]
print('-' * 30, 'Loaded data', '-' * 30)

hidden_size = 256
encoder1 = EncoderRNN(vocab.NumIds(), hidden_size)
decoder1 = DecoderRNN(hidden_size, vocab.NumIds(), 1)

if use_cuda:
    encoder1 = encoder1.cuda()
    decoder1 = decoder1.cuda()

trainEpochs(encoder1, decoder1, 5000, p, print_every=100)

######################################################################
#

Exemplo n.º 30

0

Exibir arquivo

def train_model(args):
    logging.basicConfig(format='[%(asctime)s] %(message)s',
                        datefmt='%m/%d/%Y %I:%M:%S %p',
                        level=logging.INFO)
    logging.info('Train model')
    logging.info('Loading data...')

    # Split data into training set and test set
    xs, ys, n = load_data(args.X, args.Y, shuffle=True)
    n_train = int(args.frac * n)

    # Data preprocessing
    preprocessor = Preprocessor()
    rng = abs(args.max - args.min)
    xs_n = preprocessor.normalize(xs, rng)
    xs_n_filtered = xs_n

    if args.craft:
        xs_n_filtered = filter_data(xs_n_filtered)

    # Feature extraction
    logging.info('Computing means and sigmas (%s)...' % args.pre)
    means, sigmas = get_means_sigmas(args, xs_n_filtered)

    if args.craft:
        means, sigmas = crafted_gaussian_feature(means, sigmas)

    def phi(x):
        pre = Preprocessor()
        return pre.gaussian(pre.normalize(x, rng), means, sigmas)

    logging.info('Preprocessing... (d = %d; craft-feature %d)' %
                 (means.shape[0], args.craft))
    phi_xs = phi(xs)
    phi_xs_train, ys_train = phi_xs[:n_train], ys[:n_train]
    phi_xs_test, ys_test = phi_xs[n_train:], ys[n_train:]

    phi_dim = len(phi_xs_train[0])
    model = get_model(args, (phi_dim, ))
    logging.info('Using model %s (plot = %s)' % (args.model, args.plot))

    def f(x):
        return np.round(np.clip(model.test(sess, x), args.min, args.max))

    with tf.Session() as sess:
        logging.info('Training... (optimizer = %s)' % args.optimizer)
        if args.K <= 1:
            train_loss = train(args, sess, model, phi_xs_train, ys_train)
            logging.info('Training loss = %f' % train_loss)

            if n_train < n:
                test_loss = model.eval(sess, phi_xs_test, ys_test)
                logging.info('Testing loss = %f' % test_loss)

            if args.output is not None:
                logging.info('Save model at %s' % args.output)
                model.save_to_file(sess, args.output)
                np.save(args.output + '-mean', means)
                np.save(args.output + '-sigma', sigmas)

            if args.plot is not None:
                logging.info('Plotting... (output = %s)' % args.fig)
                if args.plot == '3d':
                    plot_3d(f, phi, args.min, args.max, args.min, args.max, 0,
                            1081, args.fig)
                elif args.plot == '2d':
                    plot_2d_map(f, phi, args.min, args.max, args.min, args.max)

        else:
            validation_loss = train_cross_validation(args, sess, model,
                                                     phi_xs_train, ys_train)
            log_filename = args.log
            with open(log_filename, 'w') as log_file:
                log_file.write('%s\t%s\n' % (log_filename, validation_loss))

Exemplo n.º 31

0

Exibir arquivo

Arquivo: summary.py Projeto: trongtuyen99/viet_summarizer

 def __init__(self, w2v_path=r".\data\vi.vec"):
     self.clearner = Preprocessor()
     self.vectorizer = Vectorizer(w2v_path)

Exemplo n.º 32

0

Exibir arquivo

 def phi(x):
     pre = Preprocessor()
     return pre.gaussian(pre.normalize(x, rng), means, sigmas)

Exemplo n.º 33

0

Exibir arquivo

Arquivo: als.py Projeto: fotol1/movie_recommendation

            ndc1_als.append(n1)
            ndc10_als.append(n10)
        
        return np.mean(ndc1_als),np.mean(ndc10_als)
        
    def __get_matrix(self):
        
        self.train_df['userId'] = self.train_df['userId'].astype('category')
        self.train_df['movieId'] = self.train_df['movieId'].astype('category')

        ratings_matrix = sp.coo_matrix(
            (self.train_df['rating'].astype(np.float32) ,
                (
                    self.train_df['movieId'].cat.codes.copy(),
                    self.train_df['userId'].cat.codes.copy()
                )
            )
        )
        
        ratings_matrix = ratings_matrix.tocsr()
        
        return ratings_matrix
    
prep = Preprocessor('ratings.csv')
prep.process(0.4)
mdl = ALS_helper()
mdl.train()
a,b = mdl.validate()
logger.info('ndcg@1 = {}, ndcg@10 = {}'.format(a,b))

Exemplo n.º 34

0

Exibir arquivo

Arquivo: vision.py Projeto: SSabev/SDPCode

class Vision:

    def __init__(self, pitchnum, stdout, sourcefile, resetPitchSize, noGui, debug_window,  pipe):

        self.noGui = noGui
        self.lastFrameTime = self.begin_time = time.time()
        self.processed_frames = 0

        self.running = True
        self.stdout = stdout

        self.pipe = pipe
        

        
        if sourcefile is None:
            self.camera = Camera()
        else:
            self.filetype = 'video'
            if sourcefile.endswith(('jpg', 'png')):
                self.filetype = 'image'

        self.gui = Gui(self.noGui)
        self.threshold = Threshold(pitchnum)
        self.thresholdGui = ThresholdGui(self.threshold, self.gui)
        self.preprocessor = Preprocessor(resetPitchSize)
        self.features = Features(self.gui, self.threshold)
        # if self.debug_window:
        #     self.debug_window = DebugWindow()
        # else:
        #     self.debug_window = None

        calibrationPath = os.path.join('calibration', 'pitch{0}'.format(pitchnum))
        self.camera.loadCalibration(os.path.join(sys.path[0], calibrationPath))

        eventHandler = self.gui.getEventHandler()
        eventHandler.addListener('q', self.quit)

        # Ugly stuff for smoothing coordinates - should probably move it
        self._pastSize = 5
        self._pastCoordinates = {
                            'yellow': [(0, 0)] * self._pastSize,
                            'blue': [(0, 0)] * self._pastSize,
                            'ball': [(0, 0)] * self._pastSize
                            }
        self._pastAngles = {
                            'yellow': [1.0] * self._pastSize,
                            'blue': [1.0] * self._pastSize
                           }

        while self.running:
            if self.preprocessor.hasPitchSize:
                self.outputPitchSize()
                self.gui.setShowMouse(False)
            else:
                eventHandler.setClickListener(self.setNextPitchCorner)

            while self.running:
                self.doStuff()

    def quit(self):
        self.running = False
        self.pipe.send('q')

    def print_fps(self):
        thisFrameTime = time.time()
        time_diff = thisFrameTime - self.lastFrameTime
        fps = 1.0 / time_diff
        self.processed_frames = self.processed_frames + 1
        avg_fps = self.processed_frames * 1.0 / (thisFrameTime - self.begin_time)
        self.lastFrameTime = thisFrameTime

        if self.stdout:
            print("Instantaneous fps = %f Average fps = %f" % (fps, avg_fps))

    def doStuff(self):

        frame = self.camera.getImageUndistort()

        # Uncomment to see changes in barrell distortion matrix
        # calibrationPath = os.path.join('calibration', 'pitch{0}'.format(0))
        # self.camera.loadCalibration(os.path.join(sys.path[0], calibrationPath))

        frame = self.preprocessor.preprocess(frame)

        self.gui.updateLayer('raw', frame)

        ents = self.features.extractFeatures(frame)
        self.outputEnts(ents)

        self.print_fps()

        self.gui.loop()

    def setNextPitchCorner(self, where):
        self.preprocessor.setNextPitchCorner(where)

        if self.preprocessor.hasPitchSize:
            self.outputPitchSize()
            self.gui.setShowMouse(False)
            self.gui.updateLayer('corner', None)
        else:
            self.gui.drawCrosshair(where, 'corner')

    def outputPitchSize(self):
        if self.stdout:
            print ("Pitch size:\t %i\t %i\n" % tuple(self.preprocessor.pitch_size))
        # if self.debug_window:
        #     self.debug_window.insert_text("Pitch size:\t %i\t %i\n" % tuple(self.preprocessor.pitch_size))
        self.pipe.send(InitSignal(self.preprocessor.pitch_size[0], self.preprocessor.pitch_size[1]))

    def addCoordinates(self, entity, coordinates):
	self._pastCoordinates[entity].pop(0)
        self._pastCoordinates[entity].append(coordinates)
	
        #(x, y) = coordinates;
        
        # if the frame is bad(-1) then add the most recent coordinate instead
        #if (x != -1):
        #    self._pastCoordinates[entity].append(coordinates)
        #else:
        #    self._pastCoordinates[entity].append(self._pastCoordinates[entity][-1])

    def smoothCoordinates(self, entity):
        x = sum(map(lambda (x, _): x, self._pastCoordinates[entity])) / self._pastSize
        y = sum(map(lambda (_, y): y, self._pastCoordinates[entity])) / self._pastSize
        return (x, y)

    def addAngle(self, entity, angle):
        self._pastAngles[entity].pop(0)
        self._pastAngles[entity].append(angle)
        # if the frame is bad(-1) then add the most recent angle instead
        # good angle is always in (0,2pi), bad angle is -1, careful with real number
        #if (angle > -0.5):
        #    self._pastAngles[entity].append(angle)
        #else:
        #    self._pastAngles[entity].append(self._pastAngles[entity][-1])


    def smoothAngle(self, entity):
        # angle is periodic (of 2pi) and a bit tricky to smooth
        temp = sorted (self._pastAngles[entity])
        
        # if max_angle > min_angle > pi, those angles are crossing 0
        # we must add a period to the small ones
        if (temp[-1] - temp[0] > math.pi):
            temp = map(lambda angle: angle + 2*math.pi if angle < math.pi else angle, temp)
        
        return sum(temp) / self._pastSize
        
    # add/substract period (2pi) so angle is always in (0,2pi)
    # assume they are off by at most a period
    def standardize_angle(self, angle):
        if (angle > 2*math.pi):
            return angle - 2*math.pi
        if (angle < 0):
            return angle + 2*math.pi
        return angle
            
    def outputEnts(self, ents):
        # Messyyy
        if not self.preprocessor.hasPitchSize:
            return

        msg_data = []

        for name in ['yellow', 'blue', 'ball']:
            entity = ents[name]
            coordinates = entity.coordinates()
            
            # This is currently not needed
            # if the frame is not recognized, skip a maximum of self.max_skip times
            #if (coordinates[0] != -1):
            #    self.addCoordinates(name, coordinates)
            #    self.skip = 0
            #else:
            #    if (self.skip < self.max_skip):
            #        self.skip = self.skip + 1;
            #    else:
            #        self.addCoordinates(name, coordinates)
                    
            self.addCoordinates(name, coordinates)
            x, y = self.smoothCoordinates(name)

            # TODO: The system needs (0, 0) at top left!
            if y != -1:
                y = self.preprocessor.pitch_size[1] - y

            if name == 'ball':
                # self.send('{0} {1} '.format(x, y))
                msg_data += [int(x), int(y)]
                #print (self._pastCoordinates[name])
                #print(coordinates)
            else:
                # angle is currently clockwise, this makes it anti-clockwise
                angle = self.standardize_angle( 2*math.pi - entity.angle() )
                
                self.addAngle(name, angle)
                angle = self.standardize_angle ( self.smoothAngle(name) );
                
                msg_data += [int(x), int(y), angle]
                


        msg_data.append(int(time.time() * 1000))
        data = FrameData(*msg_data)

        if self.stdout:
            print ("Yellow:\t %i\t %i\t Angle:\t %s\nBlue:\t %i\t %i\t Angle:\t %s\nBall:\t %i\t %i\t\nTime:\t %i\n" % tuple(msg_data))
        # if debug_window:
        #     debug_window.insert_text("Yellow:\t %i\t %i\t Angle:\t %s\nBlue:\t %i\t %i\t Angle:\t %s\nBall:\t %i\t %i\t\nTime:\t %i\n" % tuple(msg_data))

        self.pipe.send(data)

Exemplo n.º 35

0

Exibir arquivo

# coding=utf-8
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
import jieba
from preprocess import Preprocessor
from embedding import *
import pickle

# Use case examples:
wordlist, embeds, len_words, embed_dim = get_word2vec()
write2word2vec(wordlist)

# Use an out-of-the-box dictionary
sent = u'“年”字有多少笔？	笔顺编号:311212,，?？!！.。>》\、'
sentence = [word for word in jieba.cut(Preprocessor().replace_line(sent))]
p = Preprocessor()
p.load_dictionary(dict_name='../data/dbqa.word2vec.wordlist.txt')
print len(p.word_to_index)
print '/'.join(sentence)
indices = p.word_list_to_index_list(sentence)
print indices
print '/'.join(p.index_list_to_word_list(indices))

# You may also want to fit the dictionary from corpus
#p.reset()
p.fit_on_corpus(insert_new_word_into_dict=False)
#p.save_dictionary()
print 'Vocab size:', p.vocab_size

# questions: list of sentences, where a sentence is a list comprising of word indices

Exemplo n.º 36

0

Exibir arquivo

Arquivo: test.py Projeto: SSabev/SDPCode

        full_path_output = os.path.join (OUTPUT_DIR, filename)
        frame.save(full_path_output)
    
    return OK

# Main program

if len(sys.argv) > 1:
    pitchnum = int (sys.argv[1])
else:
    pitchnum = 0

threshold = Threshold(pitchnum)
gui = Gui(1)
features = Features(gui, threshold)
preprocessor = Preprocessor(False)
INPUT_DIR = './input_images'
OUTPUT_DIR = './output_images'
error_list = []


calibrationPath = os.path.join('calibration', 'pitch{0}'.format(pitchnum));
camera = Camera();
camera.loadCalibration(os.path.join(sys.path[0], calibrationPath));

# Statistics
recog = {'ball' : 0, 'blue' : 0, 'yellow' : 0}
n_files = 0

# Process all images in IMPUT_DIR
for filename in os.listdir(INPUT_DIR):

Exemplo n.º 37

0

Exibir arquivo

Arquivo: scipy-kmeans.py Projeto: rohan-varma/awesome-athletes

    np.set_printoptions(threshold=np.inf)
    #create a DB interactor
    interactor = DBInteractor("season_batting")
    #gets the dataframe
    df = interactor.get_current_data_frame()
    #print(df)
    #df = df.drop(['yearID','stint','stint','teamID','lgId','HBP', 'playerID'], axis=1)
    arr_with_ids = interactor.df_to_numpy_matrix()
    cols = ['playerID', 'yearID']
    df = interactor.drop_useless_stuff(cols)
    #converts it to a numpy matrix
    arr = interactor.df_to_numpy_matrix()
    arr = arr.astype(float)
    #print arr
    #print arr
    #don't forget to disconnect
    interactor.disconnect()
    #create a preprocessor to preprocess the data
    #this doesn't do anything very useful right now

    p = Preprocessor(arr, df)
    arr = p.preprocess(arr)
    sample = arr
    print arr.shape[1]
    fit_samples_gmm(sample,1)
    fit_samples_kmeans(sample, sample.shape[1], 10)
    rand_indices = np.random.choice(np.arange(0,len(sample)), replace=False, size=len(sample))
    rand_samples = sample[rand_indices]
    af = fit_affinity_propagation(samples=rand_samples)