Пример #1
0
def hy_prediction(dataio, patient_info, patient_array, params):
    ### Task Setting 
    task = params['task'][1]
    split_method = 'ratio'
    ratio = 0.8 # provide the ratio
    krun = 10 # run 5 times then average the result 
    ### Initialization
    acc = np.zeros(krun, dtype='float32') # evaluation metric
    n_feature = dataio.feature.feature_len
    param_w = np.zeros([n_feature, krun], dtype='float32') # weights parameter
    ### H&Y Reading
    feature = dataio.feature
    patient_info = feature.get_hy_stage(patient_info, patient_array)
    print ('-----')
#    split = Splitter(task, patient_array, ratio, split_method, patient_info)
    split = Splitter(task, patient_array, ratio, split_method)
    for k in range(krun):
        ### Data Splitting
        train_data, test_data = split.get_splitter(k)
        ### Model Training 
        hy_pred = HYPredictor(k, patient_info, train_data, params['result_path'])
        model, y_pred = hy_pred.train_model()
        param_w[:,k], _ = hy_pred.get_param()
        ### Evaluating
        hy_eval = Evaluator(model, test_data, patient_info, task, hy_pred)
        acc[k] = hy_eval.compute_accuracy()
    print ('-----')
    print ('Accuracy of the %s task: %f' %(task, np.sum(acc)/krun)) 
    ### Displaying Feature (selected by prediction model)
    feature = dataio.feature
    feature.get_pred_feature(param_w, krun, 'yh')  
Пример #2
0
    def __init__(self, dset_name, net_names, hard_labels, device, exp=None):
        if exp is None:
            exp = 0
            while osp.exists(osp.join(cfg.DATA_DIR, 'exp_' + str(exp))):
                exp += 1

        self.exp_dir = osp.join(cfg.DATA_DIR, 'exp_' + str(exp))
        self.num_exp = exp

        dset_dir = osp.join(self.exp_dir, dset_name)
        self.splitting_dir = osp.join(dset_dir, cfg.SPLITTING_DIR)
        self.feat_dir = osp.join(dset_dir, cfg.FEATURE_DIR)
        self.label_dir = osp.join(dset_dir, cfg.LABEL_DIR,
                                  'hard' if hard_labels else 'soft')
        self.net_dir = osp.join(dset_dir, cfg.NET_DIR,
                                'hard' if hard_labels else 'soft')
        self.res_dir = osp.join(dset_dir, cfg.RESULT_DIR,
                                'hard' if hard_labels else 'soft')

        self.dset = cfg.DSETS[dset_name]

        self.splitter = Splitter(self.dset, self.splitting_dir)
        self.extractor = Extractor(self.dset, self.splitting_dir,
                                   self.feat_dir, net_names, device)
        self.augmenter = Augmenter(self.dset, self.splitting_dir,
                                   self.feat_dir, self.label_dir, net_names,
                                   hard_labels)
        self.trainer = Trainer(self.dset, self.label_dir, self.net_dir,
                               self.res_dir, net_names, hard_labels, device)
Пример #3
0
def main():
    # Set working directory to project folder
    os.chdir('../.')
    print(os.getcwd())

    # Create Logger File to track all changes
    logger = Logger(os)

    # Create a list of words for parser to ignore
    stop_words = []  #['PSY', 'STAT']

    ninja = Splitter(stop_words)

    ans = input('Do you want to manually input lines? ')

    # Create Messenger Object to ask prompts
    if 'y' in ans or 'Y' in ans:
        messenger = Messenger()

        line = ''
        exit = False

        while not exit:
            line = messenger.collect_input()

            if line == 'quit' or line == 'exit':
                exit = True
            else:
                # Output would be collected here
                print(ninja.split_line(line))
    #else:

    print('Exiting code')
Пример #4
0
	def add_splitter(self, model_name, receptacle_id):
		splitter_id = self.global_ids.get_next_splitter_id()
		rids = []
		receptacle_count = SPLITTER_MODELS[model_name]
		for r_id in range(receptacle_count):
			rids.append(self.global_ids.get_next_receptacle_id())

		if True == DEBUG:
			print('Support.add_splitter() receptacle_count:{0}, rids:{1}'.format(receptacle_count, rids))

		sp = Splitter(model_name, splitter_id, rids)

		receptacle = self.get_receptacle_by_id(receptacle_id)
		receptacle.connect_load('SPLITTER', sp)

		self.full_receptacle_ids.append(receptacle_id)
		self.empty_receptacle_ids.remove(receptacle_id)

		r = Results()
		r.set_object_id(splitter_id)
		r.set_next_receptacle_id_from_list(rids)

		self.splitter_ids.append(splitter_id)
		del(r_id)
		for r_id in rids:
			r1 = sp.get_receptacle_by_id(r_id)
			if None == r1:
				print('Support.add_splitter sp.get_receptacle_by_id returned None for r1. r_id:{0}, rids:{1}'.format(r_id, rids))
			self.empty_receptacle_ids.append(r_id)

		if True == DEBUG:
			print('Support.add_splitter(). Final empty_receptacle_ids:{0}'.format(self.empty_receptacle_ids))
			print('Support.add_splitter(). Final full_receptacle_ids:{0}'.format(self.full_receptacle_ids))

		return r
Пример #5
0
def moca_prediction(dataio, patient_info, patient_array, params):
    ### Task Setting 
    task = params['task'][2]
    split_method = 'ratio'
    ratio = 0.8 # provide the ratio
    krun = 5 # run 5 times then average the result 
    ### Initialization
    rmse = np.zeros(krun, dtype='float32') # evaluation metric
    n_feature = dataio.feature.feature_len
    param_w = np.zeros([n_feature, krun], dtype='float32')
    ### MoCA Reading
    feature = dataio.feature
    patient_info = feature.get_moca_score(patient_info, patient_array)
    print ('-----')
    split = Splitter(task, patient_array, ratio, split_method)
    for k in range(krun):
        ### Data Splitting
        train_data, test_data = split.get_splitter(k)
        ### Model Training 
        moca_pred = MoCAPredictor(k, patient_info, train_data, params['result_path'])
        model, y_pred = moca_pred.train_model()
        param_w[:,k] = moca_pred.get_param()
        ### Evaluating
        pd_eval = Evaluator(model, test_data, patient_info, task, moca_pred)
        rmse[k] = pd_eval.compute_accuracy()
    print ('-----')
    print ('RMSE of the %s task: %f' %(task, np.sum(rmse)/krun))
    ### Displaying Feature (selected by prediction model)
    feature = dataio.feature
    feature.get_pred_feature(param_w, krun, 'moca')
    def __call__(self, _, *, audio_paths=[]):
        batch_size = len(audio_paths)
        if batch_size == 0:
            return []

        dataset = Splitter(
            audio_paths,
            annotations=args["--annotations"],
            labels=args["--labels"],
            overlap=args["--overlap"],
            duration=args["--duration"],
            output_directory=args["--output_directory"],
        )

        dataloader = torch.utils.data.DataLoader(
            dataset,
            # batch_size=batch_size,
            batch_size=1,
            shuffle=False,
            num_workers=args["--cores_per_node"],
            collate_fn=dataset.collate_fn,
        )

        start = timer()
        outputs = []
        for idx, data in enumerate(dataloader):
            for out in data:
                outputs.append(out)
        end = timer()
        print("DEBUG: end - start", end - start)

        return outputs
Пример #7
0
def pd_prediction(dataio, patient_info, patient_array, params):
       
    ### Task Setting 
    task = params['task'][0] # disease prediction
    split_method = 'cross-validation'
    kfold = 5 # 5-fold validation
    ### Initialization
    auc = np.zeros(kfold, dtype='float32') # evaluation metrics
    ap = np.zeros(kfold, dtype='float32')
    n_feature = dataio.feature.feature_len
    param_w = np.zeros([n_feature, kfold], dtype='float32')
    print ('-----')
    split = Splitter(task, patient_array, kfold, split_method)
    for k in range(kfold): # each fold, k is the index of test set
        ### Data Splitting
        train_data, test_data = split.get_splitter(k)
        ### Model Training 
        pd_pred = PDPredictor(k, patient_info, train_data, params['result_path'])
        model, y_pred = pd_pred.train_model()
        param_w[:,k], _ = pd_pred.get_param()
        ### Evaluating
        pd_eval = Evaluator(model, test_data, patient_info, task, pd_pred)
        auc[k], ap[k] = pd_eval.compute_accuracy()
    print ('-----')
    print ('AUC of the %s task: %f' %(task, np.sum(auc)/kfold))
    print ('Average Precision of the %s task: %f' %(task, np.sum(ap)/kfold))    
    ### Displaying Feature (selected by prediction model)
    feature = dataio.feature
    feature.get_pred_feature(param_w, kfold, 'pd')  
Пример #8
0
 def test_splitter_does_the_map(self):
     areas = read_kml_areas("areas.kml")
     city_spots = [
         {
             "name": "skytower",
             "type": "shopping mall",
             "lat": 17.019690,
             "lng": 51.094880
         },
         {
             "name": "Biedronka close to skytower",
             "type": "small shop",
             "lat": 17.018921,
             "lng": 51.097994
         },
         {
             "name": "Panorama Racławicka",
             "type": "historical building",
             "lat": 17.044462,
             "lng": 51.110171
         },
         {
             "name": "Galeria Dominikańska",
             "type": "shopping mall",
             "lat": 17.040685,
             "lng": 51.108244
         },
     ]
     splitter = Splitter()
     assigned = splitter.split_all_points(city_spots, areas)
     self.assertTrue(len(assigned) == 2)
     self.assertIn('Rynek 1', assigned)
     self.assertIn('Gajowice 1', assigned)
Пример #9
0
def run_splitter(batch):
    dataset = Splitter(
        batch,
        annotations=args["--annotations"],
        labels=args["--labels"],
        overlap=args["--overlap"],
        duration=args["--duration"],
        output_directory=args["--output_directory"],
    )

    dataloader = torch.utils.data.DataLoader(
        dataset,
        # batch_size=batch_size,
        batch_size=1,
        shuffle=False,
        num_workers=args["--cores_per_node"],
        collate_fn=dataset.collate_fn,
    )

    start = timer()
    outputs = []
    for data in dataloader:
        for out in data:
            outputs.append(out)
    end = timer()

    print("DEBUG: end - start", end - start)

    return outputs
Пример #10
0
    def split_patents(self, temp_dir, filename):
        self.logger.info("splitting files")
        xmls = get_files(temp_dir, ".xml")
        splitter = Splitter()

        for file in xmls:
            splitter.split_file(file, join(self.working_directory, self.patentDir, filename))
Пример #11
0
 def visitTextFile(self, textfile):
     splitter = Splitter(textfile.filePath, len(self.workers))
     file_split_result = splitter.split()
     self.operations[textfile.id] = FilePartition(textfile.id,
                                                  len(self.workers),
                                                  file_split_result,
                                                  textfile.filePath)
     self._set_collect_count(textfile)
Пример #12
0
 def __init__(self, modelId, runNo, filter_dics, filename, _type, splitter_type):
     super(Tracker, self).__init__()
     self.modelId = modelId
     self.runNo = runNo
     self.run_dir = str(modelId) + '_' + str(runNo)
     self.filename = filename
     self.filters = self.prepare_filters(filter_dics)
     self.type = _type # lazy or eager
     self.splitter = Splitter(splitter_type)
     self.track_data = {}
Пример #13
0
 def __init__(self):
     self.splitter = Splitter()
     self.postagger = POSTagger()
     self.dicttagger = DictionaryTagger([
         '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/positive.yml',
         '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/negative.yml',
         '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/morePositive.yml',
         '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/moreNegative.yml',
         '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/invert.yml'
     ])
Пример #14
0
    def assemble2(self):
        """
        Builder method: build a Chain of linked Components
        :return:
        """
        log.info('Assembling Chain: %s...' % self.chain_str)

        # Create linked list of input/filter/output (ETL Component) objects
        chain_str = self.chain_str
        split_comps = []
        while chain_str:
            chain_str = chain_str.strip()

            # Check and handle Splitter construct
            # e.g. input_xml_file |(transformer_xslt|output_file) (output_std) (transformer_xslt|output_std)
            if chain_str.startswith('('):
                etl_section_name, chain_str = chain_str.split(')', 1)
                etl_section_name = etl_section_name.strip('(')

                # Check for subchain (split at Filter level)
                if '|' in etl_section_name:
                    # Have subchain: use Chain to assemble
                    sub_chain = Chain(etl_section_name, self.config_dict)
                    sub_chain.assemble2()
                    child_comp = sub_chain.first_comp
                else:
                    # Single component (Output) to split
                    child_comp = factory.create_obj(self.config_dict, etl_section_name.strip())

                # Assemble Components (can be subchains) for Splitter later
                split_comps.append(child_comp)
                if '(' in chain_str:
                    # Still components (subchains) to assemble for Splitter
                    continue

            if len(split_comps) > 0:
                # Next component is Splitter with children
                etl_comp = Splitter(self.config_dict, split_comps)
                split_comps = []
            else:

                # "Normal" case: regular Components piped in Chain
                if '|' in chain_str:
                    # More than one component in remaining Chain
                    etl_section_name, chain_str = chain_str.split('|', 1)
                else:
                    # Last element, we're done!
                    etl_section_name = chain_str
                    chain_str = None

                # Create the ETL component by name and properties
                etl_comp = factory.create_obj(self.config_dict, etl_section_name.strip())

            # Add component to end of Chain
            self.add(etl_comp)
Пример #15
0
 def __init__(self, dataset: ds.Dataset):
     self.loader = Splitter(batch_size=32).get_all(dataset)
     self.dataset_name = str(dataset)
     self.features = []
     self.labels = []
     self.pos_class = dataset.pos_class
     print(f'Encoding: {self.dataset_name}')
     if not os.path.exists('data'):
         os.makedirs('data')
     if not os.path.exists('data/encoded/'):
         os.makedirs('data/encoded/')
     self.root_dir = 'data/encoded/'
Пример #16
0
def process_text(text):
        splitter = Splitter()
        postagger = POSTagger()

        # Split the sentences to words
        splitted_sentences = splitter.split(text)

        # Do Parts of Speech Tagging on the words
        pos_tagged_sentences = postagger.pos_tag(splitted_sentences)

        dict_tagged_sentences = dicttagger.tag(pos_tagged_sentences)
        return sum_score(dict_tagged_sentences)
Пример #17
0
    def split_by_silence(self):
        '''
        Uses the Splitter class to split the audio by silence
        to get the timestamps and respective filenames of the
        split segments.

        Segments is a list of tuples (filename, (start_time, end_time)).
        '''
        if (self.audio_extracted):
            splitter = Splitter(self.audio_file)
        else:
            raise Exception(
                "ERROR: File has not been extracted from video yet.")

        splitter.run()
        self.segments = splitter.get_segments()
Пример #18
0
def attempt_dd_improvement(main_tour, best_length, other_tour, other_length):
    segments = Splitter(main_tour, other_tour).get_segments()
    positive_kmoves, negative_kmoves = segments_to_beneficial_kmoves(xy, segments, main_tour)
    # Now that we have independent kmoves, it can be very expensive to try all possible combinations.
    # So for efficiency's sake we try high-yield (supposedly) simple combinations:
    # 1. Try all beneficial kmoves at once. Quit if succeeds (cannot get better).
    # 2. Try each beneficial kmove sequentially, starting from highest-gain.
    # 3. Exclude negative k-moves sequentially, starting from highest-loss.
    naive_gain = best_length - other_length
    if positive_kmoves:
        max_positive_gain = sum([x[0] for x in positive_kmoves])
        if max_positive_gain < naive_gain:
            print('MAX POSITIVE GAIN < NAIVE GAIN')
            main_tour = other_tour
            best_length = other_length
            return main_tour, best_length
        all_positive = combine_segment_array([x[1] for x in positive_kmoves])
        test_tour = perform_kmove(main_tour, all_positive)
        if len(test_tour) == len(main_tour):
            main_tour = test_tour
            print('Trying all {} positive kmoves together worked; gain: {} (naive gain: {})'.format(len(positive_kmoves), max_positive_gain, naive_gain))
            assert(max_positive_gain >= naive_gain)
            assert(max_positive_gain > 0)
            return main_tour, best_length - max_positive_gain
        # There may be cases where naive gain is more than decomposed gains:
        # decomposed gains currently only return moves that can be independently performed.
        # Infeasible moves that are improvements but only can be combined with other moves to become feasible
        # (a potentially computationally expensive search) will be excluded from the decomposed moves.
        dd_gain = 0 # gain due to decomposed kmoves.
        for k in positive_kmoves:
            print('    trying {}-opt move with gain {}'.format(len(k[1]['adds']), k[0]))
            test_tour = perform_kmove(main_tour, k[1])
            if len(test_tour) == len(main_tour):
                main_tour = test_tour
                best_length -= k[0]
                dd_gain += k[0]
        if naive_gain > dd_gain:
            print('naive_gain ({}) greater than dd_gain ({})'.format(naive_gain, dd_gain))
            main_tour = other_tour
            best_length = other_length
        if dd_gain > 0 and dd_gain > naive_gain:
            print('    dd gain {} greater than naive gain {}'.format(dd_gain, naive_gain))
    elif naive_gain > 0:
            print('NAIVE GAIN > 0 WITH NO DD POSITIVE GAIN')
            main_tour = other_tour
            best_length = other_length
    return main_tour, best_length
Пример #19
0
def processQuestion(gloveModel,
                    question,
                    minLen=1,
                    maxLen=3,
                    useAPI=False,
                    useSynonyms=False):
    tagger = POSTagger()
    pos = tagger.parse(question)
    # create splitter and generalizer
    splitter = Splitter()
    if question[-1] == '?' or question[-1] == '.':
        question = question[:-1]
    gen_question = splitter.generalize(question, pos)
    labels = []
    resultsExists = False
    if not useAPI:
        parts = list(splitter.split(gen_question, min=minLen, max=maxLen))
    else:
        resultsExists = True
        apiResult, _ = api.getBinaryRelations(question)
        parts = [
            rel.predicate for rel in apiResult
            if len(rel.predicate_positions_) > 1
        ]
        for part in parts:
            if len(part.split()) > 1:
                labels.append(part.split()[0] +
                              ''.join(''.join([w[0].upper(), w[1:].lower()])
                                      for w in part.split()[1:]))
        if useSynonyms:
            predicates = [max(part.split(), key=len) for part in parts]
            if predicates is not None and len(predicates) > 0:
                for predicate in predicates:
                    for part in list(parts):
                        if predicate in part:
                            for syn in gloveModel.gloveModel.most_similar(
                                    predicate.lower()):
                                parts.append(part.replace(predicate, syn[0]))
        if len(parts) == 0:
            resultsExists = False
            parts = list(splitter.split(gen_question, min=minLen, max=maxLen))
    # create embedder part
    vectors = []
    for part in parts:
        vectors.append(gloveModel.getVector(part))
    return vectors, parts, pos, gen_question, labels, resultsExists
Пример #20
0
 def __init__(self):
     self.sentences = []
     self.abbreviation = {}
     self.load_data()
     self.load_abbrv()
     self.normalizer = Normalizer()
     self.splitter = Splitter()
     self.corrector = Filter()
     self.lemmatizer = WordNetLemmatizer()
     self.missing_apostrophe_vocab = [
         'isnt', ' arent', 'wasnt', 'werent', 'wont', 'dont', 'didnt',
         'doesnt', 'couldnt', 'shouldnt', 'hasnt', 'havent', 'hadnt'
     ]
     self.tokenizer_mistake_vocab = [
         'isn', 'aren', 'wasn', 'weren', 'won', 'don', 'didn', 'doesn',
         'couldn', 'shouldn', 'hasn', 'haven', 'hadn'
     ]
     self._norm = joblib.load('model.crfsuite')
Пример #21
0
def perturbed_hill_climb(xy, tour):
    tries = 0
    success = 0
    best_length = tour_util.length(xy, tour)
    while True:
        new_tour, naive_new_length = two_opt.optimize(xy, tour_util.double_bridge(tour)) # double bridge
        #test_tour = tour[:]
        #random.shuffle(test_tour)
        #new_tour, naive_new_length = two_opt.optimize(xy, test_tour) # random restart
        segments = Splitter(tour, new_tour).get_segments()
        kmoves = segments_to_beneficial_kmoves(xy, segments, tour)
        max_gain = 0
        if kmoves:
            max_gain = sum([k[0] for k in kmoves])
        naive_gain = best_length - naive_new_length
        # There may be cases where naive gain is more than decomposed gains:
        # decomposed gains currently only return moves that can be independently performed.
        # Infeasible moves that are improvements but only can be combined with other moves to become feasible
        # (a potentially computationally expensive search) wil be excluded from the decomposed moves.
        dd_gain = 0 # gain due to decomposed kmoves.
        if kmoves:
            for k in kmoves:
                print('    trying {}-opt move with gain {}'.format(len(k[1]['adds']), k[0]))
                test_tour = perform_kmove(tour, k[1])
                if len(test_tour) == len(tour):
                    tour = test_tour
                    best_length -= k[0]
                    dd_gain += k[0]
        if naive_gain > dd_gain:
            print('naive_gain ({}) greater than dd_gain ({})'.format(naive_gain, dd_gain))
            tour = new_tour
            best_length = naive_new_length
        if naive_gain > 0 or dd_gain > 0:
            success += 1
        if dd_gain > 0 and dd_gain > naive_gain:
            print('    dd gain {} greater than naive gain {}'.format(dd_gain, naive_gain))
        tries += 1
        current_length = basic.tour_length(xy, tour)
        assert(best_length == current_length)
        if current_length <= TARGET_LENGTH:
            break
        print('current best: {} (iteration {}), improvement rate: {}'.format(best_length, tries, success / tries))
Пример #22
0
    def assemble(self):
        """
        Builder method: build a Chain of linked Components
        :return:
        """
        log.info('Assembling Chain: %s...' % self.chain_str)

        # Create linked list of input/filter/output (ETL Component) objects
        chain_str_arr = self.chain_str.split('|')

        for etl_section_name in chain_str_arr:

            # Check for splitting outputs construct using '+'
            # TODO: may also construct combining Inputs or split to multiple sub-Chains
            # for now only Outputs supported for splitting
            if '+' in etl_section_name:
                section_names = etl_section_name.split('+')

                log.info('Splitting to: %s' % etl_section_name)
                child_comps = []
                for section_name in section_names:

                    if '(' in section_name and ')' in section_name:
                        section_name = section_name.replace(',', '|')
                        section_name = section_name.strip('(')
                        section_name = section_name.strip(')')

                    # Create the child ETL component by name and properties
                    child_comp = factory.create_obj(self.config_dict, section_name.strip())
                    child_comps.append(child_comp)
                etl_comp = Splitter(self.config_dict, child_comps)
            else:

                # Create the ETL component by name and properties
                etl_comp = factory.create_obj(self.config_dict, etl_section_name.strip())

            # Add component to end of Chain
            self.add(etl_comp)
Пример #23
0
  def __init__(self, memoryFile):
    self.nCycles = 0 # Used to hold number of clock cycles spent executing instructions
    
    self.dataMemory = DataMemory(memoryFile)
    self.instructionMemory = InstructionMemory(memoryFile)
    self.registerFile = RegisterFile()
    self.alu = ALU()
    self.mainControl = MainControl()
    self.splitter = Splitter()
    self.signExtender = SignExtender()
    self.andGate = AndGate()
    self.breaker = Breaker()

    self.constant4 = Constant(4)
    # self.randomControl = RandomControl()
    self.pcMux1 = Mux()
    self.pcMux2 = Mux()
    self.regMux = Mux()
    self.aluMux = Mux()
    self.resultMux = Mux()
    self.luiMux = Mux()

    self.adder = Add()
    self.branchAdder = Add()

    self.jumpAddress = JMPAddress()
    self.shiftBranch = LeftShiftTwo()
    self.shiftJump = LeftShiftTwo()

    self.pc = PC(hex(0xbfc00000))  # hard coded "boot" address
    
    self.elements = [self.constant4, self.adder, self.instructionMemory, self.breaker, self.splitter,
                     self.shiftJump, self.mainControl, self.regMux, self.signExtender, self.luiMux, self.registerFile,
                     self.jumpAddress, self.shiftBranch, self.branchAdder, self.aluMux, self.alu, self.dataMemory,
                     self.andGate, self.pcMux1, self.pcMux2, self.resultMux, self.registerFile, self.pc]
    
    self._connectCPUElements()
from splitter import Splitter


def merge_all():
    for i, fname in enumerate(os.listdir('output')):
        if i == 0:
            df = pd.read_csv('output/' + fname)
        else:
            df = pd.merge(df, pd.read_csv('output/' + fname),
                          how='outer', on='datetime')
    df.to_csv('health_care.csv')


if __name__ == "__main__":

    print("Convert apple health care xml to csv.")

    s = Splitter()

    s.get_body_mass()
    s.get_burned_energy()
    s.get_heart_rate()
    s.get_stand_time()
    s.get_step_count()
    s.get_walking_distance()

    print("Merge all csv.")
    merge_all()

    print("Done.")
Пример #25
0
        return item
    """

    def select_item(self, splitter, user):
        max_item = self.actions[0]
        max_val = np.random.beta(
            max_item.successes + self.alpha,
            max_item.count - max_item.successes + self.beta)
        for item in self.actions[1:]:
            if not (user in splitter.train_set.keys() and item.item in splitter.train_set[user])\
                    or self.follow_back(splitter, item.item, user):
                val = np.random.beta(item.successes + self.alpha,
                                     item.count - item.successes + self.beta)
                if val > max_val:
                    max_item = item
                    max_val = val
        self.removed = 1
        return max_item


if __name__ == "__main__":
    from splitter import Splitter
    from plot import plot_results_graph
    import matplotlib.pyplot as plt

    spl = Splitter("../data/movieLens_binary_mini.dat", separator=" ")
    bandit = UCBBandit(spl, "mini", param=0, criteria="mean")
    print(len(bandit.actions))
    plot_results_graph("mini", "eps")
    plt.show()
Пример #26
0
    embeddings = emb.get_embeddings(data['title'])

    clustering = Clustering(data, config['Clustering']['directory'],
                            config['Clustering']['cluster_picture_name'],
                            config['Clustering']['result_data_file_name'],
                            config['Clustering']['center_replics_file_name'],
                            config['Clustering']['part_to_plot'],
                            config['Clustering']['bgm_config'])

    df = clustering.get_clusters_and_final_data(embeddings)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    splitter = Splitter(df, config['Splitter']['path_to_save_data'],
                        config['Splitter']['min_freq'],
                        config['Splitter']['test_size'],
                        config['Splitter']['batch_size'], device)

    train_iterator, test_iterator, train_data, test_data, SRC, TRG = splitter.get_iterators_and_fields(
    )

    input_dim = len(SRC.vocab)
    output_dim = len(TRG.vocab)
    trg_pad_idx = TRG.vocab.stoi[TRG.pad_token]

    enc = Encoder(input_dim, config['model']['EMB_DIM'],
                  config['model']['HID_DIM'], config['model']['ENC_LAYERS'],
                  config['model']['ENC_KERNEL_SIZE'],
                  config['model']['ENC_DROPOUT'], device)

    dec = Decoder(output_dim, config['model']['EMB_DIM'],
Пример #27
0
# -*- coding: utf-8 -*-
"""
Created on Sat May 25 10:06:24 2019

@author: Gerardo Cervantes
"""

from splitter import Splitter
#For testing split keys

split_keys = ['{PGUP}', '{BKSP}', '{F4}']

if __name__ == "__main__":
    splitter = Splitter()
    splitter.split('Livesplit', '{pgup}', 0)

Пример #28
0
def segment_and_pred(source_path, print_path, img_type):
    print('Start process pic:' + source_path)
    splitter = Splitter()
    if 'school' in img_type:
        segment_flag = school_flag
        attr = school_attr
    else:
        segment_flag = degree_flag
        attr = degree_attr
    result = {}
    image_color = cv2.imread(source_path)
    image = cv2.cvtColor(image_color, cv2.COLOR_BGR2GRAY)

    ret, adaptive_threshold = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY)
    splitter.show_img('adaptive_threshold', adaptive_threshold)

    ret, at = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY_INV)
    splitter.show_img('at', at)

    first_column_img = adaptive_threshold[0:image_color.shape[1], 120:350]

    second_column_img = adaptive_threshold[0:image_color.shape[1], 400:659]

    first_column_img_w = first_column_img.shape[0]
    second_column_img_w = second_column_img.shape[0]
    # 计算换行内容索引
    horizontal_sum = np.sum(at, axis=1)
    peek_ranges = splitter.extract_peek_ranges_from_array(horizontal_sum)
    last_pr = None
    # 内容换行
    line_feed_index_dict = {}
    # 内容为空
    line_empty_index_dict = {}
    j = 0
    k = 0
    for i, pr in enumerate(peek_ranges):
        if last_pr is not None and pr[0] - last_pr[1] < 15:
            line_feed_index_dict[i] = j
            j += 1
        elif last_pr is not None and pr[0] - last_pr[1] >= 42:
            line_empty_index_dict[i] = k
            k += 1
        last_pr = pr

    # 内容包括换行的行数
    line_feed_count = 0

    # 内容为空的行数
    line_empty_count = 0
    i = 0
    while i < (len(segment_flag) + len(line_feed_index_dict)):
        if i in line_feed_index_dict:
            line_feed_count += 1
        elif i in line_empty_index_dict:
            line_empty_count += 1
            result[attr[i - line_feed_count][0]] = ''
            result[attr[i - line_feed_count][1]] = ''
            i += 1
            continue
        tmp1 = first_column_img[
            peek_ranges[i -
                        line_empty_count][0]:peek_ranges[i -
                                                         line_empty_count][1],
            0:first_column_img_w - 1]
        splitter.show_img('first image', tmp1)
        kv0_path = print_path + str(i) + '/'
        if not os.path.exists(kv0_path):
            os.makedirs(kv0_path)
        cv2.imwrite(kv0_path + 'kv0.png', tmp1)

        # cv2.waitKey(0)
        if segment_flag[i - line_feed_count][0] == 1:
            min_width = 12
        else:
            min_width = 3
        kv0_print_path = print_path + str(i) + '/0/'
        if os.path.exists(kv0_print_path):
            shutil.rmtree(kv0_print_path)
        if not os.path.exists(kv0_print_path):
            os.makedirs(kv0_print_path)
        splitter.process_by_path(kv0_path + 'kv0.png',
                                 kv0_print_path,
                                 minimun_range=min_width)
        attr_name = attr[i - line_feed_count][0]
        if attr_name != '':
            pred_result, pred_val_list = chinese_ocr.pred(kv0_print_path)
            if resegment(pred_val_list):
                print('re segment:' + kv0_print_path)
                shutil.rmtree(kv0_print_path)
                os.makedirs(kv0_print_path)
                splitter.process_by_path(kv0_path + 'kv0.png',
                                         kv0_print_path,
                                         minimun_range=min_width,
                                         pred_val_list=pred_val_list)
                pred_result, pred_val_list = chinese_ocr.pred(kv0_print_path)
            if attr_name in result:
                # 内容换行
                result[attr_name] = result[attr_name] + pred_result
            else:
                result[attr_name] = pred_result
        tmp2 = second_column_img[
            peek_ranges[i -
                        line_empty_count][0]:peek_ranges[i -
                                                         line_empty_count][1],
            0:second_column_img_w - 1]
        splitter.show_img('second image', tmp2)
        kv1_path = print_path + str(i) + '/'
        cv2.imwrite(kv1_path + 'kv1.png', tmp2)
        if segment_flag[i - line_feed_count][1] == 1:
            min_width = 12
        else:
            min_width = 3
        kv1_print_path = print_path + str(i) + '/1/'
        if os.path.exists(kv1_print_path):
            shutil.rmtree(kv1_print_path)
        if not os.path.exists(kv1_print_path):
            os.makedirs(kv1_print_path)
        splitter.process_by_path(kv1_path + 'kv1.png',
                                 kv1_print_path,
                                 minimun_range=min_width)
        attr_name = attr[i - line_feed_count][1]
        if attr_name != '':
            pred_result, pred_val_list = chinese_ocr.pred(kv1_print_path)
            if resegment(pred_val_list):
                print('re segment:' + kv1_print_path)
                shutil.rmtree(kv1_print_path)
                os.makedirs(kv1_print_path)
                splitter.process_by_path(kv1_path + 'kv1.png',
                                         kv1_print_path,
                                         minimun_range=min_width,
                                         pred_val_list=pred_val_list)
                pred_result, pred_val_list = chinese_ocr.pred(kv1_print_path)
            if attr_name in result:
                # 内容换行
                result[attr_name] = result[attr_name] + pred_result
            else:
                result[attr_name] = pred_result
        i += 1
    return result
Пример #29
0
                          parameters['language'],
                          parameters['path_to_fmridata'],
                          input_path,
                          logger=logs,
                          **kwargs)
    logs.validate()

    logs.info("Retrieve arguments for each model...")
    kwargs_splitter = get_splitter_information(parameters)
    kwargs_compression = get_compression_information(parameters)
    kwargs_transformation = get_data_transformation_information(parameters)
    kwargs_estimator_model = get_estimator_model_information(parameters)
    logs.validate()

    logs.info("Instanciations of the classes...")
    splitter = Splitter(**kwargs_splitter)
    compressor = Compressor(**kwargs_compression)
    transformer = Transformer(**kwargs_transformation)
    estimator_model = EstimatorModel(**kwargs_estimator_model)
    logs.validate()

    logs.info("Defining Pipeline flow...")
    ## Pipeline
    splitter_cv_external = Task([splitter.split], name='splitter_cv_external')
    compressor_external = Task([compressor.compress],
                               input_dependencies=[splitter_cv_external],
                               name='compressor_external',
                               flatten_inputs=[True])
    transform_data_external = Task(
        [transformer.make_regressor, transformer.scale],
        input_dependencies=[splitter_cv_external, compressor_external],
Пример #30
0
from main import Main;
from data_set import Data_Set;
from dummy_master import Dummy_Master;
from regressor import Regressor;
from metrics import Metrics;
from back_elimination import Back_Eliminations;
from set_reader import Set_Reader;
from splitter import Splitter;
from plot import Plot;
from process_data import Pre_Process_Data;

import visual-python

m = Main('init');
r = Regressor();
sp = Splitter();
mt = Metrics();
m.print();
be = Back_Eliminations();
pd = Pre_Process_Data();

sr = Set_Reader();
sr.read_files();
# sr.print_files_shapes();
train = sr.get_train();
test = sr.get_test(); 

ploter = Plot();
ploter.cut_survived(train, test);
# ploter.plot_set_survived(sr.get_train(), "Sex", "Survived");
# ploter.plot_set_survived(sr.get_train(), "Pclass" ,"Survived");