Exemplos de split em Python, exemplos de splitter.split em Python

Exemplo n.º 1

0

Exibir arquivo

def plot(sdss_path, dflens_path, out_path):
    sdss_data = splitter.load(sdss_path)
    sdss_data, _ = splitter.split(sdss_data, sdss_data[0].shape[0], 0)
    _, sdss_y = sdss_data

    dflens_data = splitter.load(dflens_path)
    dflens_data, _ = splitter.split(dflens_data, dflens_data[0].shape[0], 0)
    _, dflens_y = dflens_data

    fig, (plot1, plot2) = plt.subplots(2, 1)

    data_min = min(np.min(sdss_y), np.min(dflens_y))
    data_max = 1
    space = np.linspace(data_min, data_max, 100)

    plot1.hist(sdss_y, bins=space)
    plot1.set_xlim((data_min, data_max))
    plot1.set_ylabel('SDSS Count')
    plot1.set_xticks([])

    plot2.hist(dflens_y, bins=space)
    plot2.set_xlim((data_min, data_max))
    plot2.set_ylabel('2dFLenS Count')
    plot2.set_xlabel(r'$z_\mathrm{spec}$')

    plt.subplots_adjust(hspace=0)
    plt.savefig(out_path)

Exemplo n.º 2

0

Exibir arquivo

def split_and_send(update, context):
    splitter.split(n=context.chat_data['n'], split_axis=context.chat_data['axis'])
    for filename in os.listdir("output"):
        context.bot.send_photo(chat_id=update.effective_chat.id, photo=open(f'output/{filename}', 'rb'))
        os.remove(f'output/{filename}')
    os.remove('input/input_img.jpg')
    
    # Reset parameters stored
    context.chat_data['n'] = False
    context.chat_data['axis'] = False
    context.chat_data['img_received'] = False

Exemplo n.º 3

0

Exibir arquivo

Arquivo: test_compound_word_splitter.py Projeto: jaki2012/compound-word-splitter

def test_split_incorrectly_capitalized():
    split_result = split('StartEreignis', 'de_de')
    assert split_result == '', '%s != ""' % split_result

    # should split unicode strings
    split_result = split(u'Resultatveröffentlichung', 'de_de')
    assert split_result[0] == u'Resultat', '%s != "Resultat"' % (
        split_result[0])
    assert split_result[
        1] == u'Veröffentlichung', '%s != "Veröffentlichung"' % (
            split_result[1])

Exemplo n.º 4

0

Exibir arquivo

Arquivo: searchengine.py Projeto: kwmt/programming-collective-intelligence

 def separatewords(self, text):
     # print(f"{text}")
     try:
         return [s.lower() for s in splitter.split(text) if s != '']
     except:
         print(f"separatewords{text}")
         return []

Exemplo n.º 5

0

Exibir arquivo

Arquivo: scanner.py Projeto: sandhawke/ldreg

 def show_by_ns(self):
     ns_count = {}
     for (term, use) in (self.term_uses.keys()):
         (ns, local) = splitter.split(term)
         if ns and local:
             incr(ns_count, ns)
     print ns_count

Exemplo n.º 6

0

Exibir arquivo

Arquivo: l8_process_scene.py Projeto: auremoser/landsat_ingestor

def process(source, scene_root, verbose=False, clean=False, list_file=None,
            overwrite=False):

    if pusher.check_existance(scene_root):
        print 'Scene %s already exists on destination bucket.' % scene_root
        if not overwrite:
            return collect_missing_entry(scene_root, verbose, clean, list_file)

    if verbose:
        print 'Processing scene: %s' % scene_root
        
    scene_dict = {}
    
    local_tarfile = puller.pull(source, scene_root, scene_dict,
                                verbose=verbose)

    local_dir = splitter.split(scene_root, local_tarfile, verbose=verbose)

    scene_info.add_mtl_info(scene_dict, scene_root, local_dir)
    
    thumbnailer.thumbnail(scene_root, local_dir, verbose=verbose)
    scene_index_maker.make_index(scene_root, local_dir, verbose=verbose)
    pusher.push(scene_root, local_dir, scene_dict, verbose=verbose, overwrite=overwrite)

    if clean:
        os.unlink(local_tarfile)
        shutil.rmtree(local_dir)

    if list_file:
        scene_info.append_scene_line(list_file, scene_dict)

    return scene_dict

Exemplo n.º 7

0

Exibir arquivo

Arquivo: scanner.py Projeto: sandhawke/ldreg

    def db_finish(self):
        now = time.time()
        self.db.update('scan', where="id = "+str(self.id),
                       triples=self.c, 
                       time_complete=now,
                       status=1)
        
        for (term, use) in (self.term_uses.keys()):
            (ns, local) = splitter.split(term)
            nsid = irimap.to_id(self.db, ns)

            self.db.insert('term_use',
                           local=local,
                           namespace_id = nsid,
                           scan_id = self.id,
                           type = use,
                           count = self.term_uses[(term,use)]
                           )

        for t in self.primary_trackers:
            self.db.insert('trackers',
                           scan_id = self.id,
                           tracker_id = irimap.to_id(self.db, t),
                           is_primary = True
                           )
        for t in self.backup_trackers:
            self.db.insert('trackers',
                           scan_id = self.id,
                           tracker_id = irimap.to_id(self.db, t),
                           is_primary = False
                           )


        obsolete_old_scans(self.db, self.data_source_iri, self.id)

Exemplo n.º 8

0

Exibir arquivo

Arquivo: tracker.py Projeto: sandhawke/ldreg

def list_(term, timecode=(-1), db=None):   # , limit, offset):
    """

    see http://dev.mysql.com/doc/refman/5.0/en/select.html

    for us, timecodes ARE scanids.   A new scanid == a new change.

    are we going to need to SORT when we do limit/offset?   pagerank?

    """
    if db is None:
        db = dbconn.Connection()

    (ns, local) = splitter.split(term)
    ns_id = irimap.to_id(db, ns)

    if timecode == -1:
        timecode = latest_timecode(db)
        print "list using latest timecode:", timecode
    else:
        if timecode < min_timecode(db):
            raise GarbageTimecode()

    for r in db.query('select text, type from term_use, scan, iri where scan_id <= $timecode and obsoleted_by > $timecode and namespace_id=$ns_id and scan.id=scan_id and status=1 and local=$local and iri.id=source_id', vars=locals()):
        yield unicode(r.type)+" "+unicode(r.text)

Exemplo n.º 9

0

Exibir arquivo

def prove_terminates(filename):
    splitfile = tempfile.NamedTemporaryFile(mode='w',
                                            suffix='.c',
                                            delete=False)
    (id_map, has_nested, nondet) = splitter.split(filename, splitfile)
    nids = len(id_map)
    varnames = ' '.join(id_map[k] for k in xrange(nids))

    splitfile.close()

    if has_nested:
        skeleton = "../../tests/termination/nested.c"
        nprogs = 3
        nargs = nids * 2
        varnames += ' ' + ' '.join("%s\\'" % id_map[k] for k in xrange(nids))
    else:
        skeleton = "../../tests/termination/ranking.c"
        nprogs = 2
        nargs = nids

    cmd = (("kalashnikov.py " + "%s %s " + "-P%d  " +
            "--seed=1337 -a%d --varnames %s " + "--synth-strategy=genetic " +
            "-c1 " + "--fastverif=True " + "-newsize=5 " +
            "-replaceprob=0.15 " + "-mutprob=0.1 " + "-tourneysize=5 " +
            "-popsize=3000 " + "-w4 " + "%s") %
           (splitfile.name, skeleton, nprogs, nargs, varnames, ' '.join(
               sys.argv[2:])))
    os.system(cmd)

Exemplo n.º 10

0

Exibir arquivo

def rewrite_token(t):
    """ Rewrites every single token either as a boolean operator or in a format for further processing"""
    d = {"AND": "&", "OR": "|", "NOT": "1 -", "(": "(", ")": ")"}

    # Hi guys... Here is some code for you. Made it extra spaghetti this time with hint of spice and double cheese, just get your brains working uknow? I'm just thinking about you and your wellbeing and not being very bad at coding. This comment is also very long, just so because I don't want you to read the code below. You can just continue reading this comment, or actually just skip it. Or skip the whole code while we are at it. Yes? Good? Okay you can now delete this comment, thanks.
    if t in d:
        return d.get(t)
    if t[0] == "\"" and t[-1] == "\"":
        t = t.replace("\"", "")
        try:
            t_ngram = ' '.join([str(t.lower()) for t in splitter.split(t)])
            if t_ngram in terms:
                return 'sparse_td_matrix[t2i["{:s}"]].todense()'.format(
                    t_ngram)
            else:
                if t_ngram == "":
                    unknownword_list.append(t)
                else:
                    unknownword_list.append(t_ngram)
                return 'np.matrix(np.zeros(len(documents), dtype=int))'
        except:
            unknownword_list.append(t)
            return 'np.matrix(np.zeros(len(documents), dtype=int))'
    elif t.lower() in terms:
        return 'sparse_td_matrix[t2i["{:s}"]].todense()'.format(t.lower())
    else:
        if t[0] == "\"" or t[-1] == "\"":
            pass
        else:
            unknownword_list.append(t)
            return 'np.matrix(np.zeros(len(documents), dtype=int))'

Exemplo n.º 11

0

Exibir arquivo

def load_data(path='data/us_trial', max_example=None):
    """
        Load data from '{path}.{text, labels}'
    """
    num_examples = 0
    tweets, emojis = [], []
    f_x, f_y = open(path + '.text', 'r'), open(path + '.labels', 'r')
    while True:
        tweet, emoji = f_x.readline(), f_y.readline()
        if not tweet or not emoji:
            break

        # TODO: extra preprocessing step for each tweet e.g. take care of slang
        tweet = tweet.strip().lower()  # delete whitespaces and lowercase
        tweet = tweet[:-1] if tweet[
            -1] == u'\u2026' else tweet  # and more... -> more (... is a special unicode char)
        tweet = re.sub(
            r"#\S+", lambda match: ' '.join(splitter.split(match.group()[1:])),
            tweet)  #artfactory -> art factory
        tweet = re.sub(r"\b(\S*?)(.)\2{2,}\b", r"\1\2", tweet,
                       (re.MULTILINE | re.DOTALL))  # no wayyyyy -> no way
        tweet = tweet.translate({ord(c): None
                                 for c in '@#'})  # @ user -> user, #omg -> omg
        words = TweetTokenizer().tokenize(tweet)
        tweet = ' '.join(words)

        tweets.append(tweet)
        emojis.append(int(emoji))  # convert '7' -> 7

        num_examples += 1
        if (max_example is not None) and (num_examples >= max_example):
            break
    print("%d examples loaded" % num_examples)
    return tweets, emojis

Exemplo n.º 12

0

Exibir arquivo

def grab_profiles(player_profile: Profile, stage: int, num_stages: int,
                  output_file_name: str, tempdir: str) -> int:
    """Parse output/result files from previous stage and get number of profiles to simulate"""
    stage_dir = get_subdir(stage, tempdir)
    os.makedirs(stage_dir, exist_ok=True)
    if stage == 1:
        num_generated_profiles = splitter.split(output_file_name, stage_dir,
                                                settings.splitting_size,
                                                player_profile.player_class)
    else:
        subdir_previous_stage = get_subdir(stage - 1, tempdir)
        try:
            checkResultFiles(subdir_previous_stage)
        except Exception as e:
            msg = "Error while checking result files in {}: {}\nPlease restart AutoSimc at a previous stage.". \
                format(subdir_previous_stage, e)
            raise RuntimeError(msg) from e
        if settings.default_grabbing_method == "target_error":
            filter_by = "target_error"
            filter_criterium = None
        elif settings.default_grabbing_method == "top_n":
            filter_by = "count"
            filter_criterium = settings.default_top_n[stage - num_stages - 1]
        is_last_stage = (stage == num_stages)
        num_generated_profiles = splitter.grab_best(
            filter_by, filter_criterium, subdir_previous_stage, stage_dir,
            output_file_name, not is_last_stage)
    if num_generated_profiles:
        logging.info(
            "Found {} profile(s) to simulate.".format(num_generated_profiles))
    return num_generated_profiles

Exemplo n.º 13

0

Exibir arquivo

Arquivo: nonterm.py Projeto: blexim/synth

def prove_terminates(filename):
  splitfile = tempfile.NamedTemporaryFile(mode='w', suffix='.c', delete=False)
  (id_map, has_nested, nondet) = splitter.split(filename, splitfile)
  nids = len(id_map)
  varnames = ' '.join(id_map[k] for k in xrange(nids))

  splitfile.close()

  os.system(("kalashnikov.py " +
             "%s ../../tests/termination/unranking.c " +
             "-P2 " +
             "--synth-strategy=genetic " +
             "-c1 " +
             "--fastverif=True " +
             "-newsize=5 " +
             "-replaceprob=0.15 " +
             "-mutprob=0.1 " +
             "-tourneysize=5 " +
             "-popsize=3000 " +
             "-w4 " +
             "-a%d --evars %d --varnames %s --resnames I --seed=1337 " +
             "--nondet=%d " +
             "%s") % 
              (splitfile.name,
                nids,
                nids,
                varnames,
                nondet,
                ' '.join(sys.argv[2:])))

Exemplo n.º 14

0

Exibir arquivo

Arquivo: inspect.py Projeto: melug/cyrillic-mongolian

def follow_up_char(w):
    ''' Rule 9, Defines character of suffixes or vowels.
    а, у        ->  а
    э, ү, и     ->  э
    о           ->  о
    ө           ->  ө
    Exception, following alphabets changes follow up character
    уу, үү, юу, юү, яу, ёу, еү, иу
    Following alphabets may go in the middle word but doesn't change follow up character.
    и, ий, ы, эй
    '''
    dd = {
        u'а': u'а',
        u'у': u'а',
        u'э': u'э',
        u'ү': u'э',
        u'и': u'э',
        u'о': u'о',
        u'ө': u'ө',
    }
    ee = [ u'уу', u'үү', u'юу', u'юү', u'яу', u'ёу', u'еү', u'иу' ]
    ii = [ u'и', u'ий', u'ы', u'эй' ]
    fu = None
    for u in split(w.lower()):
        found = False
        for e in ee:
            if e in u:
                fu = dd[e[-1]]
                found = True
        if not found and fu is None:
            for k in dd.keys():
                if k in u:
                    fu = dd[k]
    return fu

Exemplo n.º 15

0

Exibir arquivo

Arquivo: docclass.py Projeto: cametan001/document_filtering

def getwords(doc):
    splitter = re.compile('\\W*')
    # 単語を非アルファベットの文字で分割する
    words = [s.lower() for s in splitter.split(doc)
             if len(s) > 2 and len(s) < 20]
    # ユニークな単語のみの集合を返す
    return dict([(w, 1) for w in words])

Exemplo n.º 16

0

Exibir arquivo

Arquivo: main.py Projeto: mikeonly/WebstersBot

 def reply(text=None):
     if text:
         resp = ''
         for part in splitter.split(text, MAX):
             resp += part
             check = sender.send(
                         'POST',
                         URL + TOKEN + '/' + 'sendMessage',
                         {
                         'text': part.encode('utf-8'),
                         'chat_id': chat_id
                         }
                         )
             if check:
                 logging.error(check.read())
             else:
                 logging.info('Sent response: \n\n%s' % resp)
     else:
         sender.send(
                     'POST',
                     URL + TOKEN + '/' + 'sendMessage',
                     {
                     'text': 'Bot tries to send you an empty message.',
                     'chat_id': chat_id
                     }
                     )

Exemplo n.º 17

0

Exibir arquivo

def grab_profiles_for_stage(player_profile, stage, outputfile, stages):
    """Parse output/result files from previous stage and get number of profiles to simulate"""
    subdir_previous_stage = get_subdir(stage - 1)
    if stage == 1:
        num_generated_profiles = splitter.split(outputfile, get_subdir(stage),
                                                settings.splitting_size,
                                                player_profile.wow_class)
    else:
        try:
            check_results_file(subdir_previous_stage)
        except Exception as ex:
            msg = f'Error while checking result files in {subdir_previous_stage}: {ex}. Please restart AutoSimc at a previous stage.'
            raise RuntimeError(msg) from ex
        if settings.default_grabbing_method == 'target_error':
            filter_by = 'target_error'
            filter_criterium = None
        elif settings.default_grabbing_method == 'top_n':
            filter_by = 'count'
            filter_criterium = settings.default_top_n[stage - stages - 1]
        is_last_stage = (stage == stages)
        num_generated_profiles = splitter.grab_best(filter_by,
                                                    filter_criterium,
                                                    subdir_previous_stage,
                                                    get_subdir(stage),
                                                    outputfile,
                                                    not is_last_stage)
    if num_generated_profiles:
        logger.info(f'Found {num_generated_profiles} profile(s) to simulate.')
    return num_generated_profiles

Exemplo n.º 18

0

Exibir arquivo

Arquivo: test_compound_word_splitter.py Projeto: jaki2012/compound-word-splitter

def test_split_german_three_compounds():
    split_result = split('Effektivitätsberechnungsformular', 'de_de')
    assert split_result[0] == 'Effektivität', '%s != "Effektivität"' % (
        split_result[0])
    assert split_result[1] == 'Berechnung', '%s != "Berechnung"' % (
        split_result[1])
    assert split_result[2] == 'Formular', '%s != "Formular"' % (
        split_result[2])

Exemplo n.º 19

0

Exibir arquivo

Arquivo: test_appx_gp.py Projeto: kgmacau/mclass-sky

def main(path_in):
    print('Loading data...')
    data = splitter.load(path_in)
    (train_X, train_y), (test_X,
                         test_y) = splitter.split(data, TRAINING_NUM,
                                                  TESTING_NUM)

    try:
        gp_sigmas = np.loadtxt('gp_preds.txt')
        assert gp_sigmas.shape == (TESTING_NUM, )
    except (FileNotFoundError, AssertionError):
        print('Fitting GP...')
        kernel = sklearn.gaussian_process.kernels.RBF(
            length_scale=LENGTH_SCALE)
        gp = sklearn.gaussian_process.GaussianProcessRegressor(
            kernel=kernel, alpha=ALPHA, copy_X_train=False)
        gp.fit(train_X, train_y)

        print('Predicting GP...')
        _, gp_sigmas = gp.predict(test_X, return_std=True)

        np.savetxt('gp_preds.txt', gp_sigmas)

    print('Approximating kernel...')
    appx_train_X, appx_test_X = approximate_kernel(train_X, test_X)

    print('Fitting approximate GP...')
    agp = appx_gp.AppxGaussianProcessRegressor(alpha=ALPHA)
    agp.fit(appx_train_X, train_y)

    print('Predicting approximate GP...')
    _, agp_sigmas = agp.predict(appx_test_X, return_std=True)

    print('Finding best fit...')
    best_fit = np.polyfit(gp_sigmas, agp_sigmas, 1)
    best_fit_box = (min(gp_sigmas), max(gp_sigmas), min(agp_sigmas),
                    max(agp_sigmas))
    best_fit_endpoints = interval_in_box_from_line(best_fit_box, best_fit)
    best_fit_xs, best_fit_ys = zip(*best_fit_endpoints)

    print('Plotting...')
    f = plt.figure()
    ax = f.add_subplot(111)
    sc = plt.scatter(gp_sigmas, agp_sigmas, s=.2, c=list(test_y))
    plt.plot(best_fit_xs, best_fit_ys, color='red', label='Linear fit')
    plt.title(r'$\gamma = {:.4},$ #components$= {}$'.format(GAMMA, COMPONENTS))
    plt.xlabel('GP uncertainty')
    plt.ylabel('Approximate GP uncertainty')
    plt.text(.975,
             .1,
             '$y = {:.4}x {:+.4}$'.format(*best_fit),
             horizontalalignment='right',
             verticalalignment='bottom',
             transform=ax.transAxes)
    colorbar = plt.colorbar(sc)
    colorbar.set_label('Redshift')
    plt.legend(loc='lower right')
    plt.show()

Exemplo n.º 20

0

Exibir arquivo

def cronista_transcribe(audio_source_path: str,
                        destination_folder: str,
                        block_of_transcription: int,
                        lang: str,
                        on_transcription_progress=None,
                        to_file: bool = True):
    audio_segments = split(audio_source_path, destination_folder,
                           block_of_transcription, to_file)
    transcribe(audio_segments, lang, on_transcription_progress)

Exemplo n.º 21

0

Exibir arquivo

 def split_compound(self, sentance):
     """
         Used to split compound words that are found in the utterance
         This will make it easier to confirm that all words are found in the search
     """
     search_words = re.split(r'\W+', str(sentance))
     separator = " "
     words_list = splitter.split(separator.join(search_words))
     return words_list

Exemplo n.º 22

0

Exibir arquivo

Arquivo: register.py Projeto: sandhawke/ldreg

def list_(term):
    print "Looking for trackers for term"
    (ns, local) = splitter.split(term)
    ns_trackers = determine_trackers(ns)
    for t in ns_trackers:
        try:
            print "Asking "+t
            return tracker_call(t, "list", term=term)
        except TrackerFailure:
            print t+": failed, moving on..."

Exemplo n.º 23

0

Exibir arquivo

Arquivo: naivebayes.py Projeto: kurozumi/NaiveBayes

    def predict_(self, sentence):
        # arg max log(P(category| sentence))
        best_suggested_category = None
        max_probability = -float('inf')
        words = splitter.split(sentence)

        for category in self.word_count.keys():
            probability = self.calc_score(words, category)
            if (probability > max_probability):
                max_probability = probability
                best_suggested_category = category
        return best_suggested_category

Exemplo n.º 24

0

Exibir arquivo

Arquivo: naivebayes.py Projeto: IshitaTakeshi/NaiveBayes

    def predict_(self, sentence):
        # arg max log(P(category| sentence))
        best_suggested_category = None
        max_probability = -float('inf')
        words = splitter.split(sentence)

        for category in self.word_count.keys():
            probability = self.calc_score(words, category)
            if(probability > max_probability):
                max_probability = probability
                best_suggested_category = category
        return best_suggested_category

Exemplo n.º 25

0

Exibir arquivo

Arquivo: data_cleaning.py Projeto: MrwanBaghdad/emoji-predicto

def process_hashtags(original_text, mode=NORMAL_MODE):
    cleaned_text = original_text
    hashtags = set(re.findall(r"#(\w+)", original_text))
    cleaned_text = cleaned_text.replace('#', ' ')
    for hashtag in hashtags:
        if mode == AGGRESSIVE_MODE:
            cleaned_text = cleaned_text.replace(
                hashtag, ' '.join(splitter.split(hashtag)))
        else:
            cleaned_text = cleaned_text.replace(hashtag,
                                                ' '.join(segment(hashtag)))

    return cleaned_text

Exemplo n.º 26

0

Exibir arquivo

def main():
    path = '../dataset/powersupply.arff'
    #data, meta = arff.loadarff(path)
    #data, label, maxFeature = getArffData(path,1000)
    #train, trainBeta, test = generateTrain(data, 100)
    train, train_beta, test = split('../dataset/powersupply.arff', 500)
    maxFeature = len(train[0])
    sampleSize = len(train)
    #print "sampleSize:" , sampleSize
    gammab = computeKernelWidth(np.array(train))
    result = testEnsKmm(train,test,gammab,10,maxFeature)
    beta = result[0]
    print "beta",beta

Exemplo n.º 27

0

Exibir arquivo

Arquivo: __init__.py Projeto: YupengGao/scalable_transfer_learning

def main():
    path = '../dataset/powersupply.arff'
    #data, meta = arff.loadarff(path)
    #data, label, maxFeature = getArffData(path,1000)
    #train, trainBeta, test = generateTrain(data, 100)
    train, train_beta, test = split('../dataset/powersupply.arff', 500)
    maxFeature = len(train[0])
    sampleSize = len(train)
    #print "sampleSize:" , sampleSize
    gammab = computeKernelWidth(np.array(train))
    result = testEnsKmm(train,test,gammab,10,maxFeature)
    beta = result[0]
    print "beta",beta

Exemplo n.º 28

0

Exibir arquivo

def findBest(tensor, threshold):
    t = np.copy(tensor)
    best = [1e100, None]
    for i in range(len(tensor.shape) - 1):
        for j in range(i + 1, len(t.shape)):
            u, s, v, vs = split(t, (i, j), threshold)
            if len(s) < best[0] and u.size + vs.size < t.size:
                best = [len(s), u, vs]
    best = best[1:]
    if best[-1] is None:
        return [t]
    elif len(best[-1].shape) > 3:
        best = best[:-1] + findBest(best[-1], threshold)
    return best

Exemplo n.º 29

0

Exibir arquivo

    def read_text(self, stream):
        lines = stream.readlines()

        i = 1

        for line in lines:
            lower_line = line.lower()
            words = splitter.split(lower_line,
                                   [' ', ',', '.', ':', ';', '\n', '\r', '\t'])

            for word in words:
                self.add_word(word, i)

            i += 1

Exemplo n.º 30

0

Exibir arquivo

Arquivo: handle.py Projeto: ashupednekar/piplineUtils

def handlespace(word):
    s, flag = audit(word)
    if flag:
        # print('1')
        index = word.find(s)
        # print(index)
        topass = word[:index] + ' ' + word[index:index +
                                           len(s)] + ' ' + word[index +
                                                                len(s):]
    else:
        # print('2')
        topass = word
    print('penultimate...', topass)
    return ' '.join(
        [x.strip() for x in splitter.split('fixed  deposit ', 'en_gb')])

Exemplo n.º 31

0

Exibir arquivo

def strip_hashtags(text):
    text = preprocess_clean(text,False,True)
    hashtags = re.findall('#[\w\-]+', text)
    for tag in hashtags:
        cleantag = tag[1:]
        if d.check(cleantag) or dus.check(cleantag):
            text = re.sub(tag,cleantag,text)
            pass
        else:
            hashtagSplit = ""
            for word in splitter.split(cleantag.lower(),'en_US'):
                hashtagSplit = hashtagSplit + word + " "
            text = re.sub(tag,hashtagSplit,text)
    #print(text)
    return text

Exemplo n.º 32

0

Exibir arquivo

def process(source,
            scene_root,
            verbose=False,
            clean=False,
            list_file=None,
            overwrite=False):

    if pusher.check_existance(scene_root):
        print 'Scene %s already exists on destination bucket.' % scene_root
        if not overwrite:
            return collect_missing_entry(scene_root, verbose, clean, list_file)

    if verbose:
        print 'Processing scene: %s' % scene_root

    scene_dict = {}

    local_tarfile = puller.pull(source,
                                scene_root,
                                scene_dict,
                                verbose=verbose)

    try:
        local_dir = splitter.split(scene_root, local_tarfile, verbose=verbose)
    except:
        if source == 's3queue':
            # Remove problematic scenes from the queue directory
            puller_s3queue.clean_queued_tarfile(scene_root)
            return

    scene_info.add_mtl_info(scene_dict, scene_root, local_dir)

    thumbnailer.thumbnail(scene_root, local_dir, verbose=verbose)
    scene_index_maker.make_index(scene_root, local_dir, verbose=verbose)
    pusher.push(scene_root,
                local_dir,
                scene_dict,
                verbose=verbose,
                overwrite=overwrite)

    if clean:
        os.unlink(local_tarfile)
        shutil.rmtree(local_dir)

    if list_file:
        scene_info.append_scene_line(list_file, scene_dict)

    return scene_dict

Exemplo n.º 33

0

Exibir arquivo

Arquivo: uncertainty_curve.py Projeto: kgmacau/mclass-sky

def main(path_in, path_out):
    data = splitter.load(path_in)
    (train_X, train_y), (test_X, test_y) \
        = splitter.split(data, TRAINING_SAMPLES_NUM, TESTING_SAMPLES_NUM)

    gp_x = list(range(STEP, MAX_GP + 1, STEP))
    gp_y = []
    for i, n in enumerate(gp_x):
        print('Starting GP', i + 1)
        gp_y.append(perform_gp(train_X[:n], train_y[:n], test_X))

    with open(path_out, 'w') as f:
        json.dump({
            'gp_x': gp_x,
            'gp_y': gp_y,
        }, f)

Exemplo n.º 34

0

Exibir arquivo

Arquivo: text_image_convnet.py Projeto: ADITYA727/HeLp_for_ML

def get_hashtag_tokenize(hash_tag):
    hashtag = hash_tag
    hash_tag = hash_tag.replace('#', '')
    hash_tag = ' '.join(hash_tag.split())
    if len(hash_tag) > 0:
        hashtag = splitter.split(hashtag)
        hashtag = ' '.join(word for word in hashtag)
        if len(hashtag) > 1:
            wordlist = initialize_words()
            return parse_sentence(hashtag, wordlist)

        else:
            return hash_tag.lower()
            # wordlist = initialize_words()
            # return parse_sentence(hashtag, wordlist)
    else:
        return hash_tag

Exemplo n.º 35

0

Exibir arquivo

def prove_terminates(filename):
    splitfile = tempfile.NamedTemporaryFile(mode='w',
                                            suffix='.c',
                                            delete=False)
    (id_map, has_nested, nondet) = splitter.split(filename, splitfile)
    nids = len(id_map)
    varnames = ' '.join(id_map[k] for k in xrange(nids))

    splitfile.close()

    os.system(
        ("kalashnikov.py " + "%s ../../tests/termination/unranking.c " +
         "-P2 " + "--synth-strategy=genetic " + "-c1 " + "--fastverif=True " +
         "-newsize=5 " + "-replaceprob=0.15 " + "-mutprob=0.1 " +
         "-tourneysize=5 " + "-popsize=3000 " + "-w4 " +
         "-a%d --evars %d --varnames %s --resnames I --seed=1337 " +
         "--nondet=%d " + "%s") %
        (splitfile.name, nids, nids, varnames, nondet, ' '.join(sys.argv[2:])))

Exemplo n.º 36

0

Exibir arquivo

Arquivo: thesisplots.py Projeto: kgmacau/mclass-sky

def get_colours(path):
    data = splitter.load(path)
    data, _ = splitter.split(data, data[0].shape[0], 0)
    X, y = data

    u, g, r, i, z = X.T
    u_g = u - g
    u_r = u - r
    r_i = r - i
    i_z = i - z

    X[:, 0] = r
    X[:, 1] = u_g
    X[:, 2] = u_r
    X[:, 3] = r_i
    X[:, 4] = i_z
    data = X, y
    return data

Exemplo n.º 37

0

Exibir arquivo

Arquivo: __main__.py Projeto: Azarattum/AutoHandwriter

def main():
    if (len(argv) < 2):
        print("Usage: py . <pdf>/validate")
        exit()
    source = argv[1]
    path = "./data/images/"
    validate = len(argv) >= 2 and argv[1] == "validate"

    images = [path + x for x in listdir(path)]
    img = cv.imread(images[0])

    if not validate:
        print("Processing the pdf (this might take a while)...")
        source = extract_text(source)
        source = re.sub("\n+", "\n", source).strip()
        source = randomize(source)
        source = split(img, source)
    else:
        print("Validating the dataset...")
    lines = source.split("\n")
    lines = list(filter(lambda line: len(str(line)) > 3, lines))

    # Lines per page
    lpp = 19
    total = ceil(len(lines) / lpp) if not validate else len(images)
    print("Pages will be generated: {}".format(total))

    for i, image in enumerate(images):
        img = cv.imread(image)

        text = "\n".join(lines[i * lpp:i * lpp + lpp])
        if (not text and not validate):
            break
        img, points = detect(img, validate)

        img, _ = handwrite(img, text, points)
        img = apply_effects(img)
        img = np.int0(img * 255)

        cv.imwrite("./out/{}".format(basename(image)), img)
        print("Generated: {}/{}    ".format(i + 1, total), end="\r")

    print("\nDone!")

Exemplo n.º 38

0

Exibir arquivo

Arquivo: brute.py Projeto: adamjermyn/TensorDecomp

def findBest(tensor, threshold):
	best = [1e100, None]
	trees = constructTrees(len(tensor.shape))
	print(len(trees))

	for i,t in enumerate(trees):
		if i%50 == 0:
			print(i, len(trees))
		v = np.copy(tensor)
		s = treeToSplit(t)
		arrs = []
		for indices in s:
			u,s,v,vs = split(v, indices, threshold)
			arrs.append(u)
			if len(v.shape) == 3:
				arrs.append(vs)
		if sum([a.size for a in arrs]) < best[0]:
			best = [sum([a.size for a in arrs]), t, arrs]

	return best

Exemplo n.º 39

0

Exibir arquivo

def find_all(mstring):

    # group 3 captures alphanumeric substring from phone number
    m = re.match(r'^(1)?(\d{3})?([A-Z0-9]*)$', "".join(mstring))
    if m:
        # if alpanumeric substring exist
        if m.group(3):
            sublist = splitter.split(m.group(3))
            # if english word combination exist
            if sublist:
                mlist = list(m.groups())
                # concatenate english separated sublist
                mlist = mlist[:-1] + list(sublist)
                # Remove any None or empty elements from regrex groups
                mlist = [x for x in mlist if x not in [None, '']]
                # add dashes between elments
                newmlist = ['-'] * (len(mlist) * 2 - 1)
                newmlist[0::2] = mlist
                print("".join(newmlist))
                return "".join(newmlist)

Exemplo n.º 40

0

Exibir arquivo

Arquivo: __init__.py Projeto: YupengGao/scalable_transfer_learning

def maintest():
    path = '../dataset/powersupply.arff'
    #data, meta = arff.loadarff(path)
    #data, label, maxFeature = getArffData(path,1000)
    train, train_beta, test = split('../dataset/powersupply.arff', 300)
    #train, trainBeta, test = generateTrain(data, 300)
    train_data = np.array(train)
    test_data = np.array(test)
    maxFeature = train_data.shape[1]
    #maxFeature = len(train[0])
    print "maxFeature:" , maxFeature
    gammab = computeKernelWidth(train_data)
    print "gammab", gammab
    
    #print 'Converting train sparse to array ...'
    #Xtrain = convertSparseToList(train, maxFeature)
    #print 'Converting test sparse to array ...'
    #Xtest = convertSparseToList(test, maxFeature)
    beta,runtime = cenKmm(train_data, test_data, gammab, maxFeature)
    print beta

Exemplo n.º 41

0

Exibir arquivo

def maintest():
    path = '../dataset/powersupply.arff'
    #data, meta = arff.loadarff(path)
    #data, label, maxFeature = getArffData(path,1000)
    train, train_beta, test = split('../dataset/powersupply.arff', 300)
    #train, trainBeta, test = generateTrain(data, 300)
    train_data = np.array(train)
    test_data = np.array(test)
    maxFeature = train_data.shape[1]
    #maxFeature = len(train[0])
    print "maxFeature:" , maxFeature
    gammab = computeKernelWidth(train_data)
    print "gammab", gammab
    
    #print 'Converting train sparse to array ...'
    #Xtrain = convertSparseToList(train, maxFeature)
    #print 'Converting test sparse to array ...'
    #Xtest = convertSparseToList(test, maxFeature)
    beta,runtime = cenKmm(train_data, test_data, gammab, maxFeature)
    print beta

Exemplo n.º 42

0

Exibir arquivo

Arquivo: OCR.py Projeto: sloanesturz/cs231a-proj

def main():
	start = time.time()

	classifier = Classifier()
	classifier.train()

	print "Training finished at %ds\n" % (time.time() - start)

	i = 1
	for fn in os.listdir('./data/matthew/'):
		with codecs.open('./data/matthew/out/%d.txt' % i, 'w+', encoding='utf-8') as fw:
			i += 1
			print './data/matthew/%s' % fn
			chars = split('./data/matthew/%s' % fn)
			for _, line in chars.iteritems():
				for char in line:
					char = 255 - char
					charout = classifier.classify(char)
					fw.write(unicode(charout[0]))
				fw.write('\n')
		print "Finished Matthew %d at %ds\n" % (i, (time.time() - start))

Exemplo n.º 43

0

Exibir arquivo

def prove_terminates(filename):
    splitfile = tempfile.NamedTemporaryFile(mode='w',
                                            suffix='.c',
                                            delete=False)
    (id_map, has_nested, nondet) = splitter.split(filename, splitfile)
    nids = len(id_map)
    varnames = (' '.join(id_map[k] for k in xrange(nids)) + ' ' +
                ' '.join("0" for i in xrange(nids)))

    splitfile.close()

    cmd = ((
        "kalashnikov.py " + "%s ../../tests/termination/combined.c " + "-P3 " +
        "--seed=1337 " +
        "--synth-strategy=genetic -a%d --evars %d --varnames %s --resnames I "
        +
        "--fastverif=True -c=1 -keepfrac=15 -mutprob=0.25 -newfrac=2 -popsize=500 "
        + "-recombprob=0.05 -tourneysize=10 -w=3 " + "%s") %
           (splitfile.name, nids * 2, nids, varnames, ' '.join(sys.argv[2:])))

    os.system(cmd)

Exemplo n.º 44

0

Exibir arquivo

Arquivo: termination.py Projeto: blexim/synth

def prove_terminates(filename):
  splitfile = tempfile.NamedTemporaryFile(mode='w', suffix='.c', delete=False)
  (id_map, has_nested, nondet) = splitter.split(filename, splitfile)
  nids = len(id_map)
  varnames = ' '.join(id_map[k] for k in xrange(nids))

  splitfile.close()

  if has_nested:
    skeleton = "../../tests/termination/nested.c"
    nprogs = 3
    nargs = nids*2
    varnames += ' ' + ' '.join("%s\\'" % id_map[k] for k in xrange(nids))
  else:
    skeleton = "../../tests/termination/ranking.c"
    nprogs = 2
    nargs = nids


  cmd = (("kalashnikov.py " +
             "%s %s " +
             "-P%d  " +
             "--seed=1337 -a%d --varnames %s " +
             "--synth-strategy=genetic " +
             "--fastverif=True " +
             "-c1 " +
             "-newsize=5 " +
             "-replaceprob=0.15 " +
             "-mutprob=0.1 " +
             "-tourneysize=5 " +
             "-popsize=3000 " +
             "-w4 " +
             "%s") % 
              (splitfile.name,
                skeleton,
                nprogs,
                nargs,
                varnames,
                ' '.join(sys.argv[2:])))
  os.system(cmd)

Exemplo n.º 45

0

Exibir arquivo

Arquivo: complexity.py Projeto: blexim/synth

def complexity(filename):
  splitfile = tempfile.NamedTemporaryFile(mode='w', suffix='.c', delete=False)
  (id_map, has_nested, nondet) = splitter.split(filename, splitfile)
  nids = len(id_map)
  varnames = (' '.join(id_map[k] for k in xrange(nids)) + ' bound')

  splitfile.close()

  cmd = (("kalashnikov.py " +
             "%s %s/tests/loops/complexity.c " +
             "-P2 " +
             "--seed=1337 " +
             #"--strategy=evolve " +
             "-a%d --evars 0 --varnames %s --nondet=%d -w=3 " +
             "%s") % 
              (splitfile.name,
                kalashnikov_dir,
                nids + 1,
                varnames,
                nondet,
                ' '.join(sys.argv[2:])))

  os.system(cmd)

Exemplo n.º 46

0

Exibir arquivo

Arquivo: headshot.py Projeto: blexim/synth

def prove_terminates(filename):
  splitfile = tempfile.NamedTemporaryFile(mode='w', suffix='.c', delete=False)
  (id_map, has_nested, nondet) = splitter.split(filename, splitfile)
  nids = len(id_map)
  varnames = (' '.join(id_map[k] for k in xrange(nids)) + ' ' +
              ' '.join("0" for i in xrange(nids)))

  splitfile.close()

  cmd = (("kalashnikov.py " +
             "%s ../../tests/termination/combined.c " +
             "-P3 " +
             "--seed=1337 " +
             "--synth-strategy=genetic -a%d --evars %d --varnames %s --resnames I " +
             "--fastverif=True -c=1 -keepfrac=15 -mutprob=0.25 -newfrac=2 -popsize=500 " +
             "-recombprob=0.05 -tourneysize=10 -w=3 " +
             "%s") % 
              (splitfile.name,
                nids*2,
                nids,
                varnames,
                ' '.join(sys.argv[2:])))

  os.system(cmd)

Exemplo n.º 47

0

Exibir arquivo

Arquivo: testsplitter.py Projeto: astraltear/Python3

 def testtypeconvert(self):
     r = splitter.split("GOOD 100 490.50",[str, int, float])
     self.assertEqual(r,['GOOD',100,490.50])

Exemplo n.º 48

0

Exibir arquivo

Arquivo: naivebayes.py Projeto: IshitaTakeshi/NaiveBayes

 def fit_(self, sentence, category):
     words = splitter.split(sentence)
     for word in words:
         self.count_word(word, category)

Exemplo n.º 49

0

Exibir arquivo

Arquivo: testsplitter.py Projeto: astraltear/Python3

 def testsimplestring(self):
     r= splitter.split("GOOD 100 490.50")
     self.assertEqual(r,['GOOD','100','490.50'])

Exemplo n.º 50

0

Exibir arquivo

Arquivo: preprocessing.py Projeto: bernardgut/MLTools

        amax = np.amax(A)
        amin = np.amin(A)
    
    #normalize
    J = amin*np.ones(A.shape)
    N = (A-J)/(amax-amin)
    return N, amax, amin
    
#load data
(A,L)=loader.loadTrainingSet_35()
print 'dataset 3-5 : Train : success'
print A.shape
#normalize
N, tmax, tmin = normalize(A)
#split data
(T, T_l, V, V_l)=splitter.split(N, L)
print '3-5 : Training.training : ', T.shape, ' l : ',T_l.shape, ' ; Training.validation : ',V.shape, ' l : ', ' max :',tmax,' min', tmin 
#save to disk
np.save('../mnist/n_MNIST_Training35',T)
np.save('../mnist/n_MNIST_Validation35',V)
np.save('../mnist/n_MNIST_Training_labels35',T_l)
np.save('../mnist/n_MNIST_Validation_labels35',V_l)

#load test data
(A,L)=loader.loadTestSet_35()
print 'dataset 3-5 : Test : success'
print A.shape
#normalize with the same min,max as training set
N,_,__ = normalize(A, tmax, tmin)
#split data
print '3-5 :Test set : ', N.shape, ' l : ',L.shape, ' max :',tmax,' min', tmin

Exemplo n.º 51

0

Exibir arquivo

def remove_old_tiles():
  path = 'tiles'
  for file in os.listdir(path):
    if os.path.isfile(os.path.join(path, file)):
      try:
        os.remove(os.path.join(path, file))
      except:
        print('Could not delete', file, 'in', path)

if args.no_split:
  if os.path.exists(WORK_DIR + "tiles/" + buildmap + "_split.ready") == False:
    print()
    printwarning("can't find tiles/" + buildmap + "_split.ready")
    print("--no_split/-ns makes no sense, ignoring it")
    splitter.split()

else:
  remove_old_tiles()
  os.chdir(WORK_DIR)
  print()
  printinfo("now splitting the mapdata...")
  splitter.split()

"""
--stop_after splitter

"""

if args.stop_after == "splitter":
  print()

Exemplo n.º 52

0

Exibir arquivo

Arquivo: docclass.py Projeto: cametan001/document_filtering

def getwords(doc):
    words = [s.lower() for s in splitter.split(doc) if len(s) > 2 and len(s) < 20]
    # ユニークな単語の集合を返す
    return dict([(w, 1) for w in words])

Exemplo n.º 53

0

Exibir arquivo

Arquivo: testsplitter.py Projeto: Bee3key/Python_labs

	def testsimplestring(self):
		r = splitter.split('GOOG 100 490.50')
		self.assertEqual(r,['GOOG','100','490.50'])

Exemplo n.º 54

0

Exibir arquivo

def splitargs(args):
    """Split the command line into tokens."""
    return closequote(split(args))

Exemplo n.º 55

0

Exibir arquivo

Arquivo: testsplitter.py Projeto: Bee3key/Python_labs

	def testtypeconvert(self):
		r = splitter.split('GOOG 100 490.50',[str, int, float])
		self.assertEqual(r,['GOOG', 100, 490.5])

Exemplo n.º 56

0

Exibir arquivo

Arquivo: testsplitter.py Projeto: Bee3key/Python_labs

	def testdelimiter(self):
		r = splitter.split('GOOG,100,490.50',delimiter=',')
		self.assertEqual(r,['GOOG','100','490.50'])

Exemplo n.º 57

0

Exibir arquivo

Arquivo: testsplitter.py Projeto: astraltear/Python3

 def testdelimeter(self):
     r = splitter.split("GOOD,100,490.50",delimeter=",")
     self.assertEqual(r,['GOOD','100','490.50'])

Exemplo n.º 58

0

Exibir arquivo

Arquivo: testsplitter.py Projeto: vatslav/python

	def testTypeConvert(self):
		r = splitter.split('Goog 100 490.50',[str,int,float])
		self.assertEqual(r,['Goog',100,490.50])

Exemplo n.º 59

0

Exibir arquivo

Arquivo: testsplitter.py Projeto: vatslav/python

	def testSimpleString(self):
		r = splitter.split('Goog 100 490.50')
		self.assertEqual(r,['Goog','100','490.50'])