Python split примеры, splitter.split Python примеры использования

Пример #1

0

Показать файл

def plot(sdss_path, dflens_path, out_path):
    sdss_data = splitter.load(sdss_path)
    sdss_data, _ = splitter.split(sdss_data, sdss_data[0].shape[0], 0)
    _, sdss_y = sdss_data

    dflens_data = splitter.load(dflens_path)
    dflens_data, _ = splitter.split(dflens_data, dflens_data[0].shape[0], 0)
    _, dflens_y = dflens_data

    fig, (plot1, plot2) = plt.subplots(2, 1)

    data_min = min(np.min(sdss_y), np.min(dflens_y))
    data_max = 1
    space = np.linspace(data_min, data_max, 100)

    plot1.hist(sdss_y, bins=space)
    plot1.set_xlim((data_min, data_max))
    plot1.set_ylabel('SDSS Count')
    plot1.set_xticks([])

    plot2.hist(dflens_y, bins=space)
    plot2.set_xlim((data_min, data_max))
    plot2.set_ylabel('2dFLenS Count')
    plot2.set_xlabel(r'$z_\mathrm{spec}$')

    plt.subplots_adjust(hspace=0)
    plt.savefig(out_path)

Пример #2

0

Показать файл

def split_and_send(update, context):
    splitter.split(n=context.chat_data['n'], split_axis=context.chat_data['axis'])
    for filename in os.listdir("output"):
        context.bot.send_photo(chat_id=update.effective_chat.id, photo=open(f'output/{filename}', 'rb'))
        os.remove(f'output/{filename}')
    os.remove('input/input_img.jpg')
    
    # Reset parameters stored
    context.chat_data['n'] = False
    context.chat_data['axis'] = False
    context.chat_data['img_received'] = False

Пример #3

0

Показать файл

Файл: test_compound_word_splitter.py Проект: jaki2012/compound-word-splitter

def test_split_incorrectly_capitalized():
    split_result = split('StartEreignis', 'de_de')
    assert split_result == '', '%s != ""' % split_result

    # should split unicode strings
    split_result = split(u'Resultatveröffentlichung', 'de_de')
    assert split_result[0] == u'Resultat', '%s != "Resultat"' % (
        split_result[0])
    assert split_result[
        1] == u'Veröffentlichung', '%s != "Veröffentlichung"' % (
            split_result[1])

Пример #4

0

Показать файл

Файл: searchengine.py Проект: kwmt/programming-collective-intelligence

 def separatewords(self, text):
     # print(f"{text}")
     try:
         return [s.lower() for s in splitter.split(text) if s != '']
     except:
         print(f"separatewords{text}")
         return []

Пример #5

0

Показать файл

Файл: scanner.py Проект: sandhawke/ldreg

 def show_by_ns(self):
     ns_count = {}
     for (term, use) in (self.term_uses.keys()):
         (ns, local) = splitter.split(term)
         if ns and local:
             incr(ns_count, ns)
     print ns_count

Пример #6

0

Показать файл

Файл: l8_process_scene.py Проект: auremoser/landsat_ingestor

def process(source, scene_root, verbose=False, clean=False, list_file=None,
            overwrite=False):

    if pusher.check_existance(scene_root):
        print 'Scene %s already exists on destination bucket.' % scene_root
        if not overwrite:
            return collect_missing_entry(scene_root, verbose, clean, list_file)

    if verbose:
        print 'Processing scene: %s' % scene_root
        
    scene_dict = {}
    
    local_tarfile = puller.pull(source, scene_root, scene_dict,
                                verbose=verbose)

    local_dir = splitter.split(scene_root, local_tarfile, verbose=verbose)

    scene_info.add_mtl_info(scene_dict, scene_root, local_dir)
    
    thumbnailer.thumbnail(scene_root, local_dir, verbose=verbose)
    scene_index_maker.make_index(scene_root, local_dir, verbose=verbose)
    pusher.push(scene_root, local_dir, scene_dict, verbose=verbose, overwrite=overwrite)

    if clean:
        os.unlink(local_tarfile)
        shutil.rmtree(local_dir)

    if list_file:
        scene_info.append_scene_line(list_file, scene_dict)

    return scene_dict

Пример #7

0

Показать файл

Файл: scanner.py Проект: sandhawke/ldreg

    def db_finish(self):
        now = time.time()
        self.db.update('scan', where="id = "+str(self.id),
                       triples=self.c, 
                       time_complete=now,
                       status=1)
        
        for (term, use) in (self.term_uses.keys()):
            (ns, local) = splitter.split(term)
            nsid = irimap.to_id(self.db, ns)

            self.db.insert('term_use',
                           local=local,
                           namespace_id = nsid,
                           scan_id = self.id,
                           type = use,
                           count = self.term_uses[(term,use)]
                           )

        for t in self.primary_trackers:
            self.db.insert('trackers',
                           scan_id = self.id,
                           tracker_id = irimap.to_id(self.db, t),
                           is_primary = True
                           )
        for t in self.backup_trackers:
            self.db.insert('trackers',
                           scan_id = self.id,
                           tracker_id = irimap.to_id(self.db, t),
                           is_primary = False
                           )


        obsolete_old_scans(self.db, self.data_source_iri, self.id)

Пример #8

0

Показать файл

Файл: tracker.py Проект: sandhawke/ldreg

def list_(term, timecode=(-1), db=None):   # , limit, offset):
    """

    see http://dev.mysql.com/doc/refman/5.0/en/select.html

    for us, timecodes ARE scanids.   A new scanid == a new change.

    are we going to need to SORT when we do limit/offset?   pagerank?

    """
    if db is None:
        db = dbconn.Connection()

    (ns, local) = splitter.split(term)
    ns_id = irimap.to_id(db, ns)

    if timecode == -1:
        timecode = latest_timecode(db)
        print "list using latest timecode:", timecode
    else:
        if timecode < min_timecode(db):
            raise GarbageTimecode()

    for r in db.query('select text, type from term_use, scan, iri where scan_id <= $timecode and obsoleted_by > $timecode and namespace_id=$ns_id and scan.id=scan_id and status=1 and local=$local and iri.id=source_id', vars=locals()):
        yield unicode(r.type)+" "+unicode(r.text)

Пример #9

0

Показать файл

def prove_terminates(filename):
    splitfile = tempfile.NamedTemporaryFile(mode='w',
                                            suffix='.c',
                                            delete=False)
    (id_map, has_nested, nondet) = splitter.split(filename, splitfile)
    nids = len(id_map)
    varnames = ' '.join(id_map[k] for k in xrange(nids))

    splitfile.close()

    if has_nested:
        skeleton = "../../tests/termination/nested.c"
        nprogs = 3
        nargs = nids * 2
        varnames += ' ' + ' '.join("%s\\'" % id_map[k] for k in xrange(nids))
    else:
        skeleton = "../../tests/termination/ranking.c"
        nprogs = 2
        nargs = nids

    cmd = (("kalashnikov.py " + "%s %s " + "-P%d  " +
            "--seed=1337 -a%d --varnames %s " + "--synth-strategy=genetic " +
            "-c1 " + "--fastverif=True " + "-newsize=5 " +
            "-replaceprob=0.15 " + "-mutprob=0.1 " + "-tourneysize=5 " +
            "-popsize=3000 " + "-w4 " + "%s") %
           (splitfile.name, skeleton, nprogs, nargs, varnames, ' '.join(
               sys.argv[2:])))
    os.system(cmd)

Пример #10

0

Показать файл

def rewrite_token(t):
    """ Rewrites every single token either as a boolean operator or in a format for further processing"""
    d = {"AND": "&", "OR": "|", "NOT": "1 -", "(": "(", ")": ")"}

    # Hi guys... Here is some code for you. Made it extra spaghetti this time with hint of spice and double cheese, just get your brains working uknow? I'm just thinking about you and your wellbeing and not being very bad at coding. This comment is also very long, just so because I don't want you to read the code below. You can just continue reading this comment, or actually just skip it. Or skip the whole code while we are at it. Yes? Good? Okay you can now delete this comment, thanks.
    if t in d:
        return d.get(t)
    if t[0] == "\"" and t[-1] == "\"":
        t = t.replace("\"", "")
        try:
            t_ngram = ' '.join([str(t.lower()) for t in splitter.split(t)])
            if t_ngram in terms:
                return 'sparse_td_matrix[t2i["{:s}"]].todense()'.format(
                    t_ngram)
            else:
                if t_ngram == "":
                    unknownword_list.append(t)
                else:
                    unknownword_list.append(t_ngram)
                return 'np.matrix(np.zeros(len(documents), dtype=int))'
        except:
            unknownword_list.append(t)
            return 'np.matrix(np.zeros(len(documents), dtype=int))'
    elif t.lower() in terms:
        return 'sparse_td_matrix[t2i["{:s}"]].todense()'.format(t.lower())
    else:
        if t[0] == "\"" or t[-1] == "\"":
            pass
        else:
            unknownword_list.append(t)
            return 'np.matrix(np.zeros(len(documents), dtype=int))'

Пример #11

0

Показать файл

def load_data(path='data/us_trial', max_example=None):
    """
        Load data from '{path}.{text, labels}'
    """
    num_examples = 0
    tweets, emojis = [], []
    f_x, f_y = open(path + '.text', 'r'), open(path + '.labels', 'r')
    while True:
        tweet, emoji = f_x.readline(), f_y.readline()
        if not tweet or not emoji:
            break

        # TODO: extra preprocessing step for each tweet e.g. take care of slang
        tweet = tweet.strip().lower()  # delete whitespaces and lowercase
        tweet = tweet[:-1] if tweet[
            -1] == u'\u2026' else tweet  # and more... -> more (... is a special unicode char)
        tweet = re.sub(
            r"#\S+", lambda match: ' '.join(splitter.split(match.group()[1:])),
            tweet)  #artfactory -> art factory
        tweet = re.sub(r"\b(\S*?)(.)\2{2,}\b", r"\1\2", tweet,
                       (re.MULTILINE | re.DOTALL))  # no wayyyyy -> no way
        tweet = tweet.translate({ord(c): None
                                 for c in '@#'})  # @ user -> user, #omg -> omg
        words = TweetTokenizer().tokenize(tweet)
        tweet = ' '.join(words)

        tweets.append(tweet)
        emojis.append(int(emoji))  # convert '7' -> 7

        num_examples += 1
        if (max_example is not None) and (num_examples >= max_example):
            break
    print("%d examples loaded" % num_examples)
    return tweets, emojis

Пример #12

0

Показать файл

def grab_profiles(player_profile: Profile, stage: int, num_stages: int,
                  output_file_name: str, tempdir: str) -> int:
    """Parse output/result files from previous stage and get number of profiles to simulate"""
    stage_dir = get_subdir(stage, tempdir)
    os.makedirs(stage_dir, exist_ok=True)
    if stage == 1:
        num_generated_profiles = splitter.split(output_file_name, stage_dir,
                                                settings.splitting_size,
                                                player_profile.player_class)
    else:
        subdir_previous_stage = get_subdir(stage - 1, tempdir)
        try:
            checkResultFiles(subdir_previous_stage)
        except Exception as e:
            msg = "Error while checking result files in {}: {}\nPlease restart AutoSimc at a previous stage.". \
                format(subdir_previous_stage, e)
            raise RuntimeError(msg) from e
        if settings.default_grabbing_method == "target_error":
            filter_by = "target_error"
            filter_criterium = None
        elif settings.default_grabbing_method == "top_n":
            filter_by = "count"
            filter_criterium = settings.default_top_n[stage - num_stages - 1]
        is_last_stage = (stage == num_stages)
        num_generated_profiles = splitter.grab_best(
            filter_by, filter_criterium, subdir_previous_stage, stage_dir,
            output_file_name, not is_last_stage)
    if num_generated_profiles:
        logging.info(
            "Found {} profile(s) to simulate.".format(num_generated_profiles))
    return num_generated_profiles

Пример #13

0

Показать файл

Файл: nonterm.py Проект: blexim/synth

def prove_terminates(filename):
  splitfile = tempfile.NamedTemporaryFile(mode='w', suffix='.c', delete=False)
  (id_map, has_nested, nondet) = splitter.split(filename, splitfile)
  nids = len(id_map)
  varnames = ' '.join(id_map[k] for k in xrange(nids))

  splitfile.close()

  os.system(("kalashnikov.py " +
             "%s ../../tests/termination/unranking.c " +
             "-P2 " +
             "--synth-strategy=genetic " +
             "-c1 " +
             "--fastverif=True " +
             "-newsize=5 " +
             "-replaceprob=0.15 " +
             "-mutprob=0.1 " +
             "-tourneysize=5 " +
             "-popsize=3000 " +
             "-w4 " +
             "-a%d --evars %d --varnames %s --resnames I --seed=1337 " +
             "--nondet=%d " +
             "%s") % 
              (splitfile.name,
                nids,
                nids,
                varnames,
                nondet,
                ' '.join(sys.argv[2:])))

Пример #14

0

Показать файл

Файл: inspect.py Проект: melug/cyrillic-mongolian

def follow_up_char(w):
    ''' Rule 9, Defines character of suffixes or vowels.
    а, у        ->  а
    э, ү, и     ->  э
    о           ->  о
    ө           ->  ө
    Exception, following alphabets changes follow up character
    уу, үү, юу, юү, яу, ёу, еү, иу
    Following alphabets may go in the middle word but doesn't change follow up character.
    и, ий, ы, эй
    '''
    dd = {
        u'а': u'а',
        u'у': u'а',
        u'э': u'э',
        u'ү': u'э',
        u'и': u'э',
        u'о': u'о',
        u'ө': u'ө',
    }
    ee = [ u'уу', u'үү', u'юу', u'юү', u'яу', u'ёу', u'еү', u'иу' ]
    ii = [ u'и', u'ий', u'ы', u'эй' ]
    fu = None
    for u in split(w.lower()):
        found = False
        for e in ee:
            if e in u:
                fu = dd[e[-1]]
                found = True
        if not found and fu is None:
            for k in dd.keys():
                if k in u:
                    fu = dd[k]
    return fu

Пример #15

0

Показать файл

Файл: docclass.py Проект: cametan001/document_filtering

def getwords(doc):
    splitter = re.compile('\\W*')
    # 単語を非アルファベットの文字で分割する
    words = [s.lower() for s in splitter.split(doc)
             if len(s) > 2 and len(s) < 20]
    # ユニークな単語のみの集合を返す
    return dict([(w, 1) for w in words])

Пример #16

0

Показать файл

Файл: main.py Проект: mikeonly/WebstersBot

 def reply(text=None):
     if text:
         resp = ''
         for part in splitter.split(text, MAX):
             resp += part
             check = sender.send(
                         'POST',
                         URL + TOKEN + '/' + 'sendMessage',
                         {
                         'text': part.encode('utf-8'),
                         'chat_id': chat_id
                         }
                         )
             if check:
                 logging.error(check.read())
             else:
                 logging.info('Sent response: \n\n%s' % resp)
     else:
         sender.send(
                     'POST',
                     URL + TOKEN + '/' + 'sendMessage',
                     {
                     'text': 'Bot tries to send you an empty message.',
                     'chat_id': chat_id
                     }
                     )

Пример #17

0

Показать файл

def grab_profiles_for_stage(player_profile, stage, outputfile, stages):
    """Parse output/result files from previous stage and get number of profiles to simulate"""
    subdir_previous_stage = get_subdir(stage - 1)
    if stage == 1:
        num_generated_profiles = splitter.split(outputfile, get_subdir(stage),
                                                settings.splitting_size,
                                                player_profile.wow_class)
    else:
        try:
            check_results_file(subdir_previous_stage)
        except Exception as ex:
            msg = f'Error while checking result files in {subdir_previous_stage}: {ex}. Please restart AutoSimc at a previous stage.'
            raise RuntimeError(msg) from ex
        if settings.default_grabbing_method == 'target_error':
            filter_by = 'target_error'
            filter_criterium = None
        elif settings.default_grabbing_method == 'top_n':
            filter_by = 'count'
            filter_criterium = settings.default_top_n[stage - stages - 1]
        is_last_stage = (stage == stages)
        num_generated_profiles = splitter.grab_best(filter_by,
                                                    filter_criterium,
                                                    subdir_previous_stage,
                                                    get_subdir(stage),
                                                    outputfile,
                                                    not is_last_stage)
    if num_generated_profiles:
        logger.info(f'Found {num_generated_profiles} profile(s) to simulate.')
    return num_generated_profiles

Пример #18

0

Показать файл

Файл: test_compound_word_splitter.py Проект: jaki2012/compound-word-splitter

def test_split_german_three_compounds():
    split_result = split('Effektivitätsberechnungsformular', 'de_de')
    assert split_result[0] == 'Effektivität', '%s != "Effektivität"' % (
        split_result[0])
    assert split_result[1] == 'Berechnung', '%s != "Berechnung"' % (
        split_result[1])
    assert split_result[2] == 'Formular', '%s != "Formular"' % (
        split_result[2])

Пример #19

0

Показать файл

Файл: test_appx_gp.py Проект: kgmacau/mclass-sky

def main(path_in):
    print('Loading data...')
    data = splitter.load(path_in)
    (train_X, train_y), (test_X,
                         test_y) = splitter.split(data, TRAINING_NUM,
                                                  TESTING_NUM)

    try:
        gp_sigmas = np.loadtxt('gp_preds.txt')
        assert gp_sigmas.shape == (TESTING_NUM, )
    except (FileNotFoundError, AssertionError):
        print('Fitting GP...')
        kernel = sklearn.gaussian_process.kernels.RBF(
            length_scale=LENGTH_SCALE)
        gp = sklearn.gaussian_process.GaussianProcessRegressor(
            kernel=kernel, alpha=ALPHA, copy_X_train=False)
        gp.fit(train_X, train_y)

        print('Predicting GP...')
        _, gp_sigmas = gp.predict(test_X, return_std=True)

        np.savetxt('gp_preds.txt', gp_sigmas)

    print('Approximating kernel...')
    appx_train_X, appx_test_X = approximate_kernel(train_X, test_X)

    print('Fitting approximate GP...')
    agp = appx_gp.AppxGaussianProcessRegressor(alpha=ALPHA)
    agp.fit(appx_train_X, train_y)

    print('Predicting approximate GP...')
    _, agp_sigmas = agp.predict(appx_test_X, return_std=True)

    print('Finding best fit...')
    best_fit = np.polyfit(gp_sigmas, agp_sigmas, 1)
    best_fit_box = (min(gp_sigmas), max(gp_sigmas), min(agp_sigmas),
                    max(agp_sigmas))
    best_fit_endpoints = interval_in_box_from_line(best_fit_box, best_fit)
    best_fit_xs, best_fit_ys = zip(*best_fit_endpoints)

    print('Plotting...')
    f = plt.figure()
    ax = f.add_subplot(111)
    sc = plt.scatter(gp_sigmas, agp_sigmas, s=.2, c=list(test_y))
    plt.plot(best_fit_xs, best_fit_ys, color='red', label='Linear fit')
    plt.title(r'$\gamma = {:.4},$ #components$= {}$'.format(GAMMA, COMPONENTS))
    plt.xlabel('GP uncertainty')
    plt.ylabel('Approximate GP uncertainty')
    plt.text(.975,
             .1,
             '$y = {:.4}x {:+.4}$'.format(*best_fit),
             horizontalalignment='right',
             verticalalignment='bottom',
             transform=ax.transAxes)
    colorbar = plt.colorbar(sc)
    colorbar.set_label('Redshift')
    plt.legend(loc='lower right')
    plt.show()

Пример #20

0

Показать файл

def cronista_transcribe(audio_source_path: str,
                        destination_folder: str,
                        block_of_transcription: int,
                        lang: str,
                        on_transcription_progress=None,
                        to_file: bool = True):
    audio_segments = split(audio_source_path, destination_folder,
                           block_of_transcription, to_file)
    transcribe(audio_segments, lang, on_transcription_progress)

Пример #21

0

Показать файл

 def split_compound(self, sentance):
     """
         Used to split compound words that are found in the utterance
         This will make it easier to confirm that all words are found in the search
     """
     search_words = re.split(r'\W+', str(sentance))
     separator = " "
     words_list = splitter.split(separator.join(search_words))
     return words_list

Пример #22

0

Показать файл

Файл: register.py Проект: sandhawke/ldreg

def list_(term):
    print "Looking for trackers for term"
    (ns, local) = splitter.split(term)
    ns_trackers = determine_trackers(ns)
    for t in ns_trackers:
        try:
            print "Asking "+t
            return tracker_call(t, "list", term=term)
        except TrackerFailure:
            print t+": failed, moving on..."

Пример #23

0

Показать файл

Файл: naivebayes.py Проект: kurozumi/NaiveBayes

    def predict_(self, sentence):
        # arg max log(P(category| sentence))
        best_suggested_category = None
        max_probability = -float('inf')
        words = splitter.split(sentence)

        for category in self.word_count.keys():
            probability = self.calc_score(words, category)
            if (probability > max_probability):
                max_probability = probability
                best_suggested_category = category
        return best_suggested_category

Пример #24

0

Показать файл

Файл: naivebayes.py Проект: IshitaTakeshi/NaiveBayes

    def predict_(self, sentence):
        # arg max log(P(category| sentence))
        best_suggested_category = None
        max_probability = -float('inf')
        words = splitter.split(sentence)

        for category in self.word_count.keys():
            probability = self.calc_score(words, category)
            if(probability > max_probability):
                max_probability = probability
                best_suggested_category = category
        return best_suggested_category

Пример #25

0

Показать файл

Файл: data_cleaning.py Проект: MrwanBaghdad/emoji-predicto

def process_hashtags(original_text, mode=NORMAL_MODE):
    cleaned_text = original_text
    hashtags = set(re.findall(r"#(\w+)", original_text))
    cleaned_text = cleaned_text.replace('#', ' ')
    for hashtag in hashtags:
        if mode == AGGRESSIVE_MODE:
            cleaned_text = cleaned_text.replace(
                hashtag, ' '.join(splitter.split(hashtag)))
        else:
            cleaned_text = cleaned_text.replace(hashtag,
                                                ' '.join(segment(hashtag)))

    return cleaned_text

Пример #26

0

Показать файл

def main():
    path = '../dataset/powersupply.arff'
    #data, meta = arff.loadarff(path)
    #data, label, maxFeature = getArffData(path,1000)
    #train, trainBeta, test = generateTrain(data, 100)
    train, train_beta, test = split('../dataset/powersupply.arff', 500)
    maxFeature = len(train[0])
    sampleSize = len(train)
    #print "sampleSize:" , sampleSize
    gammab = computeKernelWidth(np.array(train))
    result = testEnsKmm(train,test,gammab,10,maxFeature)
    beta = result[0]
    print "beta",beta

Пример #27

0

Показать файл

Файл: __init__.py Проект: YupengGao/scalable_transfer_learning

def main():
    path = '../dataset/powersupply.arff'
    #data, meta = arff.loadarff(path)
    #data, label, maxFeature = getArffData(path,1000)
    #train, trainBeta, test = generateTrain(data, 100)
    train, train_beta, test = split('../dataset/powersupply.arff', 500)
    maxFeature = len(train[0])
    sampleSize = len(train)
    #print "sampleSize:" , sampleSize
    gammab = computeKernelWidth(np.array(train))
    result = testEnsKmm(train,test,gammab,10,maxFeature)
    beta = result[0]
    print "beta",beta

Пример #28

0

Показать файл

def findBest(tensor, threshold):
    t = np.copy(tensor)
    best = [1e100, None]
    for i in range(len(tensor.shape) - 1):
        for j in range(i + 1, len(t.shape)):
            u, s, v, vs = split(t, (i, j), threshold)
            if len(s) < best[0] and u.size + vs.size < t.size:
                best = [len(s), u, vs]
    best = best[1:]
    if best[-1] is None:
        return [t]
    elif len(best[-1].shape) > 3:
        best = best[:-1] + findBest(best[-1], threshold)
    return best

Пример #29

0

Показать файл

    def read_text(self, stream):
        lines = stream.readlines()

        i = 1

        for line in lines:
            lower_line = line.lower()
            words = splitter.split(lower_line,
                                   [' ', ',', '.', ':', ';', '\n', '\r', '\t'])

            for word in words:
                self.add_word(word, i)

            i += 1

Пример #30

0

Показать файл

Файл: handle.py Проект: ashupednekar/piplineUtils

def handlespace(word):
    s, flag = audit(word)
    if flag:
        # print('1')
        index = word.find(s)
        # print(index)
        topass = word[:index] + ' ' + word[index:index +
                                           len(s)] + ' ' + word[index +
                                                                len(s):]
    else:
        # print('2')
        topass = word
    print('penultimate...', topass)
    return ' '.join(
        [x.strip() for x in splitter.split('fixed  deposit ', 'en_gb')])

Пример #31

0

Показать файл

def strip_hashtags(text):
    text = preprocess_clean(text,False,True)
    hashtags = re.findall('#[\w\-]+', text)
    for tag in hashtags:
        cleantag = tag[1:]
        if d.check(cleantag) or dus.check(cleantag):
            text = re.sub(tag,cleantag,text)
            pass
        else:
            hashtagSplit = ""
            for word in splitter.split(cleantag.lower(),'en_US'):
                hashtagSplit = hashtagSplit + word + " "
            text = re.sub(tag,hashtagSplit,text)
    #print(text)
    return text

Пример #32

0

Показать файл

def process(source,
            scene_root,
            verbose=False,
            clean=False,
            list_file=None,
            overwrite=False):

    if pusher.check_existance(scene_root):
        print 'Scene %s already exists on destination bucket.' % scene_root
        if not overwrite:
            return collect_missing_entry(scene_root, verbose, clean, list_file)

    if verbose:
        print 'Processing scene: %s' % scene_root

    scene_dict = {}

    local_tarfile = puller.pull(source,
                                scene_root,
                                scene_dict,
                                verbose=verbose)

    try:
        local_dir = splitter.split(scene_root, local_tarfile, verbose=verbose)
    except:
        if source == 's3queue':
            # Remove problematic scenes from the queue directory
            puller_s3queue.clean_queued_tarfile(scene_root)
            return

    scene_info.add_mtl_info(scene_dict, scene_root, local_dir)

    thumbnailer.thumbnail(scene_root, local_dir, verbose=verbose)
    scene_index_maker.make_index(scene_root, local_dir, verbose=verbose)
    pusher.push(scene_root,
                local_dir,
                scene_dict,
                verbose=verbose,
                overwrite=overwrite)

    if clean:
        os.unlink(local_tarfile)
        shutil.rmtree(local_dir)

    if list_file:
        scene_info.append_scene_line(list_file, scene_dict)

    return scene_dict

Пример #33

0

Показать файл

Файл: uncertainty_curve.py Проект: kgmacau/mclass-sky

def main(path_in, path_out):
    data = splitter.load(path_in)
    (train_X, train_y), (test_X, test_y) \
        = splitter.split(data, TRAINING_SAMPLES_NUM, TESTING_SAMPLES_NUM)

    gp_x = list(range(STEP, MAX_GP + 1, STEP))
    gp_y = []
    for i, n in enumerate(gp_x):
        print('Starting GP', i + 1)
        gp_y.append(perform_gp(train_X[:n], train_y[:n], test_X))

    with open(path_out, 'w') as f:
        json.dump({
            'gp_x': gp_x,
            'gp_y': gp_y,
        }, f)

Пример #34

0

Показать файл

Файл: text_image_convnet.py Проект: ADITYA727/HeLp_for_ML

def get_hashtag_tokenize(hash_tag):
    hashtag = hash_tag
    hash_tag = hash_tag.replace('#', '')
    hash_tag = ' '.join(hash_tag.split())
    if len(hash_tag) > 0:
        hashtag = splitter.split(hashtag)
        hashtag = ' '.join(word for word in hashtag)
        if len(hashtag) > 1:
            wordlist = initialize_words()
            return parse_sentence(hashtag, wordlist)

        else:
            return hash_tag.lower()
            # wordlist = initialize_words()
            # return parse_sentence(hashtag, wordlist)
    else:
        return hash_tag

Пример #35

0

Показать файл

def prove_terminates(filename):
    splitfile = tempfile.NamedTemporaryFile(mode='w',
                                            suffix='.c',
                                            delete=False)
    (id_map, has_nested, nondet) = splitter.split(filename, splitfile)
    nids = len(id_map)
    varnames = ' '.join(id_map[k] for k in xrange(nids))

    splitfile.close()

    os.system(
        ("kalashnikov.py " + "%s ../../tests/termination/unranking.c " +
         "-P2 " + "--synth-strategy=genetic " + "-c1 " + "--fastverif=True " +
         "-newsize=5 " + "-replaceprob=0.15 " + "-mutprob=0.1 " +
         "-tourneysize=5 " + "-popsize=3000 " + "-w4 " +
         "-a%d --evars %d --varnames %s --resnames I --seed=1337 " +
         "--nondet=%d " + "%s") %
        (splitfile.name, nids, nids, varnames, nondet, ' '.join(sys.argv[2:])))

Пример #36

0

Показать файл

Файл: thesisplots.py Проект: kgmacau/mclass-sky

def get_colours(path):
    data = splitter.load(path)
    data, _ = splitter.split(data, data[0].shape[0], 0)
    X, y = data

    u, g, r, i, z = X.T
    u_g = u - g
    u_r = u - r
    r_i = r - i
    i_z = i - z

    X[:, 0] = r
    X[:, 1] = u_g
    X[:, 2] = u_r
    X[:, 3] = r_i
    X[:, 4] = i_z
    data = X, y
    return data

Пример #37

0

Показать файл

Файл: __main__.py Проект: Azarattum/AutoHandwriter

def main():
    if (len(argv) < 2):
        print("Usage: py . <pdf>/validate")
        exit()
    source = argv[1]
    path = "./data/images/"
    validate = len(argv) >= 2 and argv[1] == "validate"

    images = [path + x for x in listdir(path)]
    img = cv.imread(images[0])

    if not validate:
        print("Processing the pdf (this might take a while)...")
        source = extract_text(source)
        source = re.sub("\n+", "\n", source).strip()
        source = randomize(source)
        source = split(img, source)
    else:
        print("Validating the dataset...")
    lines = source.split("\n")
    lines = list(filter(lambda line: len(str(line)) > 3, lines))

    # Lines per page
    lpp = 19
    total = ceil(len(lines) / lpp) if not validate else len(images)
    print("Pages will be generated: {}".format(total))

    for i, image in enumerate(images):
        img = cv.imread(image)

        text = "\n".join(lines[i * lpp:i * lpp + lpp])
        if (not text and not validate):
            break
        img, points = detect(img, validate)

        img, _ = handwrite(img, text, points)
        img = apply_effects(img)
        img = np.int0(img * 255)

        cv.imwrite("./out/{}".format(basename(image)), img)
        print("Generated: {}/{}    ".format(i + 1, total), end="\r")

    print("\nDone!")

Пример #38

0

Показать файл

Файл: brute.py Проект: adamjermyn/TensorDecomp

def findBest(tensor, threshold):
	best = [1e100, None]
	trees = constructTrees(len(tensor.shape))
	print(len(trees))

	for i,t in enumerate(trees):
		if i%50 == 0:
			print(i, len(trees))
		v = np.copy(tensor)
		s = treeToSplit(t)
		arrs = []
		for indices in s:
			u,s,v,vs = split(v, indices, threshold)
			arrs.append(u)
			if len(v.shape) == 3:
				arrs.append(vs)
		if sum([a.size for a in arrs]) < best[0]:
			best = [sum([a.size for a in arrs]), t, arrs]

	return best

Пример #39

0

Показать файл

def find_all(mstring):

    # group 3 captures alphanumeric substring from phone number
    m = re.match(r'^(1)?(\d{3})?([A-Z0-9]*)$', "".join(mstring))
    if m:
        # if alpanumeric substring exist
        if m.group(3):
            sublist = splitter.split(m.group(3))
            # if english word combination exist
            if sublist:
                mlist = list(m.groups())
                # concatenate english separated sublist
                mlist = mlist[:-1] + list(sublist)
                # Remove any None or empty elements from regrex groups
                mlist = [x for x in mlist if x not in [None, '']]
                # add dashes between elments
                newmlist = ['-'] * (len(mlist) * 2 - 1)
                newmlist[0::2] = mlist
                print("".join(newmlist))
                return "".join(newmlist)

Пример #40

0

Показать файл

Файл: __init__.py Проект: YupengGao/scalable_transfer_learning

def maintest():
    path = '../dataset/powersupply.arff'
    #data, meta = arff.loadarff(path)
    #data, label, maxFeature = getArffData(path,1000)
    train, train_beta, test = split('../dataset/powersupply.arff', 300)
    #train, trainBeta, test = generateTrain(data, 300)
    train_data = np.array(train)
    test_data = np.array(test)
    maxFeature = train_data.shape[1]
    #maxFeature = len(train[0])
    print "maxFeature:" , maxFeature
    gammab = computeKernelWidth(train_data)
    print "gammab", gammab
    
    #print 'Converting train sparse to array ...'
    #Xtrain = convertSparseToList(train, maxFeature)
    #print 'Converting test sparse to array ...'
    #Xtest = convertSparseToList(test, maxFeature)
    beta,runtime = cenKmm(train_data, test_data, gammab, maxFeature)
    print beta

Пример #41

0

Показать файл

def maintest():
    path = '../dataset/powersupply.arff'
    #data, meta = arff.loadarff(path)
    #data, label, maxFeature = getArffData(path,1000)
    train, train_beta, test = split('../dataset/powersupply.arff', 300)
    #train, trainBeta, test = generateTrain(data, 300)
    train_data = np.array(train)
    test_data = np.array(test)
    maxFeature = train_data.shape[1]
    #maxFeature = len(train[0])
    print "maxFeature:" , maxFeature
    gammab = computeKernelWidth(train_data)
    print "gammab", gammab
    
    #print 'Converting train sparse to array ...'
    #Xtrain = convertSparseToList(train, maxFeature)
    #print 'Converting test sparse to array ...'
    #Xtest = convertSparseToList(test, maxFeature)
    beta,runtime = cenKmm(train_data, test_data, gammab, maxFeature)
    print beta

Пример #42

0

Показать файл

Файл: OCR.py Проект: sloanesturz/cs231a-proj

def main():
	start = time.time()

	classifier = Classifier()
	classifier.train()

	print "Training finished at %ds\n" % (time.time() - start)

	i = 1
	for fn in os.listdir('./data/matthew/'):
		with codecs.open('./data/matthew/out/%d.txt' % i, 'w+', encoding='utf-8') as fw:
			i += 1
			print './data/matthew/%s' % fn
			chars = split('./data/matthew/%s' % fn)
			for _, line in chars.iteritems():
				for char in line:
					char = 255 - char
					charout = classifier.classify(char)
					fw.write(unicode(charout[0]))
				fw.write('\n')
		print "Finished Matthew %d at %ds\n" % (i, (time.time() - start))

Пример #43

0

Показать файл

def prove_terminates(filename):
    splitfile = tempfile.NamedTemporaryFile(mode='w',
                                            suffix='.c',
                                            delete=False)
    (id_map, has_nested, nondet) = splitter.split(filename, splitfile)
    nids = len(id_map)
    varnames = (' '.join(id_map[k] for k in xrange(nids)) + ' ' +
                ' '.join("0" for i in xrange(nids)))

    splitfile.close()

    cmd = ((
        "kalashnikov.py " + "%s ../../tests/termination/combined.c " + "-P3 " +
        "--seed=1337 " +
        "--synth-strategy=genetic -a%d --evars %d --varnames %s --resnames I "
        +
        "--fastverif=True -c=1 -keepfrac=15 -mutprob=0.25 -newfrac=2 -popsize=500 "
        + "-recombprob=0.05 -tourneysize=10 -w=3 " + "%s") %
           (splitfile.name, nids * 2, nids, varnames, ' '.join(sys.argv[2:])))

    os.system(cmd)

Пример #44

0

Показать файл

Файл: termination.py Проект: blexim/synth

def prove_terminates(filename):
  splitfile = tempfile.NamedTemporaryFile(mode='w', suffix='.c', delete=False)
  (id_map, has_nested, nondet) = splitter.split(filename, splitfile)
  nids = len(id_map)
  varnames = ' '.join(id_map[k] for k in xrange(nids))

  splitfile.close()

  if has_nested:
    skeleton = "../../tests/termination/nested.c"
    nprogs = 3
    nargs = nids*2
    varnames += ' ' + ' '.join("%s\\'" % id_map[k] for k in xrange(nids))
  else:
    skeleton = "../../tests/termination/ranking.c"
    nprogs = 2
    nargs = nids


  cmd = (("kalashnikov.py " +
             "%s %s " +
             "-P%d  " +
             "--seed=1337 -a%d --varnames %s " +
             "--synth-strategy=genetic " +
             "--fastverif=True " +
             "-c1 " +
             "-newsize=5 " +
             "-replaceprob=0.15 " +
             "-mutprob=0.1 " +
             "-tourneysize=5 " +
             "-popsize=3000 " +
             "-w4 " +
             "%s") % 
              (splitfile.name,
                skeleton,
                nprogs,
                nargs,
                varnames,
                ' '.join(sys.argv[2:])))
  os.system(cmd)

Пример #45

0

Показать файл

Файл: complexity.py Проект: blexim/synth

def complexity(filename):
  splitfile = tempfile.NamedTemporaryFile(mode='w', suffix='.c', delete=False)
  (id_map, has_nested, nondet) = splitter.split(filename, splitfile)
  nids = len(id_map)
  varnames = (' '.join(id_map[k] for k in xrange(nids)) + ' bound')

  splitfile.close()

  cmd = (("kalashnikov.py " +
             "%s %s/tests/loops/complexity.c " +
             "-P2 " +
             "--seed=1337 " +
             #"--strategy=evolve " +
             "-a%d --evars 0 --varnames %s --nondet=%d -w=3 " +
             "%s") % 
              (splitfile.name,
                kalashnikov_dir,
                nids + 1,
                varnames,
                nondet,
                ' '.join(sys.argv[2:])))

  os.system(cmd)

Пример #46

0

Показать файл

Файл: headshot.py Проект: blexim/synth

def prove_terminates(filename):
  splitfile = tempfile.NamedTemporaryFile(mode='w', suffix='.c', delete=False)
  (id_map, has_nested, nondet) = splitter.split(filename, splitfile)
  nids = len(id_map)
  varnames = (' '.join(id_map[k] for k in xrange(nids)) + ' ' +
              ' '.join("0" for i in xrange(nids)))

  splitfile.close()

  cmd = (("kalashnikov.py " +
             "%s ../../tests/termination/combined.c " +
             "-P3 " +
             "--seed=1337 " +
             "--synth-strategy=genetic -a%d --evars %d --varnames %s --resnames I " +
             "--fastverif=True -c=1 -keepfrac=15 -mutprob=0.25 -newfrac=2 -popsize=500 " +
             "-recombprob=0.05 -tourneysize=10 -w=3 " +
             "%s") % 
              (splitfile.name,
                nids*2,
                nids,
                varnames,
                ' '.join(sys.argv[2:])))

  os.system(cmd)

Пример #47

0

Показать файл

Файл: testsplitter.py Проект: astraltear/Python3

 def testtypeconvert(self):
     r = splitter.split("GOOD 100 490.50",[str, int, float])
     self.assertEqual(r,['GOOD',100,490.50])

Пример #48

0

Показать файл

Файл: naivebayes.py Проект: IshitaTakeshi/NaiveBayes

 def fit_(self, sentence, category):
     words = splitter.split(sentence)
     for word in words:
         self.count_word(word, category)

Пример #49

0

Показать файл

Файл: testsplitter.py Проект: astraltear/Python3

 def testsimplestring(self):
     r= splitter.split("GOOD 100 490.50")
     self.assertEqual(r,['GOOD','100','490.50'])

Пример #50

0

Показать файл

Файл: preprocessing.py Проект: bernardgut/MLTools

        amax = np.amax(A)
        amin = np.amin(A)
    
    #normalize
    J = amin*np.ones(A.shape)
    N = (A-J)/(amax-amin)
    return N, amax, amin
    
#load data
(A,L)=loader.loadTrainingSet_35()
print 'dataset 3-5 : Train : success'
print A.shape
#normalize
N, tmax, tmin = normalize(A)
#split data
(T, T_l, V, V_l)=splitter.split(N, L)
print '3-5 : Training.training : ', T.shape, ' l : ',T_l.shape, ' ; Training.validation : ',V.shape, ' l : ', ' max :',tmax,' min', tmin 
#save to disk
np.save('../mnist/n_MNIST_Training35',T)
np.save('../mnist/n_MNIST_Validation35',V)
np.save('../mnist/n_MNIST_Training_labels35',T_l)
np.save('../mnist/n_MNIST_Validation_labels35',V_l)

#load test data
(A,L)=loader.loadTestSet_35()
print 'dataset 3-5 : Test : success'
print A.shape
#normalize with the same min,max as training set
N,_,__ = normalize(A, tmax, tmin)
#split data
print '3-5 :Test set : ', N.shape, ' l : ',L.shape, ' max :',tmax,' min', tmin

Пример #51

0

Показать файл

def remove_old_tiles():
  path = 'tiles'
  for file in os.listdir(path):
    if os.path.isfile(os.path.join(path, file)):
      try:
        os.remove(os.path.join(path, file))
      except:
        print('Could not delete', file, 'in', path)

if args.no_split:
  if os.path.exists(WORK_DIR + "tiles/" + buildmap + "_split.ready") == False:
    print()
    printwarning("can't find tiles/" + buildmap + "_split.ready")
    print("--no_split/-ns makes no sense, ignoring it")
    splitter.split()

else:
  remove_old_tiles()
  os.chdir(WORK_DIR)
  print()
  printinfo("now splitting the mapdata...")
  splitter.split()

"""
--stop_after splitter

"""

if args.stop_after == "splitter":
  print()

Пример #52

0

Показать файл

Файл: docclass.py Проект: cametan001/document_filtering

def getwords(doc):
    words = [s.lower() for s in splitter.split(doc) if len(s) > 2 and len(s) < 20]
    # ユニークな単語の集合を返す
    return dict([(w, 1) for w in words])

Пример #53

0

Показать файл

Файл: testsplitter.py Проект: Bee3key/Python_labs

	def testsimplestring(self):
		r = splitter.split('GOOG 100 490.50')
		self.assertEqual(r,['GOOG','100','490.50'])

Пример #54

0

Показать файл

def splitargs(args):
    """Split the command line into tokens."""
    return closequote(split(args))

Пример #55

0

Показать файл

Файл: testsplitter.py Проект: Bee3key/Python_labs

	def testtypeconvert(self):
		r = splitter.split('GOOG 100 490.50',[str, int, float])
		self.assertEqual(r,['GOOG', 100, 490.5])

Пример #56

0

Показать файл

Файл: testsplitter.py Проект: Bee3key/Python_labs

	def testdelimiter(self):
		r = splitter.split('GOOG,100,490.50',delimiter=',')
		self.assertEqual(r,['GOOG','100','490.50'])

Пример #57

0

Показать файл

Файл: testsplitter.py Проект: astraltear/Python3

 def testdelimeter(self):
     r = splitter.split("GOOD,100,490.50",delimeter=",")
     self.assertEqual(r,['GOOD','100','490.50'])

Пример #58

0

Показать файл

Файл: testsplitter.py Проект: vatslav/python

	def testTypeConvert(self):
		r = splitter.split('Goog 100 490.50',[str,int,float])
		self.assertEqual(r,['Goog',100,490.50])

Пример #59

0

Показать файл

Файл: testsplitter.py Проект: vatslav/python

	def testSimpleString(self):
		r = splitter.split('Goog 100 490.50')
		self.assertEqual(r,['Goog','100','490.50'])

Python split примеры использования