Python run示例，preprocess.run Python示例

示例#1

0

显示文件

文件： interactive.py 项目： cyanfish/ChessReanalysis

def mainloop():
    while True:
        print('')
        print(f'{len(working_set)} games in working set')
        print('(1) Add PGN to working set')
        print('(2) Clear working set')
        print('(3) Pre-process')
        print('(4) Analysis 1')
        print('(0) Exit')

        i = input()

        if i == '1':
            addpgnloop()
        if i == '2':
            working_set.clear()
        if i == '3':
            try:
                preprocess.run(working_set)
            except KeyboardInterrupt:
                pass
        if i == '4':
            print('(1) Normal output')
            print('(2) CSV output')
            j = input()
            try:
                report_name = input('Report name: ')
                if j == '1':
                    analyze.a1(working_set, report_name)
                if j == '2':
                    analyze.a1csv(working_set, report_name)
            except KeyboardInterrupt:
                pass
        if i == '0':
            return

示例#2

0

显示文件

文件： pos_tags.py 项目： rachitjain2706/samaritan-ai

def run():
    file_type, paras_pos, paras_neg = preprocess.run()
    pos_data_POS = assign_POS_tags(paras_pos)
    neg_data_POS = assign_POS_tags(paras_neg)

    pos_feature_POS = assign_feature_POS(pos_data_POS)
    neg_feature_POS = assign_feature_POS(neg_data_POS)

    # Creating dump files of positive and negative pos tagged words
    if file_type == 1:
        pickle.dump(pos_data_POS,
                    open("pickledumps/train/pos_POS_TAGGED.p", "wb"))
        pickle.dump(neg_data_POS,
                    open("pickledumps/train/neg_POS_TAGGED.p", "wb"))
    else:
        pickle.dump(pos_data_POS,
                    open("pickledumps/test/pos_POS_TAGGED.p", "wb"))
        pickle.dump(neg_data_POS,
                    open("pickledumps/test/neg_POS_TAGGED.p", "wb"))

    # Creating dump files of the pos and neg feature vectors for POS tagged
    if file_type == 1:
        pickle.dump(pos_feature_POS,
                    open("pickledumps/train/pos_feature_POS.p", "wb"))
        pickle.dump(neg_feature_POS,
                    open("pickledumps/train/neg_feature_POS.p", "wb"))
    else:
        pickle.dump(pos_feature_POS,
                    open("pickledumps/test/pos_feature_POS.p", "wb"))
        pickle.dump(neg_feature_POS,
                    open("pickledumps/test/neg_feature_POS.p", "wb"))

    return paras_pos, paras_neg

示例#3

0

显示文件

def run_single(size):
    if not sys.warnoptions:
        warnings.simplefilter("ignore")

    # fix seed for testing purposes
    random.seed(10)
    np.random.seed(10)

    # run the preprocessing to get the subset of data
    X, y = preprocess.run("review_polarity.tar.gz", 0.05, size, binary=False)
    y = y.to_numpy(dtype=int)

    # shuffle data
    X, y = shuffle(X, y)

    # number of CV folds
    k = 10

    # test svm (perform nested k fold cross validation)
    best_C, best_err, fold_err, total_err, err_dict_svm = validation.kfold(k, X, y, LinearSVC, {"C": [.1, 1, 10]})
    best_C2, best_err2, fold_err2, total_err2, err_dict_knn = validation.kfold(k, X, y, KNeighborsClassifier,
                                                                               {"n_neighbors": [5, 10, 15]})

    # return the svm for error dictionary
    return total_err, total_err2, err_dict_svm, err_dict_knn

示例#4

0

显示文件

文件： gen_app_hash.py 项目： presto-osu/orlis-orcis

def run(apkPath, outputDir, libResDir):
  # Remove libraries.
  if not os.path.exists(apkPath):
    print("[LOG]:No APK found! " + apkPath)
    return False
  tmpDir = 'tmp'
  if not os.path.exists(tmpDir): 
    os.mkdir(tmpDir)
  res = preprocess.run(apkPath, tmpDir,libResDir) 
  if res == False:
    print("[LOG]: Cannot preprocess the APK! "+apkPath)
    return False
 
  appname = os.path.basename(apkPath)
  # Generate app data object. 
  featureFn = tmpDir +'/'+appname +'.feature'
  resFn = tmpDir+'/'+appname+'.res'
  gen_class_feature.run(resFn, featureFn)
  if not os.path.exists(featureFn):
    print("[LOG]: Cannot generate feature of the APK! "+apkPath)
    return False

  # Generate app hash. 
  hashFn = outputDir+'/'+appname+'.hash'
  gen_hash.run(featureFn, hashFn)
  if not os.path.exists(hashFn):
    print("[LOG]: Cannot generate hash of the APK! "+apkPath)
    return False

  os.remove(featureFn)
  os.remove(resFn)
  return True

示例#5

0

显示文件

文件： Part1.py 项目： NooreldeanKoteb/NLP

def pre_unk(dat):
    data = preprocess.run(dat)
    with open(dat + '.pre', 'w') as f:
        for i in data:
            f.write(i.__str__() + '\n')
        f.close()

    data = unknown.run(dat + '.pre')

    with open(dat + '.pre.unk', 'w') as f:
        for i in data:
            f.write(i.__str__() + '\n')
        f.close()

    return data

示例#6

0

显示文件

def run():
    file_type, paras_pos, paras_neg = preprocess.run()
    scores = create_sentiment_dict('lexicon/AFINN-111.txt')
    # scores = create_sentiment_dict('lexicon/wordwithStrength.txt')
    pos_para_score, pos_word_position_sentence, pos_word_position_para = calculate_scores(
        paras_pos, scores)  # noqa
    neg_para_score, neg_word_position_sentence, neg_word_position_para = calculate_scores(
        paras_neg, scores)  # noqa

    pos_mode = calculate_mode(scores, pos_word_position_sentence)
    neg_mode = calculate_mode(scores, neg_word_position_sentence)
    pos_position_score = assign_position_score(scores, pos_mode,
                                               pos_word_position_sentence)
    neg_position_score = assign_position_score(scores, neg_mode,
                                               neg_word_position_sentence)
    # print pos_para_score, '\n-------------\n', neg_para_score
    return file_type, paras_pos, pos_position_score, pos_para_score, paras_neg, neg_position_score, neg_para_score

示例#7

0

显示文件

文件： model.py 项目： Silber93/da_2_lab_hw_1

def ten_fold_cv(method):
    df = preprocess.run(train=True)
    kf = KFold(n_splits=10)
    df = df[[x for x in df if x != 'id']]
    features = [col for col in df.columns if col != 'revenue']
    X, y = split_covariates_from_target(df, 'revenue')
    X = X[features].to_numpy(dtype=object)
    y = y.to_numpy(dtype=float)
    kf.get_n_splits(X)
    errors = []
    for train_index, test_index in kf.split(X):
        # print("TRAIN:", train_index, "TEST:", test_index)
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        beta_hat = apply_custom_reg(X_train, y_train,method)
        y_test_pred = np.dot(X_test,beta_hat.T)
        errors.append(root_mean_squared_log_error(y_test,y_test_pred))
    return sum(errors)/10

示例#8

0

显示文件

文件： waistcoat.py 项目： haydnKing/waistcoat

def run(settings_file, reads, outdir, temp_loc=None, extend=False):

	if os.path.exists(outdir):
		if (check_output and not 
				query_yes_no("Output path \"{}\" already exists, overwrite?"
					.format(outdir))):
			print "Cannot continue as output already exists"
			sys.exit(1)
		
		shutil.rmtree(outdir)

	os.mkdir(outdir)

	tempdir = tempfile.mkdtemp(prefix='waistcoat', dir=temp_loc)

	#Read and validate settings for waistcoat
	if verbose:
		print "Reading settings from \'{}\'".format(settings_file)
	my_settings = settings.loadf(settings_file)

	statistics.setUp(my_settings.barcodes.keys())

	#run the preprocessing pipeline
	if verbose:
		print "\n========== Preprocessing =========="
	remove_input = False
	if reads.endswith('.gz'):
		if verbose:
			print "Inflating..."
		gzfile = gzip.GzipFile(reads, 'r')
		(out, reads) = tempfile.mkstemp(dir=tempdir, prefix='input.', 
											suffix='.inflated')
		out = os.fdopen(out, 'w')
		out.writelines(gzfile)
		gzfile.close()
		out.close()
		remove_input = True

	files = preprocess.run(reads, my_settings, tempdir, remove_input)

	#discard those which map to discard
	if verbose:
		print "\n========== Discard =========="
	for i,(index, dcs) in enumerate(my_settings.discard):
		new_files = {}
		count = {}
		if verbose:
			print "Removing reads which map to \'{}\' ({}/{})...".format(index,
					i+1, len(my_settings.discard))
		for sample,f in files.iteritems():
			if verbose:
				print "\tScanning \'{}\'".format(sample)
			(new_files[sample], count[sample]) = (
					tophat.discard_mapped(f, index, tophat_settings = dcs))
		files = new_files
		statistics.addValues('discard_' + os.path.basename(index), count)

	#map to genome
	(target, target_settings) = my_settings.target
	if verbose:
		print "\n========== Map to {} ==========".format(os.path.basename(target))
	th = tophat.tophat_from_settings(target_settings)
	for i,(sample,f) in enumerate(files.iteritems()):
		th.output_dir = os.path.join(outdir, sample)
		os.mkdir(th.output_dir)
		if verbose: print "Mapping {} ({}/{})...".format(sample, i+1, len(files))
		th.run(f, index_base = target)
		os.remove(f)
		
	if verbose: print "\n========== Postprocess =========="
	count = {}
	for i,(sample,f) in enumerate(files.iteritems()):
		if verbose: print "{} ({}/{})...".format(sample, i+1, len(files))
		
		out = os.path.join(outdir, '{}.bam'.format(sample))
		count[sample] = postprocess.run(outdir, sample,	"{}.fa".format(target),
				extend=extend)
		statistics.collectFinalStats(sample, out)
		
	statistics.addValues('final_seqs', count)

	statistics.write(os.path.join(outdir, 'statistics'))
	
	shutil.rmtree(tempdir)

	if verbose:
		print "\n__________ Pipeline Statistics __________"
		print statistics.prettyString()

示例#9

0

显示文件

文件： conf.py 项目： cokelaer/msdas

import pkg_resources
version = pkg_resources.require("msdas")[0].version
release = version
author = ",\\\\".join([
    "Marti Bernardo Fauri",
    "Thomas Cokelaer",
    "Claudia Hernandez-Armenta"])

title = "MS-DAS"
copyright = author + ", 2013"
project = "MS-DAS"


try:
    import preprocess
    preprocess.run()
except:
    pass


# common sphinx extensions 

# -- General configuration -----------------------------------------------------

# If your documentation needs a minimal Sphinx version, state it here.
#needs_sphinx = '1.0'

# Add any Sphinx extension module names here, as strings. They can be extensions
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.

extensions = [

示例#10

0

显示文件

    input_data = []
    for line in sys.stdin:
        input_data.append(line.strip().split(","))

    input_df = pd.DataFrame(data=input_data, columns=["SMILES"])
    data = input_df.replace("", None)
else:

    input_data = []
    for line in sys.stdin:
        input_data.append(line.strip().split(","))

    input_df = pd.DataFrame(data=input_data, columns=["SMILES"])
    data = input_df.replace("", None)  # .drop(columns="log P (octanol-water)")
# print(input_data, data.shape)
data = run("", data, debug).astype(float)
# print(os.getcwd())
# print(data.shape)
filename = "config/training.yaml" if debug else "config.yaml"
with open(filename, "r+") as f:
    cfg = yaml.load(f, Loader=yaml.SafeLoader)

pred = np.zeros(data.shape[0])
for fold in range(1, cfg["base"]["n_folds"] + 1):
    path = f"../models/{fold}.pkl" if debug else f"{fold}.pkl"
    estimator = joblib.load(path)

    pred += estimator.predict(data) / cfg["base"]["n_folds"]

for val in pred:
    val = float(val)

示例#11

0

显示文件

import pandas as pd
import preprocess
from sklearn.feature_extraction.text import CountVectorizer
import numpy as np


def mod_bag_of_words(df, min_df, binary):
    cv = CountVectorizer(min_df=min_df, binary=binary)
    X = cv.fit_transform(df['Content'].tolist()).toarray()

    return X, cv


if __name__ == "__main__":

    sample_sizes = [200, 400, 600, 800, 1000]
    for size in sample_sizes:
        X, y = preprocess.run("review_polarity.tar.gz",
                              0.05,
                              size,
                              binary=False)
        df = pd.DataFrame(X)
        y = np.array(y)
        df['label'] = y
        df.to_csv(f'./preprocessed/data_size_{size}.csv')
        print(df)

示例#12

0

显示文件

    print("python main.py -r : run rebuilding all the files")
    print("python main.py -t : retrain model")


if len(sys.argv) > 2:
    showUsage()
    exit()

rebuild = False
retrain = False

if len(sys.argv) == 2:
    if sys.argv[1] == '-r':
        print("Rebuilding and training...")
        rebuild = True
    elif sys.argv[1] == '-t':
        print("Retraining model...")
        retrain = True
    else:
        print("Wrong parameter")
        showUsage()
        exit()

retrain = retrain or rebuild

preprocess.run(rebuild, gram_size, split_size)
dc = docClassifier(rebuild)
dc.trainAndValidate(retrain)

dc.predict()

示例#13

0

显示文件

import entity_extraction


# retrieve the directory path and file paths
resource_path = configure.RESOURCE_PATH
data_file = configure.DATA_FILE
quickUMLS_file = configure.QUICKUMLS_FILE

# Press the green button in the gutter to run the script.
if __name__ == '__main__':

    # configure the spacy
    nlp = configure.spacy_config()

    # preprocess
    snippets = preprocess.run(data_file, nlp)

    # extract treatment entities
    entity_extraction.run(snippets, nlp)

    # extract entity relationships
    print('-' * 25 + 'extracting relationship' + '-' * 25)
    relation_extractor.run(snippets)

    # attribute extraction and association
    attribute_extractor.run(snippets, nlp)

    # postprocess
    snippets = postprocess.run(snippets)

    # save, outputs

示例#14

0

显示文件

import numpy as np
import math
import pandas as pd
from model import *
import pickle
import preprocess
import utilities

# Parsing script arguments
parser = argparse.ArgumentParser(description='Process input')
parser.add_argument('tsv_path', type=str, help='tsv file path')
args = parser.parse_args()

# Reading input TSV
# data = pd.read_csv('test.tsv', sep="\t")
data = preprocess.run(args.tsv_path, train=False)

models = ['linear_model', 'ridge_model', 'custom_model']
for m in models:
    infile = open(f'saved_models/{m}.pkl', 'rb')
    # result = pickle.load(infile)
    model = pickle.load(infile)
    beta_hat, b_hat = model[0], model[1]

    prediction_df = pd.DataFrame(columns=['id', 'revenue'])
    prediction_df['id'] = data['id']
    data_1 = data[[x for x in data if x != 'id']]
    X, _ = split_covariates_from_target(data_1, 'revenue')
    print("running prediction...")
    # prediction_df['revenue'] = model.predict(X)
    y_test_pred = np.dot(X, beta_hat.T) + b_hat

示例#15

0

显示文件

文件： ccscorer.py 项目： au-re/credit-card-scorer

import tensorflow as tf
import numpy as np
import preprocess
'''
	ccscorer
	
	A single layer perceptron for assessing credit card scores using 
    tensorflow. https://www.tensorflow.org
	
	version: 0.1
	authors: Aurelien Hontabat
	license: MIT
'''

# our data (next time better to use tensorflows preprocessing tools)
data, results = preprocess.run()
input_dim = 20
output_dim = 2

# stay flexible when building the graph (tensorflow)
sess = tf.InteractiveSession()

# input layer
x = tf.placeholder(tf.float32, shape=[None, input_dim])

# output layer
y_ = tf.placeholder(tf.float32, shape=[None, output_dim])

# weights fo all connections
W = tf.Variable(tf.zeros([input_dim, output_dim]))

示例#16

0

显示文件

文件： model.py 项目： Silber93/da_2_lab_hw_1

    y_train_pred = np.dot(X_train, beta_hat)
    y_train_pred[y_train_pred < 0] = 0
    # pred_diff = y_train_pred - y_train
    train_score = root_mean_squared_log_error(y_train,y_train_pred)
    print("custom regression score on the train set is:")
    print(train_score)
    pickle.dump(beta_hat, open("saved_models/custom_model.pkl", "wb"))
    print("custom model saved in saved models/custom_model.pkl\n")
    return beta_hat


def train_model(X_train,y_train):
    print("\t----MODEL----")
    run_models(X_train,y_train)
    # ten_fold_cv(method)


if __name__ == '__main__':
    df = preprocess.run(train=True)
    df = df[[x for x in df if x != 'id']]
    features = [col for col in df.columns if col != 'revenue']
    X, y = split_covariates_from_target(df, 'revenue')
    X = X[features].to_numpy(dtype=object)
    y = y.to_numpy(dtype=float)
    train_model(X,y)

    # --------------------- Cross-Validation --------------------- #
    # for method in ['BFGS','Nelder-Mead','Powell','CG','Newton-CG']:
    #     result = ten_fold_cv(method)
    #     print(f"{method} res = {result}")

示例#17

0

显示文件

文件： ccscorer.py 项目： au-re/credit-card-scorer

import numpy as np
import preprocess

'''
	ccscorer
	
	A single layer perceptron for assessing credit card scores using 
    tensorflow. https://www.tensorflow.org
	
	version: 0.1
	authors: Aurelien Hontabat
	license: MIT
'''

# our data (next time better to use tensorflows preprocessing tools)
data, results = preprocess.run()
input_dim = 20
output_dim = 2

# stay flexible when building the graph (tensorflow)
sess = tf.InteractiveSession()

# input layer
x = tf.placeholder(tf.float32, shape=[None, input_dim])

# output layer
y_ = tf.placeholder(tf.float32, shape=[None, output_dim])

# weights fo all connections
W = tf.Variable(tf.zeros([input_dim,output_dim]))

示例#18

0

显示文件

文件： main.py 项目： NaasCraft/BOWtutorial

		# Data processing
		if _ppRun:
			print( "You required data pre processing. Enter the following parameters : \n" )
			in_re_level = int( raw_input( "Regex process level (0-3) : " ) )
			in_sw_drop = raw_input( "Do you want to keep stop words ? (Y/N) : " ) == "N"
			in_stem = raw_input( "Do you want to apply Porter Stemming ? (Y/N) : ") == "Y"
			
			import preprocess
			
			ppfilename = "ppTrainData"
			
			if in_asW2V:
				ppfilename += "_sentences"
				ul_train = pd.read_csv( dataPath_+"unlabeledTrainData.tsv", header=0, delimiter="\t", quoting=3 )
				
				ul_ppTrain, _empt_ = preprocess.run( ul_train, verbose=_verb, re_level=in_re_level, sw_drop=in_sw_drop, stem=in_stem, asW2V=in_asW2V )
			
				### Pickling of pre-processed data ###
				if _pickleData:
					pkl.dump( ul_ppTrain, open( "pickles/ul_"+str(ppfilename)+".pkl","wb" ) )
					if _verb: 
						print("Pickled pre-processed unlabeled data into 'ul_"+str(ppfilename)+".pkl' file." + \
							"(Size : " + str( os.path.getsize("pickles/ul_"+str(ppfilename)+".pkl") / 1000.00 ) + " Kilobytes. \n\n")
			
			ppTrain, ppTrainW = preprocess.run( train, verbose=_verb, re_level=in_re_level, sw_drop=in_sw_drop, stem=in_stem, asW2V=in_asW2V )
			
			### Pickling of pre-processed data ###
			if _pickleData:
				pkl.dump( ppTrain, open( "pickles/"+str(ppfilename)+".pkl","wb" ) )
				pkl.dump( ppTrainW, open( "pickles/"+str(ppfilename)+"W.pkl","wb" ) )
				if _verb:

示例#19

0

显示文件

文件： pmvs.py 项目： emiltan97/pmvs-python

import argparse
import logging
import os
import preprocess
import initialmatch

from datetime import datetime

if __name__ == "__main__" : 

    parser = argparse.ArgumentParser()
    parser.add_argument('-f', '--filename', type=str, default="dinoSR_par.txt")
    parser.add_argument('-d', '--dirname', type=str, default="C:/Users/emilt/multiview-reconstruction/src/data/sample04/dinoSparseRing2/")
    parser.add_argument('-v', '--verbose', action='store_true')
    args = parser.parse_args()

    if args.verbose : 
        LOG_FILENAME = datetime.now().strftime('logs/log%H%M%S%d%m%Y.log')
        logging.basicConfig(level=logging.DEBUG, filename=LOG_FILENAME, filemode='w')
    else : 
        logging.basicConfig(level=logging.INFO)
    
    os.chdir(args.dirname)

    images = preprocess.run(args.filename)
    patches = initialmatch.run(images)

示例#20

0

显示文件

from configs import CONFIG
from lib import errors as e

if __name__ == "__main__":
    print("\nTranscribe all the music...\n")

    num_of_args = len(sys.argv)
    if num_of_args != 9:
        e.print_usage()
        sys.exit()

    arg_parser = argparse.ArgumentParser(description='Get run specs.')
    arg_parser.add_argument('-m', dest='mode', required=True)
    arg_parser.add_argument('-model', dest='model', required=True)
    arg_parser.add_argument('-c', dest='dataset_config', required=True)
    arg_parser.add_argument('-t', dest='transform_type', required=True)
    args = arg_parser.parse_args()

    dataset_id = args.dataset_config + "_" + args.transform_type
    experiment_id = dataset_id + "_" + args.model
    if args.mode == 'preprocess' and e.is_valid_args(CONFIG, args):
        pre.run(CONFIG, args, dataset_id)
    elif args.mode == 'train' and e.is_valid_args(CONFIG, args):
        train.run(CONFIG, args, dataset_id, experiment_id)
    elif args.mode == 'evaluate' and e.is_valid_args(CONFIG, args):
        eval.run(CONFIG, args, dataset_id, experiment_id)
    else:
        e.print_usage()
    sys.exit()

示例#21

0

显示文件

文件： submission.py 项目： NaasCraft/BOWtutorial

def run( model, modelID, verb=False, re_level=0, sw_drop=True, stem=False, max_f=5000, vect=None, mode=False, wordModel=False, scale=False, dScaler=None ):
	''' git description
+ __run__( model, modelID, verb=False, re_level=0, sw_drop=True, stem=False, max_f=5000, vect=None, mode=False, wordModel=False, scale=False, dScaler=None ) :
    + _does_ : 
        + Retrieves test data
        + Pre-processes it
        + Extract feature vectors according to "mode"
        + Predicts the test labels with "model"
        + Save the output as a Kaggle submission
    + _returns_ : Predicted output (as _DataFrame_)
    + _called by_ : `python main.py -s`
    + _calls_ : __pandas.read_csv__, __pandas.DataFrame__, __preprocess.run__, __preprocess.fullPPtoW__, __sklmodels.getBoWf__, __w2v.loopFV__
    + _arguments_ :
        
| type | name | description |
| --- | --- | --- |
| _classifier_ (from __sklearn__) | model | Trained model for prediction |
| _string_ | modelID | Describes model and feature extraction mode for output |
| _boolean_ | verb | Controls console outputs |
| _int_ | re_level | Level of Regex treatment (0-3) |
| _boolean_ | sw_drop | Should drop stop words |
| _boolean_ | stem | Should apply Porter Stemming |
| _int_ | max_f | Number of maximum features for the Bag of Words |
| _CountVectorizer_ (from __sklearn__) | vect | Saved vectorizer to transform test data |
| _string_ | mode | Feature extraction mode (None for BoW, "avg" or "cluster") |
| _W2VModel_ (from __gensim__) | wordModel | Trained word vector representation model |
| _boolean_ | scale | Apply data scaling |
| _StandardScaler_ (from __sklearn__) | dScaler | Fitted data scaler |
	'''
	
	# Test data retrieval
	test = pd.read_csv(dataPath_+"testData.tsv", header=0, delimiter="\t", quoting=3 )

	if verb: print ("\nTest dataset shape : " + str( test.shape ) )
	
	# Correct following if else statement with preprocess.run ability to give multiple values
	if not mode:
		import preprocess
		ppTest, _empt_ = preprocess.run( test, verbose=verb, re_level=re_level, sw_drop=sw_drop, stem=stem )
		
		import sklmodels
		testFeatures, max_f, vect = sklmodels.getBoWf( ppTest, verbose=verb, vect=vect, m_f=max_f, default=True)
		
	else:
		import preprocess
		import w2v
		print( "Creating "+str(mode)+"-style feature vecs for test reviews" )
		
		clean_test_reviews = []
		for review in test["review"]:
			clean_test_reviews += [ preprocess.fullPPtoW( review, re_level=re_level, \
						sw_drop=sw_drop, stem=stem, join_res=False ) ]
		
		testFeatures = w2v.loopFV( clean_test_reviews, wordModel, mode )
	
	if verb: print( "Example test feature (before scaling) : \n" + str( testFeatures[0] ) + "\n" )
	
	if scale:
		testFeatures = dScaler.transform( testFeatures )
		if verb: print( "Example test feature (after scaling) : \n" + str( testFeatures[0] ) + "\n" )
	
	result = model.predict(testFeatures)

	output = pd.DataFrame( data={"id":test["id"], "sentiment":result} )
	output.to_csv( outPath_ + "submission" + modelID + ".csv", index=False, quoting=3 )
	
	if verb: print( "Submission file saved as 'submission" + modelID + ".csv.")
	
	return output

示例#22

0

显示文件

# -*- encoding:utf-8 -*-
import pandas as pd
import numpy as np
import warnings
import codecs
import copy
from tqdm import tqdm
import textCNN_money
import textCNN_laws
import preprocess

if __name__ == '__main__':
	textCNN_laws.log('preprocess...')
	preprocess.run()
	textCNN_laws.log('get laws result...')
	textCNN_laws.run()
	textCNN_laws.log('get money result...')
	textCNN_money.run()

示例#23

0

显示文件

    'sc53.sc52.personal.sc52', 'sc.56.sc57.personal.sc56',
    'sc.56.sc57.personal.sc57', 'sc58.sc61.personal.sc61',
    'sc59.sc60.personal.sc59', 'sc59.sc60.personal.sc60',
    'sc62.sc63.personal.sc63', 'sc66.sc67.personal.sc66',
    'sc66.sc67.personal.sc67', 'sc74.sc75.personal.sc74',
    'sc74.sc75.personal.sc75', 'sc76.sc77.personal.sc76',
    'sc76.sc77.personal.sc77', 'sc82.sc83.personal.sc83',
    'sc84.sc65.personal.sc65'
]

number_of_clips = 0
accuracy_svm = 0
accuracy_dummy = 0
for test_clip in clips:
    # Extract x,y-test/train vectors from data and plot dist. Linear.
    x_train, x_test, y_train, y_test = preprocess.run(votes, test_clip)

    # Classify using SVM with different kernels
    y_pred_linear = classify.SVM_linear(x_train, x_test, y_train)

    y_pred_rbf = classify.SVM_rbf(x_train, x_test, y_train)

    # Combine classifiers
    y_final = classify.combine(y_pred_linear, y_pred_rbf)

    # Make very engaged -> engaged.
    y_final = ignore_very(y_final)
    y_test = ignore_very(y_test)

    # Evaluate model
    accuracy_svm += classify.evaluate(y_final, y_test, "SVM",

示例#24

0

显示文件

from sklearn.metrics import mean_squared_error

# RNN основные параметры
BATCH_SIZE = 30
TIME_STEPS = 720
INPUT_SIZE = len(pick_feature)
OUTPUT_SIZE = 120
CELL_SIZE = 1200
LR = 0.006





# разделение выборки
attr_name_list, meta, data = preprocess.run()
attr_name_list, data = dataset.feature.add_weekday(attr_name_list, meta, data)
attr_name_list, data = dataset.feature.add_hour(attr_name_list, meta, data)
(train_X, train_Y), (test_X, test_Y) = \
    dataset.gen_co_model_data(data, TIME_STEPS, OUTPUT_SIZE, pick_feature)


# Блок определения rnn 
model = Sequential()
model.add(LSTM(
    input_shape=(TIME_STEPS, INPUT_SIZE),
    output_dim=CELL_SIZE,
))
model.add(Dense(OUTPUT_SIZE))
model.compile(optimizer=RMSprop(LR), loss='mse')

示例#25

0

显示文件

文件： pmvs.py 项目： emiltan97/pmvs-python

 os.chdir(args.dirname)
 # Config settings
 file = open(args.config, 'r')
 line = file.readline()
 words = line.split()
 alpha1 = float(words[0])
 alpha2 = float(words[1])
 beta = int(words[2])
 gamma = int(words[3])
 n = int(words[4])
 rho = float(words[5])
 sigma = int(words[6])
 omega = int(words[7])
 file.close()
 # Preprocessing
 images = preprocess.run(args.filename1, args.filename2, beta, args.display)
 # Initial Matching
 if not args.load:
     patches = initialmatch.run(images, alpha1, alpha2, omega, sigma, gamma,
                                beta, args.filename3, args.display)
 else:
     patches = utils.loadPatches(images, args.filename3)
 logging.info("------------------------------------------------")
 logging.info("Writing PLY...")
 utils.writePly(patches, args.outname)
 # Iteration n=3 of expansion and filtering
 iter = 1
 for i in range(n):
     print("==========================================================")
     print(
         f"                        EXPANSION {iter}                        "