Python clean示例

编程语言: Python

命名空间/包名称: clean_data

方法/功能: clean

hotexamples.com的示例: 5

Python clean - 已找到5个示例。这些是从开源项目中提取的最受好评的clean_data.clean现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： midi.py 项目： zmcgrath96/lstm-music

def get_notes(note_width=.25):
	# if pickled result exists, return that
	# exists = os.path.isfile('pickle/' + directory + '_notes')	
	# if exists:
	# 	return pickle.load(open('pickle/' + directory + '_notes', 'rb'))

	# if pickled result does not exist, create it and pickle it
	piano = []
	bass = []
	sax = []
	song_lens = []
	songs = {}
	num_songs = 0
	for file in glob.glob(directory + "/*.mid"):
		midi = None
		try:
			midi = converter.parse(file)
		except Exception as e:
			print('Could not parse file: {}'.format(file))
			continue

		print("Parsing %s" % file)
		num_songs += 1
		songs[file] = {}
		instruments = instrument.partitionByInstrument(midi)
		song_lens.append(midi.highestTime)
		for i in instruments:
			name = i.getInstrument().instrumentName
			if name == 'Piano':
				songs[file]['piano'] = i
				piano.append(i)
			elif name == 'Acoustic Bass':
				songs[file]['bass'] = i
				bass.append(i)
			elif name == 'Saxophone':
				songs[file]['sax'] = i
				sax.append(i)

	enumerated_notes, embedded = clean(songs, song_lens, note_width)

	output = []

	for s in embedded:
		if len(s) == 3:
			output.append(s)

	pickle.dump(enumerated_notes, open('pickle/' + directory + '_encodings', 'wb'))
	pickle.dump(output, open('pickle/' + directory + '_embedded', 'wb'))

	return enumerated_notes, output

示例#2

显示文件

def load(grid_num):
    df_train = pd.read_csv('train.csv',
                           delimiter=',',
                           parse_dates=True,
                           index_col="Unnamed: 0")
    df_train.dataframeName = 'train.csv'
    df_test = pd.read_csv("test.csv",
                          delimiter=",",
                          parse_dates=True,
                          index_col="Unnamed: 0")
    df_test.dataframeName = 'test.csv'
    x_train, y_train, x_test, y_test = clean_data.clean(
        df_train, df_test, grid_num)
    return x_train, y_train, x_test, y_test

示例#3

显示文件

文件： predict.py 项目： yiqunzheng/fraud-detection

def predict(test, xgb_model):

    # with open('vec_model.pkl') as f:
    #     vec_model = pickle.load(f)

    X_xgb = clean(test, isTrain=False, isjson=True)
    # X_vec = clean_desc(test_file, isTrain=False)

    xgb_preds = xgb_model.predict_proba(X_xgb)[:,1]
    # vec_preds = vec_model.predict_proba(X_vec)[:,1]

    # preds = (xgb_preds + vec_preds) / 2

    # return X_xgb, X_vec, preds

    return X_xgb, xgb_preds

示例#4

显示文件

    # Load a potentially pretrained model from disk.
    if datapath("model_small"):
        lda_model = ldamodel.load(datapath("model_small"))

    else:
    # train model
        lda_model = ldamodel(corpus, num_topics=10, id2word=dictionary)
        pprint.pprint(lda_model.top_topics(corpus, topn=5))

        # Save model to disk.
        temp_file = datapath("model_small")
        lda_model.save(temp_file)

    return lda_model, corpus, dictionary


def visualize_pyldavis(lda_model, corpus, dictionary):
    prepared = pyLDAvis.gensim.prepare(lda_model, corpus, dictionary)
    pyLDAvis.save_html(prepared, 'vis_topic_model_02.html')
    pyLDAvis.show(prepared)


if __name__ == '__main__':

    #list of docs as lists of strings
    texts = clean('voted-kaggle-dataset.csv')

    lda_model, corpus, dictionary = train_model(texts)
    # print(lda_model.show_topics())

    visualize_pyldavis(lda_model, corpus, dictionary)

示例#5

显示文件

# CraigsRecommendation
# created by Mikaela Hoffman-Stapleton and Arda Aysu

from flask import Flask, request
import pandas as pd
import re
from datetime import datetime
from clean_data import clean
from filter_data import filter
from cluster_data import cluster
from gmaps_fns import *
from webpage import searchpage, recpage
# import json # for demo

pd.set_option('display.max_colwidth', -1)
listings = clean('craigslist.csv')
listings = listings.assign(index=range(len(listings)))

# for demo data:
# gmaps = pd.read_csv('gmaps.csv')
# listings = pd.merge(listings, gmaps, how='outer', on='index')
# with open('places_demo.json') as data_file:
#     places = json.load(data_file)
# with open('distances_demo.json') as data_file:
#     distances = json.load(data_file)

app = Flask(__name__)


@app.route('/')
def search_page():