def get_notes(): """ Get all the notes and chords from the midi files in the ./midi_songs directory """ notes = [] # skip score parsing, if already performed if os.path.exists('data/notes'): return= pickle.read('data/notes') for file in glob.glob("midi_songs/*.mid"): midi = converter.parse(file) print("Parsing %s" % file) notes_to_parse = None try: # file has instrument parts s2 = instrument.partitionByInstrument(midi) notes_to_parse = s2.parts[0].recurse() except: # file has notes in a flat structure notes_to_parse = midi.flat.notes for element in notes_to_parse: if isinstance(element, note.Note): notes.append(str(element.pitch)) elif isinstance(element, chord.Chord): notes.append('.'.join(str(n) for n in element.normalOrder)) with open('data/notes', 'wb') as filepath: pickle.dump(notes, filepath) return notes
def genops(pickle): import cStringIO as StringIO if isinstance(pickle, str): pickle = StringIO.StringIO(pickle) if hasattr(pickle, 'tell'): getpos = pickle.tell else: getpos = lambda : None while True: pos = getpos() code = pickle.read(1) opcode = code2op.get(code) if opcode is None: if code == '': raise ValueError('pickle exhausted before seeing STOP') else: raise ValueError('at position %s, opcode %r unknown' % (pos is None and '<unknown>' or pos, code)) if opcode.arg is None: arg = None else: arg = opcode.arg.reader(pickle) yield (opcode, arg, pos) if code == '.': break return
def odczyt_grup(): ''' odczytuje grupy z pliku grupy.bin :return: grupy jako slownik ''' with open("grupy.bin", 'rb') as plik: grupy = read(plik) return grupy
def restore(): """ pozwala załadować zrobioną wcześniej migawkę - zobaczyć czy działa :return: słownik grupy albo False jak coś poszło nie tak """ if "restore" in listdir("DATA"): snapshots = listdir("DATA/restore") for nr, snap_name in enumerate(snapshots): print(nr, '. ', snap_name) sel_snapshot_nr = wejscie_ok( "wybierz plik do załadowania z listy podajac jego numer >>\n", 0, len(snapshots) - 1) if sel_snapshot_nr != -1: with open("DATA/restore/" + snapshots[sel_snapshot_nr], 'rb') as plik: print('poprawnie załadowano plik ', snapshots[sel_snapshot_nr]) return read(plik) else: print("niepoprawny numer!") else: print("nie ma folderu restore") return -1 # if something went wrong
gensimVecs= [ldaModel[dict1.doc2bow(text.lower().split())] for text in docs] vecs = [toVector(gensimVec,n) for gensimVec in gensimVecs] ave = sum(vecs)/len(posts) return ave def Nposts(dTr): nposts=dict() for brand in dTr.keys(): nposts[brand]=sum([len(Tr.getPosts()) for Tr in dTr[brand]]) return nposts def main(indir=r"Z:\ermunds\results\1 prices paid\20 vs 40 vs 80 iters\40",modelName="out40iters") dirs = LDAdirs(indir,modelName) with open(dirs.dataFileName,'r') as file1: dTr=pickle.read(file1) mm=gensim.corpora.MmCorpus(dirs.corpusFname) dict1 = gensim.corpora.dictionary.Dictionary().load(dirs.dictFileName) lda = gensim.models.ldamodel.LdaModel(id2word=dict1).load(dirs.modelFname) d2 = dict() for (k,Trlist) in dTr.items(): topicVec = toVector([],lda.num_topics) counter = 0 for Tr in Trlist: posts = Tr.getPosts() topicVec=topicVec+len(posts)*postlist2LDA_Faster(posts,lda2) counter+=len(posts) topicVec = topicVec/counter d2.update({k:topicVec})
cnt += 1 except: pass return cnt now = datetime.datetime.now() print(now) df_train['noun_count'] = df_train['text'].apply(lambda x: check_pos_tag(x, 'noun')) df_train['verb_count'] = df_train['text'].apply(lambda x: check_pos_tag(x, 'verb')) df_train['adj_count'] = df_train['text'].apply(lambda x: check_pos_tag(x, 'adj')) df_train['adv_count'] = df_train['text'].apply(lambda x: check_pos_tag(x, 'adv')) df_train['pron_count'] = df_train['text'].apply(lambda x: check_pos_tag(x, 'pron')) now2 = datetime.datetime.now() print(now - now2) df_train = pickle.dump(df_train, r'c:\data\temp\df_train.p', protocol=pickle.DEFAULT_PROTOCOL, fix_imports=True, buffer_callback=None) df_train = pickle.read(r'c:\data\temp\df_train.p') print('word tokeniser completed') ####### # train a LDA Model lda_model = decomposition.LatentDirichletAllocation(n_components=20, learning_method='online', max_iter=20) X_topics = lda_model.fit_transform(xtrain_count) topic_word = lda_model.components_ vocab = count_vect.get_feature_names() # view the topic models n_top_words = 10 topic_summaries = [] for i, topic_dist in enumerate(topic_word):
def read_pkl_data(path): file=open(path,'rb'): data=pickle.read(file) file.close() return data
from mpl_toolkits.axes_grid.inset_locator import inset_axes #subjects=[6,8,10,11,12,15,16,17,18] subjects = [6] fig, axes = plt.subplots(nrows=3, ncols=3, figsize=(20, 20)) isub = 8 cm = plt.get_cmap('winter') cNorm = colors.Normalize(vmin=0, vmax=1) scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=cm) predicted = [] path = '/home/genis/cluster_archive/Master_Project/meg_analysis/results' for isub in range(len(subjects)): filename = path + '/subject_' + str(subjects[isub]) + '_matrix.pickle' f = open(filename, 'r') data = pickle.read(filename) # result = pd.read_pickle(filename) # # array_re=np.array(result) # predicted=[] # predicted.append(array_re[0,:]) ## predicted.append(array_re[12,:]) # predicted.append(array_re[25,:]) # # x=np.linspace(-2.5,0.5,len(predicted[0])) # for i in range(len(predicted)): # color=scalarMap.to_rgba(i) # axes.flat[isub].plot(x,predicted[i],'o-',color=color) # axes.flat[isub].plot(x,np.diagonal(array_re),'ro-') # # axes.flat[isub].spines['top'].set_visible(False)