def student2enrollid_time(): '''initial''' student2enrollid_time = {} enrollid_second = cPickle.load(open('enrollid_second.cPickle')) student_enrollment = cPickle.load(open('student_enrollment.cPickle')) for key,values in student_enrollment.items(): # print key,values max_time = 0 for enrollid in values: secods = enrollid_second.get(enrollid) print secods if secods > max_time: max_time = secods for enrollid in values: student2enrollid_time[enrollid] = max_time # raw_input(student2enrollid_time) w = open("student2enrollid_time.cPickle",'w') cPickle.dump(student2enrollid_time,w) w.close() w = open('student2enrollid_time.txt','w') w.write('enrollment_id,student2enrollid_time\n') for key in sorted(student2enrollid_time.iterkeys()): w.write(str(key)) w.write(',') w.write(str(student2enrollid_time[key])) w.write('\n') w.close()
def loadArray(dirpath): # pattern = regex = str variable = '.+\.label' (recommended) pattern = '.+\.label' # another = 'array' (recommended) another = 'array' names = os.listdir(dirpath) random.shuffle(names) for name in names: if re.match(pattern,name) != None: #print name folder,prename,num,suffix = name.split('.') target = folder + '.' + prename + '.' + num + '.' + another targetpath = dirpath + '/' + target # find another suffix data file # meanwhile examine the num, length of spectrogram = length of label if os.path.exists(targetpath): # extract object from a file with file(target,'rb') as f: spectroArray = cPickle.load(f) # GPU default type is float32 spectroArray = np.float32(spectroArray) with file(name,'rb') as f: labelArray = cPickle.load(f) # label should be int type labelArray = np.int32(labelArray) yield spectroArray,labelArray,int(num)
def load_knowledge(knowledge): #existing naive_bayes object and keyword list nb=None kw=list() if knowledge is not None: if not os.path.isdir(knowledge): print("Knowledge bust be a directory") exit() else: knowledge =os.path.expanduser('~/.shakespeare') #make this directory if it doesn't already exist if not (os.path.exists(knowledge)): print('Creating directory: {}'.format(knowledge)) os.mkdir(knowledge) kfiles = glob.glob(knowledge+'/*') if os.path.join(knowledge,'nb.p') in kfiles: nb=pickle.load(open(os.path.join(knowledge,'nb.p'))) else: print("Warning: knowledge dir {} does not contain nb.p (pickled naive bayes object)".format(knowledge)) if os.path.join(knowledge,'kw.p') in kfiles: kw=pickle.load(open(os.path.join(knowledge,'kw.p'))) else: print("Warning: knowledge dir {} does not contain kw.p (pickled keyword list)".format(knowledge)) return(nb,kw, knowledge)
def load_vocabulary(): """ Unpickle and return the content of the file `free_associations_vocabulary` generated by `process_data.py`. Output ------ W: association matrix id2voc: dictionary, keys are word ids and values words voc2id: dictionary, keys are words and values word ids """ # absolute path to the free association norms data path = '../../data/associationmatrices/' filename = 'association_norms_symm' try: with open(path+filename, 'rb') as f: id2voc = pickle.load(f) voc2id = pickle.load(f) Wsparse = pickle.load(f) except IOError: raise IOError('Association matrix "' + filename + '" not found' + ' in ' + path + '. To generate the matrix run ' + 'generate_association_matrix.py') # convert to dense matrix (stored as sparse for memory reasons) W = np.asarray(Wsparse.todense()) # normalize weights to [0-1] interval W /= 2 np.fill_diagonal(W, 1.) return W, id2voc, voc2id
def __init__(self, descrs, aggFunc='mean', caching=True): self.reportMissing = True self.caching = caching self.cached_file_name = None if isinstance(descrs, str): self.descrs_file = descrs self.descrs = pickle.load(open(self.descrs_file, 'rb')) self.cached_file_name = '%s-%s.pkl' % (self.descrs_file, aggFunc) elif isinstance(descrs, dict): self.descrs = descrs if self.caching and self.cached_file_name is not None and os.path.exists(self.cached_file_name): self.space = pickle.load(open(self.cached_file_name, 'rb')) elif aggFunc in ['mean', 'max']: if aggFunc == 'mean': f = self.aggMean elif aggFunc == 'max': f = self.aggMax self.space = {} for k in self.descrs.keys(): vecs = self.descrs[k].values() if len(vecs) < 2: if self.reportMissing: print('Warning: Not enough vectors for key %s - skipping' % k) continue self.space[k] = f(vecs) if self.caching and self.cached_file_name is not None: pickle.dump(self.space, open(self.cached_file_name, 'wb'))
def readTurbostatDataFile(filename,newFile=None,verbose=False,needsReload=False): if newFile == None: newFile = filename+"_tsdata.gz" if not needsReload: try: if verbose: print "Loading data from power file..." fp = gzip.open(newFile,"rb") data = pickle.load(fp) colHeaders = pickle.load(fp) fp.close() except IOError as err: if verbose: print "Does not exist (%s). Attempting to create..."%(err) needsReload = True if needsReload: data,colHeaders = generateTurbostatDataFile(filename, newFile, verbose) if verbose: print "Got %d blocks."%(data.shape[0]) return (data,colHeaders)
def loadState(): import gzip global outputFile #f = gzip.GzipFile(outputFile,'r') try: f = open(outputFile, 'r') except: return "Humane Document file does not exist." try: state = cPickle.load(f) changeList = [] while 1: try: changes = cPickle.load(f) changeList.extend(changes) except EOFError: break except: return "Error loading Humane Document." f.close() state.restore() _applyChanges(changeList) return "Loaded file correctly."
def open(self, dirname, filename): self.filename = filename self.dirname = dirname self.status.SetStatusText("Opening: {0}".format(filename), 0) try: handle = open(os.path.join(dirname, filename), 'rb') header = cPickle.load(handle) if header != FILE_HEADER: wx.MessageBox('Invalid or corrupted file', 'Warning', wx.OK | wx.ICON_WARNING) self.status.SetStatusText("Open failed", 0) return _version = cPickle.load(handle) self.settings.start = cPickle.load(handle) self.settings.stop = cPickle.load(handle) self.spectrum = cPickle.load(handle) except: wx.MessageBox('File could not be opened', 'Warning', wx.OK | wx.ICON_WARNING) self.status.SetStatusText("Open failed", 0) return self.isSaved = True self.set_range() self.draw_plot() handle.close() self.status.SetStatusText("Finished", 0)
def getTrainTest(flag , date = '2016-01-20'): if configs['save_fea'] == True: fea = pickle.load(open(configs['train_fea'] , 'r')) #print fea.dtypes test_fea = pickle.load(open(configs['test_fea'] , 'r')) #print test_fea.dtypes else: fea = Fea.getTrainFea() test_fea = Fea.getTestFea() if flag == 'online': train_x = fea[fea_names].values train_y = fea[y_fea_names] test_x = test_fea[fea_names].values test_y = test_fea[['poi' , 'key']] train = [train_x , train_y] test = [test_x , test_y] return train , test else: train_x = fea[fea.date < date][fea_names].values train_y = fea[fea.date < date][y_fea_names] test_x = fea[fea.date >= date][fea_names].values test_y = fea[fea.date >= date][y_fea_names] train = [train_x , train_y] test = [test_x , test_y] return train , test
def plugin_init(self,mainframe,app_init): self.mainframe=mainframe self.worker=mainframe.tm panel=mainframe.float_mgr.add_panel('Map','Show or hide the map panel (use it to view or set the location of your images)','picty-map') self.mapframe=MapFrame(self) panel.vbox.pack_start(self.mapframe) places = {'Home':(0.0,0.0,1)} latlon = None place = None source = None data = settings.load_addon_prefs('map_plugin_settings') if data: places = data['places'] source = data['source'] place = data['place'] else: try: f=open(os.path.join(settings.data_dir,'map-places'),'rb') version=cPickle.load(f) places=cPickle.load(f) if version>='0.1.1': source=cPickle.load(f) f.close() except: log_err('No map-places file found') self.mapframe.set_places(places) self.mapframe.set_place(place) if source is not None: self.mapframe.set_preferred_source(source) ##TODO: should update map images whenever there are relevent collection changes (will need to maintian list of displayed images) -- may be enough to trap view add/remove and GPS metadata changes self.mainframe.connect("view-rebuild-complete",self.view_rebuild_complete)
def main(): args = parse_args() option = json.load(open(args.option)) prefix = args.prefix sufix = args.sufix data_type = args.data_type event_fn = args.event_fn word2vec_file = args.word2vec exp_name = args.exp_name max_sens = option["max_sens"] max_words = option["max_words"] padding = option["padding"] class2id = {k.strip():i for i,k in enumerate(open(event_fn))} dataset = nn.load_event_dataset(prefix, sufix) wf = open(word2vec_file) embedding = cPickle.load(wf) word2id = cPickle.load(wf) digit_dataset = nn.transform_event_dataset(dataset, word2id, class2id, data_type, max_sens, max_words, padding) model = GICF(option) model.run_experiment(digit_dataset, embedding, exp_name)
def copy_flow(self, key): trained_flow_path = "%s/%s.pickle" % (self.directory , "train_flow_"+ key) prewindowing_flow_path = "%s/%s.pickle" % (self.directory , "prewindowing_flow_"+ key) prewindowing_offline_flow_path = "%s/%s.pickle" % (self.directory , "prewindowing_offline_flow_"+ key) prewindowed_train_flow_path = "%s/%s.pickle" % (self.directory , "prewindowed_train_flow_"+ key) # using the trained flow for adaptation if os.path.exists(trained_flow_path): shutil.copyfile(trained_flow_path, "%s/%s.pickle" % (self.directory, "abri_flow_" + key + "_unadapted")) # using the prewindowing flow and prewindowed-trained flow for adaptation else: flh_1 = {} flh_2 = {} prewindowing_flow = {} postprocessing_flow = {} unadapted_flow = {} if os.path.exists(prewindowing_flow_path): flh_1[key] = open(prewindowing_flow_path, 'r') elif os.path.exists(prewindowing_offline_flow_path): flh_1[key] = open(prewindowing_offline_flow_path, 'r') flh_2[key] = open("%s/%s.pickle" % (self.directory , "prewindowed_train_flow_"+ key), 'r') prewindowing_flow[key] = cPickle.load(flh_1[key]) prewindowing_flow[key].pop(-1) prewindowing_flow[key].pop(-1) postprocessing_flow[key] = cPickle.load(flh_2[key]) postprocessing_flow[key].pop(0) postprocessing_flow[key].pop(0) unadapted_flow[key] = prewindowing_flow[key] + postprocessing_flow[key] flh_1[key].close() flh_2[key].close() unadapted_file = open("%s/%s.pickle" % (self.directory, "abri_flow_" + key + "_unadapted"), 'w+') cPickle.dump(unadapted_flow[key], unadapted_file)
def loadTree(filename): # returns a tuple of the root item and tree object f = open(filename, "rb") root_item = pickle.load(f) tree_object = pickle.load(f) f.close() return (root_item, tree_object)
def searchNearestNeighborsOPQ(codeFilename, codebooksFilename, queriesFilename, \ queriesCount, k=10000, threadsCount=30): model = pickle.load(open(codebooksFilename, 'r')) codebooks = model[0] R = model[1] M = codebooks.shape[0] codebookDim = codebooks.shape[2] dim = codebookDim * M codebookSize = codebooks.shape[1] codes = pickle.load(open(codeFilename, 'r')) queries = readXvecs(queriesFilename, dim, queriesCount) queries = np.dot(queries, R.T).astype('float32') result = np.zeros((queriesCount, k), dtype='int32') codeDistances = np.zeros((M, queriesCount, codebookSize),dtype='float32') for m in xrange(M): subqueries = queries[:,m*codebookDim:(m+1)*codebookDim].copy() codeDistances[m,:,:] = ynumpy.cross_distances(codebooks[m], subqueries) nearest = np.zeros((queriesCount, k), dtype='int32') qidRangeSize = 1 rangesCount = int(math.ceil(float(queriesCount) / qidRangeSize)) pool = Pool(threadsCount) ans = pool.map(partial(findNearestForRangePQ, \ rangeSize=qidRangeSize, codebookDistances=codeDistances, pointsCodes=codes, listLength=k), \ range(0, rangesCount)) pool.close() pool.join() for i in xrange(len(ans)): if ans[i] == None: pass else: qidsCount = ans[i].shape[0] nearest[i*qidRangeSize:i*qidRangeSize+qidsCount,:] = ans[i] return nearest
def load_infnet_from_file(f_name=None, rng=None, Xd=None, \ new_params=None): """ Load a clone of some previously trained model. """ assert(not (f_name is None)) pickle_file = open(f_name) self_dot_params = cPickle.load(pickle_file) if not (new_params is None): for k in new_params: self_dot_params[k] = new_params[k] self_dot_numpy_param_dicts = cPickle.load(pickle_file) self_dot_shared_param_dicts = {'shared': [], 'mu': [], 'sigma': []} for layer_group in ['shared', 'mu', 'sigma']: for numpy_dict in self_dot_numpy_param_dicts[layer_group]: shared_dict = {} for key in numpy_dict: val = to_fX(numpy_dict[key]) shared_dict[key] = theano.shared(val) self_dot_shared_param_dicts[layer_group].append(shared_dict) # now, create a PeaNet with the configuration we just unpickled clone_net = InfNet(rng=rng, Xd=Xd, params=self_dot_params, \ shared_param_dicts=self_dot_shared_param_dicts) # helpful output print("==================================================") print("LOADED InfNet WITH PARAMS:") for k in self_dot_params: print(" {0:s}: {1:s}".format(str(k), str(self_dot_params[k]))) print("==================================================") return clone_net
def mnist(datasets_dir='/Tmp/kastner'): try: import urllib urllib.urlretrieve('http://google.com') except AttributeError: import urllib.request as urllib url = 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz' data_file = os.path.join(datasets_dir, 'mnist.pkl.gz') if not os.path.exists(data_file): urllib.urlretrieve(url, data_file) print('... loading data') # Load the dataset f = gzip.open(data_file, 'rb') try: train_set, valid_set, test_set = cPickle.load(f, encoding="latin1") except TypeError: train_set, valid_set, test_set = cPickle.load(f) f.close() test_x, test_y = test_set test_x = test_x.astype('float32') test_x = test_x.astype('float32').reshape(test_x.shape[0], 1, 28, 28) test_y = test_y.astype('int32') valid_x, valid_y = valid_set valid_x = valid_x.astype('float32') valid_x = valid_x.astype('float32').reshape(valid_x.shape[0], 1, 28, 28) valid_y = valid_y.astype('int32') train_x, train_y = train_set train_x = train_x.astype('float32').reshape(train_x.shape[0], 1, 28, 28) train_y = train_y.astype('int32') rval = [(train_x, train_y), (valid_x, valid_y), (test_x, test_y)] return rval
def main(): args = parse_arguments() print "hi from making files", args.make if 'graph' in args.make: print args.pickle_file pham = cPickle.load(open(args.pickle_file.strip('"'), 'rb')) graph_start_sites(args, pham, args.dir) if 'starts' in args.make: phage_genes = cPickle.load(open(args.pickle_file.strip('"'), 'rb')) make_suggested_starts(phage_genes, args.phage, args.dir) if 'genome' in args.make: phage = cPickle.load(open(args.pickle_file.strip('"'), 'rb')) make_pham_genome(phage, args.phage, args.phage_length, args.dir) make_suggested_starts(phage, args.phage, args.dir) if 'text' in args.make: print args.pickle_file pham = cPickle.load(open(args.pickle_file.strip('"'), 'rb')) graph_start_sites(args, pham, args.dir) print "phage", args.phage if not args.phage: print "hello no phage" make_pham_text(args, pham, args.pham_no, args.dir, only_pham=True) else: make_pham_text(args, pham, args.pham_no, args.dir) if 'fasta' in args.make: pass
def __init__(self, b=None, eta=1., pa=None, q=None, x=None, y=None, load=False): self.b = b self.eta = eta self.pa = pa self.q = q self.x = x self.y = y self.nsteps = 1e3 if load: import cPickle f = open('powerlaw.alphax', 'rb') self.xmodel = cPickle.load(f) f.close() f = open('powerlaw.alphay', 'rb') self.ymodel = cPickle.load(f) f.close() else: self.xmodel = None self.ymodel = None
def one_model(): # load feat names #feat_names = config.feat_names feat_names = ['label'] model_type = "extratree" model_param = config.param_spaces[model_type] ## load best params for each model (feat, model) #with open("%s/model_best_params" %config.data_folder) as f: # param_best_dic = pickle.load(f) ## supply the extra parameter from config.param_spaces #for feat in config.feat_names: # for model in config.model_list: # if param_best_dic.has_key("%s_%s"%(feat, model)): # param_space = config.param_spaces[model] # for key in param_space.keys(): # if param_best_dic["%s_%s"%(feat, model)].has_key(key) is False: # param_best_dic["%s_%s"%(feat, model)][key] = param_space[key] #print param_best_dic # load feat, cross validation for iter in range(config.kiter): for fold in range(config.kfold): for feat in feat_names: print "Gen pred for (iter%d, fold%d, %s) cross validation" %(iter, fold, feat) with open("%s/iter%d/fold%d/train.%s.feat.pkl" %(config.data_folder, iter, fold, feat), 'rb') as f: [x_train, y_train] = pickle.load(f) with open("%s/iter%d/fold%d/valid.%s.feat.pkl" %(config.data_folder, iter, fold, feat), 'rb') as f: [x_test, y_test] = pickle.load(f) path = "%s/iter%d/fold%d" %(config.data_folder, iter, fold) #train_model(path, x_train, y_train, x_val, y_val, feat, param_best_dic) pred_val = hyperopt_library(model_type, model_param, x_train, y_train, x_test, y_test) print "ml score is %f" %ml_score(y_test, pred_val) break
def load(self, load_dir, load_filename = 'model.pkl'): """ load the model """ print '... loading model' save_file = open(os.path.join(load_dir, load_filename),'r') args = cPickle.load(save_file) self.__init__( seed_params = args['seed_params'], seed_noise = args['seed_noise'], input = args['input'], n_visible= args['n_visible'], n_hidden= args['n_hidden'], #tied_weights = args['tied_weights'], act_enc = args['act_enc'], act_dec = args['act_dec'], W = args['W'], W_prime = args['W_prime'], b = args['b'], b_prime = args['b_prime']) self.W.value = cPickle.load(save_file) if not self.tied_weights: self.W_prime.value = cPickle.load(save_file) self.b.value = cPickle.load(save_file) self.b_prime.value = cPickle.load(save_file) save_file.close()
def test_parser_1(self): """Tests the XMLDocParser and SentenceParser subclasses""" # Load correct parses with open(ROOT + '/test/data/CDR_TestSet_docs.pkl', 'rb') as f: gold_docs = cPickle.load(f) with open(ROOT + '/test/data/CDR_TestSet_sents.pkl', 'rb') as f: gold_sents = cPickle.load(f) # Set up the doc parser xml_parser = XMLDocParser( path=ROOT + '/test/data/CDR_TestSet.xml', doc='.//document', text='.//passage/text/text()', id='.//id/text()', keep_xml_tree=False) sent_parser = SentenceParser() corpus = Corpus(xml_parser, sent_parser, max_docs=20) print len(corpus.get_docs()) print len(corpus.get_contexts()) self.assertEqual(corpus.get_docs(), gold_docs) self.assertEqual(corpus.get_contexts(), gold_sents)
def __init__(self, discretized=False, discretizer=Orange.feature.discretization.ThresholdDiscretizer(threshold=0.)): self.ml_data = cPickle.load(file(orange_disc_filename if discretized else orange_data_filename)) self.test_data = cPickle.load(file(orange_disc_test_filename if discretized else orange_test_filename)) print "Check train vs. test:", self.ml_data.domain.features[0] == self.test_data.domain.features[0] print "Loaded:", len(self.ml_data) self.classes = {v.name:v for v in self.ml_data.domain.class_vars} self.disc_features = [discretizer.constructVariable(x) for x in self.ml_data.domain.features]
def load_nets(myglob): """ Load a dictionary of trained neural networks with filenames matching a unix-style pattern. Filenames of networks trained for individual output measures should be named, `final-OutputCol-NameOfOutputCol_net.pkl` and filenames for a network trained for the complete set of output measures should be `full_net.pkl`. Parameters ---------- myglob : str the unix-style pattern (including abs or rel path) for the filenames you want to load. Wildcards accepted. Returns ------- nets : dict a dictionary with the trained networks. Keys are the columns from y_train that the network is trained on. """ files_list = glob.glob(myglob) nets = {} if [x for x in files_list if 'full' in x]: with open(files_list[0], 'rb') as pkl: nets['all'] = pickle.load(pkl)[0] else: for filename in files_list: output_col = int(filename.split('/')[-1].split('-')[1]) with open(filename, 'rb') as pkl: nets[output_col] = pickle.load(pkl)[0] return nets
def load_db(db_file, dump_names, dump_info): global var_records, merge_graph, tag_source print "Loading " + db_file input = open(db_file, "rb") var_records = cPickle.load(input) if dump_names or dump_info: if dump_names: for name in var_records.keys(): print name if dump_info: pp = pprint.PrettyPrinter(indent=4) pp.pprint(var_records) for v in var_records.keys(): if var_records[v].is_global: assert(len(var_records[v].functions) == len(var_records[v].entry_tags)) sys.exit(0) sys.exit(0) merge_graph = cPickle.load(input) tag_source = cPickle.load(input) print "Loaded " + db_file
def read_index(index_file): global global_dict, k_gram, id_name fh = open(index_file, 'rb') global_dict = cPickle.load(fh) k_gram = cPickle.load(fh) id_name = cPickle.load(fh) fh.close()
def get_final_amar(experiment_path, member, member_path, min_samples = 0): #print "Called with (", experiment_path, ",", member, ",", member_path, ")" gen = 0 # Start searching from first generation, ignore given member_path member_path = experiment_path + "generation" + str(gen) + "\\" + member + "\\" # First, loop past the generation BEFORE the member was born while not os.path.isdir(member_path): gen += 1 member_path = experiment_path + "generation" + str(gen) + "\\" + member + "\\" # Security check if gen >= 2000: print "[ERROR] Looking for member on path", member_path, "in generation 2000, member will probably not be found!" return 42.0 # Then loop just past the final generation of the member while os.path.isdir(member_path): gen += 1 member_path = experiment_path + "generation" + str(gen) + "\\" + member + "\\" # Then go back 1 member_path = experiment_path + "generation" + str(gen-1) + "\\" + member + "\\" # Check if the member has enough samples to actually count samples = float( cPickle.load(open(member_path + "eval\\mar_samples.p", "rb")) ) if samples >= min_samples: # Retrieve AMAR return float( cPickle.load(open(member_path + "eval\\average_mar.p", "rb")) ) else: # Return high dummy value return 999999.9
def getData_Voxforge(): pickle_directory = prm.params["pickle_directory"].get() voxforge_directory = prm.params["voxforge_directory"].get() try: print "Loading the Data... (may take a while)" data = pickle.load(open(pickle_directory + "data_vf.p", "rb")) labels = pickle.load(open(pickle_directory + "labels_vf.p", "rb")) print "Data Loaded, Good to Go!" except Exception as excp: # data_vf.p doesnt exist print "Exception:", excp data, labels, rawData = extract_Data(voxforge_directory) print "Flattening ze Data" # flatten the data (for svm) data_flat = [] data = np.array(data) for elem in data: data_flat.append(elem.flatten()) data = data_flat data_male = [] data_female = [] for elem in zip(data, labels): if elem[1] == 1: # male data_male.append(elem[0]) else: data_female.append(elem[0]) print "Select Data Subset... (may take a while)" labels_male = np.ones(len(data_male)) labels_female = np.zeros(len(data_female)) data_male, labels_male = select_Data_Subset(data_male, labels_male, 0.1) data_female, labels_female = select_Data_Subset(data_female, labels_female, 1) print "Shapes of Data (male, female) and sum of labels", np.shape(data_male), np.shape(data_female), sum(labels) data = np.concatenate((data_male, data_female)) labels = np.concatenate((labels_male, labels_female)) print "Data Subset Selected!" dataComplete = zip(data, labels) # SHUFFLE THE IMAGES random.shuffle(dataComplete) data = [] labels = [] for elem in dataComplete: data.append(elem[0]) labels.append(elem[1]) print "Saving the Data... (may take a while)" pickle.dump(data, open(pickle_directory + "data_vf.p", "wb")) pickle.dump(labels, open(pickle_directory + "labels_vf.p", "wb")) print "Data Saved, Good to Go!" print "Shapes of Data (male, female) and Labels", np.shape(data_male), np.shape(data_female), np.shape(labels) print "Sum of labels:", sum(labels) return data, labels, rawData
def loadModel(self): print "Loading the unigram&bigram infomation......" inputs = open(self.path + r"bigram_feat_id.pkl", "rb") self.bigram_feat_id = load(inputs) self.bigram_feat_num = len(self.bigram_feat_id) inputs.close() inputs1 = open(self.path + r"unigram_feat_id.pkl", "rb") self.unigram_feat_id = load(inputs1) self.unigram_feat_num = len(self.unigram_feat_id) inputs1.close() inputs2 = open(self.path + r"dict_feat_id.pkl", "rb") self.dict_feat_id = load(inputs2) self.dict_feat_num = len(self.dict_feat_id) # print "Loading process done." print "Loading the prb infomation......" inputs = open(self.path + r"init_prb.pkl", "rb") self.init_prb = load(inputs) inputs.close() inputs1 = open(self.path + r"trans_prb.pkl", "rb") self.trans_prb = load(inputs1) inputs1.close() print "Loading process done." self.dimension = ( self.unigram_feat_num * 5 + self.bigram_feat_num * 5 + self.dict_feat_num * 4 + self.type_feat_num )
def main(): parser = argparse.ArgumentParser() parser.add_argument('in_abs', help='input abstracts file path: "../stemmed_abstracts.pickle" ') parser.add_argument('in_wb', help='input abstracts file path: "../word_base.pickle" ') parser.add_argument('out_abs', help='file path of abstracts output file: "vector_abstracts.pickle"') args = parser.parse_args() print 'loading abstracts...' abs_file = open(args.in_abs) abstracts = cPickle.load(abs_file) abs_file.close() print 'loading word_base' wb_file = open(args.in_wb) word_base = cPickle.load(wb_file) wb_file.close() vector_abstracts = abstracts_to_vector( abstracts, word_base) print 'persist vector abstracts' output_file = open(args.out_abs,'w') cPickle.dump( vector_abstracts, output_file, -1 ) output_file.close()
def __init__(self, dbFilePath=None, revDbFilePath=None, pos=False): self.dbFilePath = dbFilePath self.revDbFilePath = revDbFilePath if pos: self.tagger = PerceptronTagger() self.pos = pos # try to open forward database if not dbFilePath: self.dbFilePath = os.path.join(os.path.dirname(__file__), "markovdb") try: with open(self.dbFilePath, 'rb') as dbfile: self.db = pickle.load(dbfile) except (IOError, ValueError): logging.warn('Database file corrupt or not found, using empty database') self.db = _db_factory() # try to open backwards database if not revDbFilePath: self.revDbFilePath = os.path.join(os.path.dirname(__file__), "revmarkovdb") try: with open(self.revDbFilePath, 'rb') as dbfile: self.rev_db = pickle.load(dbfile) except (IOError, ValueError): logging.warn('Database file corrupt or not found, using empty database') self.rev_db = _db_factory()
# -*- coding: utf-8 -*- __author__ = 'Shane_Kao' import cx_Oracle import cPickle as pickle import os os.environ["NLS_LANG"]=".AL32UTF8" result = pickle.load(open("result", "r")) dbname = 'PLMD3' username = "******" pwd = "lsrm" dsn=cx_Oracle.makedsn('172.21.130.250','1533','PLMD3') db=cx_Oracle.connect(username,pwd,dsn) cursor = db.cursor() for i in result: i['text']=i['text'].encode('utf-8') cursor.execute("INSERT INTO test1 VALUES (:post_id,:url,:text)" ,i) db.commit()
def validate_model(mpath, rpath, data, steps=None): model = cPickle.load(open(mpath, 'rb')) predictions = model.align(data['images'], num_steps=steps, save_all=True) cPickle.dump(predictions, open(rpath, 'wb'), cPickle.HIGHEST_PROTOCOL) return predictions
def load(self, data_file): f = open(data_file, 'rb') tmp_dict = pickle.load(f) f.close() self.__dict__.update(tmp_dict)
def load_data(filename): with open(filename, 'rb') as f: data = cPickle.load(f) return data
import pandas as pd import cPickle as pickle import patsy import unidecode import numpy as np from sklearn.preprocessing import MultiLabelBinarizer from sklearn.multiclass import OneVsRestClassifier from sklearn.model_selection import GridSearchCV, train_test_split from sklearn.linear_model import LogisticRegression, SGDClassifier from sklearn.feature_extraction.text import CountVectorizer # loading data from the pickled data frame with open('../jNotebooks/master_total_df.p','rb') as f: master_total_df = pickle.load(f) global_start = datetime.datetime.now() local_start = datetime.datetime.now() def printProgress(messages): global global_start global local_start current = datetime.datetime.now() print 'task time: ', current-local_start, 'overall', current-global_start print '='*100 print messages local_start = current def uncode(x):
eigs_S, U_S = np.linalg.eig(S) eigs_S = np.real(eigs_S) tmp = np.dot(np.concatenate((A, B.T), axis = 0), A_neg_half) V = np.dot(np.dot(tmp, U_S), np.diag((eigs_S + 1e-8)**(-0.5))) return V if __name__ == '__main__': M = 3000; K = 4 dataset = 'data-0.pkl.gz' path = '/home/bo/Data/RCV1/Processed/' f = gzip.open(path + dataset, 'rb') data = cPickle.load(f) f.close() train_x = data[0].toarray() train_x = train_x.astype(np.float32) train_y = np.asarray(data[1], dtype = np.int32) train_y = np.reshape(train_y, (train_y.shape[0], 1)) dim = train_x.shape[1] data = np.concatenate((train_x, train_y), axis = 1) np.random.shuffle(data) train_x = data[:][:, 0:dim] train_y = np.int32(np.squeeze(data[:][:, -1])) V = nystrom(train_x, M)
def load_pkl_object(filename): pkl_file = open(filename, 'rb') data = pickle.load(pkl_file) pkl_file.close() return data
break except: print "Failed user " + user + ", retrying" time.sleep(4) for post in posts: url = post['href'] user_dict[user][url] = 1.0 all_items[url] = 1 # Fill in missing items with 0 for ratings in user_dict.values(): for item in all_items: if item not in ratings: ratings[item] = 0.0 if __name__ == "__main__": if len(sys.argv) == 1: delusers = initializeUserDict('programming') delusers['josephmisiti'] = {} fillItems(delusers) f = open('data_top_5.dat', 'wb') pickle.dump(delusers, f) f.close() else: f = open('data_top_5.dat', 'rb') data = pickle.load(f)
def pickle_load(pickle_fn): with open(pickle_fn, 'rb') as input_file: A = cPickle.load(input_file) # print A return A
required=True, dest='digitization', help='digitization for gTower Et (MeV)') # parse the arguments, throw errors if missing any args = parser.parse_args() startTime_wall = time.time() startTime_processor = time.clock() filename_id = "seed{:0.0f}_noise{:0.0f}_signal{:0.0f}_digitization{:0.0f}".format( args.seedEt_thresh, args.noise_filter, args.tower_thresh, args.digitization) filename = "data/seed{:0.0f}/leading_jets_{}.pkl".format( args.seedEt_thresh, filename_id) data = pickle.load(file(filename)) endTime_wall = time.time() endTime_processor = time.clock() print "Finished reading in data:\n\t Wall time: %0.2f s \n\t Clock Time: %0.2f s" % ( (endTime_wall - startTime_wall), (endTime_processor - startTime_processor)) dataSetStr = plotConfigs.dataSetStr seedCutStr = '$E_T^\mathrm{seed} >\ %d\ \mathrm{GeV}$' % args.seedEt_thresh noiseCutStr = '$E_T^\mathrm{tower} >\ %d\ \mathrm{GeV}$' % args.noise_filter towerThrStr = '$\\rho\left(E_T^\mathrm{tower} <\ %d\ \mathrm{GeV}\\right)$' % args.tower_thresh helpers = PlotHelpers(dataSetStr=dataSetStr, seedCutStr=seedCutStr, noiseCutStr=noiseCutStr, towerThrStr=towerThrStr)
import cPickle as pickle import os import numpy current_data = os.listdir('data') current_data = [data[0 : -4] for data in current_data] total = [] families = [] for family in current_data: file = open('data/{fam}.txt'.format(fam = family), 'rb') data_array = pickle.load(file) file.close() if 'eae' in family: families.append(data_array[0]) total.append(data_array[0]) print(numpy.mean(total)) print(numpy.std(total)) print(numpy.mean(families)) print(numpy.std(families))
print review for name, model in models.iteritems(): print predict(model, encoding, review)[-1][0][0] results[name].append(predict(model, encoding, review)[-1][0][0]) return results if __name__ == '__main__': logging.debug('Loading encoding...') with open('data/charnet-encoding.pkl', 'rb') as fp: text_encoding_D = pickle.load(fp) text_encoding_D.include_stop_token = False text_encoding_D.include_start_token = False discriminator_0 = Sequence(Vector(len(text_encoding_D))) >> (Repeat(LSTM(1024), 2) >> Softmax(2)) discriminator_1 = Sequence(Vector(len(text_encoding_D))) >> (Repeat(LSTM(1024), 2) >> Softmax(2)) discriminator_2 = Sequence(Vector(len(text_encoding_D))) >> Repeat(LSTM(1024) >> Dropout(0.5), 2) >> Softmax(2) discriminator_3 = Sequence(Vector(len(text_encoding_D))) >> (Repeat(LSTM(1024), 2) >> Softmax(2)) discriminator_4 = Sequence(Vector(len(text_encoding_D))) >> Repeat(LSTM(1024) >> Dropout(0.5), 2) >> Softmax(2) discriminator_5 = Sequence(Vector(len(text_encoding_D))) >> Repeat(LSTM(1024) >> Dropout(0.5), 2) >> Softmax(2) logging.debug('Loading discriminators...') with open('models/discriminative/discriminative-model-0.0.0.pkl', 'rb') as fp: state = pickle.load(fp) state = (state[0][0], (state[0][1], state[1])) discriminator_0.set_state(state)
def parse_results(openvas_results, ip=None): """ Convert the OpenVAS scan results to the GoLismero data model. :param openvas_results: OpenVAS scan results. :type openvas_results: list(OpenVASResult) :param ip: (Optional) IP address to link the vulnerabilities to. :type ip: IP | None :returns: Scan results converted to the GoLismero data model. :rtype: list(Data) """ # This is where we'll store the results. results = [] # Remember the hosts we've seen so we don't create them twice. hosts_seen = {} # Maps of OpenVAS levels to GoLismero levels. LEVELS = { 'debug': 'informational', 'log': 'informational', 'low': "low", 'medium': 'middle', 'high': "high", } RISKS = { 'none': 0, 'debug': 0, 'log': 0, 'low': 1, 'medium': 2, 'high': 3, 'critical': 4 } # Do we have the OpenVAS plugin database? if not os.path.exists(openvas_db): Logger.log_error( "OpenVAS plugin not initialized, please run setup.py") return # Load the database. with open(openvas_db, "rb") as f: use_openvas_db = Pickler.load(f) # Get the configuration. import_log = Config.audit_config.boolean( Config.plugin_args.get("import_log", "no")) import_debug = Config.audit_config.boolean( Config.plugin_args.get("import_debug", "no")) # For each OpenVAS result... for opv in openvas_results: try: # Get the host. host = opv.host # Skip if we don't have a target host. if host is None: continue # Get the threat level. threat = getattr(opv, "threat", "log").lower() # Discard log and debug entries, keep only the vulnerabilities. if threat == "log" and not import_log: continue if threat == "debug" and not import_debug: continue # Get or create the vulnerable resource. target = ip if host in hosts_seen: target = hosts_seen[host] elif not ip or ip.address != host: try: target = IP(host) except ValueError: target = Domain(host) hosts_seen[host] = target results.append(target) # Extract the relevant information from the results. nvt = opv.nvt vid = opv.id oid = int(nvt.oid.split(".")[-1]) name = getattr(nvt, "name", None) cvss_base = getattr(nvt, "cvss_base", None) level = LEVELS.get(threat, "informational") risk = RISKS.get( getattr(opv.nvt, "risk_factor", "none").lower(), 0) # Get the vulnerability description. description = opv.raw_description if not description: description = nvt.description if not description: description = nvt.summary if not description: description = None # Extract the CVEs and Bugtraq IDs. cve = nvt.cve if nvt.cve else [] if "NOCVE" in cve: cve.remove("NOCVE") bid = [] if nvt.bid: bid.extend("BID-" + x for x in nvt.bid) if nvt.bugtraq: bid.extend("BID-" + x for x in nvt.bugtraq) if "NOBID" in bid: cve.remove("NOBID") # Extract the notes and add them to the description text. if opv.notes and description is not None: description += "\n" + "\n".join( " - " + note.text for note in opv.notes ) # Extract the reference URLs from the description text. references = [] if description is not None: p = description.find("URL:") while p >= 0: p += 4 q2 = description.find("\n", p) q1 = description.find(",", p, q2) if q1 > p: q = q1 else: q = q2 if q < p: q = len(description) url = description[p:q].strip() try: url = parse_url(url).url references.append(url) except Exception: Logger.log_error(format_exc()) pass p = description.find("URL:", q) # Prepare the vulnerability properties. kwargs = { "title": "%s;;;%s" % (name, str(opv.port.port_name)), "description": description, "references": references, "level": level, "risk": risk, "severity": risk, "impact": risk, "cvss_base": cvss_base, "cve": cve, "bid": bid, "tool_id": "openvas_plugin_%s" % oid, "custom_id": vid, } # If we have the OpenVAS plugin database, look up the plugin ID # that reported this vulnerability and create the vulnerability # using a specific class. Otherwise use the vulnerability class # for uncategorized vulnerabilities. classname = "UncategorizedVulnerability" if oid in use_openvas_db: classname = use_openvas_db[oid][0][0] # Create the Vulnerability object. try: clazz = globals()[classname] vuln = clazz(target, **kwargs) except Exception, e: t = format_exc() Logger.log_error_more_verbose( "Could not load vulnerability of type: %s" % classname) Logger.log_error_more_verbose(t) vuln = UncategorizedVulnerability(target, **kwargs) results.append(vuln) # Skip this result on error. except Exception, e: t = format_exc() Logger.log_error_verbose( "Error parsing OpenVAS results: %s" % str(e)) Logger.log_error_more_verbose(t)
def test_SRNN(finetune_lr=0.01, pretraining_epochs=0, pretrain_lr=0.01, k=1, training_epochs=1000, # TODO 100+ dataset=DATASET, batch_size=100): """ :type learning_rate: float :param learning_rate: learning rate used in the finetune stage :type pretraining_epochs: int :param pretraining_epochs: number of epoch to do pretraining :type pretrain_lr: float :param pretrain_lr: learning rate to be used during pre-training :type k: int :param k: number of Gibbs steps in CD/PCD :type training_epochs: int :param training_epochs: maximal number of iterations ot run the optimizer :type dataset: string :param dataset: path the the pickled dataset :type batch_size: int :param batch_size: the size of a minibatch """ print "loading dataset from", dataset #datasets = load_data(dataset, nframes=N_FRAMES, features='fbank', scaling='normalize', cv_frac=0.2, speakers=False, numpy_array_only=True) #datasets = load_data(dataset, nframes=N_FRAMES, features='fbank', scaling='student', cv_frac='fixed', speakers=False, numpy_array_only=True) datasets = load_data(dataset, nframes=1, features='fbank', scaling='student', cv_frac='fixed', speakers=False, numpy_array_only=True) #datasets = load_data(dataset, nframes=1, features='fbank', scaling='student', cv_frac=0.2, speakers=False, numpy_array_only=True) train_set_x, train_set_y = datasets[0] # if speakers, do test/test/test valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] print "dataset loaded!" print "train set size", train_set_x.shape[0] print "validation set size", valid_set_x.shape[0] print "test set size", test_set_x.shape[0] print "phones in train", len(set(train_set_y)) print "phones in valid", len(set(valid_set_y)) print "phones in test", len(set(test_set_y)) to_int = {} with open('timit_to_int_and_to_state_dicts_tuple.pickle') as f: # TODO to_int, _ = cPickle.load(f) train_set_iterator = DatasetSentencesIterator(train_set_x, train_set_y, to_int, N_FRAMES) valid_set_iterator = DatasetSentencesIterator(valid_set_x, valid_set_y, to_int, N_FRAMES) test_set_iterator = DatasetSentencesIterator(test_set_x, test_set_y, to_int, N_FRAMES) # numpy random generator numpy_rng = numpy.random.RandomState(123) print '... building the model' n_outs = len(set(train_set_y)) dbn = SRNN(numpy_rng=numpy_rng, n_ins=N_FRAMES * N_FEATURES, relu_layers_sizes=[1024, 1024, 1024], n_outs=n_outs) # get the training, validation and testing function for the model print '... getting the finetuning functions' first_pass, train_fn = dbn.get_stacked_adadelta_trainer() train_scoref = dbn.score_stacked_classif(train_set_iterator) valid_scoref = dbn.score_stacked_classif(valid_set_iterator) test_scoref = dbn.score_stacked_classif(test_set_iterator) print '... finetuning the model' # early-stopping parameters patience = 1000 # look as this many examples regardless TODO patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant best_validation_error = numpy.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 avg_costs = [] for iteration, (x, y) in enumerate(train_set_iterator): #if best_validation_error < 0.5: # this is a hack: if epoch > 1: # this is a hack # TODO normally wait for total convergence and redo the # training this way (because doing 2 trainings would not # badly learn Ws and would reset Adadelta): p_y_init = numpy.zeros((x.shape[0], n_outs), dtype='float32') + 1./n_outs p_y = first_pass(x, p_y_init) if N_FRAMES_WINDOW > 0: p_y = numpy.concatenate([p_y_init[:N_FRAMES_WINDOW], p_y[:-N_FRAMES_WINDOW]]) avg_cost = train_fn(x, p_y, y) else: p_y_init = numpy.zeros((x.shape[0], n_outs), dtype='float32') avg_cost = train_fn(x, p_y_init, y) avg_costs.append(avg_cost) #print(' epoch %i, sentence %i, ' #'avg cost for this sentence %f' % \ # (epoch, iteration, avg_cost)) print(' epoch %i, avg costs %f' % \ (epoch, numpy.mean(avg_costs))) print(' epoch %i, training error %f %%' % \ (epoch, numpy.mean(train_scoref()) * 100.)) # we check the validation error on every epoch validation_errors = valid_scoref() this_validation_error = numpy.mean(validation_errors) # TODO this is a mean of means (with different lengths) print(' epoch %i, validation error %f %%' % \ (epoch, this_validation_error * 100.)) # if we got the best validation score until now if this_validation_error < best_validation_error: with open(output_file_name + '.pickle', 'w') as f: cPickle.dump(dbn, f) # improve patience if error improvement is good enough if (this_validation_error < best_validation_error * improvement_threshold): patience = max(patience, iteration * patience_increase) # save best validation score and iteration number best_validation_error = this_validation_error # test it on the test set test_errors = test_scoref() test_error = numpy.mean(test_errors) # TODO this is a mean of means (with different lengths) print((' epoch %i, test error of ' 'best model %f %%') % (epoch, test_error * 100.)) if patience <= iteration: # TODO correct that done_looping = True break end_time = time.clock() print(('Optimization complete with best validation score of %f %%, ' 'with test performance %f %%') % (best_validation_error * 100., test_score * 100.)) print >> sys.stderr, ('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) with open(output_file_name + '.pickle', 'w') as f: cPickle.dump(dbn, f)
def main(args): logging.basicConfig( level=logging.DEBUG, format="%(asctime)s: %(name)s: %(levelname)s: %(message)s") state = eval(args.prototype)() timings = init_timings() # Load dictionary raw_dict = cPickle.load(open(state['dictionary'], 'r')) # Dictionaries to convert str to idx and vice-versa str_to_idx = dict([(tok, tok_id) for tok, tok_id, _, _ in raw_dict ]) #字典里的每一项包含四个字段,(字符,字符号,词频,文本频率) idx_to_str = dict([(tok_id, tok) for tok, tok_id, freq, _ in raw_dict]) category = cPickle.load(open(state['category'], 'r')) assert (len(category) == state['cnum']) model = DocumentEncoder(state) rng = model.rng model.state['run_id'] = RUN_ID logger.debug("Training using exact log-likelihood") train_batch = model.build_train_function() #训练函数,返回三个量,第一个是training_cost eval_batch = model.build_eval_function() #测试(验证)函数 logger.debug("Load data") train_data, \ valid_data, = get_train_iterator(state) train_data.start() # Start looping through the dataset step = 0 patience = state['patience'] start_time = time.time() train_cost = 0 train_variational_cost = 0 train_posterior_mean_variance = 0 train_misclass = 0 train_done = 0 train_dialogues_done = 0.0 prev_train_cost = 0 prev_train_done = 0 ex_done = 0 is_end_of_batch = True start_validation = False batch = None while (step < state['loop_iters'] and (time.time() - start_time) / 60. < state['time_stop'] and patience >= 0): # Training phase # If we are training on a primary and secondary dataset, sample at random from either of them batch = train_data.next() # Train finished if not batch: # Restart training logger.debug("Got None...") break logger.debug("[TRAIN_%d] - Got batch %d,%d" % (step, batch['x'].shape[1], batch['max_length'])) if batch['max_length'] == state['max_grad_steps']: continue x_data = batch['x'] #print 'x_data:\t',x_data x_data_reversed = batch['x_reversed'] max_length = batch['max_length'] x_cost_mask = batch['x_mask'] x_semantic = batch['x_semantic'] x_reset = batch['x_reset'] ran_cost_utterance = batch['ran_var_constutterance'] is_end_of_batch = False if numpy.sum(numpy.abs(x_reset)) < 1: #print 'END-OF-BATCH EXAMPLE!' is_end_of_batch = True idx_s = (x_data == 2).nonzero()[0][0] if x_data[1:idx_s].shape[0] < 2: continue c, variational_cost, posterior_mean_variance = train_batch( x_data, max_length) if numpy.isinf(c) or numpy.isnan(c): logger.warn("Got NaN cost .. skipping") gc.collect() continue train_cost += c train_variational_cost += variational_cost train_posterior_mean_variance += posterior_mean_variance train_done += batch['num_dialogues'] train_dialogues_done += batch['num_dialogues'] this_time = time.time() if step % state['train_freq'] == 0: elapsed = this_time - start_time # Keep track of training cost for the last 'train_freq' batches. current_train_cost = train_cost / train_done if prev_train_done >= 1: current_train_cost = float( train_cost - prev_train_cost) / float(train_done - prev_train_done) prev_train_cost = train_cost prev_train_done = train_done h, m, s = ConvertTimedelta(this_time - start_time) print ".. %.2d:%.2d:%.2d %4d mb # %d bs %d maxl %d acc_cost = %.4f" % (h, m, s,\ state['time_stop'] - (time.time() - start_time)/60.,\ step, \ batch['x'].shape[1], \ batch['max_length'], \ float(train_cost/train_done)) if valid_data is not None and\ step % state['valid_freq'] == 0 and step > 1: start_validation = True if start_validation and is_end_of_batch: start_validation = False valid_data.start() valid_cost = 0 valid_variational_cost = 0 valid_posterior_mean_variance = 0 valid_wordpreds_done = 0 valid_dialogues_done = 0 logger.debug("[VALIDATION START]") fw_valid = open('_VALID__%d.txt' % step, 'w') while True: batch = valid_data.next() # Train finished if not batch: break logger.debug("[VALID] - Got batch %d,%d" % (batch['x'].shape[1], batch['max_length'])) if batch['max_length'] == state['max_grad_steps']: continue x_data = batch['x'] x_data_reversed = batch['x_reversed'] max_length = batch['max_length'] x_cost_mask = batch['x_mask'] x_semantic = batch['x_semantic'] x_semantic_nonempty_indices = numpy.where(x_semantic >= 0) x_reset = batch['x_reset'] ran_cost_utterance = batch['ran_var_constutterance'] #print ' '.join([idx_to_str[id_of_w] for id_of_w in x_data.T.tolist()[0]]) idx_s = (x_data == 2).nonzero()[0][0] if x_data[1:idx_s].shape[0] < 2: continue c, c_list, variational_cost, posterior_mean_variance, Gen_pro, Tar_Y = eval_batch( x_data, max_length) if numpy.isinf(c) or numpy.isnan(c): continue valid_cost += c valid_variational_cost += variational_cost valid_posterior_mean_variance += posterior_mean_variance print 'valid_cost', valid_cost #print 'Original: ', ' '.join([idx_to_str[id_of_w] for id_of_w in list(Tar_Y.T)[0]]) #'',join([idx_to_str[id_of_w] for id_of_w in Tar_Y]) fw_valid.write('Label: ' + ' '.join( [category[id_of_w] for id_of_w in list(Tar_Y.T)[0]]) + '\r\n') Gen_pro = Gen_pro.tolist()[0] enum_ = enumerate(Gen_pro) Gen_sort = sorted(enum_, key=lambda x: x[1], reverse=True)[:30] Gen_tar = [i[0] for i in Gen_sort] #print 'Generations: ', ' '.join([idx_to_str[id_of_w] for id_of_w in Gen_tar]) fw_valid.write( 'Predict: ' + ' '.join([category[id_of_w] for id_of_w in Gen_tar]) + '\r\n') #print 'valid_variational_cost', valid_variational_cost #print 'posterior_mean_variance', posterior_mean_variance valid_wordpreds_done += batch['num_preds'] valid_dialogues_done += batch['num_dialogues'] logger.debug("[VALIDATION END]") fw_valid.close() valid_cost /= valid_wordpreds_done valid_variational_cost /= valid_wordpreds_done valid_posterior_mean_variance /= valid_dialogues_done if len(timings["valid_cost"]) == 0 or valid_cost < numpy.min( timings["valid_cost"]): patience = state['patience'] # Saving model if decrease in validation cost save(model, timings) print 'best valid_cost', valid_cost elif valid_cost >= timings["valid_cost"][-1] * state[ 'cost_threshold']: patience -= 1 save(model, timings, '_' + str(step) + '_') print "** valid cost (NLL) = %.4f, valid word-perplexity = %.4f, valid variational cost (per word) = %.8f, valid mean posterior variance (per word) = %.8f, patience = %d" % ( float(valid_cost), float( math.exp(valid_cost)), float(valid_variational_cost), float(valid_posterior_mean_variance), patience) timings["train_cost"].append(train_cost / train_done) timings["train_variational_cost"].append(train_variational_cost / train_done) timings["train_posterior_mean_variance"].append( train_posterior_mean_variance / train_dialogues_done) timings["valid_cost"].append(valid_cost) timings["valid_variational_cost"].append(valid_variational_cost) timings["valid_posterior_mean_variance"].append( valid_posterior_mean_variance) # Reset train cost, train misclass and train done train_cost = 0 train_done = 0 prev_train_cost = 0 prev_train_done = 0 step += 1 logger.debug("All done, exiting...")
def do_load(self, arg): arg = arg.strip() if not len(arg): self.help_load() return self.db = pickle.load(file(arg, 'rb'))
cPickle.dump((mesh_terms_test, mesh_terms_pred), open('predictions_decoder.pkl','w')) return metrics X_train, Y_train, X_test, Y_test, X_val, Y_val, num_english_words, word2index = get_data() X_train = [X_train[i] for i in xrange(20000)] Y_train = [Y_train[i] for i in xrange(20000)] word_embeddings = re.read_word_embeddings(word2index) node_embeddings = re.read_node_embeddings() # # code_2_index, index_2_code = compute_vocab_target(Y_train+Y_test+Y_val) # code_2_index['SSOS'] = len(code_2_index.keys()) # index_2_code = index_2_code + ['SSOS'] code_2_index = cPickle.load(open('code_2_index.pkl', 'r')) index_2_code = cPickle.load(open('index_2_code.pkl', 'r')) seq2mesh = cPickle.load(open('seq_to_mesh.pkl','r')) Y_train = get_y_index_sequences(Y_train) Y_test = get_y_index_sequences(Y_test) Y_val = get_y_index_sequences(Y_val) # X_test = X_val[0:] # Y_test = Y_val[0:] # #reduce size of validation set X_val = [X_val[i] for i in xrange(2000)] Y_val = [Y_val[i] for i in xrange(2000)]
action='store_true', help='Get theoretical PSDs') parser.add_option('--do_expected_CSDs', action='store_true', help='Get theoretical CSDs') (options, args) = parser.parse_args() if len(args) < 1: parser.error( 'You must specify at least SETUP_SIMULATE_EXPECTED_SIGNAL_PSD_OR_CSD.PKL!' ) file = open(args[0], 'rb') setup = cpkl.load(file) file.close() workdir = os.getcwd() + '/' Nb = setup['number of batches'] Ndays = len(setup['days']) if Ndays % Nb == 0: Ndb = Ndays / Nb days_batches = [setup['days'][b * Ndb:(b + 1) * Ndb] for b in range(Nb)] elif Ndays % Nb > 0: Ndb = Ndays / (Nb - 1) Ndbl = Ndays % (Nb - 1) days_batches = [ setup['days'][b * Ndb:(b + 1) * Ndb] for b in range(Nb - 1)
# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration # # genMetadataXML.py # # # Created by Alvin on 10/05/2010. # from __future__ import with_statement import sys import cPickle as pickle usage = "genMetadataXML.py JOB_REPORT_PICKLE [--new|--old]" if len( sys.argv ) < 2: print usage sys.exit( 1 ) with open( sys.argv[ 1 ] ) as f: r = pickle.load( f ) try: optParam = sys.argv[ 2 ] except IndexError: optParam = '--old' if optParam == '--new': r.writeMetaDataXML_new() elif optParam == '--old': r.writeMetaDataXML_old() else: print usage sys.exit(1) sys.exit(0)
def load_model(): encoder, decoder = cPickle.load(open('trained_model.pkl','r')) encoder = encoder.cuda() decoder = decoder.cuda() return encoder, decoder
def voc_eval(detpath, annopath, imagesetfile, classname, cachedir, ovthresh=0.5, use_07_metric=False): """rec, prec, ap = voc_eval(detpath, annopath, imagesetfile, classname, [ovthresh], [use_07_metric]) Top level function that does the PASCAL VOC evaluation. detpath: Path to detections detpath.format(classname) should produce the detection results file. annopath: Path to annotations annopath.format(imagename) should be the xml annotations file. imagesetfile: Text file containing the list of images, one image per line. classname: Category name (duh) cachedir: Directory for caching the annotations [ovthresh]: Overlap threshold (default = 0.5) [use_07_metric]: Whether to use VOC07's 11 point AP computation (default False) """ # assumes detections are in detpath.format(classname) # assumes annotations are in annopath.format(imagename) # assumes imagesetfile is a text file with each line an image name # cachedir caches the annotations in a pickle file # first load gt if not os.path.isdir(cachedir): os.mkdir(cachedir) cachefile = os.path.join(cachedir, 'annots.pkl') # read list of images with open(imagesetfile, 'r') as f: lines = f.readlines() imagenames = [x.strip() for x in lines] if not os.path.isfile(cachefile): # load annots recs = {} for i, imagename in enumerate(imagenames): recs[imagename] = parse_rec(annopath.format(imagename)) if i % 100 == 0: print 'Reading annotation for {:d}/{:d}'.format( i + 1, len(imagenames)) # save print 'Saving cached annotations to {:s}'.format(cachefile) with open(cachefile, 'w') as f: cPickle.dump(recs, f) else: # load with open(cachefile, 'r') as f: recs = cPickle.load(f) # extract gt objects for this class class_recs = {} npos = 0 for imagename in imagenames: R = [obj for obj in recs[imagename] if obj['name'] == classname] bbox = np.array([x['bbox'] for x in R]) difficult = np.array([x['difficult'] for x in R]).astype(np.bool) det = [False] * len(R) npos = npos + sum(~difficult) class_recs[imagename] = { 'bbox': bbox, 'difficult': difficult, 'det': det } # read dets detfile = detpath.format(classname) with open(detfile, 'r') as f: lines = f.readlines() if any(lines) == 1: splitlines = [x.strip().split(' ') for x in lines] image_ids = [x[0] for x in splitlines] confidence = np.array([float(x[1]) for x in splitlines]) BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) # sort by confidence sorted_ind = np.argsort(-confidence) sorted_scores = np.sort(-confidence) BB = BB[sorted_ind, :] image_ids = [image_ids[x] for x in sorted_ind] # go down dets and mark TPs and FPs nd = len(image_ids) tp = np.zeros(nd) fp = np.zeros(nd) for d in range(nd): R = class_recs[image_ids[d]] bb = BB[d, :].astype(float) ovmax = -np.inf BBGT = R['bbox'].astype(float) if BBGT.size > 0: # compute overlaps # intersection ixmin = np.maximum(BBGT[:, 0], bb[0]) iymin = np.maximum(BBGT[:, 1], bb[1]) ixmax = np.minimum(BBGT[:, 2], bb[2]) iymax = np.minimum(BBGT[:, 3], bb[3]) iw = np.maximum(ixmax - ixmin + 1., 0.) ih = np.maximum(iymax - iymin + 1., 0.) inters = iw * ih # union uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + (BBGT[:, 2] - BBGT[:, 0] + 1.) * (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters) overlaps = inters / uni ovmax = np.max(overlaps) jmax = np.argmax(overlaps) if ovmax > ovthresh: if not R['difficult'][jmax]: if not R['det'][jmax]: tp[d] = 1. R['det'][jmax] = 1 else: fp[d] = 1. else: fp[d] = 1. # compute precision recall fp = np.cumsum(fp) tp = np.cumsum(tp) rec = tp / float(npos) # avoid divide by zero in case the first detection matches a difficult # ground truth prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) ap = voc_ap(rec, prec, use_07_metric) else: # rec = -1 # prec = -1 # ap = -1 rec = 0 prec = 0 ap = 0 return rec, prec, ap
def unpickle(file): with open(file, 'rb') as fo: dict = cPickle.load(fo) return dict
def load_param(self, fname): with open(fname, "r") as fid: params = pickle.load(fid) return params
def query_page_folder_phoc(queries, folder, threshold, dont_load=False, use_gt_phoc=False, filter_small=False): """ Evaluate folder of predictions given queries and threshold """ # Dicts of query words all_dists = {} all_matches = {} all_relevants = {} cache_name = 'queries_%d.pkl' % (threshold*100) c = 0 load_time = Timer() qtime = Timer() all_phocs, _ = phoc_letters_and_digits(queries) if (Path(folder) / cache_name).exists() and not dont_load: all_dists, all_matches = pklRick.load((Path(folder) / cache_name).open('rb')) else: print ('No chache found caching to %s' % cache_name) # Run over all predicted pages for page in Path(folder).glob('**/*.json'): load_time.tic() try: page_dict = json.load(page.open('rb')) except ValueError: print ('Somethin worng in %s lets see if we can go on' % page.stem) c += 1 qtime.tic() # Run all queries per page for p, query in enumerate(queries): dists, matches, word_idx, words_in_page = query_page_phoc(query, page_dict, threshold=threshold, phoc=all_phocs[p, :], use_gt_phoc=use_gt_phoc, filter_small=filter_small) tmp_dist = all_dists.get(query, []) tmp_dist.extend(dists) all_dists[query] = tmp_dist tmp_match = all_matches.get(query, []) tmp_match.extend(matches) all_matches[query] = tmp_match tmp_match = all_relevants.get(query, []) tmp_match.append(words_in_page) all_relevants[query] = tmp_match # Cache mAP base data for fast reproduction of evaluation pklRick.dump((all_dists, all_matches), (Path(folder) / cache_name).open('wb')) mAP = 0 recall = 0 accuracy = 0 n = 0 for query in queries: # Per query evaluation AP, rec, acc = _map_and_recall(all_dists[query], all_matches[query], all_relevants[query]) if AP is None or rec is None or acc is None: continue # Running means mAP = (1 / float(n+1))*AP + (float(n) / (n+1))*mAP recall = (1 / float(n+1))*rec + (float(n) / (n+1))*recall accuracy = (1 / float(n+1))*acc + (float(n) / (n+1))*accuracy n += 1 return mAP, recall, accuracy
import numpy as np from scipy.io import wavfile import os #import sys import cPickle as pickle from lasagne.updates import * if __name__ == "__main__": # e.g. 1000_60sec.pkl in_pkl = sys.argv[1] out_pkl = sys.argv[2] if ".pkl" in in_pkl: with open(in_pkl) as f: dat = pickle.load(f) X_train, X_valid, X_test = dat[0] else: ctr = np.load(in_pkl) X_train, X_valid, X_test = ctr["arr_0"], ctr["arr_1"], ctr["arr_2"] sys.stderr.write("X_train shape = %s\n" % str(X_train.shape)) sys.stderr.write("X_valid shape = %s\n" % str(X_valid.shape)) sys.stderr.write("X_test shape = %s\n" % str(X_test.shape)) args = dict() args["seed"] = 0 args["batch_size"] = 128 args["learning_rate"] = 0.01 args["momentum"] = 0.9 args["num_epochs"] = 4000
bestmatch[1] = len(ref) r += bestmatch[1] c += len(hyp) # computing bleu score p0 = 1e-7 bp = 1 if c > r else math.exp(1 - float(r) / float(c)) p_ns = [float(clip_count[i]) / float(count[i] + p0) + p0 \ for i in range(4)] s = math.fsum(w * math.log(p_n) \ for w, p_n in zip(weights, p_ns) if p_n) bleu = bp * math.exp(s) return bleu data = pickle.load(open("needed.p")) vocab = json.load(open("./vocab.json")) outs = [] golds = [] domain_wise = {} for domain in ['schedule','navigate','weather']: domain_wise[domain] = {} domain_wise[domain]['tp_prec'] = 0.0 domain_wise[domain]['tp_recall'] = 0.0 domain_wise[domain]['total_prec'] = 0.0 domain_wise[domain]['total_recall'] = 0.0 domain_wise[domain]['gold'] = [] domain_wise[domain]['output'] = [] tp_prec = 0.0 tp_recall = 0.0
def __init__(self): with open('drugbank.pck', 'rb') as f: self.data = pickle.load(f) self.description = pickle.load(f)
class LossHistory(keras.callbacks.Callback): def on_train_begin(self, logs={}): self.losses = [] def on_batch_end(self, batch, logs={}): self.losses.append(logs.get('loss')) rnn_size = 256 # load data from pickle f = open('data.pkl', 'r') classes = cPickle.load(f) chars = cPickle.load(f) char_indices = cPickle.load(f) indices_char = cPickle.load(f) maxlen = cPickle.load(f) step = cPickle.load(f) X_ind = cPickle.load(f) y_ind = cPickle.load(f) f.close() [s1, s2] = X_ind.shape X = np.zeros((s1, s2, len(chars)), dtype=np.bool)
def load_dict_data(self): """ 加载前缀数字典 """ if os.path.exists(self.data_path): self.root = cPickle.load(open(self.data_path, "r"))
def process_uniprot_ids(comm, uniprot_processor_num, num_tree_servers, num_uniprot_processors, all_uniprot_ids): trees_of_uniprot_id = {} uniprot_ids_in_tree = {} uniprot_row_info = numpy.zeros(3, dtype='i') status = MPI.Status() while True: comm.Recv([uniprot_row_info, MPI.INT], source=0, tag=MPI.ANY_TAG, status = status) if status.Get_tag() == TAG_DATABASE_DONE: break else: tree_id = uniprot_row_info[0] left_id = uniprot_row_info[1] uniprot_id = uniprot_row_info[2] try: trees_of_uniprot_id[uniprot_id].add(tree_id) except KeyError: trees_of_uniprot_id[uniprot_id] = set([tree_id]) try: uniprot_ids_in_tree[tree_id].add(uniprot_id) except KeyError: uniprot_ids_in_tree[tree_id] = set([uniprot_id]) comm.Barrier() ortholog_request = numpy.zeros(2, dtype='i') uniprot_id_array = numpy.zeros(5000, dtype='i') phogs_supporting_orthology = numpy.zeros(5000, dtype='i') thresholds_of_orthology = numpy.zeros(5000, dtype='d') dir = '/clusterfs/ohana/external/genomes/QuestForOrthologs/Release5/' f = open(os.path.join(dir, "info_of_uniprot_accession.pkl")) info_of_uniprot_accession = cPickle.load(f) f.close() f = open(os.path.join(dir, "uniprot_accessions_of_uniprot_id.pkl")) uniprot_accessions_of_uniprot_id = cPickle.load(f) f.close() f = open(os.path.join('/clusterfs/vasudha/bpg/OrthologsForQuest/', 'OrthologsIn13ReferenceProteomes_%d_of_%d' % (uniprot_processor_num, num_uniprot_processors)), "w") base_tree_server_id = 1 def write_uniprot_id(uniprot_id): uniprot_accessions = uniprot_accessions_of_uniprot_id[uniprot_id] f.write("%d (%s)" % (uniprot_id, ','.join(["%s:%s" % (accession, info_of_uniprot_accession[accession]['taxon']) for accession in uniprot_accessions_of_uniprot_id[uniprot_id]]))) print "UniProt processor %d writing %d uniprot_ids" \ % (uniprot_processor_num, len(trees_of_uniprot_id)) t1 = MPI.Wtime() for uniprot_id in trees_of_uniprot_id: orthologs = {} for tree_id in trees_of_uniprot_id[uniprot_id]: tree_server_num = base_tree_server_id + tree_id % num_tree_servers ortholog_request[0] = tree_id ortholog_request[1] = uniprot_id comm.Send([ortholog_request, MPI.INT], dest = tree_server_num, tag = TAG_ORTHOLOG_REQUEST) comm.Recv([uniprot_id_array, MPI.INT], source = tree_server_num, tag = TAG_ORTHOLOG_RESPONSE, status = status) num_uniprot_ids = status.Get_count(datatype = MPI.INT) comm.Recv([phogs_supporting_orthology, MPI.INT], source = tree_server_num, tag = TAG_ORTHOLOG_RESPONSE) comm.Recv([thresholds_of_orthology, MPI.DOUBLE_PRECISION], source = tree_server_num, tag = TAG_ORTHOLOG_RESPONSE) for i in range(num_uniprot_ids): try: orthologs[uniprot_id_array[i]].add( (tree_id, phogs_supporting_orthology[i], thresholds_of_orthology[i])) except KeyError: orthologs[uniprot_id_array[i]] = set([ (tree_id, phogs_supporting_orthology[i], thresholds_of_orthology[i])]) write_uniprot_id(uniprot_id) f.write(": ") for ortholog in orthologs.keys(): f.write("{") write_uniprot_id(ortholog) f.write(" <= ") for tree_id, left_id, threshold in orthologs[ortholog]: f.write("(PHOG%07d_%05d, %f)," % (tree_id, left_id, threshold)) f.write("};") f.write("\n") f.close() t2 = MPI.Wtime() print "UniProt processor %d wrote all uniprot_ids in %g secs" \ % (uniprot_processor_num, t2-t1) comm.Send([MPI.BOTTOM, MPI.INT], dest = 0, tag = TAG_ORTHOLOG_DONE)