def katz_similarity(self, t): i, j = t[0], t[1] l = 1 neighbors = self.Graph[i] score = 0.0 while l <= self.maxl: numberOfPaths = neighbors[0, j] # neighbors.count(j) if numberOfPaths > 0: score += (self.beta ** l) * numberOfPaths l += 1 if l <= self.maxl: neighborsForNextLoop = csr_matrix((1, self.G.number_of_nodes()), dtype=np.uint16) for k in neighbors.nonzero()[1]: neighborsForNextLoop += (neighbors[0, k] * self.Graph[k]) neighbors = neighborsForNextLoop self.katz_scores.append((i, j, score)) self.done.add((i,j)) if i % 10 == 0 and j % 100 == 0: print(i, j) start_time = time.perf_counter() # your code pickle_object(self.f_name+'matrix.p',self.katz_scores) elapsed_time = time.perf_counter() - start_time print('left ',len(self.edges) - len(self.katz_scores),'edges') print('it took in seconds,', elapsed_time)
def get_normalized_feature_dict(self): try: return unpickle_object(self.file_name_pickle) except: print("can't unpickle features, calculating") result = self.convert_features_to_dict( self.normalize_features(self.create_graph_features())) pickle_object(self.file_name_pickle, result) return result
def set_credentials(self, user, password, store=False): self.config["user"] = user self.config["passwd"] = password if store: pickle_object( { "user": encrypt_string(user), "passwd": encrypt_string(password), }, self.storage, )
def execute_jobs(self, append_query=None): jobs = self.get_open_jobs() if not append_query == None: jobs.append(query) for i, job in enumerate(jobs): try: self.execute_query(job) except Exception as e: print(e) continue else: del jobs[i] pickle_object([encrypt_string(j) for j in jobs], self.open_jobs)
def read_release(self, version): print('reading release...') version = self.normalize_version(version) file_reader_pickle_path = self.get_file_reaeder_pickle_for_version(version) try: if self.force_read: raise ValueError('Forcing read') drug_reader = unpickle_object(file_reader_pickle_path) except: print('failed to unpickle') release_path = self.get_relese_path(version) drug_reader = drug_data_reader(release_path) drug_reader.read_data_from_file() pickle_object(file_reader_pickle_path, drug_reader) # drug_id_to_name = drug_reader.drug_id_to_name return drug_reader
def preproc_release(self, drug_reader, version): print('postprocessing release...') version = self.normalize_version(version) preproc_pickle_path = self.get_preproc_pickle_for_version(version) try: if self.force_read: raise ValueError('Forcing read') preproc = unpickle_object(preproc_pickle_path) except: print('failed to unpickle') print('num all drugs in reader:', len(drug_reader.all_drugs)) preproc = drugs_preproc(drug_reader.drug_to_interactions, drug_reader.all_drugs) preproc.calc_valid_drugs_print_summary() preproc.create_valid_drug_interactions() pickle_object(preproc_pickle_path, preproc) return preproc
def _param_run( self, param_set: ParamSet) -> Tuple[ExperimentResults, RunnerUUID]: log(f'Running param set: {param_set}') uuid = hash_dict(param_set) if self._experiment_result_exists(uuid): log('Loading experiment results from cache') log(uuid) experiment_results = unpickle( self._file_path_experiment_results(uuid)) else: log(f'Running uuid {uuid}') experiment_results = train_kd(**param_set) pickle_object(experiment_results, self._file_path_experiment_results(uuid)) return experiment_results, uuid
def _serialize(obj): ''' Serializes an arbitrary object for transfer Parameters ---------- obj : `obj` object to be serialized for transfer Returns ------- pickled_object : `lasso.ansa.rpc.PickledObject` protobuf serialized message Notes ----- Converts any ansa entities to remote placeholders. ''' # first convert ansa entites to fake entities if isinstance(obj, ansa.base.Entity): obj = _serialize_ansa_entity(obj) elif isinstance(obj, list): obj = [ _serialize_ansa_entity(entry) if isinstance( entry, ansa.base.Entity) else entry for entry in obj ] elif isinstance(obj, tuple): obj = tuple( _serialize_ansa_entity(entry) if isinstance( entry, ansa.base.Entity) else entry for entry in obj) elif isinstance(obj, dict): obj = { _serialize_ansa_entity(key) if isinstance(key, Entity) else key: _serialize_ansa_entity(value) if isinstance(value, Entity) else value for key, value in obj.items() } # then we pickle everything return AnsaGRPC_pb2.PickledObject(data=pickle_object(obj))
import argparse parser = argparse.ArgumentParser() parser.add_argument("--save_batch_every", default=1) args = parser.parse_args() if __name__ == "__main__": params = Params('params.json') data = Corpus(params) iterator = iter(data.dataset) tokenizer = tf.keras.preprocessing.text.Tokenizer( filters='"()*,-/;[\]^_`{|}~', oov_token='UNK', char_level=False) necessary_its = (48000 // params.batch_size) print("Batch iterations: %d" % necessary_its) i = 0 print("\nTokenizer saved as %s" % params.tokenizer_file) while i <= necessary_its: for batch, _ in tqdm(iterator): batch = np.char.decode(batch.numpy().astype(np.bytes_), 'UTF-8') tokenizer.fit_on_texts(batch) if i % args.save_batch_every == 0: pickle_object(tokenizer, params.tokenizer_file) i += 1 if necessary_its // i == 2: print("HALFWAY DONE") print("DONE BUILDING TOKENIZER")
import utils fname = "../raw/gold_daily.csv" f = open(fname, 'r') dates = [] golds = [] for line in f.readlines(): content = line.split(',') idx = content[0] if len(idx) == 0: continue date = content[1] gold = float(content[2]) dates.append(date) golds.append(gold) df = utils.make_as_pandas_df(dates, content_name='Gold', content_list=golds) utils.standard_plot(df) utils.pickle_object(df, "../data/gold_daily.pkl") if __name__ == "__main__": pass
#print(curr_vol) sign = np.sign(curr_vol) #print(sign) if sign == 1: count_plus += 1 elif sign == -1: count_minus += 1 elif sign == 0: count_zero += 1 dates.append(curr_date) signs.append(sign) df_sign = utils.make_as_pandas_df(dates_list=dates, content_list=signs, content_name="sign_of_volatility") utils.pickle_object(df_sign, "volatility_data/sign_daily_gold.pkl") utils.standard_plot(df_sign[:100], column_name="sign_of_volatility", scatter=True) print("count_plus: {}, count_minus: {}, conut_zero: {}".format( count_plus, count_minus, count_zero)) if __name__ == "__main__": pass
"{}-{}-{}?" \ "access_key={}&" \ "symbols={}&" \ "format=1".\ format(year, month, day, access_key, currencies) request = json.loads(requests.get(query).text) count += 1 if 'rates' not in request.keys(): print("NULL") continue usd = request['rates']['USD'] krw = request['rates']['KRW'] usd_over_krw = usd / krw krw_over_usd = krw / usd item = dict() item[date] = krw_over_usd rates.append(item) print(date, " : ", krw_over_usd) if date == "1997-12-31": pickle_object(rates, 'KRW_USD_1997.pkl') if __name__ == "__main__": pass
import pandas as pd import utils filename = "monthly" f = open("raw/{}.csv".format(filename), "r") lines = f.readlines() dates = [] prices = [] for line in lines: date, price = line.split(',') dates.append(date) prices.append(price) dates = np.array(dates, dtype='datetime64[M]') prices = np.array(prices, dtype='float64') data = {'Date': dates, 'Gold': prices} df = pd.DataFrame(data) utils.pickle_object(df, "data/Gold_{}.pkl".format(filename)) data_path = "data/gold_monthly.pkl" df = utils.load_pickle(data_path) utils.standard_plot(df) if __name__ == "__main__": pass
filename = "cpi_monthly" f = open("../raw/{}.txt".format(filename), "r") lines = f.readlines() dates = [] cpis = [] for line in lines: split = line.split('\t') try: year = int(split[0]) except ValueError: # get rid of str column data continue print(split) for month, cpi in enumerate(split[1:13]): date = datetime.datetime(year, month+1, 1, 0, 0) dates.append(date) cpis.append(float(cpi)) df = utils.make_as_pandas_df(dates_list=dates, content_list=cpis, content_name='CPI') utils.standard_plot(df, column_name='CPI') utils.pickle_object(df, "../data/cpi_monthly.pkl") if __name__ == "__main__": pass
word = values[0] coefs = np.asarray(values[1:], dtype="float32") embeddings_index[word] = coefs f.close() print("found %s word vectors" % len(embeddings_index)) # embedding matrix print("preparing embedding matrix...") words_not_found = [] embedding_matrix = np.zeros((len(word_index) + 1, 300)) for word, i in word_index.items(): embedding_vector = embeddings_index.get(word) if (embedding_vector is not None) and len(embedding_vector) > 0: embedding_matrix[i] = embedding_vector else: words_not_found.append(word) return embedding_matrix if __name__ == "__main__": params = Params("params.json") tokenizer = load_pickle(params.tokenizer_file) embeddings_matrix = load_subword_embedding(tokenizer.word_index, args.emb_path) pickle_object(embeddings_matrix, params.embedding_matrix) print("Created and Saved Embedding Matrix @ %s" % params.embedding_matrix)
dates = [] silvers = [] for line in f.readlines(): content = line.split(',') idx = content[0] print(idx) if len(idx) == 0: continue date = content[1] silver = content[2].split('\n')[0] if len(silver) == 0: continue print(silver) silver = float(silver) dates.append(date) silvers.append(silver) df = utils.make_as_pandas_df(dates, content_name='Silver', content_list=silvers) utils.standard_plot(df, column_name='Silver') utils.pickle_object(df, "../data/silver_daily.pkl") if __name__ == "__main__": pass
train_ratio=0.7 validation_ratio=0.0 test_ratio =0.3 #Holdout: evaluation_method =['Retrospective', 'Holdout'][1] new_version="5.1.1" old_version = "5.0.0" #spliting to train\test if evaluation_method == 'Retrospective': m_test,m_train,evaluator,test_tuples, i2d,evaluation_type,drug_id_to_name = create_train_test_split_relese(old_relese = old_version,new_relese=new_version) else: m_test,m_train,evaluator,test_tuples, i2d, evaluation_type,drug_id_to_name = create_train_test_split_ratio(new_version,train_ratio,validation_ratio,test_ratio) G = nx.from_numpy_matrix(m_train) edges = sorted([(i, j) for i in G.nodes() for j in G.nodes() if j>=i]) def split_list(alist, wanted_parts=1): length = len(alist) return [ alist[i*length // wanted_parts: (i+1)*length // wanted_parts] for i in range(wanted_parts) ] edges_parts = split_list(edges,wanted_parts=10) part = 2 # done: 0, 1, 4,3,5 Working: here: 5, VPN: . katz_calc = k() katz_calc.G = G katz_calc.edges = set(edges_parts[part]) katz_scores = katz_calc.katz() pickle_object(f'kats_scores_final_{part}.pickle',katz_scores)
fname = "data/gold_daily.pkl" df = utils.load_pickle(fname) utils.standard_plot(df, column_name='Gold') num_data = df.shape[0] volatiles = [] dates = [] for idx in range(num_data-1): curr_day = df.loc[idx]["Date"] curr_gold = df.loc[idx]["Gold"] tmr_gold = df.loc[idx+1]["Gold"] target_volatility = (tmr_gold - curr_gold) / curr_gold target_volatility_percentage = target_volatility * 100 print("idx: {}, vol: {}".format(curr_day, target_volatility_percentage)) volatiles.append(target_volatility_percentage) dates.append(curr_day) df_volatile = utils.make_as_pandas_df(dates, content_list=volatiles, content_name="Volatility") utils.standard_plot(df_volatile, column_name="Volatility") utils.pickle_object(df_volatile, "gold_daily.pkl") if __name__ == "__main__": pass
gold_normals = [] for gold_idx, date in enumerate(normal_target_dates): gold = df_gold['Gold'][gold_idx] cpi_date = datetime.datetime(date.year, date.month, 1, 0, 0) cpi_idx = df_cpi['Date'][df_cpi['Date'] == cpi_date].index.tolist() if len(cpi_idx) == 0: continue else: cpi_idx = cpi_idx[0] # print(cpi_idx) cpi = df_cpi['CPI'][cpi_idx] print("cpi: {}, gold: {}".format(cpi, gold)) normalizer = 100 / cpi gold_normal = gold * normalizer gold_normals.append(gold_normal) gold_normals_date.append(date) data_cpi_normalized = {'Date': gold_normals_date, 'Gold_Normal': gold_normals} df_gold_normal = pd.DataFrame(data_cpi_normalized) utils.standard_plot(df_gold_normal, column_name="Gold_Normal") utils.pickle_object(df_gold_normal, "../data/gold_daily_normal_cpi_monthly.pkl") if __name__ == "__main__": pass
# date line = line.split(' ')[1] date = line.split('\t')[0] rate = line.split('\t')[1] date_components = date.split('/') year = date_components[0] month = date_components[1] day = date_components[2] date = year + '-' + month + '-' + day dates.append(date) # rate rate = float(rate.split('\n')[0]) rates.append(rate) dates = np.array(dates, dtype='datetime64[D]')[::-1] rates = np.array(rates, dtype='float32')[::-1] data = {'Date': dates, 'KRW/USD': rates} df = pd.DataFrame(data) utils.pickle_object(df, "data/KRW_USD_{}.pkl".format(period)) # df = utils.load_pickle("data/KRW_USD_1997.pkl") utils.standard_plot(df) if __name__ == "__main__": pass