def step(self, states, actions, rewards, next_states, dones, num_current_episode): """Save experience in replay memory, and use random sample from buffer to learn""" #self.memory.add(states, It mainly reuse function from ``actions, rewards, next_states, dones) self.memory.add(encode(states), encode(actions), rewards, encode(next_states), dones) # If enough samples in the replay memory and if it is time to update if (len(self.memory) > BATCH_SIZE) and (num_current_episode % UPDATE_EVERY == 0): # Note: this code only expects 2 agents assert (len(self.agents) == 2) # Allow to learn several time in a row in the same episode for i in range(MULTIPLE_LEARN_PER_UPDATE): # Sample a batch of experience from the replay buffer experiences = self.memory.sample() # Update Agent #0 self.maddpg_learn(experiences, own_idx=0, other_idx=1) # Sample another batch of experience from the replay buffer experiences = self.memory.sample() # Update Agent #1 self.maddpg_learn(experiences, own_idx=1, other_idx=0)
def search_game(self, title): """Return a list of Game objects for a query for the given title.""" all_games = [] encoded_title = helper.encode(title) # full game search url url = helper.gamefaqsURL_search_game + encoded_title soup = helper.get_bs4(url) tags = soup.find_all("div", {"class": "pod"}) for tag in tags: criteria_tag = tag.find("h2") # only include games under best matches category if criteria_tag and criteria_tag.text in search_matches: best_matches = tag.find_all("tr") for match in best_matches: # get game platform p = match.find("td", {"class": "rmain"}).text platform = p.replace(" ", "").replace("\r\n", "") info = match.find("td", {"class": "rtitle"}) # get game url game_url = helper.gamefaqsURL_base + info.contents[1]["href"] # get game title game_title = info.text.replace("\n", "") obj = game.Game(game_url, dict(title=game_title, platform=platform)) all_games.append(obj) break return all_games
def search_company(self, name, limit=40): """Returns a list of Company objects for a query for the given name. The maximum number of companies to be returned is set by limit. """ companies = [] encoded_name = helper.encode(name) # full company search url url = helper.gamefaqsURL_search_company + encoded_name soup = helper.get_bs4(url) company_tags = soup.find_all("table")[1].find_all("a") count = 0 for tag in company_tags: companies.append(company.Company(helper.gamefaqsURL_base + tag["href"], dict(name=tag.text))) count += 1 if (count >= limit): break return companies
def load_files(cases_text_directory, output_dir, output_file): tokenizer = nltk.download('/tokenizers/punkt/english.pickle') path = cases_text_directory + "/" filenames = os.listdir(path) count = 0 arr_json_file = [] arr_metadata = {} json_output_file = {} for filename in filenames: outputarray = {} if '.pdf.txt' in filename: count = count + 1 with open(path + filename) as fp: lines = fp.readlines() #read line by line lines = [line.rstrip('\n') for line in open(path + filename)] text = ' '.join(str(e) for e in lines) text = ''.join([i if ord(i) < 128 else ' ' for i in text]) text = helper.encode(text.replace(" ", " ")) text = text.replace( "Williams, Brooke 6/26/2015 For Educational Use Only", "") outputarray["body"] = text outputarray["filename"] = filename outputarray["entities"] = [] outputarray["title"] = "" outputarray["file_count"] = count arr_json_file.append(outputarray) arr_metadata["@count_at_loading"] = str(count) arr_metadata["@time_date"] = str(time.strftime('%X %x %Z')) arr_metadata["@input_directory"] = str(cases_text_directory) json_output_file["@data"] = arr_json_file json_output_file["@metadata"] = arr_metadata with open(output_dir + '/' + output_file, 'w') as datafile: json.dump(json_output_file, datafile, indent=4, sort_keys=True, separators=(',', ':'))
number_of_feat = 0 epochs = 1 runs = 1500 start = datetime.datetime.now() for i in range(runs): clf = DecisionTreeRegressor() num_feat = random.randint(1, 76) features, train, test, y_train, y_test = helper.split_train_test( training_set, training_target, num_feat, i, ) train_encoded, test_encoded = helper.encode(train, test) helper.train_model(clf, train_encoded, y_train, epochs) y_predicted = clf.predict(train_encoded[0:10]) test_score = helper.rsme_eval(clf, y_test, test_encoded) if test_score < scores or scores == -999: bfeatures = features scores = test_score seed = i number_of_feat = num_feat approx = ((((datetime.datetime.now() - start).seconds) / (i + 1)) * runs) - (datetime.datetime.now() - start).seconds print("time left: %s | %s/%s | best score: %s" % (str(datetime.timedelta(seconds=approx)), i + 1, runs, scores),
def named_entities(START_VALUE,END_VALUE,in_dir,in_file): properties_json = json.load(open("properties.json")) #nltk sentences, built in tokenizer = nltk.download('/tokenizers/punkt/english.pickle') #STANFORD TAGGER # "stanford_classifer":"stanford-ner/english.muc.7class.distsim.crf.ser.gz", #"stanford_jar":"stanford-ner/stanford-ner.jar" classifier = properties_json["stanford_classifer"] jar =properties_json["stanford_jar"] st = StanfordNERTagger(classifier,jar) with open(in_dir+'/'+in_file,"r") as fp: SUPER_JSON = json.load(fp) #add sentences counter = 0 for items in SUPER_JSON["@data"]: counter = counter + 1 items["counter"]= counter text = items["title"] + ". " + items["body"] items["entities"]=[] SENTENCES_TO_TAG = tokenizer.tokenize(text) items["text"] = text #no longer needed original_body items["original_body"]=[] items["sentences"] = SENTENCES_TO_TAG print "START:" print datetime.datetime.now().isoformat() entity_tags = {"PERSON","ORGANIZATION","LOCATION","DATE"} for i in xrange(len(SUPER_JSON["@data"])): if i >= START_VALUE and i < END_VALUE: # 20 seconds for 1 file #try: list_tagged_text = [] jsondata =[] print i, SUPER_JSON["@data"][i]["counter"] for sentences in SUPER_JSON["@data"][i]["sentences"]: tokenized_text = word_tokenize(helper.encode(sentences)) tagged_text = st.tag(tokenized_text) list_tagged_text.append(tagged_text) for j in xrange(len(tagged_text)): for entity_tag in entity_tags: if entity_tag in tagged_text[j][1]: jsondictitem = {entity_tag:tagged_text[j][0]} jsondata.append(jsondictitem) SUPER_JSON["@data"][i]["entities"]=jsondata #Use this for sentence entities #sentence_data = {"sentence":sentences,"entities":sentence_entities} #sentence_arr.append(sentence_data) #except: # SUPER_JSON["@data"][i]["entities"]=[{"ERROR":"ERROR"}] print "END:" print datetime.datetime.now().isoformat() for i,items in enumerate(SUPER_JSON["@data"]): entitytagslist=get_entitytagslist(items["entities"]) items["entities"]=get_final_entitytagslist(entitytagslist,items["entities"]) #for j,item in enumerate(SUPER_JSON["@data"]): # for j, sentence_entity in enumerate(item["sentence_entities"]): # entitytagslist=get_entitytagslist(sentence_entity["entities"]) # sentence_entity["combined_entities"]=get_final_entitytagslist(entitytagslist,sentence_entity["entities"]) #@create new json file SUPER_JSON["@metadata"]["@time_date_ner"]=str(time.strftime('%X %x %Z')) with open(in_dir+'/'+in_file, 'w') as datafile: json.dump(SUPER_JSON, datafile, indent=4, sort_keys=True, separators=(',', ':'))
def add_answer(self, car, road): ans = [encode(car), encode(road)] self.answer_key[self.frame_count] = ans self.frame_count += 1
import helper import sklearn from sklearn.model_selection import GridSearchCV from sklearn.tree import DecisionTreeRegressor from sklearn.model_selection import train_test_split training_set, training_target, test_set, test_ids = helper.get_data() X_train, X_test, y_train, y_test = train_test_split(training_set, training_target, test_size=0.2, random_state=1) train_encoded, test_encoded = helper.encode(X_train, X_test) parameters = { 'criterion': ['mse', 'friedman_mse', 'mae'], 'splitter': ['best', 'random'], 'max_depth': [10, None], 'max_features': ['auto', 'sqrt', 'log2', None], 'presort': [True, False] } clf = GridSearchCV(DecisionTreeRegressor(), parameters) clf.fit(train_encoded, y_train) print('score', clf.score(train_encoded, y_train)) print(clf.best_params_)
feature_set_1 = ['Neighborhood', '3SsnPorch', 'HeatingQC', 'LotArea', 'SaleType', 'MasVnrType', 'LotShape', 'OpenPorchSF', 'FullBath', 'BsmtFinSF1', 'MSZoning', 'TotalBsmtSF', 'HalfBath', 'GarageType', 'BsmtFinSF2', 'BsmtUnfSF', 'RoofMatl', 'MSSubClass', 'Condition1', 'MasVnrArea', 'Exterior1st', 'KitchenAbvGr', 'GarageYrBlt', 'Foundation', 'OverallCond', 'BsmtHalfBath', 'EnclosedPorch', 'BsmtFinType2', 'BldgType', 'FireplaceQu', 'LotFrontage', 'LandSlope', 'GarageFinish', 'BedroomAbvGr', 'YearBuilt', 'MiscVal', '1stFlrSF', 'OverallQual', 'MoSold'] feature_set_2 = ['OpenPorchSF', 'GarageYrBlt', '1stFlrSF', 'SaleType', 'LandContour', 'HouseStyle', 'BsmtFinType1', 'WoodDeckSF', 'LotShape', 'YrSold', 'BsmtFinSF1', 'BsmtQual', 'YearBuilt', 'BsmtCond', 'GarageArea', 'RoofStyle', 'GrLivArea', 'Electrical', 'TotalBsmtSF', 'BedroomAbvGr', 'KitchenQual', 'PavedDrive', 'BsmtExposure', 'Fireplaces', 'YearRemodAdd', 'Heating', 'BldgType', 'OverallCond', 'LotConfig', 'EnclosedPorch', 'Functional', 'OverallQual', 'ExterCond', 'LandSlope', 'GarageType', '2ndFlrSF', 'MiscVal', 'MSSubClass', 'BsmtHalfBath', 'LotArea', 'MSZoning', 'LotFrontage', 'Neighborhood', 'LowQualFinSF', 'Foundation', '3SsnPorch', 'FullBath', 'BsmtFinSF2', 'PoolArea', 'BsmtFinType2', 'RoofMatl', 'HalfBath', 'GarageCond', 'HeatingQC', 'GarageFinish', 'MasVnrArea', 'KitchenAbvGr', 'BsmtFullBath', 'GarageQual', 'Utilities', 'TotRmsAbvGrd'] feature_set_3 = ['ExterCond', 'BsmtUnfSF', 'LotArea', 'Condition1', 'Condition2', 'LotShape', 'BsmtHalfBath', 'CentralAir', 'GarageArea', 'Functional', 'WoodDeckSF', 'FullBath', 'BsmtFullBath', 'Neighborhood', 'EnclosedPorch', 'Electrical', 'GarageFinish', 'GarageType', 'Foundation', 'YearBuilt', 'BsmtFinSF1', 'BldgType', 'GarageYrBlt', 'MasVnrArea', 'Street', 'SaleCondition', '2ndFlrSF', 'LandContour', 'RoofStyle', 'MasVnrType', 'Fireplaces', 'YrSold', 'GarageCars', 'BsmtFinType1', 'OpenPorchSF', 'TotalBsmtSF', 'Exterior2nd', '1stFlrSF', 'OverallCond', 'HeatingQC', 'MoSold', 'Heating', 'KitchenQual', 'LotFrontage', 'RoofMatl', 'BedroomAbvGr', 'LandSlope', 'OverallQual', 'FireplaceQu', '3SsnPorch', 'SaleType', 'MiscVal', 'BsmtFinType2', 'ScreenPorch', 'Exterior1st', 'Utilities', 'GarageQual', 'Fence', 'BsmtCond', 'GarageCond', 'HalfBath', 'HouseStyle', 'KitchenAbvGr', 'ExterQual', 'YearRemodAdd', 'BsmtQual', 'TotRmsAbvGrd', 'LotConfig', 'GrLivArea', 'MSZoning', 'PoolArea', 'BsmtFinSF2', 'LowQualFinSF'] xgb_v1_1 = get_v1() xgb_v1_2 = get_v1() xgb_v1_3 = get_v1() xgb_v2_1 = get_v2() xgb_v2_2 = get_v2() xgb_v2_3 = get_v2() train1, test1, y_train1, y_test1 = helper.evaluate_split_train_test(training_set, training_target, feature_set_1) train2, test2, y_train2, y_test2 = helper.evaluate_split_train_test(training_set, training_target, feature_set_2) train3, test3, y_train3, y_test3 = helper.evaluate_split_train_test(training_set, training_target, feature_set_3) train_encoded1, test_encoded1 = helper.encode(train1, test1) train_encoded2, test_encoded2 = helper.encode(train2, test2) train_encoded3, test_encoded3 = helper.encode(train3, test3) helper.train_model(xgb_v1_1, train_encoded1, y_train1, 1) helper.train_model(xgb_v1_2, train_encoded2, y_train2, 1) helper.train_model(xgb_v1_3, train_encoded3, y_train3, 1) helper.train_model(xgb_v2_1, train_encoded1, y_train1, 1) helper.train_model(xgb_v2_2, train_encoded2, y_train2, 1) helper.train_model(xgb_v2_3, train_encoded3, y_train3, 1) rsme = [] rsme.append(helper.rsme_eval(xgb_v1_1, y_test1, test_encoded1)) rsme.append(helper.rsme_eval(xgb_v1_2, y_test2, test_encoded2)) rsme.append(helper.rsme_eval(xgb_v1_3, y_test3, test_encoded3)) rsme.append(helper.rsme_eval(xgb_v2_1, y_test1, test_encoded1))
18, 18, 18, 18, ] DATA_PATH = '../nanopore_dna_storage_data/' infile_name = DATA_PATH + 'encoded_file/data_files.tar.bz2.enc' for i in range(13): print('i', i) print('bytes_per_oligo', bytes_per_oligo[i]) print('RS_redundancy', RS_redundancy[i]) print('conv_m', conv_m[i]) print('conv_r', conv_r[i]) print('pad', pad[i]) helper.encode(data_file=infile_name, oligo_file=DATA_PATH + 'oligos_files/reads.' + str(i), bytes_per_oligo=bytes_per_oligo[i], RS_redundancy=RS_redundancy[i], conv_m=conv_m[i], conv_r=conv_r[i], pad=pad[i]) with open(DATA_PATH + 'oligo_files/reads.' + str(i)) as f_reads, open( DATA_PATH + 'oligo_files/oligos_' + str(i) + '.fa', 'w') as f_oligos: for j, line in enumerate(f_reads): f_oligos.write('>oligos_' + str(i) + '_' + barcode_start[i] + '_' + barcode_end[i] + '_' + str(j) + '\n') f_oligos.write(barcode_start[i] + line.rstrip('\n') + barcode_end[i] + '\n')
def DES(filename=None,key_file=None,action="encrypt"): # ***** ERROR CHECKING ***** # check that required arguments are given if key_file == None and filename == None: print("Error: need to input filename and key") return None # check that key is given if (not isinstance(key_file,str)): print("Error: need to input key file") return None # check that key file (kf) exists try: kf = open(key_file,"rb") except: print("Error: " + key_file + " does not exist") return None # check for invalid file if (not isinstance(filename,str)): print("Error: First input must be filename (string)") return None # check that file exists try: f = open(filename,"rb") except: print("Error: " + filename + " does not exist") return None key = [] with kf as x: temp = x.read(); key.append(temp); while temp != b"": temp = x.read(); key.append(temp) # do weird binary stuff - dunno how it even got working theKey = key[0]; if theKey[-1] == 10: theKey = theKey[:-1] # check correct key length if (len(theKey) != 8): print(theKey) print("Error: must give exactly 8 keys") return None # ***** ENCRYPTION AND DECRYPTION ***** b = [] # bytes in file with f as x: temp = x.read(2); b.append(temp) while temp != b"": temp = x.read(2); b.append(temp) # *** ENCRYPT *** bts = [] # the actual encoded thing if (action=="encrypt"): print("ENCRYPTING") for i in range(0,8): cnt = 0 while b[cnt] != b"": b[cnt] = h.encode(b[cnt],theKey[i]) if i == 7: bts = bts+b[cnt] cnt = cnt+1 # *** DECRYPT *** else: print("DECRYPTING") for i in range(0,8): cnt = 0 while b[cnt] != b"": b[cnt] = h.decode(b[cnt],theKey[7-i]) if i == 7: bts = bts+b[cnt] cnt = cnt+1 return [bts,key]