def process_bind_param(self, value, dialect): if not value: return None try: parser(value).V() except: raise InvalidVersion( "{0} is not a valid version".format(value) ) return value
def write_and_calc(): """ write for the output.txt the tree from ID3 and the accuracies for ID3 knn and Naive Bayese :return: """ train_p = parser("./train.txt") dev_p = parser("./test.txt") knn = calc_knn(train_p,dev_p,5) id3, tree =calc_ID3(train_p,dev_p) naive_bayse =calc_Naive_Bayse(train_p,dev_p) with open('output.txt',"w+") as file: write_tree(tree,file) file.write("\n"+str(id3) + "\t" + str(knn) + "\t" + str(naive_bayse)) file.close()
def mparser(self, song_path, extensions = [".mp3",".m4a"]): ext = os.path.splitext(song_path)[1] if ext in extensions: #print song_path file = parser(song_path, ext, self.equivalencies, self.prop) #print file if file: if file["title"]: song_title = file["title"] else: a_path, song_title = os.path.split(song_path) if file["album"]: song_album = file["album"] else: song_album = "unknown" if file["artist"]: song_artist = file["artist"] else: song_artist = "unknown" else: path, song_title = os.path.split(song_path) path, song_album = os.path.split(path) path, song_artist = os.path.split(path) self.FileInsert.append([song_title.lower(), song_album.lower(), song_artist.lower(), song_path.decode('UTF-8') ])#on must decode using system default encoding
def get_meta(url): meta = {} r = parser(url) meta['title'] = r.find(class_='core_title_txt').text tag = r'\[.*?\]|【.*?】' # 去除【】[] 包裹的内容 meta['title'] = re.sub(tag, '', meta['title']) meta['author'] = r.find('a', class_='p_author_name').text meta['original'] = url return meta
def get_meta(url): meta = {} r = parser(url) meta['title'] = r.find('h1', id='j_data').text tag = r'\[.*?\]|【.*?】' # 去除【】[] 包裹的内容 meta['title'] = re.sub(tag, '', meta['title']) meta['author'] = r.find('a', class_='u').text meta['original'] = url return meta
def get_meta(url): meta = {} r = parser(url) meta['title'] = r.find(id='thread_subject').text tag = r'\[.*?\]|【.*?】' # 去除【】[] 包裹的内容 meta['title'] = re.sub(tag, '', meta['title']) meta['author'] = r.find(class_='authi').find('a', class_='xw1').text meta['original'] = url return meta
def get_posts(url): r = parser(url) post = str(r.find(class_='article-holder')) post = post.replace('data-src', 'src') img_src = r"""\bsrc\b\s*=\s*[\'\"]?([^\'\"]*)[\'\"]?""" for img in re.findall(img_src, post): new_img = upload_img(img) post = post.replace(img, new_img) post = html2markdown(post) return post
def __init__(self, ax, fname='graph.log'): self.ax = ax actions = [] data = [] levels = [] nebs = {} parser(fname, actions, data, levels, nebs) self.actions = actions self.data = data self.levels = levels self.nebs = nebs self.colors = [ 'tab:blue', 'tab:orange', 'tab:green', 'tab:red', 'tab:purple', 'tab:brown', 'tab:pink', 'tab:gray' ]
def extract_tweets(tweets, cmd_line=False): """ prints the tweets from tweets: list of tweet dicts """ tweet_texts = [] for tweet in tweets: text = get_tweet(tweet) if cmd_line: text = text.encode('unicode-escape') text = ununicode(text) text = unescape(text) tweet_texts.append(parser(text)) return tweet_texts
def baidu_crawler(keyword: str, outfile, max_page=10): for page in range(1, max_page + 1): html = get_page_code(keyword, page) item_list, next_page = parser(html) print("{} {}".format(keyword, page)) with open(outfile, 'a', encoding='utf-8') as f: for item in item_list: f.write("{}\n".format(item)) if not next_page: break time.sleep(random.randint(1, 4))
def connect(self): try: response_in_html = send_request(self.url) if not response_in_html: return None return parser(response_in_html, self.pool) except: traceback.print_exc() return None
def write_original_dataset(): """ write the tree of original dataset and the k-folds accuracies for id3, knn and naive bayse """ train_p = parser(argv[1]) tree = ID3_print_Tree(copy.deepcopy(train_p)) with open("accuracy.txt", 'w+') as file_acc: write_Accuracies(copy.deepcopy(train_p), file_acc) file_acc.close() with open('tree.txt',"w+") as file: write_tree(tree,file) file.close()
def main(): validate_call(sys.argv) my_list = parser(sys.argv[1]) term_list = [] for elem in my_list: term = term_splitter(elem) term_list.append(Term(term)) function = Polynomial(term_list) print(f"The derivative of f(x) = {function}") print(f"f'(x) = {function.derivative()}")
def is_local(self, other): other = parser(other).LOL() if any(other.startswith(x) for x in ('<', '>')): other = other.replace(' ', '') matchers = [ VersionMatcher(patt) for patt in re.findall(relative_re, other) ] return any( all(matcher == VersionMatcher(v.version) for matcher in matchers) for v in self.versions ) return any( VersionMatcher(other) == VersionMatcher(v.version) for v in self.versions )
def get_libor(): df = quandl.get("FRED/USDONTD156N", authtoken="UHqpEq3-QGzP5zTUSEYr") dates = list(df.index) interest_rates = list(df.values) parsed_dates = [] rates = [] for date, rate in zip(dates, interest_rates): date = parser(date._date_repr) parsed_dates.append(date) rate = rate[0] rates.append(rate) data_dict = {'Date': parsed_dates, 'LIBOR': rates} df = pd.DataFrame(data_dict) return df
def get_close_data(self): hist_close = self.hist["Close"] keys = hist_close.keys() dates = [] prices = [] for key in keys: date = parser(key._date_repr) price = hist_close[key] dates.append(date) prices.append(price) data_dict = {'Date': dates[sg_start:], self.code: prices[sg_start:]} df = pd.DataFrame(data_dict) return df
def get_posts(url): r = parser(url) post = str(r.find(class_='quote-content')) img_src = r"""\bsrc\b\s*=\s*[\'\"]?([^\'\"]*)[\'\"]?""" post = post.replace('data-original', 'src') place_holder = 'https://b1.hoopchina.com.cn/web/sns/bbs/images/placeholder.png' for img in re.findall(img_src, post): if img == place_holder: # 懒加载 post = post.replace(f'src="{place_holder}"', '') continue try: i = img.index('?') img_real = img[:i] except ValueError: img_real = img new_img = upload_img(img_real) post = post.replace(img, new_img) post = html2markdown(post) return post
def main(): print "PLEASE ENTER THE NUMBER BELOW TO SELECT THE ALORGITHM" print "1. Weighting rule with path-compression." print "2. Weighting rule." print "3. Naive algorithm." print selection = int(raw_input("YOUR SELECTION IS: ")) result_set = parser('hw4.dat') time_set = [] if selection in [1, 2, 3]: for i in range(10): # run the algorithm 10 times, then get the average time. time_set.append(test(selection, result_set)) else: err_msg = "MAKE SURE YOU ENTERED 1, 2 OR 3." sys.exit(err_msg) print 'The average time is: ' + str(sum(time_set)/len(time_set)) + ' second(s)'
def get_posts(url): def get_post(page_url): return parser(page_url).find_all(class_='d_post_content') page = parser(url).find_all( 'li', class_='l_reply_num')[0].find_all('span')[1].text posts = [] for i in range(1, int(page) + 1): posts += [x for x in get_post(url + '&pn=' + str(i))] content = '' img_src = r"""\bsrc\b\s*=\s*[\'\"]?([^\'\"]*)[\'\"]?""" for post in posts: post = str(post) post = re.sub('<div[^>]*>', '<p>', post) post = re.sub('<\/div[^>]*>', '</p>', post) for img in re.findall(img_src, post): new_img = upload_img(img) post = post.replace(img, new_img) content += html2markdown(post) return content
def save_file(src_path: str, upload_path: str): """ Handles the below tasks: - Creates output directory as define in the README.md file. - Generates CSV file name as timestamp. - Saves CSV file (encoding='UTF-16'). :param src_path: path to the folder (unprocessed data) :param upload_path: path to the folder (processed data) """ file_name = str(time.strftime("%Y%m%d_%H%M%S")) + ".csv" try: formatted_df = formatter(parser(src_path)) df = output_dataset(formatted_df) try: for currency_code in formatted_df.currency.unique(): path = upload_path + "/search_terms/" + currency_code + "/" os.makedirs(path) except OSError: logging.info("Directory %s already exist" % path) else: logging.info("Successfully created the directory %s" % path) try: df.to_csv(path + file_name, sep='\t', index=False, header=True, encoding='UTF-16') logging.info("Successfully created output file: %s" % file_name) except AttributeError as err: logging.info("Error file not generated: %s" % err) except EmptyDataError as err: logging.info("Failed: %s" % err)
def main(): args = utils.parser() hidden = int(args.hidden) alpha = float(args.alpha) beta = float(args.beta) lmbda = float(args.lmbda) epoch = int(args.epoch) model = models.neural_network.Model() model.add(models.neural_network.Sigmoid(784, hidden)) model.add(models.neural_network.Softmax(hidden, 10)) Xtrain, Ytrain = utils.load_data('../data/digitstrain.txt', False) Xvalid, Yvalid = utils.load_data('../data/digitsvalid.txt', False) cross_train = [] cross_valid = [] accuracy_train = [] accuracy_valid = [] for n in range(epoch): print(n) for i in range(len(Ytrain)): x = Xtrain[i, :] y = Ytrain[i] model.train(x, y, alpha, beta, lmbda) calculate_error(model, cross_train, accuracy_train, Xtrain, Ytrain) calculate_error(model, cross_valid, accuracy_valid, Xvalid, Yvalid) print(cross_train[-1]) for i in range(len(model.layers) - 1): W = model.layers[i].W utils.visualize(W) utils.plot_curve(cross_train, cross_valid, 'cross entropy error') utils.plot_curve(accuracy_train, accuracy_valid, 'classificatoin error')
def get_date(url): date = parser(url).find(class_='stime').text[:10] return date
import aws import utils from reqasync import run_async from sys import exit from time import sleep from datetime import datetime from datetime import timedelta if __name__ == '__main__': args = utils.parser() print('Reading configuration file') benchmark_params = utils.readJson(args.config) print('Starting AWS session') session = aws.session(benchmark_params['region'], benchmark_params['access_key_id'], benchmark_params['secret_access_key']) print('All the AWS services will be created at ' + benchmark_params['region']) print('Starting AWS S3 client') s3_client = session.client('s3') bucket_list = s3_client.list_buckets() bucket = None bucket = aws.createBucket(bucket_list['Buckets'], benchmark_params['bucket_name'], s3_client,
if result[0] == "find": uf.find(result[1], True) elif result[0] == "union": uf.union(result[1][0], result[1][1]) endtime = time.time() time_set.append(endtime - starttime) del uf return time_set data_sets = [] data_sets.append(parser("1.dat")) data_sets.append(parser("2.dat")) data_sets.append(parser("3.dat")) data_sets.append(parser("4.dat")) data_sets.append(parser("5.dat")) data_sets.append(parser("6.dat")) data_sets.append(parser("7.dat")) def test2(): result = test(4, data_sets) if __name__ == "__main__": cProfile.run("test2()", "main.prof") import pstats
def lz_only(url): suffix = parser(url).find_all(class_='authi')[1].find('a')['href'] return f'https://bbs.saraba1st.com/2b/{suffix}'
explicit_mode=explicit_mode, glb_maps=params['use_glb_maps'], prefetch=params['use_prefetch'], coloring=params['coloring'], poly_order=poly_order, domain=os.path.splitext(os.path.basename(mesh.name))[0], function_spaces=[self.elastic.S, self.elastic.U]) return u1, s1 if __name__ == '__main__': set_log_level(INFO) # Parse the input args = parser() params = { 'num_unroll': args.num_unroll, 'tile_size': args.tile_size, 'mode': args.fusion_mode, 'partitioning': args.part_mode, 'coloring': args.coloring, 'extra_halo': args.extra_halo, 'explicit_mode': args.explicit_mode, 'explicit_mode_id': args.explicit_mode, 'use_glb_maps': args.glb_maps, 'use_prefetch': args.prefetch, 'log': args.log, 'tofile': args.tofile, 'verbose': args.verbose }
connection, address = serv.accept() data = connection.recv(5096) # first data packet print('First packet: ', data) from_client = data packet_counter = 1 while True: # for variable size loads data = connection.recv(5096) packet_counter += 1 print(f'Packet number {packet_counter}: {data}') from_client += data if data == bytes('', encoding='utf-8'): # no more packets incoming print('no more info!') break from_client = str(from_client)[2:-1] print('Connection: ', connection, "Client's IP:", address) print(len(from_client), 'is the length of the packet') # do server output f = open((os.getcwd() + '\data\server\server_output.txt'), mode='w', encoding='utf-8') f.write(from_client) f.close() connection.close() break for i in range(5): print('SERVER IS DONE, NOW GOING TO PARSER') parser()
import sys from utils.loader import * from utils.parser import * from utils.plotter import * from utils.runner import * if __name__ == '__main__': dim_feature = 25 # Get arguments args = parser(sys.argv) algorithm_name, dataset_location, time_steps, output_filename, \ alpha, num_clusters, four_cliques_epsilon, four_cliques_graph_noise = extracter(arg_options=args) # Load a specific dataset or directly use the artificial 4Cliques user_contexts, network, cluster_to_idx, idx_to_cluster = \ load_data( dataset_location=dataset_location, four_cliques_epsilon=four_cliques_epsilon, four_cliques_graph_noise=four_cliques_graph_noise, dim_feature=dim_feature, num_clusters=num_clusters) # identify cluster data if cluster_to_idx and idx_to_cluster: cluster_data = (cluster_to_idx, idx_to_cluster) else: cluster_data = None # Load a specific agent agent = load_agent(algorithm_name=algorithm_name,
def main(): args = utils.parser() hidden = int(args.hidden) alpha = float(args.alpha) beta = float(args.beta) lmbda = float(args.lmbda) epoch = int(args.epoch) Xtrain, Ytrain = utils.load_data('../data/digitstrain.txt') Xvalid, Yvalid = utils.load_data('../data/digitsvalid.txt') # use denoising autoencoder for pretraining denoise_auto_model = models.autoencoder.Model(784, hidden, True, 0.5) denoise_auto_model.add(models.autoencoder.Sigmoid(784, hidden)) denoise_auto_model.add(models.autoencoder.Sigmoid(hidden, 784)) denoise_auto_model.update_W() for n in range(epoch): for i in range(Xtrain.shape[0]): x = Xtrain[i, :] denoise_auto_model.train(x, x, alpha) # use autoencoder for pretraining auto_model = models.autoencoder.Model(784, hidden) auto_model.add(models.autoencoder.Sigmoid(784, hidden)) auto_model.add(models.autoencoder.Sigmoid(hidden, 784)) auto_model.update_W() for n in range(epoch): for i in range(Xtrain.shape[0]): x = Xtrain[i, :] auto_model.train(x, x, alpha) # use rbm for pretraining rbm = models.rbm.Model(hidden, 784) for n in range(epoch): for i in range(Xtrain.shape[0]): x = Xtrain[i, :] rbm.run_rbm(x, alpha, 10) # use random weight model = models.neural_network.Model() model.add(models.neural_network.Sigmoid(784, hidden)) model.add(models.neural_network.Softmax(hidden, 10)) cross_valid_random = [] accuracy_valid_random = [] for n in range(epoch): print(n) for i in range(len(Ytrain)): x = Xtrain[i, :] y = Ytrain[i] model.train(x, y, alpha, beta, lmbda) calculate_error(model, cross_valid_random, accuracy_valid_random, Xvalid, Yvalid) print(cross_valid_random[-1]) # use the dennoising auto pre-trained weights model = models.neural_network.Model() model.add(models.neural_network.Sigmoid(784, hidden)) model.add(models.neural_network.Softmax(hidden, 10)) model.layers[0].W = denoise_auto_model.W cross_valid_denoise = [] accuracy_valid_denoise = [] for n in range(epoch): print(n) for i in range(len(Ytrain)): x = Xtrain[i, :] y = Ytrain[i] model.train(x, y, alpha, beta, lmbda) calculate_error(model, cross_valid_denoise, accuracy_valid_denoise, Xvalid, Yvalid) print(cross_valid_denoise[-1]) # use the dennoising auto pre-trained weights model = models.neural_network.Model() model.add(models.neural_network.Sigmoid(784, hidden)) model.add(models.neural_network.Softmax(hidden, 10)) model.layers[0].W = auto_model.W cross_valid_auto = [] accuracy_valid_auto = [] for n in range(epoch): print(n) for i in range(len(Ytrain)): x = Xtrain[i, :] y = Ytrain[i] model.train(x, y, alpha, beta, lmbda) calculate_error(model, cross_valid_auto, accuracy_valid_auto, Xvalid, Yvalid) print(cross_valid_auto[-1]) # use the dennoising auto pre-trained weights model = models.neural_network.Model() model.add(models.neural_network.Sigmoid(784, hidden)) model.add(models.neural_network.Softmax(hidden, 10)) model.layers[0].W = np.transpose(rbm.W) cross_valid_rbm = [] accuracy_valid_rbm = [] for n in range(epoch): print(n) for i in range(len(Ytrain)): x = Xtrain[i, :] y = Ytrain[i] model.train(x, y, alpha, beta, lmbda) calculate_error(model, cross_valid_rbm, accuracy_valid_rbm, Xvalid, Yvalid) print(cross_valid_rbm[-1]) x = np.array(range(epoch)) plt.hold(True) plt.plot(x, accuracy_valid_random, label='randomized') plt.plot(x, accuracy_valid_auto, label='autoencoder') plt.plot(x, accuracy_valid_denoise, label='denoising') plt.plot(x, accuracy_valid_rbm, label='RBM') plt.xlabel('Epoch') plt.ylabel('classification error') plt.legend(loc='upper center', shadow=True) plt.show()
for item in data_set: result = convert(item) if result[0] == 'find': uf.find(result[1], True) elif result[0] == 'union': uf.union(result[1][0], result[1][1]) endtime = time.time() uf.internalNameOfSet(1) del uf return (endtime - starttime) result_set = parser('hw4.dat') def main(): print "PLEASE ENTER THE NUMBER BELOW TO SELECT THE ALORGITHM" print "1. Weighting rule with path-compression." print "2. Weighting rule." print "3. Naive algorithm." print selection = int(raw_input("YOUR SELECTION IS: ")) result_set = parser('hw4.dat') time_set = [] if selection in [1, 2, 3]: for i in range(10): # run the algorithm 10 times, then get the average time. time_set.append(test(selection, result_set))
def get_date(url): return parser(url).find_all(class_='authi')[1].find('em').text[4:13]
def get_post(page_url): return parser(page_url).find_all(class_='t_fsz')
random.seed(datetime.now()) tictactoe_values = { 0: ['x', 'o', 'b'], 1: ['x', 'o', 'b'], 2: ['x', 'o', 'b'], 3: ['x', 'o', 'b'], 4: ['x', 'o', 'b'], 5: ['x', 'o', 'b'], 6: ['x', 'o', 'b'], 7: ['x', 'o', 'b'], 8: ['x', 'o', 'b'], 9: ['positive', 'negative'] } tictactoe = parser("tictactoe.csv") tictactoe_dataset = DataSet( name="Tic Tac Toe", examples=tictactoe, inputs=range(len(tictactoe_values)), values=tictactoe_values, target=9, attrnames="TopLeft,TopCenter,TopRight,CenterL,CenterCenter," "CenterRight,BottomLeft,BottomCenter,BottomRight, Result") balance_scale_values = { 0: ['L', 'B', 'R'], 1: ['1', '2', '3', '4', '5'], 2: ['1', '2', '3', '4', '5'], 3: ['1', '2', '3', '4', '5'], 4: ['1', '2', '3', '4', '5']
def get_post(page_url): return parser(page_url).find_all(class_='d_post_content')
async def parser_phrase(phrase: str): return parser(phrase)
def process_result_param(self, value, dialect): if not value: return None return parser(value).RULE()[0]
def get_date(url): return parser(url).find_all(class_='tail-info')[2].text[:10]
async def addfilter(client, message): userid = message.from_user.id if message.from_user else None if not userid: return await message.reply(f"You are anonymous admin. Use /connect {message.chat.id} in PM") chat_type = message.chat.type args = message.text.html.split(None, 1) if chat_type == "private": grpid = await active_connection(str(userid)) if grpid is not None: grp_id = grpid try: chat = await client.get_chat(grpid) title = chat.title except: await message.reply_text("Make sure I'm present in your group!!", quote=True) return else: await message.reply_text("I'm not connected to any groups!", quote=True) return elif chat_type in ["group", "supergroup"]: grp_id = message.chat.id title = message.chat.title else: return st = await client.get_chat_member(grp_id, userid) if ( st.status != "administrator" and st.status != "creator" and str(userid) not in ADMINS ): return if len(args) < 2: await message.reply_text("Command Incomplete :(", quote=True) return extracted = split_quotes(args[1]) text = extracted[0].lower() if not message.reply_to_message and len(extracted) < 2: await message.reply_text("Add some content to save your filter!", quote=True) return if (len(extracted) >= 2) and not message.reply_to_message: reply_text, btn, alert = parser(extracted[1], text) fileid = None if not reply_text: await message.reply_text("You cannot have buttons alone, give some text to go with it!", quote=True) return elif message.reply_to_message and message.reply_to_message.reply_markup: try: rm = message.reply_to_message.reply_markup btn = rm.inline_keyboard msg = get_file_id(message.reply_to_message) if msg: fileid = msg.file_id reply_text = message.reply_to_message.caption.html else: reply_text = message.reply_to_message.text.html fileid = None alert = None except: reply_text = "" btn = "[]" fileid = None alert = None elif message.reply_to_message and message.reply_to_message.media: try: msg = get_file_id(message.reply_to_message) fileid = msg.file_id if msg else None reply_text, btn, alert = parser(extracted[1], text) if message.reply_to_message.sticker else parser(message.reply_to_message.caption.html, text) except: reply_text = "" btn = "[]" alert = None elif message.reply_to_message and message.reply_to_message.text: try: fileid = None reply_text, btn, alert = parser(message.reply_to_message.text.html, text) except: reply_text = "" btn = "[]" alert = None else: return await add_filter(grp_id, text, reply_text, btn, fileid, alert) await message.reply_text( f"Filter for `{text}` added in **{title}**", quote=True, parse_mode="md" )