async def process(message: types.Message, state: FSMContext): await bot.send_chat_action(message.from_user.id, ChatActions.TYPING) art_list = message.text.split() for art in art_list: if art.isdigit() and len(art) % 6 == 0: continue else: await state.finish() return await message.answer('Неверно введены артикулы!') async with state.proxy() as data: try: for art in art_list: data['art_list'].append(art) print(data['art_list']) except KeyError: data['art_list'] = art_list data['current_art'] = art_list[0] data['message'] = message current_art = art_list[0] answer, images = parse(current_art) async with state.proxy() as data: data[f'article:{current_art}'] = current_art data[f'images:{current_art}'] = images data[f'count:{current_art}'] = 1 data[f'price:{current_art}'] = 0 count = 1 price = 0 answer += bold(f'\nКоличество: {count}\nЦена продажи: {price}р') async with state.proxy() as data: data[f'title:{current_art}'] = answer media = [] for img in images: media.append(InputMediaPhoto(img)) await bot.send_media_group( message.from_user.id, media, reply_to_message_id=message.message_id, ) await bot.send_message( message.from_user.id, text=emojize(answer), reply_markup=kb.inline_kb1, )
async def next_prod(call: CallbackQuery, state: FSMContext): await call.answer('Ищу следующий товар...') async with state.proxy() as data: current_art = data['current_art'] art_list = data['art_list'] message = data['message'] index = art_list.index(current_art) try: current_art = data['current_art'] = art_list[index + 1] except IndexError: return await bot.send_message( message.from_user.id, text='Артикулы обработаны. ' 'Вы можете:', reply_markup=kb.inline_finalize, ) await bot.send_chat_action(message.from_user.id, ChatActions.TYPING) await Stage.Q1.set() answer, images = parse(current_art) async with state.proxy() as data: data[f'article:{current_art}'] = current_art data[f'images:{current_art}'] = images data[f'count:{current_art}'] = 1 data[f'price:{current_art}'] = 0 count = 1 price = 0 answer += bold(f'\nКоличество: {count}\nЦена продажи: {price}р') async with state.proxy() as data: data[f'title:{current_art}'] = answer media = [] for img in images: media.append(InputMediaPhoto(img)) await bot.send_media_group( message.from_user.id, media, reply_to_message_id=message.message_id, ) await bot.send_message( message.from_user.id, text=emojize(answer), reply_markup=kb.inline_kb1, )
def get_clinical_research(medicament): if check_input(medicament) == False: return error() # Get an alphabet characters list. alphabet_list = parse(get_html(BASE_URL)) # Find a link for transition to the page with a particular letter. link_togo = get_first_letter_url(alphabet_list, medicament) if check_input(link_togo) == False: return error() # Get a list of medicaments. medicament_list = get_medicament_list(link_togo) # Get a list of medicaments that were find. medicament_link = find_medicament_matches(medicament_list, medicament) if check_input(medicament_link) == False: return error() # Create a bootstrap table to display data that was found. html = " <table class='table table-hover'><thead class='thead-dark'><tr><th scope='col'>Страница препарата в регистре лекарственных средств России</th><th scope='col'>Информация о клинических исследованиях</th></tr></thead>" search_result = "" warning = "" for i in medicament_link: link = "http:" + i['value'] medicament_page = get_html(link) clinical_trials = parse_clinical_trials(medicament_page) if clinical_trials == None: search_result = 'Не найдена' warning = "" else: search_result = 'Найдена' warning = "class='table-primary'" html = html + "<tr" + " " + warning + "><td align='left'><a href=" + "'" + link + "'" + " " + "target='_blank' class='text-dark'>" + i[ 'key'] + "<a></td>" + "<td align='center'>" + search_result + "</td></tr>" html = html + '</table>' mapping = {'page': html} body = render("index", mapping) return body
def parse(p): needless = None p.contents = tabulations.parse(p.contents) p.windows_style = None # if p.contents.find('\r'): if e_bothering.search(p.contents): p.contents = p.contents.replace('\r', '') p.windows_style = True p.old_contents = p.contents p.characters, p.contents = characters.parse(p.contents) debug(p, DEBUG_CHARACTERS) p.comments, p.contents = comments.parse(p.contents) debug(p, DEBUG_COMMENTS) p.strings, p.contents = strings.parse(p.contents) debug(p, DEBUG_STRINGS) p.macros, p.contents = macros.parse(p.contents) debug(p, DEBUG_MACROS) p.blanks, p.contents = blanks.parse(p.contents) debug(p, DEBUG_BLANKS) p.functions, p.contents = functions.parse(p.contents) debug(p, DEBUG_FUNCTIONS) p.prototypes, p.contents = prototypes.parse(p.contents) debug(p, DEBUG_PROTOTYPES) p.types, p.contents = types.parse(p.contents) debug(p, DEBUG_TYPES) p.globals, p.contents = globals.parse(p.contents) debug(p, DEBUG_GLOBALS)
def __init__(self, size, initial_function=None, resolution=256, line_offset=32,\ x_range=(-5, 5), y_range=(-1, 6.5),\ draw_points=False, verbose=True): if not isinstance(size, abc.Iterable) or len(size) != 2: raise TypeError self.size = size self.image = pygame.Surface(size, pygame.SRCALPHA) self.function = functions.parse(initial_function) self.current_time = 0 self.VERBOSE = verbose self.RESOLUTION = resolution self.LINE_OFFSET = line_offset self.X_RANGE = x_range self.Y_RANGE = y_range self.DRAW_POINTS = draw_points
def scraper(url): """Scraper function""" things = [] names = [] html = f.get_html(url) tags = f.parse(html) for tag in tags: things.append(f.chop(tag)) f.remove_first_chunk(things) f.remove_second_chunk(things, names) things.clear() f.remove_last_chunk(names, things) for name in things: print(name, end="\n\n\n")
import functions keywords = [] separators = [] operators = [] identifiers = [] parsed_values = [] symbol_table = [] instructions = [] assembly_instructions = [] functions.load_lists("keyword.txt", keywords) functions.load_lists("operator.txt", operators) functions.load_lists("separator.txt", separators) functions.parse("input.txt", keywords, separators, operators, identifiers, "output.txt", parsed_values, symbol_table, instructions) functions.gen_assy(instructions, symbol_table, assembly_instructions, identifiers) open('output.txt', 'w').close() output_file = open("output.txt", "a") print("=== Start of Symbol Table ===") output_file.write("=== Start of Symbol Table ===\n") for x in range(0, len(symbol_table)): print(symbol_table[x]) output_file.write(str(symbol_table[x]) + '\n') print("=== Start of Assembly Code ===") output_file.write("=== Start of Assembly Table ===\n") for x in range(0, len(assembly_instructions)):
def check(): if os.path.exists(tf.name): "There is currently a user logged on." else: print "You are currently not logged in.\n" functions.parse(raw_input(">> "))
#!/usr/bin/python import functions import lexicon import synonyms import config import game if config.DEBUG == True: while 1: playertypes = raw_input('> ') words = playertypes.split() print(words) sen = functions.parse(words) print(sen) s = functions.sanitize_sentence(sen) print(s) s = functions.synonymize(s) print("synonyms: ",s) functions.check_commands(s) else: while 1: playertypes = raw_input('> ') words = playertypes.split() sen = functions.parse(words) s = functions.sanitize_sentence(sen) s = functions.synonymize(s) functions.check_commands(s)
import requests from bs4 import BeautifulSoup import pandas as pd from functions import parse result = pd.DataFrame() url = 'https://dominos.by' r = requests.get(url) soup = BeautifulSoup(r.text, features='html.parser') div = soup.find_all('div', {'class': 'product-card'}) for item in div: res = parse(item) result = result.append(res, ignore_index=True) result.to_excel('result.xlsx')
def set_function(self, f): f = functions.parse(f, verbose=self.VERBOSE) if f: self.function = f
def indeed_crawl_query(): print("Getting request...") values = request.get_json() # Expecting JSON like: # { # locations: ['Washington, DC', 'Charlotte, NC', 'Roanoke, VA'] # titles: ['Data Scientist', 'Data Analyst'] # query: """I use python to collect and scrape data from the web. I can set up integrated data pipelines # pipeline to collect data from different sources. I train machine learning models using sklearn, # and tensorflow with keras. BeautifulSoup and Selenium. BeautifulSoup and Selenium. # BeautifulSoup and Selenium. BeautifulSoup and Selenium. I can give results to developers using Flask apps # and Flask APIs API. I can access APIs API and RSS feeds. I can also use SQL, particularly ElephantSQL # and Postgres. I like venture capital, finance and business consulting. I love to work with # natural language processing. Looking for a junior or entry level entry-level or mid level mid-level # venture capital, finance and business consulting venture capital, finance and business consulting # venture capital, finance and business consulting venture capital, finance and business consulting""" # } # Extract request data locations = values['locations'] titles = values['titles'] query = values['query'] # Disable auto-complete print("Launching browser...") profile = webdriver.FirefoxProfile() profile.set_preference("browser.formfill.enable", "false") # Create new Instance of Chrome in incognito mode browser = webdriver.Firefox( executable_path='../../../Selenium/geckodriver') # Instantiate dirty_jobs dirty_jobs = [] # Crawl over indeed.com jobs = indeed_crawl(titles, locations, dirty_jobs, browser) # Clean up the text from the lxml print('Parsing descriptions...') texts = parse(jobs) texts = [str(text)[1:-1] for text in texts] # Send to df df = pd.DataFrame(texts, columns=['description']) df['jobs'] = jobs # NLP Model print('Loading model...') nlp = spacy.load("en_core_web_md") print('Done loading model!') # Clean and tokenize the descriptions # df['tokens'] = [tokenize(entry, nlp) for entry in df['description'].apply(clean_description).tolist()] # send clean text to list text = df['description'].apply(clean_description).tolist() # Instantiate Vectorizer print('Fitting vectorizer...') tfidf_dtm = fit_for_nn(text) # Create a vocab and get word counts per doc sparse = tfidf.fit_transform(text) # send to df tfidf_dtm = pd.DataFrame(sparse.todense(), columns=tfidf.get_feature_names()) # Instantiate model print('Teaching the computer...') nn = NearestNeighbors(n_neighbors=20, algorithm='ball_tree') nn.fit(tfidf_dtm) print('Damn, that computer is smart.') # Process query for the model print('Asking the computer for recommendations...') query_dtm = process_query_for_nn(query) # Query for closest neighbors results = nn.kneighbors(query_dtm)[1][0].tolist() # Send to list job_urls = df['jobs'][results].tolist() print('Done!') return jsonify({'jobs': job_urls})
# Extract the directory path to where to find the vocabs originDir = os.path.dirname(os.path.abspath(__file__)) dir = os.path.join(os.path.dirname(os.path.dirname(originDir)), "Resources") # Iterate for every directory for dirName in os.listdir(dir): # For each directory create a new ExcelFile if (os.path.isdir(os.path.join(dir, dirName))): # Create a Pandas Excel writer using XlsxWriter as the engine. writer = pd.ExcelWriter(os.path.join(originDir, dirName) + ".xlsx", engine='xlsxwriter', options={ 'strings_to_urls': False, 'constant_memory': True }) excel = ExcelFile(writer) # Get the xlsxwriter workbook and worksheet objects. workbook = writer.book # Add WorkSheet with relative title and relative bold header data = workbook.add_worksheet("Data") data.write_row(0, 0, ("Subject", "Predicate", "Object", "Domain"), workbook.add_format({"bold": True})) data.set_column(0, 4, 66) # Add information for each file of the directory for fileName in os.listdir(os.path.join(dir, dirName)): excel = parse(dir, dirName, fileName, excel, originDir) # Close the Excel file excel.writer.book.close() excel.writer.save()