def insert_none_keyword(self, keyword): if self.is_keyword_need_upsert(keyword): self.upsert_keyword( Keyword( value=keyword, update=datetime.strptime(datetime.today().strftime('%Y%m0309'), '%Y%m%d%H') + relativedelta(months=+1) ))
def init_add_keywords(): global session, engine KEYWORDS = [] KEYWORDS.append({'keyword_description': "NULL", 'keyword_regex': "NULL"}) # Lets import the CSV with the keywork list with open('initial/keywords.csv') as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') line_count = 0 for row in csv_reader: if line_count != 0: print(f'Adding keyword {row[0]} => {row[1]}') KEYWORDS.append({ 'keyword_description': row[0], 'keyword_regex': row[1] }) line_count += 1 logging.info('Inserted {} keywords to database'.format(line_count)) for keyword in KEYWORDS: logging.info('{}: Adding keyword {} to the database'.format( sys._getframe().f_code.co_name, keyword['keyword_description'])) session.add( Keyword(keyword_description=keyword['keyword_description'], keyword_regex=keyword['keyword_regex'], keyword_tmodified=datetime.now(), keyword_tcreate=datetime.now())) session.commit()
def apply(self): try: if self.subject is None: self.delete_instance() return False except Bot.DoesNotExist: self.delete_instance() return False if self.action == 'category': from models import Category try: cat = Category.get(Category.id == self.value) self.subject.category = cat except Category.DoesNotExist: raise AttributeError("Category to change to does not exist.") elif self.action == 'name': self.subject.name = self.value elif self.action == 'username': self.subject.username = self.value elif self.action == 'description': self.subject.description = self.value elif self.action == 'extra': self.subject.extra = self.value elif self.action == 'country': if self._value == 'None' or self._value is None: self.subject.country = None else: from models import Country try: con = Country.get(id=self._value) self.subject.country = con except Country.DoesNotExist: raise AttributeError( "Country to change to does not exist.") elif self.action == 'inlinequeries': self.subject.inlinequeries = bool(self.value) elif self.action == 'official': self.subject.official = bool(self.value) elif self.action == 'offline': self.subject.offline = bool(self.value) elif self.action == 'spam': self.subject.spam = bool(self.value) elif self.action == 'add_keyword': kw_obj = Keyword(name=self.value, entity=self.subject) kw_obj.save() elif self.action == 'remove_keyword': try: kw_obj = Keyword.get(name=self.value, entity=self.subject) kw_obj.delete_instance() except Keyword.DoesNotExist: raise AttributeError( "Keyword to disable does not exist anymore.") self.subject.save() self.executed = True self.save() return True
def add_keyword(request, room_title): room = get_object_or_404(Room, title=room_title) keyword_str = request.POST['keyword'].strip() keywords = room.keyword_set.filter(name=keyword_str).all() if len(keywords)==0: keyword = Keyword(name=keyword_str, room=room) keyword.save() return HttpResponse('OK')
def carga_keywords(): # aqui se va a abrir el archivo txt y se metera en la base de datos contador = 0 try: with open('keywords.txt') as file: for line in file: line = line.replace('\n', '').lower() if line != "": keywords = Keyword(line) keywords.save() # aqui voy a llamar a comprobar y me retorna la posicion contador += 1 key = Keyword(line, 12) key.update() except FileNotFoundError: print("No se encuentra Fichero keywords.txt") return Keyword.get_all()
def many_to_many(session): # give Wendy some blog posts wendy = session.query(User)\ .filter_by(name='wendy')\ .one() post = BlogPost("Wendy's Blog Post", "This is a test", wendy) session.add(post) print("wendy's first post: {}".format(post)) # create a few keywords post.keywords.append(Keyword('wendy')) post.keywords.append(Keyword('firstpost')) # query posts with the 'firstpost' keyword first_posts = session.query(BlogPost)\ .filter(BlogPost.keywords.any(keyword='firstpost'))\ .all() print('all the first posts: {}'.format(first_posts)) # we can also see all of wendy's posts print("Wendy's posts: {}".format(wendy.posts.all()))
def get_keyword(self, name): """ Returns the topic if it exists, and otherwise creates it """ keyword = self.session.query(Keyword).filter( Keyword.name == name).first() if not keyword: keyword = Keyword(name) self.session.add(keyword) self.session.commit() return keyword
def set_keywords(): data = request.get_json(silent=True) for key, values in data['keywords'].items(): keyword = Keyword.get(keyword_id=key) description = keyword.description for i in range(len(values)): keyword.value = values[i] db_session.commit() if i != len(values) - 1: # Creating new keyword for multiple values keyword = set_attributes(Keyword(), description=description) db_session.add(keyword) return HttpResponse('Keywords set!')
def add_keywords(update, context): try: keyword_sent_list = update.effective_message.text.split('\n') except ValueError: update.message.reply_text('ورودی اشتباه') return ADD_WORDS admin = Admin.get_by_username(update.effective_message.from_user.username) for keyword in keyword_sent_list: if not keyword in Admin.get_keywords(admin.username): Keyword(name=keyword, admin_id=admin.id).add() return ADD_WORDS
def _get_keyword_id(self, word, type_): try: keyword = Keyword.select(word=word, type=type_).first() except Exception as e: print(f"Failed to search keyword {word} with error: {e}") return -1 if keyword is None: try: keyword = Keyword(created=datetime.now(), word=word, type=type_) keyword.flush() except Exception as e: print(f"Failed to insert keyword {keyword} with error: {e}") return -1 return keyword.id
def init_add_keywords(): global session, SERVER_MODE, engine KEYWORDS = [ { 'keyword_description': 'Binance', 'keyword_regex': '(binance|bnb)' }, { 'keyword_description': 'Huobi', 'keyword_regex': '(huobi)' }, { 'keyword_description': 'Bittrex', 'keyword_regex': '(bittrex)' }, { 'keyword_description': 'Bitfinex', 'keyword_regex': '(bitfinex)' }, { 'keyword_description': 'Coinbase', 'keyword_regex': '(coinbase)' }, { 'keyword_description': 'Kraken', 'keyword_regex': '(kraken)' }, { 'keyword_description': 'Poloniex', 'keyword_regex': '(poloniex)' }, ] for keyword in KEYWORDS: logging.info( f"{sys._getframe().f_code.co_name}: Adding keyword {keyword['keyword_description']} to the database" ) session.add( Keyword(keyword_description=keyword['keyword_description'], keyword_regex=keyword['keyword_regex'], keyword_tmodified=datetime.now(), keyword_tcreate=datetime.now())) session.commit()
def parse_keyword(keyword, response, page, page_size): """keyword 解析器""" resp_json = response.json() resp_keywords = resp_json['value']['data'] if not resp_json['successed'] or len(resp_keywords) == 0: return None, None resp_total = resp_json['value']['total'] next_page = None if page >= 500 else _get_next_page(1, page, page_size, resp_total) keywords = list() for item in resp_keywords: try: new_keyword = Keyword( value=item['keywords'], company_cnt=item['company_cnt'], showwin_cnt=item['showwin_cnt'], repeat_keyword=item.get('repeatKeyword', None), is_p4p_keyword=item.get('isP4pKeyword', None), update=datetime.strptime(item['yyyymm']+'0309', '%Y%m%d%H') + relativedelta(months=+1), srh_pv={ 'srh_pv_this_mon': item['srh_pv_this_mon'], 'srh_pv_last_1mon': item['srh_pv_last_1mon'], 'srh_pv_last_2mon': item['srh_pv_last_2mon'], 'srh_pv_last_3mon': item['srh_pv_last_3mon'], 'srh_pv_last_4mon': item['srh_pv_last_4mon'], 'srh_pv_last_5mon': item['srh_pv_last_5mon'], 'srh_pv_last_6mon': item['srh_pv_last_6mon'], 'srh_pv_last_7mon': item['srh_pv_last_7mon'], 'srh_pv_last_8mon': item['srh_pv_last_8mon'], 'srh_pv_last_9mon': item['srh_pv_last_9mon'], 'srh_pv_last_10mon': item['srh_pv_last_10mon'], 'srh_pv_last_11mon': item['srh_pv_last_11mon'], }, ) except KeyError: raise ParseError('数据解析错误 - %s: %s' % (type(item), item)) keywords.append(new_keyword) return next_page, keywords
def geo_add(): form = SeedForm() if form.validate_on_submit(): geo_query = GeoQuery(name=form.keyword.data) db.session.add(geo_query) seeds = json.loads(form.seeds.data) for blob_id in seeds.keys(): count = 0 for patch in seeds[blob_id]: keyword = Keyword(name=form.keyword.data + str(count), geoquery=geo_query) count += 1 db.session.add(keyword) x = max(0, patch[0]) y = max(0, patch[1]) size = patch[2] print '%s (%d, %d, %d)' % (blob_id, x, y, size) blob = Blob.query.get(blob_id) patch = Patch(blob=blob, x=int(x), y=int(y), size=int(size)) db.session.add(patch) seed = Example(value=True, patch=patch, keyword=keyword) db.session.add(seed) db.session.commit() # calculate patch feature, save feature file with just that patch manage.calculate_dataset_features( config.BLOB_DIR, config.FEATURES_DIR, os.path.basename(blob.location), [x, y, size, patch.id]) # initialize classifier, change to select geo_query dataset manage.create_classifier(keyword_id=keyword.id, dataset_id=2, num_seeds=1) else: print 'did not validate' print form.keyword.errors print form.seeds.errors return redirect(url_for('geo_top'))
def create_keywords(): values = request.values['keyword'].split(',') keywords = Keyword.get_all(Keyword.value.in_(values)) if len(keywords) == 0: keywords = [add_to_db(Keyword(), value=v) for v in values] return HttpResponse( { 'keywords': [{ 'value': k.value, 'id': k.keyword_id } for k in keywords] }, 202) else: if len(keywords[0].categories) > 0: return HttpResponse( 'Keyword "{0}" already exists in category "{1}"!'.format( keywords[0].value, keywords[0].categories[0].name), 302) else: return HttpResponse( 'Keyword "{0}" already exists!'.format(keywords[0].value), 302)
def search(keyword): logging.info(u"Searching for keyword: {}".format(keyword)) try: kw = db_session.query(Keyword).filter_by(keyword=keyword).one() except NoResultFound: kw = Keyword(keyword=keyword).save() page = make_request( "https://scrapethissite.com/pages/forms/?q={}".format(keyword), headers=HEADERS) for row in page.find_all("tr", "team"): i = Item() i.year = row.find("td", "name").text.strip() i.wins = row.find("td", "wins").text.strip() i.losses = row.find("td", "losses").text.strip() i.save() print i.to_dict()
def keyword_add(): form = SeedForm() if form.validate_on_submit(): keyword = Keyword.query.filter_by(name=form.keyword.data).first() keyword = keyword if not keyword == None else Keyword( name=form.keyword.data) db.session.add(keyword) seeds = json.loads(form.seeds.data) for blob_id in seeds.keys(): blob = Blob.query.get_or_404(blob_id) for patch in seeds[blob_id]: (x, y, size) = patch print '%s (%d, %d, %d)' % (blob_id, x, y, size) patch = Patch(blob=blob, x=int(x), y=int(y), width=int(size), height=int(size)) db.session.add(patch) seed = Example(value=True, patch=patch, keyword=keyword) db.session.add(seed) # We don't create features yet, because we don't know # what datasets, and thus what patches and features # we'll be running against. # feat = fs.create_patch_feature(patch) # db.session.add(feat) db.session.commit() return redirect(url_for('keyword', id=keyword.id)) else: print 'did not validate' print form.keyword.errors print form.seeds.errors return redirect(url_for('keyword_new')) return redirect(url_for('keyword_top'))
app.config.from_pyfile('config.py') db.init_app(app) app.logger.addHandler(logging.StreamHandler()) app.logger.setLevel(logging.INFO) logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') for _ in teams: try: Team(group_id=_['groupId'], name=_['name']).save() except NotUniqueError: pass if len(keywords.keys()) != Keyword.objects.count(): for _ in keywords.keys(): Keyword(keyword=_).save() def generate_coupon(coin, description, producer): if coin is None or description is None: raise Error("coin and description required") coupon = Coupon(coin=coin, description=description, producer=producer) try: coupon.save() return coupon except ValidationError: raise Error("invalid value")
def dbInsertKeywords(keywordList, lazy=False): keywordList = list( filter(None, [strToKeyword(phrase) for phrase in keywordList])) if lazy: if DEBUG: (print( "dbInsertKeywords: Using Lazy DB check, no inserting or updating\n" )) keywordObjects = [] for phrase in keywordList: keyQuery = Keyword.objects(key=phrase) if (keyQuery): keywordObjects.append(keyQuery[0]) if DEBUG: (print("dbInsertKeywords: Returning lazy DB query\n")) return list(set(keywordObjects)) if DEBUG: (print("dbInsertKeywords: Taking keyword string list...\n")) #static URL for keywordseverywhere bulk tool if DEBUG: (print("")) URL = "https://keywordseverywhere.com/ke/4/manual.php" if DEBUG: (print("dbInsertKeywords: Scraping for given keywords at %s...\n", URL)) #setup browser options chrome_options = Options() chrome_options.add_argument("user-data-dir=/tmp/tarun") download_dir = r"C:\Users\Oliver\Desktop\Misc Code\python\KC\temp" preferences = { "download.default_directory": download_dir, "directory_upgrade": True, "safebrowsing.enabled": True } chrome_options.add_experimental_option("prefs", preferences) if DEBUG: (print("dbInsertKeywords: Getting webdriver for chrome...\n")) driver = webdriver.Chrome( executable_path=r'C:\Users\Oliver\dev\cfehome\chromedriver.exe', options=chrome_options) fileName = '' keywordObjectReturnList = [] keywordLists = chunks(list(set(keywordList)), 2000) for chunk in keywordLists: driver.get(URL) #find and enter to form if DEBUG: (print("dbInsertKeywords: Trying to find keyword form...\n")) form = driver.find_element_by_id("keywords") text = ", ".join(chunk) if DEBUG: (print( "dbInsertKeywords: Copying to clipboard with pyperclip.copy()...\n" )) pyperclip.copy(text) if DEBUG: (print("dbInsertKeywords: Pasting with os Keys...\n")) form.send_keys(Keys.CONTROL, 'v') if DEBUG: (print("dbInsertKeywords: Finding submit button...\n")) button = driver.find_element_by_id("submit") if DEBUG: (print("dbInsertKeywords: Clicking submit button...\n")) button.click() time.sleep(2) if DEBUG: (print("dbInsertKeywords: Finding CSV download button...\n")) CSVDownloadButton = driver.find_element_by_class_name("buttons-csv") if DEBUG: (print( "dbInsertKeywords: Trying to click submit button until CSV button is displayed...\n" )) while not CSVDownloadButton.is_displayed(): try: button.click() if DEBUG: (print("dbInsertKeywords: clicked [SUBMIT]...\n")) except: pass time.sleep(1) if DEBUG: (print("dbInsertKeywords: Clicking CSV download button...\n")) CSVDownloadButton.click() time.sleep(1) if DEBUG: (print( "dbInsertKeywords: Trying to find file that was downloaded...\n" )) name = str.lower(chunk[0]).replace("&", "").replace( "|", "").strip().replace(" ", "-") if name == "": name = "csv" fileName = "C:\\Users\\Oliver\\Desktop\\Misc Code\\python\\KC\\temp\\" + name + ".csv" if DEBUG: (print("dbInsertKeywords: Opening as CSV...\n")) try: csv_file = open(fileName) except NameError: print("No file found") return [] reader = csv.reader(csv_file, delimiter=',') titleRowSkip = False if DEBUG: (print("dbInsertKeywords: Adding/updating keywords...\n")) for row in reader: if titleRowSkip: phrase = row[1] vol = row[5] cpc = row[6] competition = row[7] keyQuery = Keyword.objects(key=phrase) if (len(phrase) < 20): if (not keyQuery): kw = Keyword(key=phrase, volume=vol.replace(",", ""), cpc=cpc.replace("$", ""), competition=competition) kw.save() keyQuery = kw print("Saved New keyword:") print(keyQuery.key) else: keyQuery.update_one(set__volume=vol.replace(",", "")) keyQuery.update_one(set__cpc=cpc.replace("$", "")) keyQuery.update_one(set__competition=competition) keyQuery = keyQuery[0] print("Updated keyword:") print(keyQuery.key) if (keyQuery): keywordObjectReturnList.append(keyQuery) titleRowSkip = True if DEBUG: (print("dbInsertKeywords: Closing CSV...\n")) csv_file.close() if DEBUG: (print("dbInsertKeywords: Closing browser and deleting CSV file...\n") ) driver.close() os.remove(fileName) if DEBUG: (print("dbInsertKeywords: Returning Keyword Object List...\n")) return keywordObjectReturnList
goldLabels.append(label) s1 = " ".join(leaves(t1)) s2 = " ".join(leaves(t2)) modelPredict = model.predict(s1, s2) predictions.append(modelPredict) count += 1 accuracy = accuracy_score(predictions, goldLabels) print "Accuracy on SICK %s set: %f" % (dataSet, accuracy) if __name__ == "__main__": parser = argparse.ArgumentParser( description="arguments for CioEntails system") parser.add_argument("--model", type=str, default="baseline", help="Name of model to use for system") args = parser.parse_args() if args.model == "baseline": model = Baseline("cosineSimilarity", ["keyword_overlap"]) elif args.model == "keyword": model = Keyword("cosineSimilarity", ["keyword_overlap"]) elif args.model == "NB": model = NaiveBayes("cosineSimilarity", ["keyword_overlap"]) start = time.time() evaluateModel(model, args.model, sick_dev_reader) print "Evaluation done in %f seconds" % (time.time() - start)
def add_keyword(name): from models import Keyword from models import add_to_db add_to_db(Keyword(), value=name)
print "Adding sample Degrees..." d_a = Degree(title="computer science") d_b = Degree(title="data science") d_c = Degree(title="human computer interaction") d_d = Degree(title="predictive analytics") d_e = Degree(title="software engineering") print "Adding sample Jobs..." j_a = Job(title="software developer") j_b = Job(title="web developer") j_c = Job(title="data scientist") j_d = Job(title="network engineer") j_e = Job(title="database administrator") print "Adding sample Keywords..." k_a = Keyword(key="algorithms") k_b = Keyword(key="object-oriented programming") k_c = Keyword(key="system architecture") k_d = Keyword(key="creative design") k_e = Keyword(key="databases") k_f = Keyword(key="data visualization") k_g = Keyword(key="uml") k_h = Keyword(key="front-end") k_i = Keyword(key="back-end") k_j = Keyword(key="ui ux") k_k = Keyword(key="csharp") k_l = Keyword(key="css") k_m = Keyword(key="objective-c") k_n = Keyword(key="perl") k_o = Keyword(key="r")
def newspaperize(article_url): """Takes a string url that contains an article. Returns a Story object from models.py containing information scraped from the article located at the url.""" article = Article(article_url) # create Article object print("Downloading:", article_url) try: # returns None if url fails to download article.download() except: print("Failed to download url:", article_url) return None try: # returns None if url cannot be parsed article.parse() except: print("Failed to parse url:", article_url) return None article.nlp() # variables to hold values for Story attributes headline = article.title imageurl = article.top_image timestamp = article.publish_date content = article.text keywords = article.keywords summary = article.summary description = article.meta_description clickbait = -1 # placeholder for clickbait label # populates keyword object with article.keywords list_of_keyword_obj = [] for word in keywords: if word not in stopword: # prevents stopwords from being keywords k = Keyword() k.keyword = word list_of_keyword_obj.append(k) s = Story() # create Story object # set attributes s.name = headline s.imageurl = imageurl s.url = article_url current_time = datetime.datetime.now() if timestamp is not None: s.timestamp = timestamp.isoformat() else: # generate timestamp if none found s.timestamp = current_time s.description = description s.keywords = list_of_keyword_obj s.summary = summary s.content = content s.clickbait = clickbait s.createtime = current_time return s
# querying with joins for u, a in session.query(User, Address).filter(User.id==Address.user_id).filter(Address.email_address=='*****@*****.**').all(): print(u) print(a) user_j = session.query(User).join(Address).filter(Address.email_address=='*****@*****.**').all() print(user_j) # using aliases adalias1 = aliased(Address) adalias2 = aliased(Address) for username, email1, email2 in session.query(User.name, adalias1.email_address, adalias2.email_address).\ join(User.addresses.of_type(adalias1)).join(User.addresses.of_type(adalias2)).\ filter(adalias1.email_address=='*****@*****.**').filter(adalias2.email_address=='*****@*****.**'): print(username, email1, email2) # delete session.delete(jack) q = session.query(User).filter_by(name='jack').count() print(q) addr = session.query(Address).filter(Address.email_address.in_(['*****@*****.**', '*****@*****.**'])).count() print(addr) # many to many relationship wendy = session.query(User).filter_by(name='wendy').one() post = BlogPost('Wendy\'s Blog Post', 'This is a test', wendy) session.add(post) post.keywords.append(Keyword('wendy')) post.keywords.append(Keyword('firstpost')) firstposts = session.query(BlogPost).filter(BlogPost.keywords.any(keyword='firstpost')).all() print(firstposts) wendypost = wendy.posts.filter(BlogPost.keywords.any(keyword='firstpost')).all() print(wendypost) session.commit()
deck1 = Deck(name="Washington Facts", description="Facts about the great state of Washington", visibility="Public", user_id=myself.id) deck2 = Deck(name="Tennessee Facts", description="Facts about the great state of Tennessee", visibility="Private", user_id=myself.id) deck3 = Deck(name="Kansas Facts", description="Facts about the great state of Kansas", visibility="Public", user_id=myself.id) nugget1 = Nugget(truth="Eastern Tennessee has many mountains", user_id=1) keyword1 = Keyword(word="mountains", place_in_sentence=8, instance_count="1x", my_nugget=1) keyword2 = Keyword(word="mountains", instance_count="All", my_nugget=1) fakeout1 = Fakeout(fake_word="laws against skateboarding", hypernym="some places", relationship="HasA", my_keyword_id=1) fakeout2 = Fakeout(fake_word="rivers", my_keyword_id=2) fakeout3 = Fakeout(fake_word="jungles", my_keyword_id=2) fakeout4 = Fakeout(fake_word="deserts", my_keyword_id=2) nugget2 = Nugget(truth="Tennessee is known as the Volunteer state", user_id=1) keyword3 = Keyword(word="Volunteer", instance_count="All", my_nugget=2) fakeout5 = Fakeout(fake_word="Trombone", my_keyword_id=3) fakeout6 = Fakeout(fake_word="Evergreen", my_keyword_id=3) fakeout7 = Fakeout(fake_word="Sunflower", my_keyword_id=3)
#coding=utf-8 from models import Keyword from daos import Keyword as DB_Keyword from datetime import datetime if __name__ == '__main__': keyword_list = [] file_path = '/Users/ZHU_Chenghao/Downloads/keywords.csv' with open(file_path, 'r') as reader: lines = reader.readlines()[1:] # remove the header for line in lines: elements = line.split(',') timestamp = elements[0] timestamp = datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S') score = int(elements[-1].strip('\n')) keywords = elements[1:-1] for keyword in keywords: keyword = eval(keyword.strip('["]')) keyword_list.append(Keyword(cid=0, keyword=keyword, timestamp=timestamp, score=score)) print 'Parse Done!' print 'Add keywords into database...' DB_Keyword.dump_keywords(keyword_list) print 'Dumping Done!'
from models import db, Job, Keyword, Degree, JobKeywords, DegreeKeywords #Add all the titles to the lists, just the titles jobs = [] degrees = [] keywords = [] #Loading Loops for adding data to the database quickly for j in jobs: tmp = Job(title="%s" % j.lower()) db.session.add(tmp) db.session.commit() for d in degrees: tmp = Degree(title="%s" % d.lower()) db.session.add(tmp) db.session.commit() for k in keywords: tmp = Keyword(key="%s" % k.lower()) db.session.commit()