def process_file(fname): file_location = os.path.join(settings.RTF_DIR, fname) logger.info('Opening file %s...', fname) session = Session() if fname.startswith('~$'): return None try: with open(file_location, 'rb') as rtf_file: txt = rtf_file.read() except Exception: logger.warning('Cannot read from file %s', fname) clean_text = striprtf(txt) dicts = parser(clean_text, fname) if len(dicts) == 0: logger.error('Cannot extract articles from file %s', fname) return None logger.info('Found %d articles in file %s', len(dicts), fname) for dict_item in dicts: article = session.query(Articles).filter_by(id=dict_item['id']).first() if article: logger.info('Article %s already exists in database', article.id) else: article = Articles(**dict_item) session.add(article) session.commit() session.close() logger.info('Finished parsing file %s...', fname)
def insert_comp_art(organization, article): try: session = Session() org_list = organization.split(':') code = org_list[0].strip().upper() company = get_company_by_code(code) if company is None: return None com_art = CompanyArticle(gvkey=company.gvkey, article_id=article.id) if article.NS is not None: ns_list = article.NS.split('|') else: ns_list = [] i = 0 for cat in ns_list: cat_list = cat.split(':') match_cnum = re.match('c\d+', cat_list[0].strip()) if i > 2 or match_cnum is None: continue i += 1 if i == 1: com_art.main_category = cat_list[1].strip() if i == 2: com_art.sub_category = cat_list[1].strip() session.add(com_art) session.commit() logger.info('Matched company %s to article %s', company.factiva_name, com_art.article_id) session.close() except Exception: logger.exception('message') raise
def create_post(author_id, title, description, body): session = Session() try: post = Post(author_id, title, description, body, strftime("%Y-%m-%dT%H:%M:%S", gmtime())) session.add(post) session.commit() return successful(post.id) except: session.rollback() return internal_server_error finally: session.close()
def get_articles(): session = Session() articles = session.query(Articles).all() for article in articles: analysed = session.query(Analysis).filter_by(id=article.id).first() if analysed: continue logger.info('Analysing %s', article.id) text = article.text.rstrip('None') analyzed_dict = get_data(text) analyzed_dict['id'] = article.id analyzed_dict.pop('doc_size') a = Analysis(**analyzed_dict) session.add(a) session.commit()
def update_post(title, description, body, post_id): session = Session() try: session.query(Post).filter(Post.id == post_id).update({ 'title': title, 'description': description, 'body': body }) session.commit() return successful('Post {} updated'.format(post_id)) except: session.rollback() return internal_server_error finally: session.close()
def new_user(email, name, password_encrypted, token): session = Session() try: user = User(email, name, password_encrypted, token) session.add(user) session.commit() return successful({ 'id': user.id, 'email': user.email, 'password_encrypted': user.password_encrypted, 'token': user.token, }) except: session.rollback() return internal_server_error finally: session.close()
def delete_post(post_id): session = Session() try: session.query(Post).filter(Post.id == post_id).delete() session.commit() image_path = image_format(post_id) if os.path.isfile(image_path): os.remove(image_path) return successful('Post {} deleted'.format(post_id)) except: session.rollback() return internal_server_error finally: session.close()
def import_data(path): with open(path) as f: print(1) line_count = 0 reader = csv.reader(f, delimiter=';') session = Session() for row in reader: if line_count == 0: print(f'Column names are {", ".join(row)}') line_count += 1 else: company = Company( gvkey=row[0], name=row[1], factiva_name=row[2], factiva_code=row[3], ) session.add(company) session.commit() print(f'Processed {line_count} lines.')