Пример #1
0
import pymongo
import datetime
import id_beritagar
from tqdm import tqdm
from collections import Counter

today = datetime.date.today()
year = today.year
month = today.month
day = today.day

database = 'scraper'
collection = 'topEntity'

nlp = id_beritagar.load()


class Entity(object):
    def __init__(self):
        self

    def insert_top_entity(self, database=None, collection=None, attr=None):
        client = pymongo.MongoClient("mongodb://localhost:27017")
        db = client["{}".format(database)]
        col = db["{}".format(collection)]

        try:
            insert_db = col.insert(attr)
            print('Insert Data into MongoDB Succesfully')
        except:
            print('Insert Data into MongoDB Failed')
Пример #2
0
from textacy.preprocess import preprocess_text
from sklearn.externals import joblib
from bs4 import BeautifulSoup
from tqdm import tqdm
import id_beritagar as indo
import datetime
import requests
import id_aldo
import sys

sys.path.insert(0, '/home/lumierra/smart_news/Database/')
import dbMongo

## Load NLP
nlp = id_aldo.load()
nlp_ner = indo.load()

## Load Database Mongo
DB = dbMongo.Database()

## Load Stopword For NLP
stopwords = requests.get(
    "https://raw.githubusercontent.com/masdevid/ID-Stopwords/master/id.stopwords.02.01.2016.txt"
).text.split("\n")
tambahan = ['url', 'number', 'email', 'usd']
for tambah in tambahan:
    stopwords.append(tambah)

## set datetime
now = datetime.datetime.now().date()