class MongoHandler:
    def __init__(self):
        self.log = Log(MongoHandler)
        conn = MongoClient(CONFIG['DB_ip'], CONFIG['DB_port'])

        self.save_collection = conn[CONFIG['DB_name']][
            CONFIG['collection_name']]
        self.log.info("MongoDB connection to {0} collection. - {1}".format(
            CONFIG['collection_name'], self.save_collection))

        if not CONFIG['is_input_keywords']:
            self.category_collection = conn[
                CONFIG['DB_name']]['stock_category']
            self.log.info(
                "MongoDB connection to category collection. - {0}".format(
                    self.category_collection))

    def get_search_keywords(self):
        if CONFIG['is_input_keywords']:
            raise self.log.warning("Check is_input_keywords option")

        query = {}
        category_info = list(self.category_collection.find(query))
        return category_info

    def add_news_data(self, news_dict):
        self.save_collection.insert_many(news_dict)
class DataHandler:
    def __init__(self):
        self.log = Log(DataHandler)
        self.mongo = MongoHandler()
        self.company_info = None
        self.company_list = None

        check_target_location = CONFIG['company_name_location']
        if check_target_location == 'DB':
            self.get_target_company = types.MethodType(
                self._get_company_by_mongo, self)
        elif check_target_location == 'File':
            self.get_target_company = types.MethodType(
                self._get_company_by_file, self)

    def get_target_company(self):
        pass

    def save_stock_data(self, stock_df):
        self.mongo.update_stock_data(stock_df)

    def _get_company_by_mongo(self, obj):
        self.log.debug("Get company information by database(MongoDB)")
        self.company_info = pd.DataFrame(self.mongo.get_company())
        self.company_list = self.company_info[['company', 'code']]

    def _get_company_by_file(self, obj):
        pass
Exemplo n.º 3
0
    def __init__(self):
        self.log = Log(MongoHandler)
        conn = MongoClient(CONFIG['DB_ip'], CONFIG['DB_port'])

        self.save_collection = conn[CONFIG['DB_name']][
            CONFIG['save_collection_name']]
        self.target_collection = conn[CONFIG['DB_name']][
            CONFIG['target_collection_name']]
        self.log.info(
            "MongoDB save collection {0}, target collection. - {1}".format(
                self.save_collection, self.target_collection))
    def __init__(self):
        self.log = Log(DataHandler)
        self.mongo = MongoHandler()
        self.company_info = None
        self.company_list = None

        check_target_location = CONFIG['company_name_location']
        if check_target_location == 'DB':
            self.get_target_company = types.MethodType(
                self._get_company_by_mongo, self)
        elif check_target_location == 'File':
            self.get_target_company = types.MethodType(
                self._get_company_by_file, self)
Exemplo n.º 5
0
class MongoHandler:
    def __init__(self):
        self.log = Log(MongoHandler)
        conn = MongoClient(CONFIG['DB_ip'], CONFIG['DB_port'])

        self.save_collection = conn[CONFIG['DB_name']][
            CONFIG['save_collection_name']]
        self.target_collection = conn[CONFIG['DB_name']][
            CONFIG['target_collection_name']]
        self.log.info(
            "MongoDB save collection {0}, target collection. - {1}".format(
                self.save_collection, self.target_collection))

    def get_company(self):
        query = {}
        category_info = list(self.target_collection.find(query))
        return category_info

    def update_stock_data(self, stock_df):
        updates = []

        for idx, row in tqdm(stock_df.iterrows(), total=len(stock_df)):
            updates.append(
                UpdateOne(
                    {
                        'Date': row['Date'],
                        'Code': row['Code']
                    },
                    {
                        '$set': {
                            'Company': row['Company'],
                            'Type': row['Type'],
                            'Code': row['Code'],
                            'Date': row['Date'],
                            'High': row['High'],
                            'Low': row['Low'],
                            'Open': row['Open'],
                            'Close': row['Close'],
                            'candleCenter': row['candleCenter'],
                            'Volume': row['Volume']
                            # , 'Adj Close': row['Adj Close']
                        }
                    },
                    upsert=True))

        self.log.debug("update list count- {0}".format(len(updates)))
        self.save_collection.bulk_write(updates)

    def add_stock_data(self, stock_df):
        self.save_collection.insert_many(stock_df.to_dict('records'))
    def __init__(self):
        self.log = Log(MongoHandler)
        conn = MongoClient(CONFIG['DB_ip'], CONFIG['DB_port'])

        self.save_collection = conn[CONFIG['DB_name']][
            CONFIG['collection_name']]
        self.log.info("MongoDB connection to {0} collection. - {1}".format(
            CONFIG['collection_name'], self.save_collection))

        if not CONFIG['is_input_keywords']:
            self.category_collection = conn[
                CONFIG['DB_name']]['stock_category']
            self.log.info(
                "MongoDB connection to category collection. - {0}".format(
                    self.category_collection))
class DataHandler:
    def __init__(self):
        self.log = Log(DataHandler)
        self.mongo_handler = MongoHandler()

    def get_search_keywords(self):
        df = pd.DataFrame(self.mongo_handler.get_search_keywords())
        search_keywords = list(
            zip(*map(df.get, df[
                ['code', 'company', 'business_code', 'business']])))
        # search_keywords = df['company'].unique()
        self.log.debug("search keywords count - {0}".format(
            len(search_keywords)))

        return search_keywords

    def get_range_search_date(self):
        now_date = datetime.now()
        e_date = now_date.strftime('%Y.%m.%d.%H.%M')
        s_date = (now_date - timedelta(hours=2)).strftime('%Y.%m.%d.%H.%M')
        self.log.debug("start date - {0}, end date - {1}".format(
            s_date, e_date))

        return s_date, e_date

    def save_file(self, df, keyword, size):
        path = CONFIG['save_file_path']
        if not os.path.exists(path):
            os.makedirs(path)

        self.log.debug("save file name - {0}_{1}.xlsx".format(keyword, size))
        df.to_excel(path + '/{0}_{1}.xlsx'.format(keyword, size))

    def save_db(self, df):
        self.mongo_handler.add_news_data(df.to_dict('records'))
Exemplo n.º 8
0
 def __init__(self, class_obj):
     self.log = Log(class_obj)
Exemplo n.º 9
0
import yaml
from Utils.utils import Log
from Handlers.data_handler import DataHandler
from Crawlers.naver_news_crawler import NaverNewsCrawler
yaml.warnings({'YAMLLoadWarning': False})
with open("config.yaml", "rt", encoding="utf-8") as stream:
    CONFIG = yaml.load(stream)['NewsCrawler']

if __name__ == '__main__':
    log = Log(__name__)
    data_handler = DataHandler()
    naver_crawler = NaverNewsCrawler(data_handler)

    if CONFIG['is_input_keywords']:
        search_keywords = CONFIG['keywords']
    else:
        search_keywords = data_handler.get_search_keywords()

    if CONFIG['iterate']:
        s_date, e_date = data_handler.get_range_search_date()
    else:
        s_date = CONFIG['start_date']
        e_date = CONFIG['end_date']

    url = naver_crawler.get_target_url(s_date, e_date)
    naver_crawler.execute_crawler(search_keywords, url)
 def __init__(self):
     self.log = Log(DataHandler)
     self.mongo_handler = MongoHandler()