def parse_currency(currency_key, directory, name):  # Type : 1 - Currency
     print("Currency")
     col = Mongo().create_collection("Currency", FDC.get_index_models())
     with open(directory) as csv_file:
         csv_reader = csv.reader(csv_file, delimiter=',')
         print(currency_key)
         hour = -1
         fd = None
         for row in csv_reader:
             if len(row) < 2:  # Check Data
                 continue
             add_value = 0
             if currency_key == "EURUSD":
                 date = DateHelper.str2date(row[0])
                 add_value = -1
             else:
                 date = DateHelper.str2date(row[0]+row[1])
             if hour != date.hour:
                 hour = date.hour
                 if fd is not None:
                     try:
                         col.insert(fd.get_currency())
                     except:
                         Logger().get_logger().error('Insert Error', exc_info=True)
                 fd = FinancialData(name, currency_key, date,
                                    row[FDLocations.Currency_Open.value + add_value],
                                    row[FDLocations.Currency_High.value + add_value],
                                    row[FDLocations.Currency_Low.value + add_value],
                                    row[FDLocations.Currency_Close.value + add_value])
             else:
                 fd.add(row[FDLocations.Currency_High.value + add_value],
                        row[FDLocations.Currency_Low.value + add_value],
                        row[FDLocations.Currency_Close.value + add_value])
 def parse_index_datetime(currency_key, directory, name, interval):  # Type : 4 - Index
     col = Mongo().create_collection("Index")
     with open(directory) as csv_file:
         csv_reader = csv.reader(csv_file, delimiter=',')
         line_count = 0
         print(currency_key)
         hour = -1
         hour_count = 0
         fd = None
         for row in csv_reader:
             if len(row) < 2:  # Check Data
                 continue
             date = DateHelper.str2date(row[0] + row[1])
             if hour != date.hour:
                 hour = date.hour
                 hour_count = 0
                 if fd is not None:
                     print(fd)
                     try:
                         col.insert(fd.get_index())
                     except:
                         Logger().get_logger().error('Insert Error', exc_info=True)
                 fd = FinancialData(name, currency_key, date,
                                    row[FDLocations.IndexDateTime_Open.value],
                                    row[FDLocations.IndexDateTime_High.value],
                                    row[FDLocations.IndexDateTime_Low.value],
                                    row[FDLocations.IndexDateTime_Close.value])
             else:
                 fd.add(row[FDLocations.IndexDateTime_High.value],
                        row[FDLocations.IndexDateTime_Low.value],
                        row[FDLocations.IndexDateTime_Close.value])
                 hour_count += 1
             line_count += 1
         print(f'Processed {line_count} lines.')
Пример #3
0
 def save_to_db(self):
     mongo = Mongo()
     try:
         mongo.insert(self.row)
     except Exception:
         print(self.row)
         Logger().get_logger().error('Insert Error', exc_info=True)
Пример #4
0
class WikiRecorder(object):
    def __init__(self, collection_name="Wiki"):
        self.col = Mongo().create_collection(collection_name,
                                             WikiRecorder.get_index_models())
        self.preprocessor = PreProcessing()
        self.config = WikiRecorder.get_config()
        self.total = 0

    def collect_all(self):
        name_list = self.config["Wiki"]["Corporations"]
        for cor_name in name_list:
            self.collect(cor_name)

    def collect(self, title, page_id=None):
        page = Wikipedia.get_page(title, pageid=page_id)

        title = page.original_title
        title_p = self.preprocessor.preprocess(title)
        summary = page.summary
        summary_p = self.preprocessor.preprocess(summary)
        content = page.content
        page_id = page.pageid
        data = {
            'title': title,
            'title_p': title_p,
            'summary': summary,
            'summary_p': summary_p,
            'content': content,
            'page_id': page_id
        }
        print(data)
        try:
            self.col.insert(data)
        except Exception as exception:
            Logger().get_logger().error(type(exception).__name__,
                                        exc_info=True)

    @staticmethod
    def get_index_models():
        return [
            IndexModel("title", name="index_title"),
            IndexModel("page_id", name="index_page_id")
        ]

    @staticmethod
    def get_config():
        pwd = os.path.dirname(os.path.abspath(__file__))
        return json.load(open(pwd + '/config.json', 'r'))
 def parse_stock(currency_key, directory, name, interval):  # Type : 3 - Stock
     print("Stock")
     col = Mongo().create_collection("Stock", FDC.get_index_models())
     with open(directory) as csv_file:
         csv_reader = csv.reader(csv_file, delimiter=',')
         print(currency_key)
         for row in csv_reader:
             if len(row) < 2:  # Check Data
                 continue
             date = DateHelper.str2date(row[0])
             if interval == 60:
                 fd = FinancialData(name, currency_key, date,
                                    row[FDLocations.Stock_Open.value],
                                    row[FDLocations.Stock_High.value],
                                    row[FDLocations.Stock_Low.value],
                                    row[FDLocations.Stock_Close.value],
                                    row[FDLocations.Stock_Volume.value],
                                    row[FDLocations.Stock_Trade.value],
                                    row[FDLocations.Stock_Avg.value])
                 col.insert(fd.get_stock())
             else:
                 print("Not Handled !!!")