示例#1
0
'''
Created on Mar 23, 2015

@author: aelsalla
'''
from DatasetBuilder.DatasetBuilder import DatasetBuilder
from FeaturesExtractor.FeaturesExtractor import FeaturesExtractor
from Classifier.Classifier import Classifier
import matplotlib.pyplot as plt
import numpy as np

# Initialize the DatasetBuilder
###############################
dataSetBuilder = DatasetBuilder()
testSetShare = 0.1
dataSetBuilder.csvPricesFileName = '.\\crawler\\prices\\prices_16_4_2015_15_30_55.csv'
dataSetBuilder.csvNewsFileName = '.\\news_all.csv'
trainSet, testSet = dataSetBuilder.BuildDataSet(testSetShare)

dataSet = []
dataSet.extend(trainSet)
dataSet.extend(testSet)
'''
fullPrices = []
for price in dataSetBuilder.get_prices():
    fullPrices.append(float(price['value']))
'''

fullPrices = []
labels = []
sizes = []
示例#2
0
    dirName = '.\\crawler\\news'
    for file in os.listdir(dirName):
        if file.endswith(".csv"):
            full_file_name = dirName + '\\' + file

            d.csvNewsFileName = full_file_name

            news_headlines.extend(d.get_news_headlines())


def CollectPrices():
    dirName = '.\\crawler\\prices'
    for file in os.listdir('.\\crawler\\prices'):
        if file.endswith(".csv"):
            full_file_name = dirName + '\\' + file
            print(full_file_name)

            d.csvPricesFileName = full_file_name

            prices.extend(d.get_prices())


d = DatasetBuilder()

CollectNews()
CollectPrices()
d.csvNewsFileName = 'news_all.csv'
d.DumpNewsCSV(news_headlines)
d.csvPricesFileName = 'prices_all.csv'
d.DumpPricesCSV(prices)
示例#3
0
 for headline in news_headlines:
     print(headline['text'] + '\n' + headline['time_stamp'])
     
     headline_exist = NewsHeadline.objects.filter(text=headline['text'])
     if(len(headline_exist) == 0):
         headline_entry = NewsHeadline()
         headline_entry.text = headline['text']
         headline_entry.time_stamp = headline['time_stamp']
         headline_entry.save()
 
 d.csvNewsFileName = '.\\crawler\\news\\news_' + str(datetime.datetime.now().day) +'_' + str(datetime.datetime.now().month) +'_' + str(datetime.datetime.now().year) +'_' + str(datetime.datetime.now().hour) +'_' + str(datetime.datetime.now().minute) + '_' + str(datetime.datetime.now().second) +'.csv'
 
 
 d.DumpNewsCSV(news_headlines)
 
 
 prices = d.ParsePricesURL(priceStartDate)
 
 for price in prices:
     print(price['value'] + '\n' + price['time_stamp'])
     price_exist = Price.objects.filter(time_stamp=price['time_stamp'])
     if(len(price_exist) == 0):
         price_entry = Price()
         price_entry.value = price['value']
         price_entry.time_stamp = price['time_stamp']
         price_entry.save()
 
 d.csvPricesFileName = '.\\crawler\\prices\\prices_'+ str(datetime.datetime.now().day) +'_' + str(datetime.datetime.now().month) +'_' + str(datetime.datetime.now().year) +'_' + str(datetime.datetime.now().hour) +'_' + str(datetime.datetime.now().minute) + '_' + str(datetime.datetime.now().second) +'.csv'  
 d.DumpPricesCSV(prices)
 # Crawl every hour
 time.sleep(3600)