Пример #1
0
def main():
    today = datetime.date.today()
    start_date = today - datetime.timedelta(
        6)  # since dates are including the boundaries
    data = fd.fetch_data(str(start_date), str(today))
    insertToDB(data)
    print("Sucessfully updated data to Database")
Пример #2
0
    def getRadvizPoints(self, session, filterByTerm):
        es_info = self._esInfo(session['domainId'])
        index = es_info['activeDomainIndex']
        max_features = 200

        #session['pagesCap'] = 12
        if session.get('from') is None:
            session['from'] = 0
        format = '%m/%d/%Y %H:%M %Z'
        if not session.get('fromDate') is None:
            session['fromDate'] = long(
                DomainModel.convert_to_epoch(
                    datetime.strptime(session['fromDate'], format)))
        if not session.get('toDate') is None:
            session['toDate'] = long(
                DomainModel.convert_to_epoch(
                    datetime.strptime(session['toDate'], format)))
        results_data = self.getTextQuery(session)
        ddteval_data = fetch_data(results_data["results"],
                                  es_doc_type=es_doc_type,
                                  es=es)
        data = ddteval_data["data"]

        labels = ddteval_data["labels"]

        urls = ddteval_data["urls"]

        tf_v = tf_vectorizer(convert_to_ascii=True, max_features=max_features)
        [X, features] = tf_v.vectorize(data)

        matrix_transpose = np.transpose(X.todense())

        print "\n\n Number of 1-gram features = ", len(features)
        print "\n\n tf 1-gram matrix size = ", np.shape(X)

        # data = self.radviz.loadData_pkl("data/ht_data_200.pkl").todense()

        # data = np.transpose(data)

        # features = self.radviz.loadFeatures("data/ht_data_features_200.csv")
        # print features
        # print len(features)
        # labels = self.radviz.loadLabels("data/ht_data_labels_200.csv")
        # urls = self.radviz.loadSampleNames("data/ht_data_urls_200.csv")

        self.radviz = Radviz(X, features, labels, urls)

        return_obj = {}
        for i in range(0, len(features)):
            return_obj[features[i]] = matrix_transpose[i, :].tolist()[0]
        labels_urls = OrderedDict([("labels", labels), ("urls", urls),
                                   ("title", ddteval_data["title"]),
                                   ("snippet", ddteval_data["snippet"]),
                                   ("image_url", ddteval_data["image_url"])])
        od = OrderedDict(
            list(OrderedDict(sorted(return_obj.items())).items()) +
            list(labels_urls.items()))

        return od
Пример #3
0
def index():
    form = FilterForm()
    success = True
    if form.is_submitted():
        data, headers = fetch_data(form)
        if len(data) != 0:
            return render_template("search.html", data=data, headers=headers)
        else:
            success = False
    return render_template("index.html", form=form, success=success)
Пример #4
0
def nessus_data_clone():
    end_date = datetime.datetime.today()
    start_date = end_date - datetime.timedelta(days=1)
    while start_date != datetime.datetime(
            1950, 1, 1):  # old enough date for bulk fetch and store
        data = fetch_data(start_date, end_date)
        store_data(data)
        del data
        end_date = start_date
        start_date = end_date - datetime.timedelta(days=1)
Пример #5
0
def main():
    data = fetch_data()

    warp = LightFM(loss='warp')
    logistic = LightFM(loss='logistic')
    bpr = LightFM(loss='bpr')

    warp.fit(data['matrix'], epochs=30, num_threads=2)
    logistic.fit(data['matrix'], epochs=30, num_threads=2)
    bpr.fit(data['matrix'], epochs=30, num_threads=2)

    print('Using the WARP loss function: ')
    recommendation(model=warp, data=data['matrix'], users=[20, 23, 50])
    print('\n')

    print("Using the Logistic loss function")
    recommendation(model=logistic, data=data['matrix'], users=[20, 23, 50])
    print('\n')

    print("Using the BPR loss function")
    recommendation(model=bpr, data=data['matrix'], users=[20, 23, 50])
    print('\n')
	for j in range(len(manhattan_polygon[i])):
		manhattan_polygon[i][j] = float(manhattan_polygon[i][j])
	manhattan_polygon[i] = tuple(manhattan_polygon[i])
poly_file.close()

# Set time window for SQL query
DELTA = timedelta(seconds=30)

# Had to use a dummy date so that I could add and subtract timedeltas from times
INTERVAL_START = datetime(2013,01,01,12,00,00)
INTERVAL_END = datetime(2013,01,01,13,59,59)

start_datetime = datetime(2013,01,01,12,00,00)
end_datetime = datetime(2013,01,01,12,00,01)
i = 0

# Open CSV file to which taxi travel times will be written
outputFile = open("manhattan_rides.csv", "w")
while end_datetime <= INTERVAL_END and i < 1:
	queryString = "SELECT pickup_datetime,dropoff_datetime,pickup_longitude,pickup_latitude,dropoff_longitude,dropoff_latitude FROM [833682135931:nyctaxi.trip_data] WHERE (FLOAT(pickup_longitude) BETWEEN -74.0382 AND -73.9030) AND (FLOAT(dropoff_longitude) BETWEEN -74.0382 AND -73.9030) AND (FLOAT(pickup_latitude) BETWEEN 40.6780 AND 40.8860) AND (FLOAT(dropoff_latitude) BETWEEN 40.6780 AND 40.8860) AND (TIME(dropoff_datetime) BETWEEN TIME(\'" + str(start_datetime) + "\') AND TIME(\'" + str(INTERVAL_END) + "\')) AND (TIME(pickup_datetime) BETWEEN TIME(\'" + str(start_datetime) + "\') AND TIME(\'" + str(end_datetime) + "\'))"
	results = fetch_data(queryString)
	for row in results:
		if point_in_poly(float(row[3]), float(row[2]), manhattan_polygon) and point_in_poly(float(row[5]), float(row[4]), manhattan_polygon):
			travel_time = datetime.strptime(row[1], "%Y-%m-%d %H:%M:%S")-datetime.strptime(row[0], "%Y-%m-%d %H:%M:%S")
			outputFile.write(str(travel_time.total_seconds()) + ",")
			outputFile.write(",".join(row[2:6]))
			outputFile.write('\n')
	start_datetime += DELTA
	end_datetime += DELTA
	i += 1
outputFile.close()
Пример #7
0
import sys
import numpy as np
from lightfm.datasets import fetch_movielens
from lightfm import LightFM
from fetch_data import fetch_data

#CHALLENGE part 1 of 3 - write your own fetch and format method for a different recommendation
#dataset. Here a good few https://gist.github.com/entaroadun/1653794
#And take a look at the fetch_movielens method to see what it's doing
#

data = fetch_data()

#fetch data and format it
data = fetch_movielens(min_rating=4.0)

#print training and testing data
#print(repr(data['train']))
#print(data['train'])
#print(repr(data['test']))

#CHALLENGE part 2 of 3 - use 3 different loss functions (so 3 different models), compare results, print results for
#the best one. - Available loss functions are warp, logistic, bpr, and warp-kos.

#create model
model = LightFM(loss='warp')
#model2 = LightFM(loss='logistic')
#model3 = LightFM(loss='bpr')
#model4 = LightFM(loss='warp-kos')
#train model
model.fit(data['train'], epochs=30, num_threads=2)
Пример #8
0
def task(url, p):
    fetch_data.fetch_data(url)
    time.sleep(1)
    p.addThread()
Пример #9
0
from markov_python.cc_markov import MarkovChain
from fetch_data import fetch_data

lyrics = fetch_data()

mc = MarkovChain()
for song in lyrics:
    mc.add_string(song)

print mc
"""text = mc.generate_text(400)

for i in range(0,400,5):
	for j in text[i:i+5]:
		print j,
	print ""
	if i % 4 == 0:
		print "\n"""
Пример #10
0
# Map States to number
map_state = dict({"NAME":"state","Alabama":"01","Alaska":"02","Arizona":"04","Arkansas":"05","California":"06","Colorado":"08","Connecticut":"09","Delaware":"10","District of Columbia":"11","Florida":"12","Georgia":"13","Hawaii":"15","Idaho":"16","Illinois":"17","Indiana":"18","Iowa":"19","Kansas":"20","Kentucky":"21","Louisiana":"22","Maine":"23","Maryland":"24","Massachusetts":"25","Michigan":"26","Minnesota":"27","Mississippi":"28","Missouri":"29","Montana":"30","Nebraska":"31","Nevada":"32","New Hampshire":"33","New Jersey":"34","New Mexico":"35","New York":"36","North Carolina":"37","North Dakota":"38","Ohio":"39","Oklahoma":"40","Oregon":"41","Pennsylvania":"42","Rhode Island":"44","South Carolina":"45","South Dakota":"46","Tennessee":"47","Texas":"48","Utah":"49","Vermont":"50","Virginia":"51","Washington":"53","West Virginia":"54","Wisconsin":"55","Wyoming":"56","Puerto Rico":"72"})
r_crit = map_state[r_crit_state]

# Import Modules
sys.path.append(os.path.split(os.path.realpath(__file__))[0])
import fetch_data as fd # This module fetching data from Census Bureau
fd = reload(fd) # Make sure newest module is loaded
import construct_deathdata as cd # This module calculate rates from input data and fetched population data
cd = reload(cd) # Make sure newest module is loaded
import data_filter as df # This module filtered the result based on input
df = reload(df) # Make sure newest module is loaded

# Call fetch_data function in fd module. This module return the population matrix for each geographic unit 
# ,and the age structure (percentage of each age group) 
[r_note_col, result, percent] = fd.fetch_data(base_year, r_crit_level, r_crit, r_year, r_geolevel, age_structure)

if partial_data == 'TRUE':
	filt_dict = df.build_filt_dict (inputdata, id_field)
	[result, r_note_col] = df.filter_with_dict (result, r_note_col, "GEOID", filt_dict)

# Write population matrix, and standard population structure into files
f = open(outputfolder + "\\" + "PopAge_structure_" + r_crit_level + r_crit + ".csv", "w")
head = True
for row in result:
	if head:
		headerline = row
		head = False
	temp_text = cd.vect_to_str(row)
	f.writelines(temp_text + "\n")
f.close()
Пример #11
0
def pg2csv(database, subject_id, subject_id_hashed, data_root_dir, probe_info, runtype, server_address, usr, pwd, time_start, time_end):

    #with open(subjects_info) as csvfile:
    #    subject_info = csv.reader(csvfile, delimiter=';', quotechar='|')
    #    for row in subject_info:
    #        if len(row)==2:
    #            if row[0]==subject:
    #                subject_id = row[1]
    #                #year_start = int(row[2])
    #                #month_start = int(row[3])
    #                #day_start = int(row[4])
    #                #hour_start = int(row[5])
    #                #minute_start = int(row[6])
    #                #year_end = int(row[7])
    #                #month_end = int(row[8])
    #                #day_end = int(row[9])
    #                #hour_end = int(row[10])
    #                #minute_end = int(row[11])
    #        else:
    #            subject_id = subject
    #            print subject_id

    #Directory to put the extracted data in:
    dirname = data_root_dir+subject_id
    if os.path.exists(dirname):
        shutil.rmtree(dirname)
        os.makedirs(dirname)
    else:
        os.makedirs(dirname)

    #Set roughly the start and the end of the data timestamps
    #start_all = datetime.datetime(year_start,month_start,day_start,hour_start,minute_start,0)
    #end_all = datetime.datetime(year_end,month_end,day_end,hour_end,minute_end,59)

    #Convert to unix timestamp (seconds):
    #start_all_ts = start_all.strftime('%s')
    #end_all_ts = end_all.strftime('%s')
    import time
    start_all_ts = time.mktime(time_start.timetuple())
    end_all_ts = time.mktime(time_end.timetuple())

    #Reading probes info
    probes = []
    with open(probe_info) as f:
        reader = csv.reader(f, delimiter=';', quoting=csv.QUOTE_NONE)
        for row in reader:
            if row[0]=='1':
                probes.append(row[1:len(row)])

    # Extractng timestamps for data samples:
    # 'start': timestamps for the start of each trial (x: class; y: trial number)
    # 'end': timestamps for the end of each trial (x: class; y: trial number)
    # Numbers are stored in miliseconds here since the sensor timestamps are in ms.

    if runtype=='trial':
        triggers = fetch_data(database, subject_id_hashed, 'ActivityLog', 'FEATURE_VALUE', 'timestamp', start_all_ts, end_all_ts, False, server_address, usr, pwd)
        start = []
        end = []
        for row in triggers:
            if row[1]=='start':
                start.append(row[0])
            elif row[1]=='end':
                end.append(row[0])
        if len(start)!=len(end):
            print('Start and End triggers are inconsistent!')
            sys.exit(1)
    elif runtype=='all':
        start =  [float(start_all_ts)]
        end =  [float(end_all_ts)]
    else:
        print('Unknown Runtype '+runtype+'!')
        sys.exit(1)


    print
    with open('log_python.txt','a') as logfile:
        logfile.write('\n')
    logfile.close()

    # cut-off time in miliseconds at the beginning and the end
    clip_begin = 0
    clip_end = 0

    num_trials = len(start)

    for probe in probes:
        duplicate_timestamps = 0
        empty_entry = 0
        for j in range(num_trials):
            data = []
            #Setting the start and end timestamps for each trial
            t1 = start[j]+clip_begin
            t2 = end[j]-clip_end
            #Converting t1 and t2 to secs for the probes that have their timestamps in secs
            if probe[4]=='ms':
                t1 = float(t1*1000.0)
                t2 = float(t2*1000.0)
            #Converting t1 and t2 to secs for the probes that have their timestamps in nanosecs
            if probe[4]=='ns':
                t1 = float(t1*1000000000.0)
                t2 = float(t2*1000000000.0)
            #print(probe)
            data_temp = fetch_data(database, subject_id_hashed, probe[0], probe[2], probe[3], t1, t2, False, server_address, usr, pwd)
            if not data_temp:
                #print('\033[93m'+'PG2CSV: There is no data for probe \''+ probe[1] + '\'' + '\033[0m')
                msg = 'Subject '+subject_id+': There is no data for probe \''+ probe[1] + '\''
                print(msg)
                with open('log_python.txt','a') as logfile:
                    logfile.write(msg + '\n')
                logfile.close()
                continue
            num_columns = len(data_temp[0])
            for k in range(len(data_temp)):
                #The first column is the timestamp
                if probe[4]=='s':
                    time = float('%.6f'%(deepcopy(data_temp[k][0])))
                elif probe[4]=='ms':
                    time = float('%.6f'%(deepcopy(data_temp[k][0])/1000.0))
                else:
                    time = float('%.6f'%(deepcopy(data_temp[k][0])/1000000000.0))
                #Saving data sample only when it's different from the previous sample - this is a PR/PostgreSQL communication bug
                if not(len(data)==0) and time==data[len(data)-1][0] and probe[5]=='R':
                    duplicate_timestamps = duplicate_timestamps+1
                if len(data)==0 or time!=data[len(data)-1][0] or probe[5]=='N':
                    data_row = [time]
                    for kk in range(num_columns-1):
                        if not str(data_temp[k][kk+1]):
                            data_row.append('-99')
                            empty_entry = empty_entry+1
                        else:
                            data_row.append(deepcopy(data_temp[k][kk+1]))
                    #data_row.append(class_label)
                    #data_row.append(location_label)
                    data.append(data_row)
            if empty_entry>0:
                msg = 'Subject '+subject_id+': '+str(empty_entry)+' empty entries for probe \''+probe[1]+'\' replaced with \'-99\''
                print(msg)
                with open('log_python.txt','a') as logfile:
                    logfile.write(msg + '\n')
                logfile.close()
            if duplicate_timestamps>0:
                msg = 'Subject '+subject_id+': '+str(duplicate_timestamps)+'/'+str(len(data_temp))+' duplicate timestamps for probe \''+probe[1]+'\' removed'
                print(msg)
                with open('log_python.txt','a') as logfile:
                    logfile.write(msg + '\n')
                logfile.close()
            #Dumping the gathered samples
            if runtype=='trial':
                filename = dirname+'/'+probe[1]+'_trial%d.csv'%(j)
            else:
                filename = dirname+'/'+probe[1]+'.csv'
            with open(filename,'w') as csvfile:
                spamwriter = csv.writer(csvfile, delimiter='\t',quotechar='|',quoting=csv.QUOTE_MINIMAL)
                for i in range(len(data)):
                    spamwriter.writerow(data[i])
Пример #12
0
def api(ticker):
    return fetch_data(ticker)
Пример #13
0
def main(args):
	data = fetch_data(args)
	print data.shape
	data = calculate_pairs(data, mean)
	print data.tail()
	print data.columns
Пример #14
0
def pg2csv(database, subject_id, subject_id_hashed, data_root_dir, probe_info,
           runtype, server_address, usr, pwd, time_start, time_end):

    #with open(subjects_info) as csvfile:
    #    subject_info = csv.reader(csvfile, delimiter=';', quotechar='|')
    #    for row in subject_info:
    #        if len(row)==2:
    #            if row[0]==subject:
    #                subject_id = row[1]
    #                #year_start = int(row[2])
    #                #month_start = int(row[3])
    #                #day_start = int(row[4])
    #                #hour_start = int(row[5])
    #                #minute_start = int(row[6])
    #                #year_end = int(row[7])
    #                #month_end = int(row[8])
    #                #day_end = int(row[9])
    #                #hour_end = int(row[10])
    #                #minute_end = int(row[11])
    #        else:
    #            subject_id = subject
    #            print subject_id

    #Directory to put the extracted data in:
    dirname = data_root_dir + subject_id
    if os.path.exists(dirname):
        shutil.rmtree(dirname)
        os.makedirs(dirname)
    else:
        os.makedirs(dirname)

    #Set roughly the start and the end of the data timestamps
    #start_all = datetime.datetime(year_start,month_start,day_start,hour_start,minute_start,0)
    #end_all = datetime.datetime(year_end,month_end,day_end,hour_end,minute_end,59)

    #Convert to unix timestamp (seconds):
    #start_all_ts = start_all.strftime('%s')
    #end_all_ts = end_all.strftime('%s')
    import time
    start_all_ts = time.mktime(time_start.timetuple())
    end_all_ts = time.mktime(time_end.timetuple())

    #Reading probes info
    probes = []
    with open(probe_info) as f:
        reader = csv.reader(f, delimiter=';', quoting=csv.QUOTE_NONE)
        for row in reader:
            if row[0] == '1':
                probes.append(row[1:len(row)])

    # Extractng timestamps for data samples:
    # 'start': timestamps for the start of each trial (x: class; y: trial number)
    # 'end': timestamps for the end of each trial (x: class; y: trial number)
    # Numbers are stored in miliseconds here since the sensor timestamps are in ms.

    if runtype == 'trial':
        triggers = fetch_data(database, subject_id_hashed, 'ActivityLog',
                              'FEATURE_VALUE', 'timestamp', start_all_ts,
                              end_all_ts, False, server_address, usr, pwd)
        start = []
        end = []
        for row in triggers:
            if row[1] == 'start':
                start.append(row[0])
            elif row[1] == 'end':
                end.append(row[0])
        if len(start) != len(end):
            print('Start and End triggers are inconsistent!')
            sys.exit(1)
    elif runtype == 'all':
        start = [float(start_all_ts)]
        end = [float(end_all_ts)]
    else:
        print('Unknown Runtype ' + runtype + '!')
        sys.exit(1)

    print
    with open('log_python.txt', 'a') as logfile:
        logfile.write('\n')
    logfile.close()

    # cut-off time in miliseconds at the beginning and the end
    clip_begin = 0
    clip_end = 0

    num_trials = len(start)

    for probe in probes:
        duplicate_timestamps = 0
        empty_entry = 0
        for j in range(num_trials):
            data = []
            #Setting the start and end timestamps for each trial
            t1 = start[j] + clip_begin
            t2 = end[j] - clip_end
            #Converting t1 and t2 to secs for the probes that have their timestamps in secs
            if probe[4] == 'ms':
                t1 = float(t1 * 1000.0)
                t2 = float(t2 * 1000.0)
            #Converting t1 and t2 to secs for the probes that have their timestamps in nanosecs
            if probe[4] == 'ns':
                t1 = float(t1 * 1000000000.0)
                t2 = float(t2 * 1000000000.0)
            #print(probe)
            data_temp = fetch_data(database, subject_id_hashed, probe[0],
                                   probe[2], probe[3], t1, t2, False,
                                   server_address, usr, pwd)
            if not data_temp:
                #print('\033[93m'+'PG2CSV: There is no data for probe \''+ probe[1] + '\'' + '\033[0m')
                msg = 'Subject ' + subject_id + ': There is no data for probe \'' + probe[
                    1] + '\''
                print(msg)
                with open('log_python.txt', 'a') as logfile:
                    logfile.write(msg + '\n')
                logfile.close()
                continue
            num_columns = len(data_temp[0])
            for k in range(len(data_temp)):
                #The first column is the timestamp
                if probe[4] == 's':
                    time = float('%.6f' % (deepcopy(data_temp[k][0])))
                elif probe[4] == 'ms':
                    time = float('%.6f' % (deepcopy(data_temp[k][0]) / 1000.0))
                else:
                    time = float('%.6f' %
                                 (deepcopy(data_temp[k][0]) / 1000000000.0))
                #Saving data sample only when it's different from the previous sample - this is a PR/PostgreSQL communication bug
                if not (len(data)
                        == 0) and time == data[len(data) -
                                               1][0] and probe[5] == 'R':
                    duplicate_timestamps = duplicate_timestamps + 1
                if len(data) == 0 or time != data[len(data) -
                                                  1][0] or probe[5] == 'N':
                    data_row = [time]
                    for kk in range(num_columns - 1):
                        if not str(data_temp[k][kk + 1]):
                            data_row.append('-99')
                            empty_entry = empty_entry + 1
                        else:
                            data_row.append(deepcopy(data_temp[k][kk + 1]))
                    #data_row.append(class_label)
                    #data_row.append(location_label)
                    data.append(data_row)
            if empty_entry > 0:
                msg = 'Subject ' + subject_id + ': ' + str(
                    empty_entry) + ' empty entries for probe \'' + probe[
                        1] + '\' replaced with \'-99\''
                print(msg)
                with open('log_python.txt', 'a') as logfile:
                    logfile.write(msg + '\n')
                logfile.close()
            if duplicate_timestamps > 0:
                msg = 'Subject ' + subject_id + ': ' + str(
                    duplicate_timestamps) + '/' + str(
                        len(data_temp
                            )) + ' duplicate timestamps for probe \'' + probe[
                                1] + '\' removed'
                print(msg)
                with open('log_python.txt', 'a') as logfile:
                    logfile.write(msg + '\n')
                logfile.close()
            #Dumping the gathered samples
            if runtype == 'trial':
                filename = dirname + '/' + probe[1] + '_trial%d.csv' % (j)
            else:
                filename = dirname + '/' + probe[1] + '.csv'
            with open(filename, 'w') as csvfile:
                spamwriter = csv.writer(csvfile,
                                        delimiter='\t',
                                        quotechar='|',
                                        quoting=csv.QUOTE_MINIMAL)
                for i in range(len(data)):
                    spamwriter.writerow(data[i])
Пример #15
0
from cc_markov import MarkovChain
from fetch_data import fetch_data

url = 'http://songmeanings.com/songs/view/3530822107859540342/'
text = fetch_data(url)
MarkovChain.add_string(text)
result = MarkovChain.generate_text()
print(result)
Пример #16
0
"""
script to fetch data from gateway and upload to influxdb
"""
import io
from fetch_data import fetch_data
from dump_to_influx import dump_to_influx

buffer = io.BytesIO()

for data in fetch_data():
    buffer.write(data)

buffer.seek(0)

dump_to_influx(io.TextIOWrapper(buffer))