Пример #1
0
 def save_canvas(self,
                 canvas,
                 sub_dir=None,
                 name=None,
                 print_names=True,
                 show=True):
     sub_dir = self.SaveDir if hasattr(
         self, 'SaveDir') and sub_dir is None else '{subdir}/'.format(
             subdir=sub_dir)
     canvas.Update()
     file_name = canvas.GetName() if name is None else name
     # file_path = '{save_dir}{res}/{{typ}}/{file}'.format(res=sub_dir, file=file_name, save_dir=self.ResultsDir)
     file_path = join(self.ResultsDir, sub_dir, '{typ}', file_name)
     ftypes = ['root', 'png', 'pdf', 'eps']
     out = 'Saving plots: {nam}'.format(nam=name)
     set_root_output(show)
     gROOT.ProcessLine("gErrorIgnoreLevel = kError;")
     for f in ftypes:
         ext = '.{typ}'.format(typ=f)
         ensure_dir(file_path.format(typ=f))
         out_file = '{fname}{ext}'.format(fname=file_path, ext=ext)
         out_file = out_file.format(typ=f)
         canvas.SaveAs(out_file)
     if print_names:
         log_message(out)
     set_root_output(True)
Пример #2
0
    def __init__(self, analysis):

        self.Dir = join(analysis.ProgramDir, 'pickles')
        self.RunNumber = analysis.RunNumber if hasattr(analysis,
                                                       'RunNumber') else None
        ensure_dir(self.Dir)

        self.TestCampaign = ''
        self.Path = None
Пример #3
0
    def configure(self):
        # check if directories exist and create them if not
        ensure_dir(self.LoggingDir)
        ensure_dir(self.LogFileDir)

        file_name = '{hv}_{dev}_{mod}_{t}.log'.format(hv=self.Name, dev=self.DeviceName, mod=self.ModelName, t=strftime('%Y_%m_%d_%H_%M_%S'))
        file_path = join(self.LogFileDir, file_name)
        log_info('Creating LOGFILE: {}'.format(file_path))
        self.Logger.removeHandler(self.FileHandler)
        self.FileHandler = FileHandler(file_path)
        self.FileHandler.setLevel(INFO)
        self.FileHandler.setFormatter(Formatter('%(asctime)s %(message)s', '%H:%M:%S'))
        self.Logger.addHandler(self.FileHandler)
Пример #4
0
 def set_path(self,
              sub_dir,
              name=None,
              run='',
              ch=None,
              suf=None,
              camp=None):
     ensure_dir(join(self.Dir, sub_dir))
     name = name if name is not None else ''
     campaign = self.TestCampaign if camp is None else camp
     run = str(
         self.RunNumber).zfill(3) if self.RunNumber is not None else run
     ch = str(ch) if ch is not None else ''
     suf = str(suf) if suf is not None else ''
     tot_name = '_'.join(
         [item for item in [name, campaign, run, ch, suf] if item])
     self.Path = join(self.Dir, sub_dir, '{n}.pickle'.format(n=tot_name))
Пример #5
0
 def prepare(time, f, target, url):
     ensure_dir(f)
     ensure_dir(target)
     p = pathlib.Path(target)
     if p.exists():
         os.remove(target)
     if (int(time.utc_datetime().strftime("%Y%m%d")) > 20150118) \
         and (int(time.utc_datetime().strftime("%Y%m%d")) <=
              int(now().utc_datetime().strftime("%Y%m%d"))):
         p = pathlib.Path(f)
         if not p.exists():
             with urllib.request.urlopen(url) as item:
                 item = item.read().decode('utf-8')
                 this = open(f, 'w')
                 this.write(item)
                 this.close()
         if int(time.utc_datetime().strftime("%H%M")) == 1800:
             os.symlink(f, target)
def build_main_db():
    
    ensure_dir(main_geolifedb)
    
    connectionOrig=apsw.Connection(main_geolifedb)
    curs1orig = connectionOrig.cursor()
    create_table_SQL = 'CREATE TABLE geolife(person INT,  traj INT,  latitude REAL,  longitude REAL,  datetime TEXT);'
    
    curs1orig.execute( create_table_SQL )
    
    
    mainDirectory = geolife_zipfile_data_dir
    persons = os.listdir(mainDirectory)
    
    
    for person in persons:
    
        print "person n " + person
        files = os.listdir(mainDirectory + '/' + person + '/Trajectory')
        
        sqlList = []
        for trajectoryFile in files:
            
            id_trajectory = trajectoryFile.split('.')[0]
            print "\t" + id_trajectory
            with open(mainDirectory + '/' + person + '/Trajectory/' + trajectoryFile, 'r') as f:
                ct = 0
                for line in f:
                    if ct >= 6 :
                        data = line.split(',')           
                        sqlList.append((person,id_trajectory,data[0],data[1],data[-2] + " " + data[-1].replace('\n','').replace('\r','')))
                        
                    ct += 1
                          
                
            f.close()
    
        curs1orig.execute('PRAGMA journal_mode = OFF; ') # turn of journalling for speed
        curs1orig.execute('BEGIN') # this will disable autocommit for speed, bundling it into a single commit
        curs1orig.executemany('INSERT INTO geolife VALUES(?,?,?,?,?)', sqlList)  
        curs1orig.execute('END')
    
    connectionOrig.close()
    print "Done"
Пример #7
0
def build_main_db():
    ensure_dir(main_geolifedb)
    
    connectionOrig=apsw.Connection(main_geolifedb)
    curs1orig = connectionOrig.cursor()
    create_table_SQL = 'CREATE TABLE geolife(person INT,  traj INT,  latitude REAL,  longitude REAL,  datetime TEXT);'
    
    curs1orig.execute( create_table_SQL )

    mainDirectory = geolife_zipfile_data_dir
    persons = os.listdir(mainDirectory)

    for person in persons:
        print("person n " + person)
        files = os.listdir(mainDirectory + '/' + person + '/Trajectory')
        
        sqlList = []
        for trajectoryFile in files:
            
            id_trajectory = trajectoryFile.split('.')[0]
            print("\t" + id_trajectory)
            with open(mainDirectory + '/' + person + '/Trajectory/' + trajectoryFile, 'r') as f:
                ct = 0
                for line in f:
                    if ct >= 6 :
                        data = line.split(',')           
                        sqlList.append((person,id_trajectory,data[0],data[1],data[-2] + " " + data[-1].replace('\n','').replace('\r','')))
                        
                    ct += 1

            f.close()

        curs1orig.execute('PRAGMA journal_mode = OFF; ') # turn of journalling for speed
        curs1orig.execute('BEGIN') # this will disable autocommit for speed, bundling it into a single commit
        curs1orig.executemany('INSERT INTO geolife VALUES(?,?,?,?,?)', sqlList)  
        curs1orig.execute('END')

    connectionOrig.close()
    print("Done")
Пример #8
0
    def run_convert_images():
        wave = Settings().wave
        ensure_dir(Config().tmp_dir)
        ensure_dir(Config().output_dir)
        image_earth = Config().image_earth(wave)
        image_night = Config().image_night(wave)
        t1, t2, t3 = gregorian.from_jd(wave.time.tt)
        month = date(t1, t2, t3).strftime('%m')

        if Settings().draw_earth:
            copyfile(Config().image_earth_month(month), image_earth)
            if Settings().draw_waves:
                this = "convert " +\
                    image_earth + " " +\
                    Config().image_waves_original(wave) +\
                    " -alpha on -compose Blend  -define compose:args=20 -composite " +\
                    image_earth + " "
                call(this, shell=True)
            if Settings().draw_temperature:
                this = "convert " +\
                    image_earth + " " +\
                    Config().image_temperature_original(wave) +\
                    " -alpha on -compose Blend  -define compose:args=30 -composite " +\
                    image_earth + " "
                call(this, shell=True)
            if Settings().draw_constellations:
                this = "convert " +\
                    image_earth + " " +\
                    Config().image_constellations +\
                    " -alpha on -compose Blend  -define compose:args=40 -composite " +\
                    image_earth + " "
                call(this, shell=True)
            if Settings().draw_flightpaths:
                this = "convert " +\
                    image_earth + " " +\
                    Config().image_flightpaths +\
                    " -alpha on -compose Blend  -define compose:args=50 -composite " +\
                    image_earth + " "
                call(this, shell=True)

            this = ("convert -brightness-contrast 20x20 " +
                    image_earth + " " + image_earth + "")
            call(this, shell=True)

        if Settings().draw_night:
            copyfile(Config().image_night_month(month), image_night)
            if Settings().draw_waves:
                this = "convert " +\
                    image_night + " " +\
                    Config().image_waves_original(wave) +\
                    " -alpha on -compose Blend  -define compose:args=20 -composite " +\
                    image_night + " "
                call(this, shell=True)
            if Settings().draw_temperature:
                this = "convert " +\
                    image_night + " " +\
                    Config().image_temperature_original(wave) +\
                    " -alpha on -compose Blend  -define compose:args=5 -composite " +\
                    image_night + " "
                call(this, shell=True)
            if Settings().draw_constellations:
                this = "convert " +\
                    image_night + " " +\
                    Config().image_constellations +\
                    " -alpha on -compose Blend  -define compose:args=50 -composite " +\
                    image_night + " "
                call(this, shell=True)
            if Settings().draw_flightpaths:
                this = "convert " +\
                    image_night + " " +\
                    Config().image_flightpaths +\
                    " -alpha on -compose Blend  -define compose:args=50 -composite " +\
                    image_night + " "
                call(this, shell=True)

            this = ("convert -brightness-contrast 15x15 " +
                    image_night + " " + image_night + "")
            call(this, shell=True)

        if not (Settings().draw_night and Settings().draw_earth):
            if Settings().draw_constellations:
                copyfile(Config().image_constellations, image_night)
                copyfile(Config().image_constellations, image_earth)

        if Settings().draw_clouds:
            this = (
                "convert -brightness-contrast 5x0 " +
                Config().image_cloud_original(wave) +
                " " +
                Config().image_cloud(wave) +
                "")
            call(this, shell=True)
Пример #9
0
def buildPreprocessingTable(spatialRes, temporalRes, nest=True, personsIds=(0, 1, 2, 3, 4, 5, 7, 9, 12, 13, 14, 15,
                                                                            16, 17, 22, 24, 153, 28, 30, 35, 36, 38,
                                                                            39, 40, 43, 44, 50, 179, 52, 55, 68, 71,
                                                                            82, 84, 85, 92, 96, 101, 104, 167, 119, 126)):

    data = []
    nside = ComputeNside(spatialRes)
   
    # connection to the DataBase :
    connectionOrig=apsw.Connection(main_geolifedb)
    
    # loading it in the memory :
    writingConn=apsw.Connection(":memory:")
#     with conn.backup("main", connectionOrig, "main") as backup:
#         backup.step() # copy whole database in one go

    curs1 = connectionOrig.cursor()
    curs2 = connectionOrig.cursor()
    curs3 = connectionOrig.cursor()
    writingCurs = writingConn.cursor()
    
    # Create table :
#     sql = "DROP TABLE IF EXISTS {}".format(tableName)
#     writingCurs.execute(sql)
    writingCurs.execute("CREATE TABLE preproc (person INT, traj INT, idxPix INT, datetime TEXT)")
    
    def getIdxPix(longitude, latitude):
        return hp.ang2pix(nside, (90 - latitude) * np.pi / 180, longitude * np.pi / 180, nest)
    
    fmt = '%Y-%m-%d %H:%M:%S'
    
    if isinstance(personsIds, str) and personsIds.lower() == 'all':
        sql = "SELECT distinct person FROM geolife  GROUP BY person "
#        WHERE person = 1
        personsIds = [ x[0] for x in curs3.execute(sql) ]
    #   
#    
    for pId in personsIds :
        
        print('Considering s: {} t:{} Person {}'.format(spatialRes,temporalRes,pId))
        
        person = pId
#        person =0
        sql = "SELECT distinct traj FROM geolife WHERE person = {} AND NOT datetime = '' GROUP BY traj".format(person)
    #    AND traj = 20090426211055
        trajectories = curs2.execute(sql)
        
        
        prev = 0
        t_ct = 0
        # for each trajectories
        for t in trajectories:
            # select the first element of the tuple given by sqlite:
            traj = t[0]
            # make a sql query for
            sql = "SELECT longitude ,latitude , datetime FROM geolife WHERE person = {} AND NOT datetime = '' AND traj = {} ORDER BY datetime".format(person,traj)
            rows = curs1.execute(sql)
            
            
            
            ct = 0
            points = []
            for row in rows:
                actualTime = dt.strptime(row[2],fmt)
                
                # If this is the first trajectory, set the time origin :
                if ct==0:
                    # Next time is the next time a location have to be taken
                    if t_ct == 0 :
                        # If it's the first trajectory of the person, just set the time origin at the first point
                        nextTime = actualTime
                        points.append((person,traj,int(getIdxPix(row[0],row[1])),nextTime.strftime(fmt)))
                        nextTime += temporalRes
                        
                    else :
                        # Determine the number of symbols missing.
                        nb_loc_missing = int((actualTime- nextTime).total_seconds()//temporalRes.total_seconds()+1)
                        # Determine the next date where a location have to be taken:
                        nextTime = nb_loc_missing * temporalRes + nextTime
    #                    #If the ending and beginning locations of a gap in the GPS records are the same, 
    #                        #the user is taken as dwelling at the same location during that time.
    #                    if (int(getIdxPix(row[0],row[1])) == data[-1][-1][1]):
    #                        print("Fill blank-----------------------------------------------------", nb_loc_missing)
                # Else if the time is over the next time, this mean that we have to record the point
                elif actualTime > nextTime :
                    # Check witch point is the closer :
                    # This one:
                    if abs(nextTime - actualTime) < abs(nextTime - dt.strptime(prev[2],fmt)):
                        # record the point :
                        points.append((person,traj,int(getIdxPix(row[0],row[1])),nextTime.strftime(fmt)))
                    # Or the previous one ?
                    else :
                        # record the point :
                        points.append((person,traj,int(getIdxPix(prev[0],prev[1])),nextTime.strftime(fmt)))
                    nextTime += temporalRes

                ct += 1
                prev = row
            t_ct += 1
            if len(points) > 0 :
                data.append(points)            
                if len(points) > 1 :
                    writingCurs.executemany("INSERT INTO preproc (person,traj,idxPix,datetime) VALUES (?,?,?,?)",points)
                
            print("S: {} T: {} Person {}: {} trajectories processed".format( spatialRes,temporalRes, person, t_ct ))

    # writing the informations about the sample :
    writingCurs.execute("CREATE TABLE infoSample (nside INT)")
    sql = "INSERT INTO infoSample VALUES ({})".format(nside)
    writingCurs.execute(sql)
    
    # Creating index :
    print("Creating index...")
    sql = """CREATE INDEX idx_person_preproc ON preproc(person);
         CREATE INDEX idx_person_traj_preproc ON preproc(person,traj);
         CREATE INDEX idx_traj_preproc ON preproc(traj);
         CREATE INDEX idx_traj_datetime_preproc ON preproc(traj,datetime);"""
    writingCurs.execute(sql)

    print("Cleaning up...")
    writingCurs.execute("vacuum")
    writingCurs.close()
   
    print("Writing out the database file...")

    # Create the database file
    filename = "S{}T{}.sqlite".format(spatialRes, temporalRes)
    path = preprocessing_dir + "/" + filename
    ensure_dir(path)
    f = open(path, 'w')
    f.close()   
    # Now write out the database back to a file in one go

    connection=apsw.Connection(path)
    with connection.backup("main", writingConn, "main") as backup:
        backup.step() # copy whole database in one go

    print("Done")
Пример #10
0
from datetime import datetime as dt
from Utils import ensure_dir
import os
from multiprocessing import Pool, cpu_count
from datetime import timedelta
fmt = '%Y-%m-%d %H:%M:%S'

# ========
# PATHS
# ========

main_geolifedb = '../DataGeolife/geolife.sqlite'    # will be created if it doesn't exist
preprocessing_dir = '../DataGeolife/Preproc'    # folder for temporary databases for quantisation
geolife_zipfile_data_dir = '~/Downloads/Geolife Trajectories 1.3/Data'  # original data, builds databases if don't exist

ensure_dir(main_geolifedb)
ensure_dir(preprocessing_dir)


if '~' in geolife_zipfile_data_dir:
    geolife_zipfile_data_dir = os.path.expanduser(geolife_zipfile_data_dir)

# ========


def build_specific_cache( spatialRes_temporalRes_pair ):
    spatialRes = spatialRes_temporalRes_pair[0]
    temporalRes = spatialRes_temporalRes_pair[1]
    
    print('Processing spatial res {}, temporal res {}'.format( spatialRes, temporalRes ))
    
def run( group = "All",scale = None, output_dir = './ResultsLoP_replication/final_graphs', bulk_build_preprocessing = False):
    """
    Generates a single heatmap for a given list of Geolife ids, for a given method of computing the upper bound on
    the upper limit of predictability.
    
    :param group: ["id_str",[list of ids in the geolife dataset]]
    :type group: Nested list
    :param scale: [min_z, max_z, step]  Set the scale of the heatmap z
    :type scale: Float array
    """
    t = time.time()
    
    #Group setting
    if(group == "All"):
        suffix = "All"
        persons = "All"
    else:
        suffix = "Grp{}".format(group[0])
        persons = group[1]
    
    
    if not output_dir[-1] == '/':
        output_dir = output_dir + '/'
        
    file_name = "{}Heatmap_{}".format(output_dir,suffix)
    
    ensure_dir(file_name)
    
    print "Calculing the LoP for {}".format(suffix)
    
    if bulk_build_preprocessing:
        # will attempt to bulk build the cache using multiple CPU cores
        # will skip caches if already built.
        # if this option is not specified and a cache does not exist
        # it will be built when required, using a single CPU core.
        GeolifeSymbolisation.bulk_build_resolution_cache(listSpatialRes, listTemporalRes)
    
    mlab.openPool()
    failed_ids = set()
    LoP_RL = []
    LoP_DL = []
    LoP_failed_ct = []
    passed_norm_test = []
    for spatialRes in listSpatialRes:
        LoP_RL.append([])
        LoP_DL.append([])
        LoP_failed_ct.append([])
        passed_norm_test.append([])
        for temporalRes in listTemporalRes:
            
            #Compute data

            #---------------------------------------------
            #Load data from an existing preproc database, this will have been created
            # earlier if it did not exist.    
            data, person_ids = get_geolife_data(spatialRes, temporalRes,persons)
            #---------------------------------------------
            
            # Sanity check on loading
            for person in data:
                if len(person) == 0:
                    raise Exception("One or more person's trajectory was not loaded/created correctly.")
            # End sanity check
            
            S_RL, N_RL = empiricalEntropyRate(data,'RL')
            S_DL, N_DL = empiricalEntropyRate(data,'DL')
                    
            #Save the average:

            tmpG_RL = list(mlab.ParLoP(S_RL, N_RL)[0])
            tmpG_DL = list(mlab.ParLoP(S_DL, N_DL)[0])
            
            #-88 real fail in solve
            #-99 known fail in solve when S > log2(N)
            # See the Matlab script (ParLoP.m) for more details
            
            if (np.asarray(tmpG_RL)==-88).any():
                raise Exception("ERROR: (RL) Matlab failed the solve, but the entropy was in the correct range. Therefore an unknown error has occured.")
            
            if (np.asarray(tmpG_DL)==-88).any():
                raise Exception("ERROR: (DL) Matlab failed the solve, but the entropy was in the correct range. Therefore an unknown error has occured.")
            
            
            # Replace known solve fails. These are the cases when an entropy is found that is to high. 
            # This means the LZ entropy rate estimate is wrong (the estimator has failed to converge)
            # There is no way to correct this, without collecting more data from the individual.
            # While excluding the individual is not ideal it is better than including a value that is 
            # *known* to be erroneous. Therefore we discard the individual. 
            tmpG_RL = np.asarray(tmpG_RL)
            tmpG_DL = np.asarray(tmpG_DL)
            
            tmpG_RL_known_fail_mask = tmpG_RL < -1
            tmpG_DL_known_fail_mask = tmpG_DL < -1
            

            # To be comparable we must arrive at a consistent set of individuals from which to compare both 
            # methods. 
            tmpG_known_fail_mask = np.asarray(tmpG_RL_known_fail_mask) | np.asarray(tmpG_DL_known_fail_mask)
            
            #print tmpG_known_fail_mask
            
            failed_ct = len(tmpG_RL[tmpG_known_fail_mask])
            
            for p in np.asarray(person_ids)[tmpG_known_fail_mask]:
                failed_ids.add(p)
            

            # Filter out known solve fails.
            tmpG_RL = list(np.asarray(tmpG_RL)[~tmpG_known_fail_mask])
            tmpG_DL = list(np.asarray(tmpG_DL)[~tmpG_known_fail_mask])
            
            if not len(tmpG_RL) == len(tmpG_DL):
                raise Exception("SHOULD NOT OCCUR 5g4dfg65")
            
            if (np.asarray(tmpG_RL) < 0).any():
                raise Exception("ERROR. lsdkfal")
            
                
            LoP_RL[-1].append(np.average(tmpG_RL))
            LoP_DL[-1].append(np.average(tmpG_DL))
            LoP_failed_ct[-1].append( failed_ct )
                

            
    mlab.closePool()
    

    
    save_results( file_name, LoP_RL, 'RL')
    save_results( file_name, LoP_DL, 'DL')
    
    f2 = file(file_name + "_failed_ct.csv", 'w')
    print 'failed_ids = {}.'.format( failed_ids )
    
    np.savetxt(f2, LoP_failed_ct,fmt ="%.5f")
    f2.close()
    
    print "Done in {} seconds".format(time.time() - t)
Пример #12
0
def run(group="All",
        scale=None,
        output_dir='./ResultsLoP_replication/final_graphs',
        bulk_build_preprocessing=False):
    """
    Generates a single heatmap for a given list of Geolife ids, for a given method of computing the upper bound on
    the upper limit of predictability.
    
    :param group: ["id_str",[list of ids in the geolife dataset]]
    :type group: Nested list
    :param scale: [min_z, max_z, step]  Set the scale of the heatmap z
    :type scale: Float array
    """
    t = time.time()

    #Group setting
    if (group == "All"):
        suffix = "All"
        persons = "All"
    else:
        suffix = "Grp{}".format(group[0])
        persons = group[1]

    if not output_dir[-1] == '/':
        output_dir = output_dir + '/'

    file_name = "{}Heatmap_{}".format(output_dir, suffix)

    ensure_dir(file_name)

    print "Calculing the LoP for {}".format(suffix)

    if bulk_build_preprocessing:
        # will attempt to bulk build the cache using multiple CPU cores
        # will skip caches if already built.
        # if this option is not specified and a cache does not exist
        # it will be built when required, using a single CPU core.
        GeolifeSymbolisation.bulk_build_resolution_cache(
            listSpatialRes, listTemporalRes)

    mlab.openPool()
    failed_ids = set()
    LoP_RL = []
    LoP_DL = []
    LoP_failed_ct = []
    passed_norm_test = []
    for spatialRes in listSpatialRes:
        LoP_RL.append([])
        LoP_DL.append([])
        LoP_failed_ct.append([])
        passed_norm_test.append([])
        for temporalRes in listTemporalRes:

            #Compute data

            #---------------------------------------------
            #Load data from an existing preproc database, this will have been created
            # earlier if it did not exist.
            data, person_ids = get_geolife_data(spatialRes, temporalRes,
                                                persons)
            #---------------------------------------------

            # Sanity check on loading
            for person in data:
                if len(person) == 0:
                    raise Exception(
                        "One or more person's trajectory was not loaded/created correctly."
                    )
            # End sanity check

            S_RL, N_RL = empiricalEntropyRate(data, 'RL')
            S_DL, N_DL = empiricalEntropyRate(data, 'DL')

            #Save the average:

            tmpG_RL = list(mlab.ParLoP(S_RL, N_RL)[0])
            tmpG_DL = list(mlab.ParLoP(S_DL, N_DL)[0])

            #-88 real fail in solve
            #-99 known fail in solve when S > log2(N)
            # See the Matlab script (ParLoP.m) for more details

            if (np.asarray(tmpG_RL) == -88).any():
                raise Exception(
                    "ERROR: (RL) Matlab failed the solve, but the entropy was in the correct range. Therefore an unknown error has occured."
                )

            if (np.asarray(tmpG_DL) == -88).any():
                raise Exception(
                    "ERROR: (DL) Matlab failed the solve, but the entropy was in the correct range. Therefore an unknown error has occured."
                )

            # Replace known solve fails. These are the cases when an entropy is found that is to high.
            # This means the LZ entropy rate estimate is wrong (the estimator has failed to converge)
            # There is no way to correct this, without collecting more data from the individual.
            # While excluding the individual is not ideal it is better than including a value that is
            # *known* to be erroneous. Therefore we discard the individual.
            tmpG_RL = np.asarray(tmpG_RL)
            tmpG_DL = np.asarray(tmpG_DL)

            tmpG_RL_known_fail_mask = tmpG_RL < -1
            tmpG_DL_known_fail_mask = tmpG_DL < -1

            # To be comparable we must arrive at a consistent set of individuals from which to compare both
            # methods.
            tmpG_known_fail_mask = np.asarray(
                tmpG_RL_known_fail_mask) | np.asarray(tmpG_DL_known_fail_mask)

            #print tmpG_known_fail_mask

            failed_ct = len(tmpG_RL[tmpG_known_fail_mask])

            for p in np.asarray(person_ids)[tmpG_known_fail_mask]:
                failed_ids.add(p)

            # Filter out known solve fails.
            tmpG_RL = list(np.asarray(tmpG_RL)[~tmpG_known_fail_mask])
            tmpG_DL = list(np.asarray(tmpG_DL)[~tmpG_known_fail_mask])

            if not len(tmpG_RL) == len(tmpG_DL):
                raise Exception("SHOULD NOT OCCUR 5g4dfg65")

            if (np.asarray(tmpG_RL) < 0).any():
                raise Exception("ERROR. lsdkfal")

            LoP_RL[-1].append(np.average(tmpG_RL))
            LoP_DL[-1].append(np.average(tmpG_DL))
            LoP_failed_ct[-1].append(failed_ct)

    mlab.closePool()

    save_results(file_name, LoP_RL, 'RL')
    save_results(file_name, LoP_DL, 'DL')

    f2 = file(file_name + "_failed_ct.csv", 'w')
    print 'failed_ids = {}.'.format(failed_ids)

    np.savetxt(f2, LoP_failed_ct, fmt="%.5f")
    f2.close()

    print "Done in {} seconds".format(time.time() - t)
def buildPreprocessingTable(spatialRes,temporalRes,nest = True, personsIds = [0, 1, 2, 3, 4, 5, 7, 9, 12, 13, 14, 15, 16, 17, 22, 24, 153, 28, 30, 35, 36, 38, 39, 40, 43, 44, 50, 179, 52, 55, 68, 71, 82, 84, 85, 92, 96, 101, 104, 167, 119, 126]):

    data = []
    nside = ComputeNside(spatialRes)
   
    #connection to the DataBase :
    connectionOrig=apsw.Connection(main_geolifedb)
    
    #loading it in the memory :
    writingConn=apsw.Connection(":memory:")
#     with conn.backup("main", connectionOrig, "main") as backup:
#         backup.step() # copy whole database in one go
    
    
    curs1 = connectionOrig.cursor()
    curs2 = connectionOrig.cursor()
    curs3 = connectionOrig.cursor()
    writingCurs = writingConn.cursor()
    
    #Create table :
#     sql = "DROP TABLE IF EXISTS {}".format(tableName)
#     writingCurs.execute(sql)
    writingCurs.execute("CREATE TABLE preproc (person INT, traj INT, idxPix INT, datetime TEXT)")
    
    def getIdxPix(longitude, latitude):
        return hp.ang2pix(nside, (90 - latitude) * np.pi / 180, longitude * np.pi / 180, nest)
    
    fmt = '%Y-%m-%d %H:%M:%S'
    
    if isinstance(personsIds, str) and personsIds.lower() == 'all':
        sql = "SELECT distinct person FROM geolife  GROUP BY person "
#        WHERE person = 1
        personsIds = [ x[0] for x in curs3.execute(sql) ]
    #   
#    
    for pId in personsIds :
        
        print 'Considering s: {} t:{} Person {}'.format(spatialRes,temporalRes,pId)
        
        person = pId
#        person =0
        sql = "SELECT distinct traj FROM geolife WHERE person = {} AND NOT datetime = '' GROUP BY traj".format(person)
    #    AND traj = 20090426211055
        trajectories = curs2.execute(sql)
        
        
        prev = 0
        t_ct = 0
        #for each trajectories
        for t in trajectories:
            #select the first element of the tuple given by sqlite:
            traj = t[0]
            #make a sql query for 
            sql = "SELECT longitude ,latitude , datetime FROM geolife WHERE person = {} AND NOT datetime = '' AND traj = {} ORDER BY datetime".format(person,traj)
            rows = curs1.execute(sql)
            
            
            
            ct = 0
            points = []
            for row in rows:
                actualTime = dt.strptime(row[2],fmt)
                
                #If this is the first trajectory, set the time origin :
                if ct==0:
                    #Next time is the next time a location have to be taken
                    if t_ct == 0 :
                        #If it's the first trajectory of the person, just set the time origin at the first point
                        nextTime = actualTime
                        points.append((person,traj,int(getIdxPix(row[0],row[1])),nextTime.strftime(fmt)))
                        nextTime += temporalRes
                        
                    else :
                        #Determine the number of symbols missing. 
                        nb_loc_missing = int((actualTime- nextTime).total_seconds()//temporalRes.total_seconds()+1)
                        #Determine the next date where a location have to be taken:
                        nextTime = nb_loc_missing * temporalRes + nextTime
    #                    #If the ending and beginning locations of a gap in the GPS records are the same, 
    #                        #the user is taken as dwelling at the same location during that time.
    #                    if (int(getIdxPix(row[0],row[1])) == data[-1][-1][1]):
    #                        print "Fill blank-----------------------------------------------------", nb_loc_missing
                #Else if the time is over the next time, this mean that we have to record the point  
                elif actualTime > nextTime :
                    #Check witch point is the closer :
                    #This one: 
                    if abs(nextTime - actualTime) < abs(nextTime - dt.strptime(prev[2],fmt)):
                        #record the point :
                        points.append((person,traj,int(getIdxPix(row[0],row[1])),nextTime.strftime(fmt)))
                    #Or the previous one ?
                    else :
                        #record the point :
                        points.append((person,traj,int(getIdxPix(prev[0],prev[1])),nextTime.strftime(fmt)))
                    nextTime += temporalRes
                
                    
                ct += 1
                prev = row
            t_ct += 1
            if len(points) > 0 :
                data.append(points)            
                if len(points) > 1 :
                    writingCurs.executemany("INSERT INTO preproc (person,traj,idxPix,datetime) VALUES (?,?,?,?)",points)
                
            print "S: {} T: {} Person {}: {} trajectories processed".format( spatialRes,temporalRes, person, t_ct )

    #writing the informations about the sample :
    writingCurs.execute("CREATE TABLE infoSample (nside INT)")
    sql = "INSERT INTO infoSample VALUES ({})".format(nside)
    writingCurs.execute(sql)
    
    #Creating index :
    print "Creating index..."
    sql = """CREATE INDEX idx_person_preproc ON preproc(person);
         CREATE INDEX idx_person_traj_preproc ON preproc(person,traj);
         CREATE INDEX idx_traj_preproc ON preproc(traj);
         CREATE INDEX idx_traj_datetime_preproc ON preproc(traj,datetime);"""
    writingCurs.execute(sql)
   
   
    print "Cleaning up..."
    writingCurs.execute("vacuum")
    writingCurs.close()
   
    print "Writing out the database file..."
    
    
    #Create the database file 
    filename = "S{}T{}.sqlite".format(spatialRes, temporalRes)
    path = preprocessing_dir + "/" + filename
    ensure_dir(path)
    f = open(path, 'w')
    f.close()   
    # Now write out the database back to a file in one go
    

    connection=apsw.Connection(path)
    with connection.backup("main", writingConn, "main") as backup:
        backup.step() # copy whole database in one go

    print "Done"
import os
from multiprocessing import Pool, cpu_count
from datetime import timedelta
fmt = '%Y-%m-%d %H:%M:%S'

#========
# PATHS
#========

main_geolifedb = '../DataGeolife/geolife.sqlite' # will be created if it doesn't exist
preprocessing_dir = '../DataGeolife/Preproc' # folder for semi-temporary databases for specific quantisations, will be created if they don't exist
geolife_zipfile_data_dir = '~/Downloads/Geolife Trajectories 1.3/Data' # folder for the original data. Used to build the above databases if they don't exist.



ensure_dir(main_geolifedb)
ensure_dir(preprocessing_dir)


if '~' in geolife_zipfile_data_dir:
    geolife_zipfile_data_dir = os.path.expanduser(geolife_zipfile_data_dir)

#========

def build_specific_cache( spatialRes_temporalRes_pair ):
    spatialRes = spatialRes_temporalRes_pair[0]
    temporalRes = spatialRes_temporalRes_pair[1]
    
    print 'Processing spatial res {}, temporal res {}'.format( spatialRes, temporalRes )
    
    if not os.path.exists( "{}/S{}T{}.sqlite".format(preprocessing_dir,spatialRes, temporalRes) ):