Пример #1
0
def derive_features_par(source_ids,
                        noise_dict,
                        cursor,
                        connection,
                        cadence_dict={},
                        number_processors=1,
                        delete_existing=True):
    features_columns = create_database.get_pragma(cursor)

    # obtain information about source_ids you are deriving features for
    # should the following two lines to avoid injection attacks
    # but sqlite put max of 1000 on this
    #sql_cmd = """SELECT source_id, original_source_id, noisification, noise_args FROM sources WHERE source_id IN (""" + (len(source_ids) * "?,")[:-1] + ")"
    #cursor.execute(sql_cmd,source_ids)
    # so instead we use "dangerous" form
    sql_cmd = """SELECT source_id, original_source_id, noisification, noise_args FROM sources WHERE source_id IN """ + repr(
        tuple(source_ids))
    cursor.execute(sql_cmd)
    source_info = cursor.fetchall()

    # set up multiprocessing
    sourcenumber = Value('i', 0)
    l = Lock()
    l1 = []
    for i in np.arange(number_processors):
        l1.append(Process(target=derive_features, args=(source_info, \
    cursor,connection,sourcenumber,l, \
    delete_existing,features_columns, \
           noise_dict,cadence_dict)))
        l1[i].start()
    for i in np.arange(number_processors):
        l1[i].join()
    print "done extracting LS features"
Пример #2
0
def tfeOutput(source_ids, cursor, filename, table_name="measurements"):
    '''This is documentation'''
    # convert source_ids to integers
    j = 0
    for i in source_ids:
        source_ids[j] = repr(i)
        j += 1

    ## now using function from create_database to get
    ## pragma of measurements table
    columns_to_get = create_database.get_pragma(cursor, table=table_name)

    # get desired rows in features and sources table
    rows_to_get = '(' + ','.join(source_ids) + ')'
    sql_cmd = """SELECT * FROM """ + table_name + """ WHERE source_id IN """ + rows_to_get
    cursor.execute(sql_cmd)
    db_info = cursor.fetchall()

    # now write to file
    column_names = '; '.join(columns_to_get)
    g = open(filename, 'w')
    g.write(column_names + '\n')
    for i in db_info:
        output1 = ''
        for j in i:
            output1 += str(j) + '; '
        output1 = output1[:-2]
        g.write(output1 + '\n')
    g.close()
Пример #3
0
def tfeOutput(source_ids,cursor,filename,table_name="measurements"):
    '''This is documentation'''
    # convert source_ids to integers
    j = 0
    for i in source_ids:
        source_ids[j] = repr(i)
        j += 1

    ## now using function from create_database to get 
    ## pragma of measurements table
    columns_to_get = create_database.get_pragma(cursor,table=table_name)

    # get desired rows in features and sources table
    rows_to_get = '(' + ','.join(source_ids) + ')'
    sql_cmd = """SELECT * FROM """ + table_name + """ WHERE source_id IN """ + rows_to_get
    cursor.execute(sql_cmd)
    db_info = cursor.fetchall()

    # now write to file
    column_names = '; '.join(columns_to_get)
    g = open(filename,'w')
    g.write(column_names + '\n')
    for i in db_info:
        output1 = ''
        for j in i:
            output1 += str(j) + '; '
        output1 = output1[:-2]
        g.write(output1 + '\n')
    g.close()
Пример #4
0
def derive_features_par(source_ids,noise_dict,cursor,connection,cadence_dict={},number_processors=1,delete_existing=True):
    features_columns = create_database.get_pragma(cursor)

    # obtain information about source_ids you are deriving features for
    # should the following two lines to avoid injection attacks 
    # but sqlite put max of 1000 on this
    #sql_cmd = """SELECT source_id, original_source_id, noisification, noise_args FROM sources WHERE source_id IN (""" + (len(source_ids) * "?,")[:-1] + ")"    
    #cursor.execute(sql_cmd,source_ids)
    # so instead we use "dangerous" form
    sql_cmd = """SELECT source_id, original_source_id, noisification, noise_args FROM sources WHERE source_id IN """ + repr(tuple(source_ids))     
    cursor.execute(sql_cmd)
    source_info = cursor.fetchall()

    # set up multiprocessing
    sourcenumber = Value('i',0)
    l = Lock()
    l1 = []
    for i in np.arange(number_processors):
        l1.append(Process(target=derive_features, args=(source_info, \
				cursor,connection,sourcenumber,l, \
				delete_existing,features_columns, \
			        noise_dict,cadence_dict)))
        l1[i].start()
    for i in np.arange(number_processors):
        l1[i].join()
    print "done extracting LS features"
Пример #5
0
def outputIntervals(source_ids,cursor,filename,features_to_remove=[]):
    ''' outputs feature intervals to a data file '''
    # convert source_ids to integers
    j = 0
    for i in source_ids:
        source_ids[j] = repr(i)
        j += 1
    ## get names of features
    columns_to_get = create_database.get_pragma(cursor,table='features')
    ## TODO: put these in try / except
    for i in features_to_remove:
        columns_to_get.remove(i)
    columns_to_get.remove('source_id')
    columns_to_get = map(lambda feature_name:'features.'+feature_name,
                         columns_to_get)
    columns_to_get_min = map(lambda feature_name:'min('+feature_name+')',
                         columns_to_get)
    columns_to_get_max = map(lambda feature_name:'max('+feature_name+')',
                         columns_to_get)
    columns_to_get = columns_to_get_min + columns_to_get_max
    columns_to_get.append('sources.classification')
    columns_to_get.append('sources.original_source_id')

    # get desired rows in features and sources table
    columns_to_get_comma = ', '.join(columns_to_get)
    rows_to_get = '(' + ','.join(source_ids) + ')'
    sql_cmd = """SELECT """ + columns_to_get_comma + """ FROM sources, features WHERE sources.source_id = features.source_id AND features.source_id IN """ + rows_to_get + """ GROUP BY sources.original_source_id"""
    cursor.execute(sql_cmd)
    db_info = cursor.fetchall()

    ## rename columns
    for i in range(len(columns_to_get)):
        if columns_to_get[i][0:4] == "max(":
            columns_to_get[i] = columns_to_get[i][5:-1] + "U"
        if columns_to_get[i][0:4] == "min(":
            columns_to_get[i] = columns_to_get[i][5:-1] + "L"
    columns_to_get = map(lambda i:i.split('.')[1],columns_to_get)
    columns_to_get[-1] = "source_id"
    
    ## write to file
    column_names = ';'.join(columns_to_get)
    g = open(filename,'w')
    g.write(column_names + '\n')
    for i in db_info:
        output1 = ''
        for j in i:
            output1 += str(j) + ';'
        output1 = output1[:-1]
        g.write(output1 + '\n')
    g.close()
Пример #6
0
def outputRfile(source_ids,cursor,filename):
    # convert source_ids to integers
    j = 0
    for i in source_ids:
        source_ids[j] = repr(i)
        j += 1
    
    # get column names from features table
    columns_to_get = create_database.get_pragma(cursor,table='features')
    columns_to_get = map(lambda feature_name:'features.'+feature_name,
                         columns_to_get)
    columns_to_get.append('sources.xml_filename')
    columns_to_get.append('sources.original_source_id')
    columns_to_get.append('sources.noisification')
    columns_to_get.append('sources.noise_args')
    columns_to_get.append('sources.true_period')
    columns_to_get.append('sources.classification')
    columns_to_get.append('sources.survey')
    columns_to_get.append('sources.c1')
    columns_to_get.append('sources.c2')
    columns_to_get.append('sources.e1')
    columns_to_get.append('sources.e2')    

    # get desired rows in features and sources table
    columns_to_get_comma = ', '.join(columns_to_get)
    rows_to_get = '(' + ','.join(source_ids) + ')'
    sql_cmd = """SELECT """ + columns_to_get_comma + """ FROM sources, features WHERE sources.source_id = features.source_id AND features.source_id IN """ + rows_to_get
    cursor.execute(sql_cmd)
    db_info = cursor.fetchall()

    sql_cmd = """SELECT source_id FROM features"""
    cursor.execute(sql_cmd)
    db_info2 = cursor.fetchall()

    sql_cmd = """SELECT source_id FROM sources"""
    cursor.execute(sql_cmd)
    db_info3 = cursor.fetchall()

    # now write to file
    column_names = ';'.join(columns_to_get)
    g = open(filename,'w')
    g.write(column_names + '\n')
    for i in db_info:
        output1 = ''

        for j in i:
            output1 += str(j) + ';'
        output1 = output1[:-1]
        g.write(output1 + '\n')
    g.close()
Пример #7
0
def outputIntervals(source_ids, cursor, filename, features_to_remove=[]):
    ''' outputs feature intervals to a data file '''
    # convert source_ids to integers
    j = 0
    for i in source_ids:
        source_ids[j] = repr(i)
        j += 1
    ## get names of features
    columns_to_get = create_database.get_pragma(cursor, table='features')
    ## TODO: put these in try / except
    for i in features_to_remove:
        columns_to_get.remove(i)
    columns_to_get.remove('source_id')
    columns_to_get = map(lambda feature_name: 'features.' + feature_name,
                         columns_to_get)
    columns_to_get_min = map(lambda feature_name: 'min(' + feature_name + ')',
                             columns_to_get)
    columns_to_get_max = map(lambda feature_name: 'max(' + feature_name + ')',
                             columns_to_get)
    columns_to_get = columns_to_get_min + columns_to_get_max
    columns_to_get.append('sources.classification')
    columns_to_get.append('sources.original_source_id')

    # get desired rows in features and sources table
    columns_to_get_comma = ', '.join(columns_to_get)
    rows_to_get = '(' + ','.join(source_ids) + ')'
    sql_cmd = """SELECT """ + columns_to_get_comma + """ FROM sources, features WHERE sources.source_id = features.source_id AND features.source_id IN """ + rows_to_get + """ GROUP BY sources.original_source_id"""
    cursor.execute(sql_cmd)
    db_info = cursor.fetchall()

    ## rename columns
    for i in range(len(columns_to_get)):
        if columns_to_get[i][0:4] == "max(":
            columns_to_get[i] = columns_to_get[i][5:-1] + "U"
        if columns_to_get[i][0:4] == "min(":
            columns_to_get[i] = columns_to_get[i][5:-1] + "L"
    columns_to_get = map(lambda i: i.split('.')[1], columns_to_get)
    columns_to_get[-1] = "source_id"

    ## write to file
    column_names = ';'.join(columns_to_get)
    g = open(filename, 'w')
    g.write(column_names + '\n')
    for i in db_info:
        output1 = ''
        for j in i:
            output1 += str(j) + ';'
        output1 = output1[:-1]
        g.write(output1 + '\n')
    g.close()
Пример #8
0
def outputRfile(source_ids, cursor, filename):
    # convert source_ids to integers
    j = 0
    for i in source_ids:
        source_ids[j] = repr(i)
        j += 1

    # get column names from features table
    columns_to_get = create_database.get_pragma(cursor, table='features')
    columns_to_get = map(lambda feature_name: 'features.' + feature_name,
                         columns_to_get)
    columns_to_get.append('sources.xml_filename')
    columns_to_get.append('sources.original_source_id')
    columns_to_get.append('sources.noisification')
    columns_to_get.append('sources.noise_args')
    columns_to_get.append('sources.true_period')
    columns_to_get.append('sources.classification')
    columns_to_get.append('sources.survey')
    columns_to_get.append('sources.c1')
    columns_to_get.append('sources.c2')
    columns_to_get.append('sources.e1')
    columns_to_get.append('sources.e2')

    # get desired rows in features and sources table
    columns_to_get_comma = ', '.join(columns_to_get)
    rows_to_get = '(' + ','.join(source_ids) + ')'
    sql_cmd = """SELECT """ + columns_to_get_comma + """ FROM sources, features WHERE sources.source_id = features.source_id AND features.source_id IN """ + rows_to_get
    cursor.execute(sql_cmd)
    db_info = cursor.fetchall()

    sql_cmd = """SELECT source_id FROM features"""
    cursor.execute(sql_cmd)
    db_info2 = cursor.fetchall()

    sql_cmd = """SELECT source_id FROM sources"""
    cursor.execute(sql_cmd)
    db_info3 = cursor.fetchall()

    # now write to file
    column_names = ';'.join(columns_to_get)
    g = open(filename, 'w')
    g.write(column_names + '\n')
    for i in db_info:
        output1 = ''

        for j in i:
            output1 += str(j) + ';'
        output1 = output1[:-1]
        g.write(output1 + '\n')
    g.close()
Пример #9
0
def outputOriginalOnly(source_ids,cursor,filename,features_to_remove=[]):
    # convert source_ids to integers
    j = 0
    for i in source_ids:
        source_ids[j] = repr(i)
        j += 1
    ## get names of features
    columns_to_get = create_database.get_pragma(cursor,table='features')
    ## TODO: put these in try / except
    for i in features_to_remove:
        columns_to_get.remove(i)
    columns_to_get = map(lambda feature_name:'features.'+feature_name,
                         columns_to_get)
    columns_to_get.append('sources.classification')

    # get desired rows in features and sources table
    columns_to_get_comma = ', '.join(columns_to_get)
    rows_to_get = '(' + ','.join(source_ids) + ')'
    sql_cmd = """SELECT """ + columns_to_get_comma + """ FROM sources, features WHERE sources.source_id = features.source_id AND features.source_id IN """ + rows_to_get
    cursor.execute(sql_cmd)
    db_info = cursor.fetchall()


    ## rename columns
    columns_to_get = map(lambda i:i.split('.')[1],columns_to_get)

    ## write to file
    column_names = ';'.join(columns_to_get)
    g = open(filename,'w')
    g.write(column_names + '\n')
    for i in db_info:
        output1 = ''
        for j in i:
            output1 += str(j) + ';'
        output1 = output1[:-1]
        g.write(output1 + '\n')
    g.close()
Пример #10
0
def outputOriginalOnly(source_ids, cursor, filename, features_to_remove=[]):
    # convert source_ids to integers
    j = 0
    for i in source_ids:
        source_ids[j] = repr(i)
        j += 1
    ## get names of features
    columns_to_get = create_database.get_pragma(cursor, table='features')
    ## TODO: put these in try / except
    for i in features_to_remove:
        columns_to_get.remove(i)
    columns_to_get = map(lambda feature_name: 'features.' + feature_name,
                         columns_to_get)
    columns_to_get.append('sources.classification')

    # get desired rows in features and sources table
    columns_to_get_comma = ', '.join(columns_to_get)
    rows_to_get = '(' + ','.join(source_ids) + ')'
    sql_cmd = """SELECT """ + columns_to_get_comma + """ FROM sources, features WHERE sources.source_id = features.source_id AND features.source_id IN """ + rows_to_get
    cursor.execute(sql_cmd)
    db_info = cursor.fetchall()

    ## rename columns
    columns_to_get = map(lambda i: i.split('.')[1], columns_to_get)

    ## write to file
    column_names = ';'.join(columns_to_get)
    g = open(filename, 'w')
    g.write(column_names + '\n')
    for i in db_info:
        output1 = ''
        for j in i:
            output1 += str(j) + ';'
        output1 = output1[:-1]
        g.write(output1 + '\n')
    g.close()
Пример #11
0
sql_cmd = """SELECT source_id,survey,number_points,classification,true_period FROM sources"""
cursor.execute(sql_cmd)
db_info = cursor.fetchall()
for i in db_info:
    print i

len(db_info)



## create noisified versions
reload(create_database)
sql_cmd = """DELETE from sources WHERE source_id != original_source_id"""
cursor.execute(sql_cmd)

source_pragma = create_database.get_pragma(cursor,table='sources')
del source_pragma[source_pragma.index('raw_xml')]
source_pragma
n_points = [10,20,30,40,50]
n_versions_first = 1
n_versions_random = 0
sql_cmd = """SELECT source_id FROM sources"""
cursor.execute(sql_cmd)
db_info = cursor.fetchall()
len(db_info)
db_info = tolist(db_info)

for i in db_info:
   create_database.noisify_unsmoothed_sources(cursor,
                                              i,
                                              source_pragma,
Пример #12
0
sql_cmd = """SELECT count(*) FROM measurements"""
cursor.execute(sql_cmd)
db_info = cursor.fetchall()
db_info




sql_cmd = """DELETE FROM sources WHERE source_id != original_source_id"""
cursor.execute(sql_cmd)
connection.commit()


## create noisified prototypes
source_pragma = create_database.get_pragma(cursor,table='sources')
del source_pragma[source_pragma.index('raw_xml')]
n_versions_random = 5
n_versions_first = 0
sql_cmd = """SELECT source_id, number_points FROM sources WHERE source_id = original_source_id"""
cursor.execute(sql_cmd)
db_info = cursor.fetchall()
len(db_info)
for i in db_info:
    n_points = [math.ceil(i[1] / 2.)]
    create_database.noisify_unsmoothed_sources(cursor,
                                               i[0],
                                               source_pragma,
                                               n_points,
                                               n_versions_first=n_versions_first,
                                               n_versions_random=n_versions_random)