def derive_features_par(source_ids, noise_dict, cursor, connection, cadence_dict={}, number_processors=1, delete_existing=True): features_columns = create_database.get_pragma(cursor) # obtain information about source_ids you are deriving features for # should the following two lines to avoid injection attacks # but sqlite put max of 1000 on this #sql_cmd = """SELECT source_id, original_source_id, noisification, noise_args FROM sources WHERE source_id IN (""" + (len(source_ids) * "?,")[:-1] + ")" #cursor.execute(sql_cmd,source_ids) # so instead we use "dangerous" form sql_cmd = """SELECT source_id, original_source_id, noisification, noise_args FROM sources WHERE source_id IN """ + repr( tuple(source_ids)) cursor.execute(sql_cmd) source_info = cursor.fetchall() # set up multiprocessing sourcenumber = Value('i', 0) l = Lock() l1 = [] for i in np.arange(number_processors): l1.append(Process(target=derive_features, args=(source_info, \ cursor,connection,sourcenumber,l, \ delete_existing,features_columns, \ noise_dict,cadence_dict))) l1[i].start() for i in np.arange(number_processors): l1[i].join() print "done extracting LS features"
def tfeOutput(source_ids, cursor, filename, table_name="measurements"): '''This is documentation''' # convert source_ids to integers j = 0 for i in source_ids: source_ids[j] = repr(i) j += 1 ## now using function from create_database to get ## pragma of measurements table columns_to_get = create_database.get_pragma(cursor, table=table_name) # get desired rows in features and sources table rows_to_get = '(' + ','.join(source_ids) + ')' sql_cmd = """SELECT * FROM """ + table_name + """ WHERE source_id IN """ + rows_to_get cursor.execute(sql_cmd) db_info = cursor.fetchall() # now write to file column_names = '; '.join(columns_to_get) g = open(filename, 'w') g.write(column_names + '\n') for i in db_info: output1 = '' for j in i: output1 += str(j) + '; ' output1 = output1[:-2] g.write(output1 + '\n') g.close()
def tfeOutput(source_ids,cursor,filename,table_name="measurements"): '''This is documentation''' # convert source_ids to integers j = 0 for i in source_ids: source_ids[j] = repr(i) j += 1 ## now using function from create_database to get ## pragma of measurements table columns_to_get = create_database.get_pragma(cursor,table=table_name) # get desired rows in features and sources table rows_to_get = '(' + ','.join(source_ids) + ')' sql_cmd = """SELECT * FROM """ + table_name + """ WHERE source_id IN """ + rows_to_get cursor.execute(sql_cmd) db_info = cursor.fetchall() # now write to file column_names = '; '.join(columns_to_get) g = open(filename,'w') g.write(column_names + '\n') for i in db_info: output1 = '' for j in i: output1 += str(j) + '; ' output1 = output1[:-2] g.write(output1 + '\n') g.close()
def derive_features_par(source_ids,noise_dict,cursor,connection,cadence_dict={},number_processors=1,delete_existing=True): features_columns = create_database.get_pragma(cursor) # obtain information about source_ids you are deriving features for # should the following two lines to avoid injection attacks # but sqlite put max of 1000 on this #sql_cmd = """SELECT source_id, original_source_id, noisification, noise_args FROM sources WHERE source_id IN (""" + (len(source_ids) * "?,")[:-1] + ")" #cursor.execute(sql_cmd,source_ids) # so instead we use "dangerous" form sql_cmd = """SELECT source_id, original_source_id, noisification, noise_args FROM sources WHERE source_id IN """ + repr(tuple(source_ids)) cursor.execute(sql_cmd) source_info = cursor.fetchall() # set up multiprocessing sourcenumber = Value('i',0) l = Lock() l1 = [] for i in np.arange(number_processors): l1.append(Process(target=derive_features, args=(source_info, \ cursor,connection,sourcenumber,l, \ delete_existing,features_columns, \ noise_dict,cadence_dict))) l1[i].start() for i in np.arange(number_processors): l1[i].join() print "done extracting LS features"
def outputIntervals(source_ids,cursor,filename,features_to_remove=[]): ''' outputs feature intervals to a data file ''' # convert source_ids to integers j = 0 for i in source_ids: source_ids[j] = repr(i) j += 1 ## get names of features columns_to_get = create_database.get_pragma(cursor,table='features') ## TODO: put these in try / except for i in features_to_remove: columns_to_get.remove(i) columns_to_get.remove('source_id') columns_to_get = map(lambda feature_name:'features.'+feature_name, columns_to_get) columns_to_get_min = map(lambda feature_name:'min('+feature_name+')', columns_to_get) columns_to_get_max = map(lambda feature_name:'max('+feature_name+')', columns_to_get) columns_to_get = columns_to_get_min + columns_to_get_max columns_to_get.append('sources.classification') columns_to_get.append('sources.original_source_id') # get desired rows in features and sources table columns_to_get_comma = ', '.join(columns_to_get) rows_to_get = '(' + ','.join(source_ids) + ')' sql_cmd = """SELECT """ + columns_to_get_comma + """ FROM sources, features WHERE sources.source_id = features.source_id AND features.source_id IN """ + rows_to_get + """ GROUP BY sources.original_source_id""" cursor.execute(sql_cmd) db_info = cursor.fetchall() ## rename columns for i in range(len(columns_to_get)): if columns_to_get[i][0:4] == "max(": columns_to_get[i] = columns_to_get[i][5:-1] + "U" if columns_to_get[i][0:4] == "min(": columns_to_get[i] = columns_to_get[i][5:-1] + "L" columns_to_get = map(lambda i:i.split('.')[1],columns_to_get) columns_to_get[-1] = "source_id" ## write to file column_names = ';'.join(columns_to_get) g = open(filename,'w') g.write(column_names + '\n') for i in db_info: output1 = '' for j in i: output1 += str(j) + ';' output1 = output1[:-1] g.write(output1 + '\n') g.close()
def outputRfile(source_ids,cursor,filename): # convert source_ids to integers j = 0 for i in source_ids: source_ids[j] = repr(i) j += 1 # get column names from features table columns_to_get = create_database.get_pragma(cursor,table='features') columns_to_get = map(lambda feature_name:'features.'+feature_name, columns_to_get) columns_to_get.append('sources.xml_filename') columns_to_get.append('sources.original_source_id') columns_to_get.append('sources.noisification') columns_to_get.append('sources.noise_args') columns_to_get.append('sources.true_period') columns_to_get.append('sources.classification') columns_to_get.append('sources.survey') columns_to_get.append('sources.c1') columns_to_get.append('sources.c2') columns_to_get.append('sources.e1') columns_to_get.append('sources.e2') # get desired rows in features and sources table columns_to_get_comma = ', '.join(columns_to_get) rows_to_get = '(' + ','.join(source_ids) + ')' sql_cmd = """SELECT """ + columns_to_get_comma + """ FROM sources, features WHERE sources.source_id = features.source_id AND features.source_id IN """ + rows_to_get cursor.execute(sql_cmd) db_info = cursor.fetchall() sql_cmd = """SELECT source_id FROM features""" cursor.execute(sql_cmd) db_info2 = cursor.fetchall() sql_cmd = """SELECT source_id FROM sources""" cursor.execute(sql_cmd) db_info3 = cursor.fetchall() # now write to file column_names = ';'.join(columns_to_get) g = open(filename,'w') g.write(column_names + '\n') for i in db_info: output1 = '' for j in i: output1 += str(j) + ';' output1 = output1[:-1] g.write(output1 + '\n') g.close()
def outputIntervals(source_ids, cursor, filename, features_to_remove=[]): ''' outputs feature intervals to a data file ''' # convert source_ids to integers j = 0 for i in source_ids: source_ids[j] = repr(i) j += 1 ## get names of features columns_to_get = create_database.get_pragma(cursor, table='features') ## TODO: put these in try / except for i in features_to_remove: columns_to_get.remove(i) columns_to_get.remove('source_id') columns_to_get = map(lambda feature_name: 'features.' + feature_name, columns_to_get) columns_to_get_min = map(lambda feature_name: 'min(' + feature_name + ')', columns_to_get) columns_to_get_max = map(lambda feature_name: 'max(' + feature_name + ')', columns_to_get) columns_to_get = columns_to_get_min + columns_to_get_max columns_to_get.append('sources.classification') columns_to_get.append('sources.original_source_id') # get desired rows in features and sources table columns_to_get_comma = ', '.join(columns_to_get) rows_to_get = '(' + ','.join(source_ids) + ')' sql_cmd = """SELECT """ + columns_to_get_comma + """ FROM sources, features WHERE sources.source_id = features.source_id AND features.source_id IN """ + rows_to_get + """ GROUP BY sources.original_source_id""" cursor.execute(sql_cmd) db_info = cursor.fetchall() ## rename columns for i in range(len(columns_to_get)): if columns_to_get[i][0:4] == "max(": columns_to_get[i] = columns_to_get[i][5:-1] + "U" if columns_to_get[i][0:4] == "min(": columns_to_get[i] = columns_to_get[i][5:-1] + "L" columns_to_get = map(lambda i: i.split('.')[1], columns_to_get) columns_to_get[-1] = "source_id" ## write to file column_names = ';'.join(columns_to_get) g = open(filename, 'w') g.write(column_names + '\n') for i in db_info: output1 = '' for j in i: output1 += str(j) + ';' output1 = output1[:-1] g.write(output1 + '\n') g.close()
def outputRfile(source_ids, cursor, filename): # convert source_ids to integers j = 0 for i in source_ids: source_ids[j] = repr(i) j += 1 # get column names from features table columns_to_get = create_database.get_pragma(cursor, table='features') columns_to_get = map(lambda feature_name: 'features.' + feature_name, columns_to_get) columns_to_get.append('sources.xml_filename') columns_to_get.append('sources.original_source_id') columns_to_get.append('sources.noisification') columns_to_get.append('sources.noise_args') columns_to_get.append('sources.true_period') columns_to_get.append('sources.classification') columns_to_get.append('sources.survey') columns_to_get.append('sources.c1') columns_to_get.append('sources.c2') columns_to_get.append('sources.e1') columns_to_get.append('sources.e2') # get desired rows in features and sources table columns_to_get_comma = ', '.join(columns_to_get) rows_to_get = '(' + ','.join(source_ids) + ')' sql_cmd = """SELECT """ + columns_to_get_comma + """ FROM sources, features WHERE sources.source_id = features.source_id AND features.source_id IN """ + rows_to_get cursor.execute(sql_cmd) db_info = cursor.fetchall() sql_cmd = """SELECT source_id FROM features""" cursor.execute(sql_cmd) db_info2 = cursor.fetchall() sql_cmd = """SELECT source_id FROM sources""" cursor.execute(sql_cmd) db_info3 = cursor.fetchall() # now write to file column_names = ';'.join(columns_to_get) g = open(filename, 'w') g.write(column_names + '\n') for i in db_info: output1 = '' for j in i: output1 += str(j) + ';' output1 = output1[:-1] g.write(output1 + '\n') g.close()
def outputOriginalOnly(source_ids,cursor,filename,features_to_remove=[]): # convert source_ids to integers j = 0 for i in source_ids: source_ids[j] = repr(i) j += 1 ## get names of features columns_to_get = create_database.get_pragma(cursor,table='features') ## TODO: put these in try / except for i in features_to_remove: columns_to_get.remove(i) columns_to_get = map(lambda feature_name:'features.'+feature_name, columns_to_get) columns_to_get.append('sources.classification') # get desired rows in features and sources table columns_to_get_comma = ', '.join(columns_to_get) rows_to_get = '(' + ','.join(source_ids) + ')' sql_cmd = """SELECT """ + columns_to_get_comma + """ FROM sources, features WHERE sources.source_id = features.source_id AND features.source_id IN """ + rows_to_get cursor.execute(sql_cmd) db_info = cursor.fetchall() ## rename columns columns_to_get = map(lambda i:i.split('.')[1],columns_to_get) ## write to file column_names = ';'.join(columns_to_get) g = open(filename,'w') g.write(column_names + '\n') for i in db_info: output1 = '' for j in i: output1 += str(j) + ';' output1 = output1[:-1] g.write(output1 + '\n') g.close()
def outputOriginalOnly(source_ids, cursor, filename, features_to_remove=[]): # convert source_ids to integers j = 0 for i in source_ids: source_ids[j] = repr(i) j += 1 ## get names of features columns_to_get = create_database.get_pragma(cursor, table='features') ## TODO: put these in try / except for i in features_to_remove: columns_to_get.remove(i) columns_to_get = map(lambda feature_name: 'features.' + feature_name, columns_to_get) columns_to_get.append('sources.classification') # get desired rows in features and sources table columns_to_get_comma = ', '.join(columns_to_get) rows_to_get = '(' + ','.join(source_ids) + ')' sql_cmd = """SELECT """ + columns_to_get_comma + """ FROM sources, features WHERE sources.source_id = features.source_id AND features.source_id IN """ + rows_to_get cursor.execute(sql_cmd) db_info = cursor.fetchall() ## rename columns columns_to_get = map(lambda i: i.split('.')[1], columns_to_get) ## write to file column_names = ';'.join(columns_to_get) g = open(filename, 'w') g.write(column_names + '\n') for i in db_info: output1 = '' for j in i: output1 += str(j) + ';' output1 = output1[:-1] g.write(output1 + '\n') g.close()
sql_cmd = """SELECT source_id,survey,number_points,classification,true_period FROM sources""" cursor.execute(sql_cmd) db_info = cursor.fetchall() for i in db_info: print i len(db_info) ## create noisified versions reload(create_database) sql_cmd = """DELETE from sources WHERE source_id != original_source_id""" cursor.execute(sql_cmd) source_pragma = create_database.get_pragma(cursor,table='sources') del source_pragma[source_pragma.index('raw_xml')] source_pragma n_points = [10,20,30,40,50] n_versions_first = 1 n_versions_random = 0 sql_cmd = """SELECT source_id FROM sources""" cursor.execute(sql_cmd) db_info = cursor.fetchall() len(db_info) db_info = tolist(db_info) for i in db_info: create_database.noisify_unsmoothed_sources(cursor, i, source_pragma,
sql_cmd = """SELECT count(*) FROM measurements""" cursor.execute(sql_cmd) db_info = cursor.fetchall() db_info sql_cmd = """DELETE FROM sources WHERE source_id != original_source_id""" cursor.execute(sql_cmd) connection.commit() ## create noisified prototypes source_pragma = create_database.get_pragma(cursor,table='sources') del source_pragma[source_pragma.index('raw_xml')] n_versions_random = 5 n_versions_first = 0 sql_cmd = """SELECT source_id, number_points FROM sources WHERE source_id = original_source_id""" cursor.execute(sql_cmd) db_info = cursor.fetchall() len(db_info) for i in db_info: n_points = [math.ceil(i[1] / 2.)] create_database.noisify_unsmoothed_sources(cursor, i[0], source_pragma, n_points, n_versions_first=n_versions_first, n_versions_random=n_versions_random)