def create_jobs_for_new_files(): """Check job-tracker DB for newly downloaded files. Group jobs that belong to the same observation and create entries in the jobs table. """ # Get files that were not associated with a job yet rows = jobtracker.query( "SELECT filename FROM files " "LEFT JOIN job_files " "ON job_files.file_id=files.id " "WHERE files.status IN ('downloaded', 'added') " "AND job_files.id IS NULL" ) newfns = [str(row["filename"]) for row in rows] # Group together files that belong together groups = datafile.simple_group_files(newfns) # Keep only groups that are not missing any files complete_groups = [grp for grp in groups if SPAN512_job.is_complete(grp)] if complete_groups: jobpool_cout.outs("Inserting %d new entries into jobs table" % len(complete_groups)) # Label the first task task_name = "rfifind" for complete in complete_groups: # Insert new job and link it to data files queries = [] queries.append( "INSERT INTO jobs (" "created_at, " "details, " "status, " "task, " "updated_at) " "VALUES ('%s', '%s', '%s', '%s', '%s')" % (jobtracker.nowstr(), "Newly created job", "new", task_name, jobtracker.nowstr()) ) queries.append( "INSERT INTO job_files (" "file_id, " "created_at, " "job_id, " "updated_at) " "SELECT id, '%s', (SELECT LAST_INSERT_ID()), '%s' " "FROM files " "WHERE filename IN ('%s')" % (jobtracker.nowstr(), jobtracker.nowstr(), "', '".join(complete)) ) jobtracker.query(queries)