-
Notifications
You must be signed in to change notification settings - Fork 2
/
job.py
274 lines (226 loc) · 11 KB
/
job.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
import os
import sqlite3
import subprocess
import wget
from flask import request, flash, Flask
from flask_mail import Message, Mail
from datetime import datetime, timedelta
from werkzeug.utils import secure_filename
from forms import ALLOWED_EXTENSIONS
j = Flask(__name__)
j.config['MAIL_SERVER'] = 'smtp.nyu.edu'
j.config['MAIL_PORT'] = 25
j.config['MAIL_USERNAME'] = 'reform-test@nyu.edu'
j.config['MAIL_PASSWORD'] = ''
j.config['MAIL_USE_TLS'] = False
j.config['MAIL_USE_SSL'] = False
mail = Mail(j)
def redisjob(target_dir, timestamp, email, chrom, upstream_fasta, downstream_fasta, position, ref_fastaURL, ref_gffURL,
in_fasta, in_gff):
if position:
command = "bash ./run.sh {} {} {} {} {} {} {} {} {}".format(target_dir, timestamp, email, chrom,
ref_fastaURL, ref_gffURL, in_fasta,
in_gff, position)
else:
command = "bash ./run.sh {} {} {} {} {} {} {} {} {} {}".format(target_dir, timestamp, email, chrom,
ref_fastaURL, ref_gffURL, in_fasta,
in_gff, upstream_fasta,
downstream_fasta)
try:
#subprocess.run([command])
os.system(command)
os.system("echo Emailing")
send_email(email, timestamp)
os.system("echo Emailed")
db_update(timestamp, "status", "complete")
except:
os.system("echo Command Failed")
send_email_error(email)
def redisjob1(target_dir, timestamp, email, chrom, upstream_fasta, downstream_fasta, position, ref_fastaURL, ref_gffURL,
in_fasta, in_gff):
# (4) Download files from user provided URLs to server
try:
ref_fasta = download(target_dir, ref_fastaURL)
ref_gff = download(target_dir, ref_gffURL)
except:
# TODO: e-mal of failure
print("ERROR: ")
db_update(timestamp, "status", "failed to download references")
# Are the downloads compressed (gzip)
if "gz" in ref_fasta:
os.system("gunzip " + ref_fasta)
ref_fasta = ref_fasta[0:-3]
if "gz" in ref_gff:
os.system("gunzip " + ref_gff)
ref_gff = ref_gff[0:-3]
# (5) Run the reform.py
try:
runReform(target_dir, ref_fasta, ref_gff, timestamp, position, chrom, in_fasta, in_gff, upstream_fasta,
downstream_fasta)
send_email(email, timestamp)
db_update(timestamp, "status", "complete")
except:
print("ERROR: ")
db_update(timestamp, "status", "failed running reform")
def allowed_file(filename):
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
def verify_uploads(file):
fileObj = request.files[file]
if fileObj.filename == '':
flash('No ' + file + ' file selected for uploading', 'error')
return False
if fileObj and allowed_file(fileObj.filename):
return True
else:
flash('Invalid File Type for ' + file, 'error')
return False
# verify upload files for test site
def verify_test_uploads(file):
fileObj = request.files[file]
if fileObj.filename == '':
# flash('No ' + file + ' file selected for uploading', 'error')
# return False
return True # If no file is uploaded then the default file is used
if fileObj and allowed_file(fileObj.filename):
return True
else:
flash('Invalid File Type for ' + file, 'error')
return False
def upload(target_dir, file):
fileObj = request.files[file]
# make the directory based on timestamp
os.system('mkdir -p ' + target_dir)
# save the file
fileObj.save(os.path.join(target_dir,
secure_filename(fileObj.filename)))
# upload file function for test site
def upload_test(target_dir, file_key, default_files):
# if file is empty (indicated use default file), fileObj set to None
fileObj = request.files[file_key] if file_key in request.files else None
os.makedirs(target_dir, exist_ok=True) # dirs for upload files
if fileObj:
# save the uploaded file
filename = secure_filename(fileObj.filename)
file_path = os.path.join(target_dir, filename)
fileObj.save(file_path)
return fileObj.filename
else:
# Use the default file if no file was uploaded, pass realpath
src = os.path.abspath(default_files[file_key])
dst = os.path.join(target_dir, os.path.basename(src)) # link name in target_dir
if not os.path.exists(dst): # Only create the symlink if it doesn't already exist
os.symlink(src, dst) # Create a soft link
return os.path.basename(src)
def download(target_dir, URL):
if URL:
return wget.download(URL, target_dir)
def runReform(target_dir, ref_fasta, ref_gff, timestamp, position, chrom, in_fasta, in_gff, upstream_fasta,
downstream_fasta):
if position:
command = 'python reform.py --chrom {} --position {} --in_fasta {} --in_gff {} --ref_fasta {} --ref_gff {} ' \
'--output_dir {}'.format(chrom,
position,
os.path.join(target_dir, secure_filename(in_fasta)),
os.path.join(target_dir, secure_filename(in_gff)),
ref_fasta,
ref_gff,
"./results/" + timestamp + "/"
)
else:
command = 'python reform.py --chrom {} --upstream_fasta {} --downstream_fasta {} --in_fasta {} --in_gff {} ' \
'--ref_fasta {} --ref_gff {} --output_dir {}'.format(chrom,
os.path.join(target_dir,
secure_filename(upstream_fasta)),
os.path.join(target_dir,
secure_filename(downstream_fasta)),
os.path.join(target_dir,
secure_filename(in_fasta)),
os.path.join(target_dir,
secure_filename(in_gff)),
ref_fasta,
ref_gff,
"./results/" + timestamp + "/"
)
os.system("mkdir -p results/" + timestamp)
os.system(command)
os.system('tar -czf results/' + timestamp + '/' + timestamp + '.tar.gz -C results/' + timestamp + '/ .')
def send_email(email, timestamp):
# calculate 72h DDL
deadline = datetime.now() + timedelta(hours=72)
deadline_str = deadline.strftime('%Y-%m-%d %H:%M:%S')
# paths to the log files
err_log_path = f"./downloads/{timestamp}/{timestamp}-worker-err.log"
out_log_path = f"./downloads/{timestamp}/{timestamp}-worker-out.log"
# read the content of the log files
with open(err_log_path, 'r') as file:
err_log_content = file.read()
with open(out_log_path, 'r') as file:
out_log_content = file.read()
def send_email(email, timestamp):
# calculate 72h DDL
deadline = datetime.now() + timedelta(hours=72)
deadline_str = deadline.strftime('%Y-%m-%d %H:%M:%S')
# paths to the log files
err_log_path = f"./downloads/{timestamp}/{timestamp}-worker-err.log"
out_log_path = f"./downloads/{timestamp}/{timestamp}-worker-out.log"
# read the content of the log files
with open(err_log_path, 'r') as file:
err_log_content = file.read()
with open(out_log_path, 'r') as file:
out_log_content = file.read()
with j.app_context():
subject = f"Reform Results - Download Deadline: {deadline_str}"
msg = Message(subject, sender='reform@nyu.edu', recipients=[email])
msg.html = f"""Reform job complete.
<a href='https://reform.bio.nyu.edu/download/{timestamp}'>Click here to download results and related log files</a>.
The file will be available for the next 72 hours.
The deadline to download the file is {deadline_str}.
If you do not download the file before this time, it will be deleted. <br><br>
<b>Reform.py Output Log:</b><br><pre>{err_log_content}</pre><br>
<b>Worker Output Log:</b><br><pre>{out_log_content}</pre>
"""
mail.send(msg)
# Remove the log files from the download folder
os.remove(err_log_path)
os.remove(out_log_path)
def send_email_error(email):
with j.app_context():
msg = Message('reform results - error', sender='reform@nyu.edu', recipients=[email])
msg.html = "reform job had an error. Please resubmit."
mail.send(msg)
def db_create():
db = sqlite3.connect('database.db')
db.execute(
'CREATE TABLE submissions (jobID TEXT, timestamp TEXT, email TEXT, status TEXT, chrom TEXT, upstream_fasta '
'TEXT, downstream_fasta TEXT, position TEXT, ref_fasta TEXT, ref_gff TEXT, in_fasta TEXT, in_gff TEXT)')
db.close()
def db_submit(request, timestamp):
try:
with sqlite3.connect("database.db") as con:
cur = con.cursor()
cur.execute(
'INSERT INTO submissions (jobID, timestamp, email, status, chrom, upstream_fasta, '
'downstream_fasta, position, ref_fasta, ref_gff, in_fasta, in_gff ) VALUES(?, ?, ?, ?, ?, ?, '
'?, ?, ?, ?, ?, ?)',
("none",
timestamp,
request.form['email'],
"submitted",
request.form['chrom'],
request.files['upstream_fasta'].filename,
request.files['downstream_fasta'].filename,
request.form['position'],
request.form['ref_fasta'],
request.form['ref_gff'],
request.files['in_fasta'].filename,
request.files['in_gff'].filename)
)
con.commit()
except:
con.rollback()
flash("error in insert operation ", 'error')
def db_update(timestamp, set_id, set_value):
db = sqlite3.connect('database.db')
db.execute("UPDATE submissions SET " + set_id + "=? where timestamp=? ", (set_value, timestamp))
db.commit()
db.close()