-
Notifications
You must be signed in to change notification settings - Fork 2
/
uploader.py
376 lines (315 loc) · 12.9 KB
/
uploader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
import os
import cgi
import pylons
import datetime
import logging
import ckan.lib.munge as munge
import ckan.logic as logic
import ckan.plugins as plugins
#Home office import start
from ofs import get_impl
import requests
import boto
from boto.s3.key import Key
#Home office import end
config = pylons.config
log = logging.getLogger(__name__)
_storage_path = None
_max_resource_size = None
_max_image_size = None
#Home office method start
def scan_file(fileLocation):
print("Sending file for virus scan")
print(fileLocation)
clamav_url = config.get(
'ckan.datacatalogue.clamav.url', 'https://clamav.platform-services.svc.cluster.local/scan')
try:
r = requests.post(clamav_url, files={fileLocation: open(fileLocation, 'rb')}, verify=False)
except:
print("There was an error at clamav")
print "Unexpected error:", sys.exc_info()[0]
return False
print("r.status_code")
print(r.status_code)
if(r.status_code == 200):
answer = r.content[18:].strip()
print(answer)
return answer == 'true'
else:
return False
class VirusFileError(Exception):
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)
def move_file_into_store(tmpFile, filepath):
ofs_impl = config.get('ofs.impl')
print("ofs_impl is")
print(ofs_impl)
if(ofs_impl != 's3'):
#then treat it as local storage
os.rename(tmpFile, filepath)
else:
aws_access_key_id = config['ofs.s3.aws_access_key_id']
aws_secret_access_key = config['ofs.s3.aws_secret_access_key']
ofs_s3_bucket = config['ofs.s3.bucket']
print("ofs_s3_bucket")
print(ofs_s3_bucket)
conn = boto.connect_s3(aws_access_key_id, aws_secret_access_key)
bucket = conn.get_bucket(ofs_s3_bucket)
k = Key(bucket)
k.key = filepath
print("Sending file to AWS")
print(bucket)
print(filepath)
k.set_contents_from_filename(tmpFile, encrypt_key=True)
#Home office method end
def get_uploader(upload_to, old_filename=None):
'''Query IUploader plugins and return an uploader instance for general
files.'''
upload = None
for plugin in plugins.PluginImplementations(plugins.IUploader):
upload = plugin.get_uploader(upload_to, old_filename)
# default uploader
if upload is None:
upload = Upload(upload_to, old_filename)
return upload
def get_resource_uploader(data_dict):
'''Query IUploader plugins and return a resource uploader instance.'''
upload = None
for plugin in plugins.PluginImplementations(plugins.IUploader):
upload = plugin.get_resource_uploader(data_dict)
# default uploader
if upload is None:
upload = ResourceUpload(data_dict)
return upload
def get_storage_path():
'''Function to cache storage path'''
global _storage_path
print("getting a storage path")
# None means it has not been set. False means not in config.
if _storage_path is None:
storage_path = config.get('ckan.storage_path')
ofs_impl = config.get('ofs.impl')
ofs_storage_dir = config.get('ofs.storage_dir')
if ofs_impl == 's3' and ofs_storage_dir:
print('Setting storage path')
_storage_path = ofs_storage_dir
return _storage_path
elif storage_path:
_storage_path = storage_path
elif ofs_impl == 'pairtree' and ofs_storage_dir:
log.warn('''Please use config option ckan.storage_path instead of
ofs.storage_dir''')
_storage_path = ofs_storage_dir
return _storage_path
elif ofs_impl:
log.critical('''We only support local file storage form version 2.2
of ckan please specify ckan.storage_path in your
config for your uploads''')
_storage_path = False
else:
log.critical('''Please specify a ckan.storage_path in your config
for your uploads''')
_storage_path = False
return _storage_path
def get_max_image_size():
global _max_image_size
if _max_image_size is None:
_max_image_size = int(config.get('ckan.max_image_size', 2))
return _max_image_size
def get_max_resource_size():
global _max_resource_size
if _max_resource_size is None:
_max_resource_size = int(config.get('ckan.max_resource_size', 10))
return _max_resource_size
class Upload(object):
def __init__(self, object_type, old_filename=None):
''' Setup upload by creating a subdirectory of the storage directory
of name object_type. old_filename is the name of the file in the url
field last time'''
self.storage_path = None
self.filename = None
self.filepath = None
path = get_storage_path()
if not path:
return
self.storage_path = os.path.join(path, 'storage',
'uploads', object_type)
try:
os.makedirs(self.storage_path)
except OSError, e:
# errno 17 is file already exists
if e.errno != 17:
raise
self.object_type = object_type
self.old_filename = old_filename
if old_filename:
self.old_filepath = os.path.join(self.storage_path, old_filename)
def update_data_dict(self, data_dict, url_field, file_field, clear_field):
''' Manipulate data from the data_dict. url_field is the name of the
field where the upload is going to be. file_field is name of the key
where the FieldStorage is kept (i.e the field where the file data
actually is). clear_field is the name of a boolean field which
requests the upload to be deleted. This needs to be called before
it reaches any validators'''
self.url = data_dict.get(url_field, '')
self.clear = data_dict.pop(clear_field, None)
self.file_field = file_field
self.upload_field_storage = data_dict.pop(file_field, None)
if not self.storage_path:
return
if isinstance(self.upload_field_storage, cgi.FieldStorage):
self.filename = self.upload_field_storage.filename
self.filename = str(datetime.datetime.utcnow()) + self.filename
self.filename = munge.munge_filename_legacy(self.filename)
self.filepath = os.path.join(self.storage_path, self.filename)
data_dict[url_field] = self.filename
self.upload_file = self.upload_field_storage.file
self.tmp_filepath = self.filepath + '~'
# keep the file if there has been no change
elif self.old_filename and not self.old_filename.startswith('http'):
if not self.clear:
data_dict[url_field] = self.old_filename
if self.clear and self.url == self.old_filename:
data_dict[url_field] = ''
def upload(self, max_size=2):
''' Actually upload the file.
This should happen just before a commit but after the data has
been validated and flushed to the db. This is so we do not store
anything unless the request is actually good.
max_size is size in MB maximum of the file'''
if self.filename:
output_file = open(self.tmp_filepath, 'wb')
self.upload_file.seek(0)
current_size = 0
while True:
current_size = current_size + 1
# MB chunks
data = self.upload_file.read(2 ** 20)
if not data:
break
output_file.write(data)
if current_size > max_size:
os.remove(self.tmp_filepath)
raise logic.ValidationError(
{self.file_field: ['File upload too large']}
)
output_file.close()
#Home office addition start
fileOK = scan_file(self.tmp_filepath)
print("fileOK")
print(fileOK)
if(not fileOK):
print("Virus found")
log.warn("The file " + self.tmp_filepath + " has tested positive for a virus")
raise VirusFileError("The file " + self.tmp_filepath + " has tested positive for a virus")
print("Move file into store")
move_file_into_store(self.tmp_filepath, self.filepath)
#os.rename(self.tmp_filepath, self.filepath)
#Home office addition end
self.clear = True
if (self.clear and self.old_filename
and not self.old_filename.startswith('http')):
try:
os.remove(self.old_filepath)
except OSError:
pass
class ResourceUpload(object):
def __init__(self, resource):
path = get_storage_path()
if not path:
self.storage_path = None
return
self.storage_path = os.path.join(path, 'resources')
try:
os.makedirs(self.storage_path)
except OSError, e:
# errno 17 is file already exists
if e.errno != 17:
raise
self.filename = None
url = resource.get('url')
upload_field_storage = resource.pop('upload', None)
self.clear = resource.pop('clear_upload', None)
if isinstance(upload_field_storage, cgi.FieldStorage):
self.filename = upload_field_storage.filename
self.filename = munge.munge_filename(self.filename)
resource['url'] = self.filename
resource['url_type'] = 'upload'
resource['last_modified'] = datetime.datetime.utcnow()
self.upload_file = upload_field_storage.file
elif self.clear:
resource['url_type'] = ''
def get_directory(self, id):
directory = os.path.join(self.storage_path,
id[0:3], id[3:6])
return directory
def get_path(self, id):
directory = self.get_directory(id)
filepath = os.path.join(directory, id[6:])
return filepath
def upload(self, id, max_size=10):
'''Actually upload the file.
:returns: ``'file uploaded'`` if a new file was successfully uploaded
(whether it overwrote a previously uploaded file or not),
``'file deleted'`` if an existing uploaded file was deleted,
or ``None`` if nothing changed
:rtype: ``string`` or ``None``
'''
if not self.storage_path:
return
# Get directory and filepath on the system
# where the file for this resource will be stored
directory = self.get_directory(id)
filepath = self.get_path(id)
# If a filename has been provided (a file is being uploaded)
# we write it to the filepath (and overwrite it if it already
# exists). This way the uploaded file will always be stored
# in the same location
if self.filename:
try:
os.makedirs(directory)
except OSError, e:
# errno 17 is file already exists
if e.errno != 17:
raise
tmp_filepath = filepath + '~'
output_file = open(tmp_filepath, 'wb+')
self.upload_file.seek(0)
current_size = 0
while True:
current_size = current_size + 1
# MB chunks
data = self.upload_file.read(2 ** 20)
if not data:
break
output_file.write(data)
if current_size > max_size:
os.remove(tmp_filepath)
raise logic.ValidationError(
{'upload': ['File upload too large']}
)
output_file.close()
#Home office addition start
fileOK = scan_file(tmp_filepath)
print("fileOK")
print(fileOK)
if(not fileOK):
print("Found a Virus")
log.warn("The file " + tmp_filepath + " has tested positive for a virus")
raise VirusFileError("The file " + tmp_filepath + " has tested positive for a virus")
move_file_into_store(tmp_filepath, filepath)
#os.rename(tmp_filepath, filepath)
#Home office addition end
return
# The resource form only sets self.clear (via the input clear_upload)
# to True when an uploaded file is not replaced by another uploaded
# file, only if it is replaced by a link to file.
# If the uploaded file is replaced by a link, we should remove the
# previously uploaded file to clean up the file system.
if self.clear:
try:
os.remove(filepath)
except OSError, e:
pass