-
Notifications
You must be signed in to change notification settings - Fork 0
/
encryption.py
executable file
·192 lines (170 loc) · 6.89 KB
/
encryption.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
import binascii, string, re
from Crypto.Cipher import AES
from Crypto.Hash import HMAC
from Crypto.Hash import SHA256
from Crypto.Protocol.KDF import PBKDF2
import Crypto.Random
from email.MIMEMultipart import MIMEMultipart
from email.MIMEText import MIMEText
stopwords = set("a an and be for from fw fwd if is it of on or re the than that this then".split())
class Encryption:
# Tokenization methods:
TOKENIZE_EMAIL_ADDRESSES = 0 # Used for header fields like From, To, CC
TOKENIZE_BLANK_SPACES = 1 # Tokenize just on blank spaces (default)
TOKENIZE_HEADER_PART = ['From', 'To', 'Cc', 'Bcc', 'Subject']
# Number of characters for the tags
__tag_length = 4
# password: The user's password
# salt: Salt for PBKDF2 or in our case, the user's email address
def __init__(self, password, salt):
prf = lambda password, salt: HMAC.new(password, salt, SHA256).digest()
key = PBKDF2(password, salt, 48, 10000, prf)
# First 128 bits are cipher key, Last 256 bits are tagging key
self.__cipher_key = key[:16]
self.__prf_key = key[16:]
# Encrypts the given tag using an HMAC on SHA-256
# tag: the tag to encrypt
# Returns the SAH256 HMAC of the 'tag' using the given prf key in a string
# of length 'self.__tag_length' encoded in hex
def __PRF(self, tag):
hmac = HMAC.new(key = self.__prf_key, msg = tag, digestmod = SHA256)
return hmac.hexdigest()[:self.__tag_length]
# Encrypt the given plaintext using the given key in AES CBC mode with
# padding of the null character.
# plaintext: plain text to encrypt
# Return value is two hex strings for IV and cipher text
def __encrypt(self, IV, plaintext):
cipher = AES.new(self.__cipher_key, AES.MODE_GCM, IV)
# Multiply the null character the needed number of times and append
# This is needed because the AES function will not pad the string
plaintext += chr(0) * (16-(len(plaintext) % 16))
# Encrypt and return two pieces of binary data
return binascii.hexlify(cipher.encrypt(plaintext)), binascii.hexlify(
cipher.digest())
# This function splits all the elements in the headers with lists of emails
def __split_element(self, data):
orig_toks = data.strip().lower().split()
tokens = set([])
# split again with the data input ori_toks
for i in range(len(orig_toks)):
tok = orig_toks[i]
if (tok in stopwords) or (len(tok) < 2):
continue
if tok.startswith("<") and tok.endswith(">"):
tok = tok[1:-1]
if "@" in tok:
newtoks = tok.split("@")
tokens.update(newtoks)
if "." in tok:
newtoks = tok.split(".")
tokens.update(newtoks)
tokens.add(tok)
return tokens
# Just gets all the unique words from tokens
def __get_unique_words(self, tokens):
ulist = []
[ulist.append(x) for x in tokens if x not in ulist]
return ulist
# Create full encryption string.
# Returned Format is: mac.ciphertext.tag1.tag2...
# plaintext: the plaintext of the field
# tokenFlag: What kind of tokenization for the tags. (Use class constants)
# snippetBlocks: If zero, do not make a preview, otherwise, tag and encrypt
# this number of blocks at the beginning, used for body and subject.
def encrypt_and_tag(self, IV, plaintext, tokenFlag, snippetBlocks):
if tokenFlag == self.TOKENIZE_EMAIL_ADDRESSES:
tokens = self.__get_unique_words(self.__split_element(plaintext))
else:
tokens = self.__get_unique_words(plaintext.split(' '))
tags = []
for token in tokens:
tags.append(self.__PRF(token))
# A block is 30 characters with space for padding
if snippetBlocks > 0:
snippet, s_digest = self.__encrypt(IV[16:]
, plaintext[:snippetBlocks*15])
ctext, c_digest = self.__encrypt(IV[:16], plaintext)
#print c_digest
digest = s_digest + '.' + snippet
ciphertext = c_digest + '.' + ctext
else:
ciphertext, digest = self.__encrypt(IV[:16], plaintext)
return digest + '.' + ciphertext + '.' + string.join(tags, '.') + '.'
# Creates an ecnrypted tag from the given search term
def create_tag(self, searchterm):
return self.__PRF(searchterm)
# Decrypts the given ciphertext. The tags can be attached at the end or
# not, it won't affect the result.
def decrypt(self, IV, ciphertext):
sections = ciphertext.split('.')
cipher = AES.new(self.__cipher_key, AES.MODE_GCM, IV)
#print 'text' + ciphertext
plaintext = cipher.decrypt(binascii.unhexlify(
sections[1])).replace(chr(0), '')
try:
cipher.verify(binascii.unhexlify(sections[0]))
return plaintext
except ValueError:
return "Message was corrupted"
# Takes in a plain email and returns an encrypted one. Both the argument
# and return value are dictionaries with the body stored under 'Body' and
# other headers stored under their MIME names.
def encrypt_email(self, plain_email):
salt = binascii.hexlify(Crypto.Random.get_random_bytes(16))
enc_email = MIMEMultipart()
for header in self.TOKENIZE_HEADER_PART:
IV = self._create_IV(header, salt)
if header == 'Subject' and plain_email[header] != None:
enc_email[header] = self.encrypt_and_tag(IV, plain_email[header]
, self.TOKENIZE_BLANK_SPACES, 1)
else:
print header
if plain_email[header] != None and plain_email[header] != '':
enc_email[header] = self.encrypt_and_tag(IV
, plain_email[header]
, self.TOKENIZE_EMAIL_ADDRESSES, 0)
for part in plain_email.walk():
if (part.get_content_maintype() == 'multipart') and (
part.get_content_subtype() != 'plain'):
continue
body = part.get_payload()
if body == None:
return
IV = self._create_IV('Body', salt)
enc_body = self.encrypt_and_tag(IV, body
, self.TOKENIZE_BLANK_SPACES, 2)
enc_email.attach(MIMEText(enc_body))
print repr(enc_email['From'])
print repr(salt)
enc_email.replace_header('From', salt + '.' + enc_email['From'])
return enc_email
def _create_IV(self, header, salt):
IV = SHA256.new(salt + header).digest()[:16]
IV += SHA256.new(salt + header + 'Snippet').digest()[:16]
return IV
# Takes in a plain email and returns an encrypted one. Both the argument
# and return value are dictionaries with the body stored under 'Body' and
# other headers stored under their MIME names.
def decrypt_email(self, enc_email):
salt = enc_email['From'][:32]
enc_email.replace_header('From', enc_email['From'][33:])
plain_email = MIMEMultipart()
for header in self.TOKENIZE_HEADER_PART:
IV = SHA256.new(salt+ header).digest()[:16]
if header == 'Subject':
plain_email[header] = self.decrypt(IV, enc_email[header][
66:enc_email[header].find('.', 99)+1])
else:
if enc_email[header] != None:
plain_email[header] = self.decrypt(IV, enc_email[header])
for part in enc_email.walk():
if (part.get_content_maintype() == 'multipart') and (
part.get_content_subtype() != 'plain'):
continue
body = part.get_payload()
if body == None or body == '':
return plain_email
IV = SHA256.new(salt + 'Body').digest()[:16]
plain_body = self.decrypt(IV, body[98:body.find('.', 131)+1])
plain_email.attach(MIMEText(plain_body))
return plain_email