forked from pinterest/mysql_utils
/
mysql_backup_csv.py
executable file
·696 lines (616 loc) · 28.8 KB
/
mysql_backup_csv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
#!/usr/bin/env python
import argparse
import datetime
import json
import logging
import multiprocessing
import os
import subprocess
import threading
import time
import traceback
import uuid
import boto
import _mysql_exceptions
import psutil
import safe_uploader
import mysql_backup_status
from lib import backup
from lib import environment_specific
from lib import host_utils
from lib import mysql_lib
ACTIVE = 'active'
CSV_BACKUP_LOCK_TABLE_NAME = 'backup_locks'
CSV_BACKUP_LOCK_TABLE = """CREATE TABLE IF NOT EXISTS {db}.{tbl} (
`lock_identifier` varchar(36) NOT NULL,
`lock_active` enum('active') DEFAULT 'active',
`created_at` datetime NOT NULL,
`expires` datetime DEFAULT NULL,
`released` datetime DEFAULT NULL,
`db` varchar(64) NOT NULL,
`hostname` varchar(90) NOT NULL DEFAULT '',
`port` int(11) NOT NULL DEFAULT '0',
PRIMARY KEY (`lock_identifier`),
UNIQUE KEY `lock_active` (`db`,`lock_active`),
INDEX `backup_location` (`hostname`, `port`),
INDEX `expires` (`expires`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1"""
MAX_THREAD_ERROR = 5
LOCKS_HELD_TIME = '5 MINUTE'
# How long locks are held and updated
LOCK_EXTEND_FREQUENCY = 10
# LOCK_EXTEND_FREQUENCY in seconds
PATH_PITR_DATA = 'pitr/{replica_set}/{db_name}/{date}'
SUCCESS_ENTRY = 'YAY_IT_WORKED'
log = logging.getLogger(__name__)
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--db',
default=None,
help='DB to export, default is all databases.')
parser.add_argument('--force_table',
default=None,
help='Table to export, default is all tables.')
parser.add_argument('--force_reupload',
default=False,
action='store_true',
help='Ignore existing uploads, reupload everyting')
parser.add_argument('--loglevel',
default='INFO',
help='Change logging verbosity',
choices=set(['INFO', 'DEBUG']))
parser.add_argument('--dev_bucket',
default=False,
action='store_true',
help='Use the dev bucket, useful for testing')
args = parser.parse_args()
logging.basicConfig(level=getattr(logging, args.loglevel.upper(), None))
# If we ever want to run multi instance, this wil need to be updated
backup_obj = mysql_backup_csv(host_utils.HostAddr(host_utils.HOSTNAME),
args.db, args.force_table,
args.force_reupload, args.dev_bucket)
backup_obj.backup_instance()
class mysql_backup_csv:
def __init__(self, instance,
db=None, force_table=None,
force_reupload=False, dev_bucket=False):
""" Init function for backup, takes all args
Args:
instance - A hostAddr obect of the instance to be baced up
db - (option) backup only specified db
force_table - (option) backup only specified table
force_reupload - (optional) force reupload of backup
"""
self.instance = instance
self.timestamp = datetime.datetime.utcnow()
# datestamp is for s3 files which are by convention -1 day
self.datestamp = (self.timestamp - datetime.timedelta(days=1)).strftime("%Y-%m-%d")
self.dbs_to_backup = multiprocessing.Queue()
if db:
self.dbs_to_backup.put(db)
else:
for db in mysql_lib.get_dbs(self.instance):
self.dbs_to_backup.put(db)
self.force_table = force_table
self.force_reupload = force_reupload
if dev_bucket:
self.upload_bucket = environment_specific.S3_CSV_BUCKET_DEV
else:
self.upload_bucket = environment_specific.S3_CSV_BUCKET
def backup_instance(self):
""" Back up a replica instance to s3 in csv """
host_lock_handle = None
try:
log.info('Backup for instance {i} started at {t}'
''.format(t=str(self.timestamp),
i=self.instance))
log.info('Checking heartbeat to make sure replicaiton is not too '
'lagged.')
self.check_replication_for_backup()
log.info('Taking host backup lock')
host_lock_handle = host_utils.take_flock_lock(backup.BACKUP_LOCK_FILE)
log.info('Setting up export directory structure')
self.setup_and_get_tmp_path()
log.info('Will temporarily dump inside of {path}'
''.format(path=self.dump_base_path))
log.info('Releasing any invalid shard backup locks')
self.ensure_backup_locks_sanity()
log.info('Deleting old expired locks')
self.purge_old_expired_locks()
log.info('Stopping replication SQL thread to get a snapshot')
mysql_lib.stop_replication(self.instance, mysql_lib.REPLICATION_THREAD_SQL)
workers = []
for _ in range(multiprocessing.cpu_count() / 2):
proc = multiprocessing.Process(target=self.mysql_backup_csv_dbs)
proc.daemon = True
proc.start()
workers.append(proc)
# throw in a sleep to make sure all threads have started dumps
time.sleep(2)
log.info('Restarting replication')
mysql_lib.start_replication(self.instance, mysql_lib.REPLICATION_THREAD_SQL)
for worker in workers:
worker.join()
if not self.dbs_to_backup.empty():
raise Exception('All worker processes have completed, but '
'work remains in the queue')
log.info('CSV backup is complete, will run a check')
mysql_backup_status.verify_csv_backup(self.instance.replica_type,
self.datestamp,
self.instance)
finally:
if host_lock_handle:
log.info('Releasing general host backup lock')
host_utils.release_flock_lock(host_lock_handle)
def mysql_backup_csv_dbs(self):
""" Worker for backing up a queue of dbs """
proc_id = multiprocessing.current_process().name
conn = mysql_lib.connect_mysql(self.instance, backup.USER_ROLE_MYSQLDUMP)
mysql_lib.start_consistent_snapshot(conn, read_only=True)
pitr_data = mysql_lib.get_pitr_data(self.instance)
err_count = 0
while not self.dbs_to_backup.empty():
db = self.dbs_to_backup.get()
try:
self.mysql_backup_csv_db(db, conn, pitr_data)
except:
self.dbs_to_backup.put(db)
log.error('{proc_id}: Could not dump {db}, '
'error: {e}'.format(db=db,
e=traceback.format_exc(),
proc_id=proc_id))
err_count = err_count + 1
if err_count > MAX_THREAD_ERROR:
log.error('{proc_id}: Error count in thread > MAX_THREAD_ERROR. '
'Aborting :('.format(proc_id=proc_id))
return
def mysql_backup_csv_db(self, db, conn, pitr_data):
""" Back up a single db
Args:
db - the db to be backed up
conn - a connection the the mysql instance
pitr_data - data describing the position of the db data in replication
"""
proc_id = multiprocessing.current_process().name
if not self.force_reupload and self.already_backed_up(db):
log.info('{proc_id}: {db} is already backed up, skipping'
''.format(proc_id=proc_id,
db=db))
return
# attempt to take lock by writing a lock to the master
tmp_dir_db = None
lock_identifier = None
extend_lock_thread = None
try:
self.release_expired_locks()
lock_identifier = self.take_backup_lock(db)
extend_lock_stop_event = threading.Event()
extend_lock_thread = threading.Thread(target=self.extend_backup_lock,
args=(lock_identifier,
extend_lock_stop_event))
extend_lock_thread.daemon = True
extend_lock_thread.start()
if not lock_identifier:
return
log.info('{proc_id}: {db} db backup start'
''.format(db=db,
proc_id=proc_id))
tmp_dir_db = os.path.join(self.dump_base_path, db)
if not os.path.exists(tmp_dir_db):
os.makedirs(tmp_dir_db)
host_utils.change_owner(tmp_dir_db, 'mysql', 'mysql')
self.upload_pitr_data(db, pitr_data)
for table in self.get_tables_to_backup(db):
self.mysql_backup_csv_table(db, table, tmp_dir_db, conn)
log.info('{proc_id}: {db} db backup complete'
''.format(db=db,
proc_id=proc_id))
finally:
if extend_lock_thread:
extend_lock_stop_event.set()
log.debug('{proc_id}: {db} waiting for lock expiry thread to'
'end'.format(db=db,
proc_id=proc_id))
extend_lock_thread.join()
if lock_identifier:
log.debug('{proc_id}: {db} releasing lock'
''.format(db=db,
proc_id=proc_id))
self.release_db_backup_lock(lock_identifier)
def mysql_backup_csv_table(self, db, table, tmp_dir_db, conn):
""" Back up a single table of a single db
Args:
db - the db to be backed up
table - the table to be backed up
tmp_dir_db - temporary storage used for all tables in the db
conn - a connection the the mysql instance
"""
proc_id = multiprocessing.current_process().name
(_, data_path, _) = environment_specific.get_csv_backup_paths(
self.datestamp, db, table,
self.instance.replica_type,
self.instance.get_zk_replica_set()[0])
log.debug('{proc_id}: {db}.{table} dump to {path} started'
''.format(proc_id=proc_id,
db=db,
table=table,
path=data_path))
self.upload_schema(db, table, tmp_dir_db)
fifo = os.path.join(tmp_dir_db, table)
procs = dict()
try:
# giant try so we can try to clean things up in case of errors
self.create_fifo(fifo)
# Start creating processes
procs['cat'] = subprocess.Popen(['cat', fifo],
stdout=subprocess.PIPE)
procs['nullescape'] = subprocess.Popen(['nullescape'],
stdin=procs['cat'].stdout,
stdout=subprocess.PIPE)
procs['lzop'] = subprocess.Popen(['lzop'],
stdin=procs['nullescape'].stdout,
stdout=subprocess.PIPE)
# Start dump query
return_value = set()
query_thread = threading.Thread(target=self.run_dump_query,
args=(db, table, fifo,
conn, procs['cat'], return_value))
query_thread.daemon = True
query_thread.start()
# And run the upload
safe_uploader.safe_upload(precursor_procs=procs,
stdin=procs['lzop'].stdout,
bucket=self.upload_bucket,
key=data_path,
check_func=self.check_dump_success,
check_arg=return_value)
os.remove(fifo)
log.debug('{proc_id}: {db}.{table} clean up complete'
''.format(proc_id=proc_id,
db=db,
table=table))
except:
log.debug('{proc_id}: in exception handling for failed table upload'
''.format(proc_id=proc_id))
if os.path.exists(fifo):
self.cleanup_fifo(fifo)
safe_uploader.kill_precursor_procs(procs)
raise
def create_fifo(self, fifo):
""" Create a fifo to be used for dumping a mysql table
Args:
fifo - The path to the fifo
"""
if os.path.exists(fifo):
self.cleanup_fifo(fifo)
log.debug('{proc_id}: creating fifo {fifo}'
''.format(proc_id=multiprocessing.current_process().name,
fifo=fifo))
os.mkfifo(fifo)
# Could not get os.mkfifo(fifo, 0777) to work due to umask
host_utils.change_owner(fifo, 'mysql', 'mysql')
def cleanup_fifo(self, fifo):
""" Safely cleanup a fifo that is an unknown state
Args:
fifo - The path to the fifo
"""
log.debug('{proc_id}: Cleanup of {fifo} started'
''.format(proc_id=multiprocessing.current_process().name,
fifo=fifo))
cat_proc = subprocess.Popen('timeout 5 cat {fifo} >/dev/null'.format(fifo=fifo),
shell=True)
cat_proc.wait()
os.remove(fifo)
log.debug('{proc_id}: Cleanup of {fifo} complete'
''.format(proc_id=multiprocessing.current_process().name,
fifo=fifo))
def run_dump_query(self, db, table, fifo, conn, cat_proc, return_value):
""" Run a SELECT INTO OUTFILE into a fifo
Args:
db - The db to dump
table - The table of the db to dump
fifo - The fifo to dump the table.db into
conn - The connection to MySQL
cat_proc - The process reading from the fifo
return_value - A set to be used to populated the return status. This is
a semi-ugly hack that is required because of the use of
threads not being able to return data, however being
able to modify objects (like a set).
"""
log.debug('{proc_id}: {db}.{table} dump started'
''.format(proc_id=multiprocessing.current_process().name,
db=db,
table=table))
sql = ("SELECT * "
"INTO OUTFILE '{fifo}' "
"FROM {db}.{table} "
"").format(fifo=fifo,
db=db,
table=table)
cursor = conn.cursor()
try:
cursor.execute(sql)
except Exception as detail:
# if we have not output any data, then the cat proc will never
# receive an EOF, so we will be stuck
if psutil.pid_exists(cat_proc.pid):
cat_proc.kill()
log.error('{proc_id}: dump query encountered an error: {er}'
''.format(er=detail,
proc_id=multiprocessing.current_process().name))
log.debug('{proc_id}: {db}.{table} dump complete'
''.format(proc_id=multiprocessing.current_process().name,
db=db,
table=table))
return_value.add(SUCCESS_ENTRY)
def check_dump_success(self, return_value):
""" Check to see if a dump query succeeded
Args:
return_value - A set which if it includes SUCCESS_ENTRY shows that
the query succeeded
"""
if SUCCESS_ENTRY not in return_value:
raise Exception('{proc_id}: dump failed'
''.format(proc_id=multiprocessing.current_process().name))
def upload_pitr_data(self, db, pitr_data):
""" Upload a file of PITR data to s3 for each schema
Args:
db - the db that was backed up.
pitr_data - a dict of various data that might be helpful for running a
PITR
"""
s3_path = PATH_PITR_DATA.format(replica_set=self.instance.get_zk_replica_set()[0],
date=self.datestamp,
db_name=db)
log.debug('{proc_id}: {db} Uploading pitr data to {s3_path}'
''.format(s3_path=s3_path,
proc_id=multiprocessing.current_process().name,
db=db))
boto_conn = boto.connect_s3()
bucket = boto_conn.get_bucket(self.upload_bucket, validate=False)
key = bucket.new_key(s3_path)
key.set_contents_from_string(json.dumps(pitr_data))
def upload_schema(self, db, table, tmp_dir_db):
""" Upload the schema of a table to s3
Args:
db - the db to be backed up
table - the table to be backed up
tmp_dir_db - temporary storage used for all tables in the db
"""
(schema_path, _, _) = environment_specific.get_csv_backup_paths(
self.datestamp, db, table,
self.instance.replica_type,
self.instance.get_zk_replica_set()[0])
create_stm = mysql_lib.show_create_table(self.instance, db, table)
log.debug('{proc_id}: Uploading schema to {schema_path}'
''.format(schema_path=schema_path,
proc_id=multiprocessing.current_process().name))
boto_conn = boto.connect_s3()
bucket = boto_conn.get_bucket(self.upload_bucket, validate=False)
key = bucket.new_key(schema_path)
key.set_contents_from_string(create_stm)
def take_backup_lock(self, db):
""" Write a lock row on to the master
Args:
db - the db to be backed up
Returns:
a uuid lock identifier
"""
zk = host_utils.MysqlZookeeper()
(replica_set, _) = self.instance.get_zk_replica_set()
master = zk.get_mysql_instance_from_replica_set(replica_set,
host_utils.REPLICA_ROLE_MASTER)
master_conn = mysql_lib.connect_mysql(master, role='scriptrw')
cursor = master_conn.cursor()
lock_identifier = str(uuid.uuid4())
log.debug('Taking backup lock: {replica_set} {db} '
''.format(replica_set=replica_set,
db=db))
params = {'lock': lock_identifier,
'db': db,
'hostname': self.instance.hostname,
'port': self.instance.port,
'active': ACTIVE}
sql = ("INSERT INTO {db}.{tbl} "
"SET "
"lock_identifier = %(lock)s, "
"lock_active = %(active)s, "
"created_at = NOW(), "
"expires = NOW() + INTERVAL {locks_held_time}, "
"released = NULL, "
"db = %(db)s,"
"hostname = %(hostname)s,"
"port = %(port)s"
"").format(db=mysql_lib.METADATA_DB,
tbl=CSV_BACKUP_LOCK_TABLE_NAME,
locks_held_time=LOCKS_HELD_TIME)
cursor = master_conn.cursor()
try:
cursor.execute(sql, params)
master_conn.commit()
except _mysql_exceptions.IntegrityError:
lock_identifier = None
sql = ("SELECT hostname, port, expires "
"FROM {db}.{tbl} "
"WHERE "
" lock_active = %(active)s AND "
" db = %(db)s"
"").format(db=mysql_lib.METADATA_DB,
tbl=CSV_BACKUP_LOCK_TABLE_NAME)
cursor.execute(sql,
{'db': db, 'active': ACTIVE})
ret = cursor.fetchone()
log.debug('DB {db} is already being backed up on {hostname}:{port}, '
'lock will expire at {expires}.'
''.format(db=db,
hostname=ret['hostname'],
port=ret['port'],
expires=str(ret['expires'])))
log.debug(cursor._executed)
return lock_identifier
def extend_backup_lock(self, lock_identifier, extend_lock_stop_event):
""" Extend a backup lock. This is to be used by a thread
Args:
lock_identifier - Corrosponds to a lock identifier row in the
CSV_BACKUP_LOCK_TABLE_NAME.
extend_lock_stop_event - An event that will be used to inform this
thread to stop extending the lock
"""
# Assumption is that this is callled right after creating the lock
last_update = time.time()
while(not extend_lock_stop_event.is_set()):
if (time.time() - last_update) > LOCK_EXTEND_FREQUENCY:
zk = host_utils.MysqlZookeeper()
(replica_set, _) = self.instance.get_zk_replica_set()
master = zk.get_mysql_instance_from_replica_set(replica_set, host_utils.REPLICA_ROLE_MASTER)
master_conn = mysql_lib.connect_mysql(master, role='scriptrw')
cursor = master_conn.cursor()
params = {'lock_identifier': lock_identifier}
sql = ('UPDATE {db}.{tbl} '
'SET expires = NOW() + INTERVAL {locks_held_time} '
'WHERE lock_identifier = %(lock_identifier)s'
'').format(db=mysql_lib.METADATA_DB,
tbl=CSV_BACKUP_LOCK_TABLE_NAME,
locks_held_time=LOCKS_HELD_TIME)
cursor.execute(sql, params)
master_conn.commit()
log.debug(cursor._executed)
last_update = time.time()
extend_lock_stop_event.wait(.5)
def release_db_backup_lock(self, lock_identifier):
""" Release a backup lock created by take_backup_lock
Args:
lock_identifier - a uuid to identify a lock row
"""
zk = host_utils.MysqlZookeeper()
(replica_set, _) = self.instance.get_zk_replica_set()
master = zk.get_mysql_instance_from_replica_set(replica_set, host_utils.REPLICA_ROLE_MASTER)
master_conn = mysql_lib.connect_mysql(master, role='scriptrw')
cursor = master_conn.cursor()
params = {'lock_identifier': lock_identifier}
sql = ('UPDATE {db}.{tbl} '
'SET lock_active = NULL, released = NOW() '
'WHERE lock_identifier = %(lock_identifier)s AND '
' lock_active is NOT NULL'
'').format(db=mysql_lib.METADATA_DB,
tbl=CSV_BACKUP_LOCK_TABLE_NAME)
cursor.execute(sql, params)
master_conn.commit()
log.debug(cursor._executed)
def ensure_backup_locks_sanity(self):
""" Release any backup locks that aren't sane. This means locks
created by the same host as the caller. The instance level flock
should allow this assumption to be correct.
"""
zk = host_utils.MysqlZookeeper()
(replica_set, _) = self.instance.get_zk_replica_set()
master = zk.get_mysql_instance_from_replica_set(replica_set, host_utils.REPLICA_ROLE_MASTER)
master_conn = mysql_lib.connect_mysql(master, role='scriptrw')
cursor = master_conn.cursor()
if not mysql_lib.does_table_exist(master, mysql_lib.METADATA_DB,
CSV_BACKUP_LOCK_TABLE_NAME):
log.debug('Creating missing metadata table')
cursor.execute(CSV_BACKUP_LOCK_TABLE.format(db=mysql_lib.METADATA_DB,
tbl=CSV_BACKUP_LOCK_TABLE_NAME))
params = {'hostname': self.instance.hostname,
'port': self.instance.port}
sql = ('UPDATE {db}.{tbl} '
'SET lock_active = NULL, released = NOW() '
'WHERE hostname = %(hostname)s AND '
' port = %(port)s'
'').format(db=mysql_lib.METADATA_DB,
tbl=CSV_BACKUP_LOCK_TABLE_NAME)
cursor.execute(sql, params)
master_conn.commit()
def release_expired_locks(self):
""" Release any expired locks """
zk = host_utils.MysqlZookeeper()
(replica_set, _) = self.instance.get_zk_replica_set()
master = zk.get_mysql_instance_from_replica_set(replica_set, host_utils.REPLICA_ROLE_MASTER)
master_conn = mysql_lib.connect_mysql(master, role='scriptrw')
cursor = master_conn.cursor()
sql = ('UPDATE {db}.{tbl} '
'SET lock_active = NULL, released = NOW() '
'WHERE expires < NOW()'
'').format(db=mysql_lib.METADATA_DB,
tbl=CSV_BACKUP_LOCK_TABLE_NAME)
cursor.execute(sql)
master_conn.commit()
log.debug(cursor._executed)
def purge_old_expired_locks(self):
""" Delete any locks older than a week """
zk = host_utils.MysqlZookeeper()
(replica_set, _) = self.instance.get_zk_replica_set()
master = zk.get_mysql_instance_from_replica_set(replica_set, host_utils.REPLICA_ROLE_MASTER)
master_conn = mysql_lib.connect_mysql(master, role='scriptrw')
cursor = master_conn.cursor()
sql = ('DELETE FROM {db}.{tbl} '
'WHERE expires < NOW() - INTERVAL 1 WEEK AND '
' lock_active is NOT NULL '
'').format(db=mysql_lib.METADATA_DB,
tbl=CSV_BACKUP_LOCK_TABLE_NAME)
cursor.execute(sql)
master_conn.commit()
log.debug(cursor._executed)
def already_backed_up(self, db):
""" Check to see if a db has already been uploaded to s3
Args:
db - The db to check for being backed up
Returns:
bool - True if the db has already been backed up, False otherwise
"""
boto_conn = boto.connect_s3()
bucket = boto_conn.get_bucket(self.upload_bucket, validate=False)
for table in self.get_tables_to_backup(db):
(_, data_path, _) = environment_specific.get_csv_backup_paths(
self.datestamp, db, table,
self.instance.replica_type,
self.instance.get_zk_replica_set()[0])
if not bucket.get_key(data_path):
return False
return True
def get_tables_to_backup(self, db):
""" Determine which tables should be backed up in a db
Args:
db - The db for which we need a list of tables eligible for backup
Returns:
a set of table names
"""
tables = environment_specific.filter_tables_to_csv_backup(
self.instance, db,
mysql_lib.get_tables(self.instance, db, skip_views=True))
if not self.force_table:
return tables
if self.force_table not in tables:
raise Exception('Requested table {t} is not available to backup'
''.format(t=self.force_table))
else:
return set([self.force_table])
def check_replication_for_backup(self):
""" Confirm that replication is caught up enough to run """
while True:
heartbeat = mysql_lib.get_heartbeat(self.instance)
if heartbeat.date() < self.timestamp.date():
log.warning('Replicaiton is too lagged ({cur}) to run daily backup, '
'sleeping'.format(cur=heartbeat))
time.sleep(10)
elif heartbeat.date() > self.timestamp.date():
raise Exception('Replication is later than expected day')
else:
log.info('Replicaiton is ok ({cur}) to run daily backup'
''.format(cur=heartbeat))
return
def setup_and_get_tmp_path(self):
""" Figure out where to temporarily store csv backups,
and clean it up
"""
tmp_dir_root = os.path.join(host_utils.find_root_volume(),
'csv_export',
str(self.instance.port))
if not os.path.exists(tmp_dir_root):
os.makedirs(tmp_dir_root)
host_utils.change_owner(tmp_dir_root, 'mysql', 'mysql')
self.dump_base_path = tmp_dir_root
if __name__ == "__main__":
environment_specific.initialize_logger()
main()