This is a follow-up to System backup on Linux. I've done a lot to my script since I asked my first question and I think I could improve a lot while doing this. The script still does the same, only a bit neater and cleaner. So let me explain what I changed, and sometimes why I've changed it.
At first I changed all the names of my variables and constants (I hope that I understood constants correctly, that these are values that stay always the same, no matter on which system/host this script is executed). They're all now in "snake_case" and the constants are also in capital case.
I started using the power of
os.path.joininstead of string concatenation. But there is one problem,os.path.joindoesn't set a final/on the filepath, so I had to add theDIR_PATHconstant on every path that was using thebackup_dirkey. Still looking around for a better solution for that..I also created a function for every major task to get a better structure in my script, also it makes it a lot easier for me to debug it. I want to make one function out of the two functions for getting the metadata and the old backups because they're practically doing the same except for the file ending, but I'm not sure if that's a good/better idea as using two. Also I'm not sure about the return values.
The biggest change I've made in my backup function. I factored out the nesting to check the file age and add those files as a parameter to tar. The main reason for that was that I suddenly had around 120MB of parameters given to tar and somewhere below that lies the border of how many parameters tar can process. Now I'm using some nice tar features to make real incremental backups of my systems.
I'm really happy about what I did to my code, it looks pretty good and also works really well. But I guess there is still some space to improve it further, so I would appreciate any further feedback to my code!
import datetime
import os
import subprocess
import socket
import glob
import time
from stat import *
host = socket.gethostname()
date = datetime.datetime.today().strftime('%Y-%m-%d')
MOUNT_POINT = '/mnt/linuxbackup/'
NAS = '//172.19.3.5/backup/linuxserver/'
CREDENTIALS = 'sec=ntlm,credentials=/root/.backup.secret'
DIR_PATH = '/'
# configuration
backup_config = {
'backup_dir' : os.path.join(MOUNT_POINT, host),
'backup_name' : host + '.' + date,
'backups_to_keep' : 4,
'exclude_dirs' : ('/proc',
'/lost+found',
'/sys',
'/media',
'/var/cache',
'/var/log',
'/mnt',
'/dev',
'/run',
'/tmp'
),
}
exclude_dirs = ''
for a in backup_config['exclude_dirs']:
exclude_dirs = exclude_dirs + '--exclude=%s ' %a
backup_config['exclude_dirs'] = exclude_dirs
def mount(mount_point, credentials, nas, backup_dir):
"""Checks if the NFS share for the backup
exists (if not it creates it) and mounts it"""
if os.path.exists(mount_point):
if not os.path.ismount(mount_point):
subprocess.check_call(['mount', '-t', 'cifs', '-o', credentials, nas, mount_point])
if not os.path.exists(backup_dir):
os.makedirs(backup_dir)
else:
os.makedirs(mount_point)
subprocess.check_call(['mount', '-t', 'cifs', '-o', credentials, nas, mount_point])
def unmount(mount_point):
"""Unmounts the mounted backup directory"""
if os.path.ismount(mount_point):
subprocess.check_call(['umount', mount_point])
def get_previous_backups(backup_dir):
"""Returns a list of tar.bz2 compressed backup files matching the date format"""
return [os.path.basename(x) for x in
glob.glob(backup_dir + DIR_PATH + host + '.[0-9][0-9][0-9][0-9]' +
'-[0-9][0-9]' + '-[0-9][0-9]' + '.tar.bz2')]
def get_metafile(backup_dir):
"""Returns the metadata file for incremental tar backups"""
return [os.path.basename(x) for x in
glob.glob(backup_dir + DIR_PATH + host + '.[0-9][0-9][0-9][0-9]' +
'-[0-9][0-9]' + '-[0-9][0-9]' + '.metadata')]
def remove_old_backups(backup_dir, backups_to_keep, backup_name):
"""Gets a list of old backups and deletes the old ones"""
fnames = get_previous_backups(backup_dir)
metadata = get_metafile(backup_dir)
print fnames
while len(fnames) > backups_to_keep:
print 'more than {} file(s) here!'.format(backups_to_keep)
print 'Removing oldest file'
file_times = {}
access_times = []
for file_name in fnames:
print backup_dir + file_name
mode = os.stat(backup_dir + DIR_PATH + file_name)[ST_MTIME]
file_times[mode] = file_name
access_times.append(mode)
print mode
access_times.sort()
print file_times
print access_times
file_to_delete = file_times[access_times[0]]
print 'Deleting file %s' %file_to_delete
try:
os.remove(backup_dir + DIR_PATH + file_to_delete)
os.remove(backup_dir + DIR_PATH + file_to_delete[:-7] + 'log')
except Exception as inst:
print inst
print '%s%s' %(backup_dir, backup_name)
def backup(backup_dir, backup_name):
"""is taking care of both, the incremental and the full backup,
of a linux system"""
fnames = get_previous_backups(backup_dir)
metadata = get_metafile(backup_dir)
print 'fnames = {}'.format(fnames)
print 'metadata = {}'.format(metadata)
print 'Backing up the system'
print 'Working on %s' %backup_dir
# Full Backup
if datetime.date.today().strftime('%A') == 'Sunday':
print 'doing complete backup of %s' %host
command = ('nice -n 10 tar --create --verbose --preserve-permissions --bzip2 \
--file={0}_FULL.tar.bz2 --listed-incremental={0}.metadata {1} > {2}.log 2>&1'
.format(os.path.join(backup_dir, backup_name),
backup_config['exclude_dirs'],
DIR_PATH,
os.path.join(backup_dir, backup_name)))
# Daily Backup
else:
print 'doing incremental backup of %s' %host
command = ('nice -n 10 tar --create --verbose --preserve-permissions --bzip2 \
--file={0}.tar.bz2 --listed-incremental={1} {2} {3} > {0}.log 2>&1'
.format(os.path.join(backup_dir, backup_name),
os.path.join(backup_dir, metadata[0]),
backup_config['exclude_dirs'],
DIR_PATH))
print command
os.system(command)
mount(MOUNT_POINT, CREDENTIALS, NAS, backup_config['backup_dir'])
backup(backup_config['backup_dir'], backup_config['backup_name'])
remove_old_backups(backup_config['backup_dir'], backup_config['backups_to_keep'], backup_config['backup_name'])
unmount(MOUNT_POINT)