Sync files… the Python way!

What it does

  • copies missing files/modified files from source to target
  • does not copy if file is newer at target
  • use -r for a report mode and -s for the actual sync mode

Why?

  • I could not use rsync or other solutions for this situation
  • needed a scriptable solution. too much directories to sync

Note: It worked for me but might not in different cases. more testing needed.

import os, sys, shutil,time
import cPickle
from ListFiles import listFiles

def cleanDir(dname, dirlist):
	clean_dir_list = [path.replace(dname, "") for path in dirlist]
	return clean_dir_list
def removeItem(item, dirlist):
	dirlist.remove(item)
	return dirlist

def syncFiles(dname, files):
	error_logs = []
	not_copied = []
	done_logs = []
	for i in files:
		source_name = dir1 + i
		target_name = dir2 + i
		try:
			if os.path.isdir(source_name):
				os.mkdir(target_name)
				shutil.copymode(source_name, target_name)
				print 'created directory ' + target_name
				done_logs.append('created directory ' + target_name + '\n')
			elif os.path.isfile(source_name):
				shutil.copy2(source_name, target_name)
				print 'copied file ' + source_name + ' to ' + target_name
				done_logs.append('copied file ' + source_name + ' to ' + target_name + '\n')
		except Exception, why:
			error_logs.append(target_name + '\t' + str(why) + '\n')
			not_copied.append(source_name + '\n')
		pass
	return error_logs, not_copied, done_logs

if __name__ == "__main__":
	dir1 = sys.argv[1]
	dir2 = sys.argv[2]
	dname = sys.argv[3]
	report = sys.argv[4]

	if report == '-r':
		sync = 'no'
	elif report == '-s':
		sync = 'yes'
	else:
		print 'Usage: ' + sys.argv[0] + ' directory1 directory2 sync_option'
		exit

	if os.path.exists(dname + '_source.txt'):
		fin = open(dname + '_source.txt', 'r')
		flist1 = cPickle.load(fin)
		fin.close()
	else:
		flist1 = listFiles(dir1, "*",return_folders="1")
		fout = open(dname + '_source.txt', 'w')
		cPickle.dump(flist1,fout)
		fout.close()
	if os.path.exists(dname + '_target.txt'):
		fin = open(dname + '_target.txt', 'r')
		flist2 = cPickle.load(fin)
		fin.close()
	else:
		flist2 = listFiles(dir2, "*",return_folders="1")
		fout = open(dname + '_target.txt', 'w')
		cPickle.dump(flist2,fout)
		fout.close()

#open main log
log = open(dname + '_log.txt', 'w')

#missing files
clean_dir1_list = cleanDir(dir1,flist1)
clean_dir2_list = cleanDir(dir2,flist2)
missing_files= [item for item in clean_dir1_list if item not in clean_dir2_list]
if sync == 'no':
	if len(missing_files):
		print >> log, '\nmissing in ' + dir2 + ' compared to ' + dir1 + '\n'
		for i in missing_files:
			log.write(i + '\n')
		print 'list of missing files written to ' + dname + '_log.txt'
elif sync == 'yes':
	(missing_error, missing_not_copied, missing_done) = syncFiles(dname, missing_files)
	if len(missing_error):
		print >> log, '\nerrors occured while copying the missing files from ' + dir1 + ' to ' + dir2 + '\n'
		for i in missing_error:
			log.write(i + '\n')

	if len(missing_not_copied):
		print >> log, '\nmissing files not copied\n'
		for i in missing_not_copied:
			log.write(i + '\n')

	if len(missing_done):
		print >> log, '\nmissing files copied\n'
		for i in missing_done:
			log.write(i + '\n')
else:
	exit

#modified files
modified_files = []
for item in clean_dir1_list:
	if os.path.isfile(dir1 + item):
		f1time = os.path.getmtime(dir1 + item)
		if item in clean_dir2_list:
			f2time = os.path.getmtime(dir2 + item)
			if((f1time - f2time == 3600.0) or (f2time - f1time == 0.0)):
				continue
			elif(f2time > f1time):
				print >> log, 'file ' + item + ' is newer at ' + dir2 + '. will not be copied\n'
			else:
				modified_files.append(item)
			removeItem(item, clean_dir1_list)
			removeItem(item, clean_dir2_list)
if sync == 'no':
	if len(modified_files):
		print >> log, '\nthe following are modified in ' + dir1 + ' compared to ' + dir2 + '\n'
		for i in modified_files:
			log.write(i + '\n')
		print 'list of modified files written to ' + dname + '_log.txt'
elif sync == 'yes':
	(modified_error, modified_not_copied, modified_done) = syncFiles(dname, modified_files)
	if len(modified_error):
		print >> log, '\nerrors while copying the modified files from ' + dir1 + ' to ' + dir2 + '\n'
		for i in modified_error:
			log.write(i + '\n')

	if len(modified_not_copied):
		print >> log, '\nmodified files not copied\n'
		for i in modified_not_copied:
			log.write(i + '\n')

	if len(modified_done):
		print >> log, '\nmodified files copied\n'
		for i in modified_done:
			log.write(i + '\n')
else:
	exit

0 Responses to “Sync files… the Python way!”



  1. No Comments Yet

Leave a Reply