Scripting and System Automation with Python
Python is the scripting language of choice on both Unix/Linux and Windows systems. Its standard library covers everything from running shell commands to building full CLIs and monitoring the filesystem.
subprocess — run external commands
import subprocess
# Recommended: subprocess.run (blocks until done)
result = subprocess.run(
['ls', '-la', '/tmp'], # use a list — never shell=True with external input
capture_output=True, # captures stdout and stderr
text=True, # decodes bytes to str
timeout=10, # maximum 10 seconds
check=True, # raises CalledProcessError if returncode != 0
)
print(result.stdout)
print("Exit code:", result.returncode)
# Capture stderr separately
result2 = subprocess.run(
['python3', '-c', 'import sys; sys.stderr.write("error!\\n")'],
capture_output=True,
text=True,
)
print("stderr:", result2.stderr)
# Pipe data to stdin
result3 = subprocess.run(
['sort'],
input='banana\napple\ncherry\n',
capture_output=True,
text=True,
)
print(result3.stdout)
Long-running processes with Popen
import subprocess
def run_streaming(command: list[str]) -> int:
"""Runs a command and prints its output line by line in real time."""
with subprocess.Popen(
command,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT, # merge stderr into stdout
text=True,
bufsize=1, # line-buffered
) as proc:
for line in proc.stdout:
print(line, end='', flush=True)
return proc.returncode
code = run_streaming(['ping', '-c', '4', '8.8.8.8'])
print(f"Exited with code: {code}")
Pipelines — chaining commands
# Equivalent to: ps aux | grep python | wc -l
ps = subprocess.Popen(['ps', 'aux'], stdout=subprocess.PIPE)
grep = subprocess.Popen(
['grep', 'python'],
stdin=ps.stdout,
stdout=subprocess.PIPE,
)
ps.stdout.close() # allow ps to receive SIGPIPE if grep exits first
wc = subprocess.run(['wc', '-l'], stdin=grep.stdout, capture_output=True, text=True)
grep.stdout.close()
ps.wait()
grep.wait()
print(f"Python processes: {wc.stdout.strip()}")
os and pathlib — file system operations
import pathlib
# pathlib is the modern API (preferred over os.path)
p = pathlib.Path('/home/user/project')
# Introspection
p.exists()
p.is_dir()
p.is_file()
p.stat().st_size # size in bytes
p.stat().st_mtime # modification timestamp
# Navigation
p.parent # /home/user
p.name # 'project'
p.stem # 'project' (no extension)
p.suffix # '' or '.txt'
# Build paths (portable across OSes)
config = p / 'config' / 'settings.json'
# Create directories
(p / 'logs').mkdir(parents=True, exist_ok=True)
# Find files recursively
for py_file in p.glob('**/*.py'):
print(py_file)
# Only Python files modified in the last 24 hours
import time
cutoff = time.time() - 86400
recent = [f for f in p.glob('**/*.py') if f.stat().st_mtime > cutoff]
shutil — high-level file operations
import shutil
import pathlib
src = pathlib.Path('source_dir')
dest = pathlib.Path('dest_dir')
# Copy entire directory tree
shutil.copytree(src, dest, dirs_exist_ok=True)
# Copy a single file (preserving metadata)
shutil.copy2(src / 'file.txt', dest / 'file.txt')
# Move file or directory
shutil.move(str(src / 'old.txt'), str(dest / 'new.txt'))
# Remove directory tree
shutil.rmtree(dest, ignore_errors=True)
# Create a ZIP archive
shutil.make_archive('backup_2024', 'zip', root_dir='project/', base_dir='.')
# Extract an archive
shutil.unpack_archive('backup_2024.zip', extract_dir='restored/')
# Disk space
total, used, free = shutil.disk_usage('/')
print(f"Free: {free / 1024**3:.1f} GB")
# Find executable in PATH
python_path = shutil.which('python3')
print(f"Python at: {python_path}")
argparse — build robust CLIs
import argparse
import pathlib
def make_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
prog='converter',
description='Convert files between formats.',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog='Examples:\n converter image.jpg output.png\n converter -v -q 85 photo.jpg out.webp',
)
# Positional arguments
parser.add_argument('input', type=pathlib.Path, help='Input file')
parser.add_argument('output', type=pathlib.Path, help='Output file')
# Options
parser.add_argument('-q', '--quality', type=int, default=85,
choices=range(1, 101), metavar='1-100',
help='Compression quality (default: 85)')
parser.add_argument('-v', '--verbose', action='store_true',
help='Enable verbose output')
parser.add_argument('--format', choices=['jpg', 'png', 'webp', 'avif'],
help='Force output format')
parser.add_argument('--resize', nargs=2, type=int, metavar=('WIDTH', 'HEIGHT'),
help='Resize to WIDTH×HEIGHT pixels')
return parser
def main():
parser = make_parser()
args = parser.parse_args()
if not args.input.exists():
parser.error(f"File '{args.input}' does not exist")
if args.verbose:
print(f"Converting {args.input} → {args.output}")
print(f"Quality: {args.quality}")
if args.resize:
print(f"Size: {args.resize[0]}×{args.resize[1]}")
print(f"Conversion complete: {args.output}")
if __name__ == '__main__':
main()
Subcommands (git-style CLI)
import argparse
parser = argparse.ArgumentParser(prog='mytool')
subparsers = parser.add_subparsers(dest='command', required=True)
# 'convert' subcommand
conv = subparsers.add_parser('convert', help='Convert a file')
conv.add_argument('file')
conv.add_argument('--format', required=True)
# 'list' subcommand
lst = subparsers.add_parser('list', help='List jobs')
lst.add_argument('--status', choices=['all', 'pending', 'done'])
args = parser.parse_args()
if args.command == 'convert':
print(f"Converting {args.file} to {args.format}")
Schedule recurring tasks
import schedule # pip install schedule
import time
def clean_temp_files():
import pathlib, shutil
tmp = pathlib.Path('/tmp/myapp')
if tmp.exists():
shutil.rmtree(tmp)
tmp.mkdir()
print("Temp files cleaned")
def daily_report():
print("Generating daily report...")
schedule.every(10).minutes.do(clean_temp_files)
schedule.every().day.at("08:00").do(daily_report)
schedule.every().monday.at("09:00").do(daily_report)
while True:
schedule.run_pending()
time.sleep(1)
watchdog — monitor filesystem changes
from watchdog.observers import Observer # pip install watchdog
from watchdog.events import FileSystemEventHandler
import time
import pathlib
IMAGE_EXTENSIONS = {'.jpg', '.png', '.tiff', '.bmp', '.webp'}
class ImageHandler(FileSystemEventHandler):
def on_created(self, event):
if event.is_directory:
return
path = pathlib.Path(event.src_path)
if path.suffix.lower() in IMAGE_EXTENSIONS:
print(f"New image detected: {path.name}")
self.process(path)
def on_modified(self, event):
print(f"Modified: {pathlib.Path(event.src_path).name}")
def process(self, path: pathlib.Path):
print(f" → Processing {path.name}...")
# conversion logic here
def watch_folder(folder: str, recursive: bool = False):
handler = ImageHandler()
observer = Observer()
observer.schedule(handler, folder, recursive=recursive)
observer.start()
print(f"Watching {folder}... (Ctrl+C to stop)")
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
observer.stop()
observer.join()
# watch_folder('/path/to/input', recursive=True)
Complete automated backup script
#!/usr/bin/env python3
"""
backup.py — Incremental backup with rotation and structured logging.
Usage: python backup.py --source /data --dest /backups --days 7
"""
import argparse
import logging
import pathlib
import shutil
import time
from datetime import datetime
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s [%(levelname)s] %(message)s',
handlers=[
logging.FileHandler('backup.log'),
logging.StreamHandler(),
]
)
log = logging.getLogger('backup')
def make_backup(source: pathlib.Path, dest: pathlib.Path) -> pathlib.Path:
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
backup_path = dest / f"backup_{timestamp}"
log.info(f"Starting backup: {source} → {backup_path}")
t0 = time.perf_counter()
shutil.copytree(source, backup_path)
log.info(f"Backup done in {time.perf_counter()-t0:.1f}s")
return backup_path
def rotate_backups(dest: pathlib.Path, keep_days: int):
cutoff = time.time() - keep_days * 86400
removed = 0
for d in sorted(dest.glob('backup_*')):
if d.is_dir() and d.stat().st_mtime < cutoff:
shutil.rmtree(d)
log.info(f"Removed old backup: {d.name}")
removed += 1
if removed:
log.info(f"Rotation: {removed} backup(s) removed")
def main():
parser = argparse.ArgumentParser(description='Backup with rotation')
parser.add_argument('--source', type=pathlib.Path, required=True)
parser.add_argument('--dest', type=pathlib.Path, required=True)
parser.add_argument('--days', type=int, default=7, help='Retention days')
args = parser.parse_args()
args.dest.mkdir(parents=True, exist_ok=True)
try:
path = make_backup(args.source, args.dest)
rotate_backups(args.dest, args.days)
log.info(f"All done. Latest backup: {path}")
except Exception:
log.exception("Backup failed")
raise SystemExit(1)
if __name__ == '__main__':
main()
Best practices
- Use a list in
subprocess.run(), never a string withshell=Truewhen arguments come from user input — prevents command injection. - Prefer
pathlib.Pathoveros.path— more readable, object-oriented, and portable across operating systems. - Always pass
check=Truetosubprocess.run()so command errors don't go unnoticed. - Always pass
timeout=to external process calls — prevents scripts from hanging indefinitely. - Call
shutil.which('ffmpeg')at script startup to verify external dependencies are installed before they are needed. - Always include
if __name__ == '__main__':so scripts can be safely imported in tests without running the main logic.
Related conversions
Frequent conversions across the catalogue: