File Format Conversion with Python
Python is the ideal platform for building file-conversion pipelines thanks to its specialized libraries and its ability to orchestrate external tools like FFmpeg. This guide covers the most common conversions with production-ready code.
Image conversion with Pillow
pip install Pillow
from PIL import Image, ImageOps
import pathlib
def convert_image(
source: str,
dest: str,
*,
max_size: tuple[int, int] | None = None,
quality: int = 85,
strip_exif: bool = True,
) -> pathlib.Path:
"""
Converts an image to any Pillow-supported format.
Handles transparency when targeting PNG; composites onto white for JPEG.
"""
src_path = pathlib.Path(source)
dst_path = pathlib.Path(dest)
fmt = dst_path.suffix.lstrip('.').upper()
img = Image.open(src_path)
img = ImageOps.exif_transpose(img) # fix EXIF rotation
if max_size:
img.thumbnail(max_size, Image.LANCZOS)
# Handle transparency for JPEG output
if fmt in ('JPG', 'JPEG') and img.mode in ('RGBA', 'LA', 'P'):
bg = Image.new('RGB', img.size, (255, 255, 255))
if img.mode == 'P':
img = img.convert('RGBA')
bg.paste(img, mask=img.split()[-1] if 'A' in img.mode else None)
img = bg
if fmt == 'PNG' and img.mode == 'P':
img = img.convert('RGBA')
kwargs: dict = {'optimize': True}
if fmt in ('JPG', 'JPEG'):
kwargs['quality'] = quality
kwargs['progressive'] = True
elif fmt == 'WEBP':
kwargs['quality'] = quality
kwargs['method'] = 6
elif fmt == 'PNG':
kwargs['compress_level'] = 9
if strip_exif:
data = list(img.getdata())
clean_img = Image.new(img.mode, img.size)
clean_img.putdata(data)
img = clean_img
img.save(dst_path, **kwargs)
print(f"Converted: {src_path.name} → {dst_path.name} ({dst_path.stat().st_size/1024:.1f} KB)")
return dst_path
# Common conversions
convert_image('photo.png', 'photo.jpg', quality=90, max_size=(1920, 1080))
convert_image('logo.jpg', 'logo.webp', quality=80)
convert_image('icon.png', 'icon.avif', quality=75)
Batch image conversion
import pathlib
from PIL import Image, ImageOps
from concurrent.futures import ThreadPoolExecutor, as_completed
def convert_folder(
input_folder: str,
output_folder: str,
target_format: str = 'webp',
max_size: tuple[int, int] = (2000, 2000),
quality: int = 82,
workers: int = 4,
):
inp = pathlib.Path(input_folder)
out = pathlib.Path(output_folder)
out.mkdir(parents=True, exist_ok=True)
exts = {'.jpg', '.jpeg', '.png', '.tiff', '.bmp', '.gif'}
files = [f for f in inp.rglob('*') if f.suffix.lower() in exts]
ok = errors = 0
def process(f):
dest = out / f.relative_to(inp).with_suffix(f'.{target_format}')
dest.parent.mkdir(parents=True, exist_ok=True)
try:
convert_image(str(f), str(dest), max_size=max_size, quality=quality)
return True, f.name
except Exception as e:
return False, f"{f.name}: {e}"
with ThreadPoolExecutor(max_workers=workers) as ex:
for ok_flag, info in (f.result() for f in as_completed(ex.submit(process, f) for f in files)):
if ok_flag:
ok += 1
else:
errors += 1
print(f" ERROR: {info}")
print(f"\nBatch done: {ok} OK, {errors} errors out of {len(files)} files")
convert_folder('original_photos/', 'webp_photos/', target_format='webp')
Video and audio conversion with FFmpeg
import subprocess
import pathlib
import json
def media_info(path: str) -> dict:
cmd = [
'ffprobe', '-v', 'quiet',
'-print_format', 'json',
'-show_streams', '-show_format',
path,
]
return json.loads(subprocess.run(cmd, capture_output=True, text=True, check=True).stdout)
def convert_video(
source: str,
dest: str,
*,
video_codec: str = 'libx264',
crf: int = 23,
preset: str = 'medium',
audio_codec: str = 'aac',
audio_bitrate: str = '128k',
resolution: str | None = None, # e.g. '1280:720'
fps: int | None = None,
) -> None:
cmd = ['ffmpeg', '-i', source, '-y']
cmd += ['-c:v', video_codec, '-crf', str(crf), '-preset', preset]
if resolution:
cmd += ['-vf', f'scale={resolution}']
if fps:
cmd += ['-r', str(fps)]
cmd += ['-c:a', audio_codec, '-b:a', audio_bitrate]
cmd.append(dest)
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
raise RuntimeError(f"FFmpeg error:\n{result.stderr[-2000:]}")
size = pathlib.Path(dest).stat().st_size
print(f"Converted: {dest} ({size/1024**2:.1f} MB)")
# Common conversions
convert_video('video.avi', 'video.mp4', video_codec='libx264', crf=22)
convert_video('video.mov', 'video.webm', video_codec='libvpx-vp9', crf=30)
convert_video('video.mp4', 'video_720p.mp4', resolution='1280:720', crf=24)
def extract_audio(source: str, dest: str, fmt: str = 'mp3', bitrate: str = '192k'):
"""Extract audio track from a video file."""
cmd = ['ffmpeg', '-i', source, '-vn',
'-c:a', 'libmp3lame' if fmt == 'mp3' else fmt,
'-b:a', bitrate, '-y', dest]
subprocess.run(cmd, check=True, capture_output=True)
print(f"Audio extracted: {dest}")
extract_audio('movie.mp4', 'soundtrack.mp3', bitrate='320k')
Audio conversion
def convert_audio(source: str, dest: str, bitrate: str = '192k') -> None:
"""Convert between audio formats: MP3, FLAC, OGG, WAV, AAC, OPUS."""
ext = pathlib.Path(dest).suffix.lower()
cmd = ['ffmpeg', '-i', source, '-y']
if ext == '.mp3':
cmd += ['-c:a', 'libmp3lame', '-b:a', bitrate]
elif ext == '.ogg':
cmd += ['-c:a', 'libvorbis', '-q:a', '5']
elif ext == '.opus':
cmd += ['-c:a', 'libopus', '-b:a', bitrate]
elif ext == '.flac':
cmd += ['-c:a', 'flac', '-compression_level', '8']
elif ext == '.aac':
cmd += ['-c:a', 'aac', '-b:a', bitrate]
elif ext == '.wav':
cmd += ['-c:a', 'pcm_s16le']
else:
cmd += ['-c:a', 'copy']
cmd.append(dest)
subprocess.run(cmd, check=True, capture_output=True)
print(f"Audio converted: {dest}")
convert_audio('song.flac', 'song.mp3', bitrate='320k')
convert_audio('podcast.mp3', 'podcast.ogg')
convert_audio('voice.wav', 'voice.opus', bitrate='64k')
PDF conversion with PyMuPDF
pip install pymupdf
import fitz # PyMuPDF
import pathlib
def pdf_to_images(pdf_path: str, output_dir: str, dpi: int = 150) -> list[str]:
"""Export each PDF page as a PNG image."""
out = pathlib.Path(output_dir)
out.mkdir(parents=True, exist_ok=True)
paths = []
doc = fitz.open(pdf_path)
mat = fitz.Matrix(dpi / 72, dpi / 72)
for n, page in enumerate(doc):
pix = page.get_pixmap(matrix=mat, alpha=False)
path = str(out / f"page_{n+1:03d}.png")
pix.save(path)
paths.append(path)
doc.close()
print(f"PDF exported: {len(paths)} pages → {output_dir}")
return paths
def extract_pdf_text(pdf_path: str) -> str:
doc = fitz.open(pdf_path)
text = '\n\n'.join(page.get_text() for page in doc)
doc.close()
return text
def images_to_pdf(image_paths: list[str], output_pdf: str) -> None:
"""Combine images into a single PDF, one page per image."""
doc = fitz.open()
for img_path in image_paths:
with fitz.open(img_path) as img_doc:
rect = img_doc[0].rect
page = doc.new_page(width=rect.width, height=rect.height)
page.insert_image(rect, filename=img_path)
doc.save(output_pdf, garbage=4, deflate=True)
print(f"PDF created: {output_pdf}")
pages = pdf_to_images('report.pdf', 'pages/', dpi=200)
text = extract_pdf_text('contract.pdf')
images_to_pdf(['p1.png', 'p2.png', 'p3.png'], 'combined.pdf')
Word document conversion with python-docx
pip install python-docx
from docx import Document
from docx.enum.text import WD_ALIGN_PARAGRAPH
def docx_to_text(path: str) -> str:
"""Extract all text from a .docx file."""
doc = Document(path)
lines = [p.text for p in doc.paragraphs if p.text.strip()]
for table in doc.tables:
for row in table.rows:
lines.append('\t'.join(c.text.strip() for c in row.cells))
return '\n'.join(lines)
def create_docx_from_data(data: list[dict], output: str) -> None:
"""Create a formatted .docx from a list of records."""
doc = Document()
title = doc.add_heading('Conversion Report', level=1)
title.alignment = WD_ALIGN_PARAGRAPH.CENTER
if data:
table = doc.add_table(rows=1, cols=len(data[0]))
table.style = 'Table Grid'
hdr = table.rows[0].cells
for i, key in enumerate(data[0].keys()):
hdr[i].text = key.replace('_', ' ').title()
for record in data:
row = table.add_row().cells
for i, val in enumerate(record.values()):
row[i].text = str(val)
doc.save(output)
print(f"DOCX created: {output}")
text = docx_to_text('contract.docx')
create_docx_from_data([
{'file': 'photo.jpg', 'target': 'WebP', 'size_kb': 45, 'saved': '68%'},
{'file': 'video.avi', 'target': 'MP4', 'size_kb': 12340, 'saved': '45%'},
], 'conversion_report.docx')
Best practices
- Check for FFmpeg at startup:
shutil.which('ffmpeg')— fail fast with a clear message rather than a cryptic error later. capture_output=Trueon FFmpeg calls — FFmpeg stderr is very verbose and will pollute logs if not captured.ImageOps.exif_transpose(img)before any manipulation — fixes EXIF rotation that cameras and phones embed.- CRF in FFmpeg: 18-22 for high quality, 23-28 for size/quality balance, 28-35 for maximum compression.
- WebP or AVIF for web images — save 30-50% vs JPEG/PNG with similar or better visual quality.
ProcessPoolExecutorfor CPU-intensive conversions (images, video),ThreadPoolExecutorfor I/O-bound work (reading/writing without heavy processing).