Python Generators and Iterators: yield, itertools and Memory-Efficient Patterns
Generators are functions that produce values one at a time, on demand, without storing them all in memory. They are ideal for processing large datasets, data streams or infinite sequences.
1. Iterator protocol: iter and next
class Range:
"""Manual iterator equivalent to range()."""
def __init__(self, start, stop, step=1):
self.current = start
self.stop = stop
self.step = step
def __iter__(self):
return self
def __next__(self):
if self.current >= self.stop:
raise StopIteration
value = self.current
self.current += self.step
return value
for n in Range(0, 5):
print(n, end=" ") # 0 1 2 3 4
it = iter([10, 20, 30])
print(next(it)) # 10
print(next(it)) # 20
print(next(it, "done")) # 30
print(next(it, "done")) # done (default instead of StopIteration)
2. Generators with yield
def count_up_to(n):
i = 0
while i < n:
yield i
i += 1
gen = count_up_to(5)
print(type(gen)) # <class 'generator'>
for n in gen:
print(n, end=" ") # 0 1 2 3 4
# Memory comparison: list vs generator
import sys
lst = [i * i for i in range(100_000)]
gen = (i * i for i in range(100_000)) # Generator expression
print(f"List : {sys.getsizeof(lst):,} bytes") # ~800 KB
print(f"Generator: {sys.getsizeof(gen)} bytes") # ~200 bytes
3. Data processing pipelines
def read_lines(path):
with open(path, "r", encoding="utf-8") as f:
for line in f:
yield line.rstrip("\n")
def skip_empty(lines):
for line in lines:
if line.strip():
yield line
def to_upper(lines):
for line in lines:
yield line.upper()
def take(n, gen):
for i, item in enumerate(gen):
if i >= n:
break
yield item
# Chained pipeline — processes one line at a time
pipeline = take(5, to_upper(skip_empty(read_lines("file.txt"))))
for line in pipeline:
print(line)
4. yield from: delegate to sub-generators
def evens(n):
for i in range(0, n, 2):
yield i
def odds(n):
for i in range(1, n, 2):
yield i
def all_numbers(n):
yield from evens(n)
yield from odds(n)
print(list(all_numbers(10)))
# [0, 2, 4, 6, 8, 1, 3, 5, 7, 9]
def flatten(structure):
"""Recursively flatten nested lists."""
for item in structure:
if isinstance(item, list):
yield from flatten(item)
else:
yield item
print(list(flatten([1, [2, [3, 4]], [5, 6], 7])))
# [1, 2, 3, 4, 5, 6, 7]
5. send() and generator coroutines
def accumulator():
total = 0
while True:
value = yield total
if value is None:
break
total += value
gen = accumulator()
next(gen) # Initialize (advance to first yield)
print(gen.send(10)) # 10
print(gen.send(20)) # 30
print(gen.send(15)) # 45
6. itertools: standard iteration tools
import itertools
# count: infinite counter
for n in itertools.islice(itertools.count(start=1, step=2), 5):
print(n, end=" ") # 1 3 5 7 9
print()
# cycle: loop indefinitely
colors = ["red", "green", "blue"]
for i, color in enumerate(itertools.cycle(colors)):
if i >= 6:
break
print(color, end=" ") # red green blue red green blue
print()
# chain: concatenate iterables
result = list(itertools.chain([1, 2], [3, 4], [5, 6]))
print(result) # [1, 2, 3, 4, 5, 6]
# islice: slice without a list
first_10 = list(itertools.islice(itertools.count(), 10))
print(first_10) # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
data = [1, 3, 5, 2, 4, 6, 8]
print(list(itertools.takewhile(lambda x: x < 5, data))) # [1, 3]
print(list(itertools.dropwhile(lambda x: x < 5, data))) # [2, 4, 6, 8]
7. Combinatorics and groupby
import itertools
# Cartesian product
for combo in itertools.product("AB", repeat=2):
print("".join(combo), end=" ") # AA AB BA BB
print()
items = ["a", "b", "c"]
print(list(itertools.permutations(items, 2)))
print(list(itertools.combinations(items, 2)))
print(list(itertools.combinations_with_replacement(items, 2)))
# groupby: group by key (data must be sorted first)
files = [
{"type": "image", "name": "photo.jpg"},
{"type": "image", "name": "logo.png"},
{"type": "video", "name": "clip.mp4"},
{"type": "image", "name": "banner.webp"},
]
files.sort(key=lambda f: f["type"])
for ftype, group in itertools.groupby(files, key=lambda f: f["type"]):
names = [f["name"] for f in group]
print(f"{ftype}: {names}")
8. Useful infinite generators
import itertools
def fibonacci():
a, b = 0, 1
while True:
yield a
a, b = b, a + b
print(list(itertools.islice(fibonacci(), 10)))
# [0, 1, 1, 2, 3, 5, 8, 13, 21, 34]
def powers_of_2():
n = 1
while True:
yield n
n *= 2
for p in itertools.takewhile(lambda x: x < 1000, powers_of_2()):
print(p, end=" ") # 1 2 4 8 16 32 64 128 256 512
9. Processing large CSV files
import csv
from pathlib import Path
def read_csv(path, encoding="utf-8"):
with open(path, newline="", encoding=encoding) as f:
reader = csv.DictReader(f)
for row in reader:
yield row
def filter_by_field(rows, field, value):
for row in rows:
if row.get(field) == value:
yield row
def transform(rows):
for row in rows:
row["price"] = float(row.get("price", 0))
yield row
pipeline = transform(
filter_by_field(read_csv("sales.csv"), field="category", value="electronics")
)
total = sum(row["price"] for row in pipeline)
print(f"Electronics total: {total:.2f}")
10. Best practices
- Use generators when the dataset is large or potentially infinite.
- Generator expression
(x for x in ...)is lazy; list comprehension[x for x in ...]stores everything. itertools.isliceto limit infinite generators without manual loops.yield fromsimplifies recursion and delegation to sub-generators.- A generator can only be consumed once — create a new one if you need to iterate again.
itertools.groupbyrequires data sorted by the same key used for grouping.
itertools quick reference
| Function | Description |
|---|---|
count(n) |
Infinite counter from n |
cycle(it) |
Loop over iterable indefinitely |
islice(it, n) |
Take first n elements |
chain(*its) |
Concatenate iterables |
takewhile(f, it) |
Take while condition is true |
dropwhile(f, it) |
Skip while condition is true |
groupby(it, key) |
Group by key |
product(*its) |
Cartesian product |
combinations(it, r) |
Combinations without replacement |
permutations(it, r) |
Permutations |
Related conversions
Frequent conversions across the catalogue: