Python Type Hints and Dataclasses: Cleaner, Safer Code
Type hints (type annotations) are not enforced at runtime in Python, but they dramatically improve readability, error detection and IDE support. Dataclasses reduce boilerplate when defining classes that primarily store data.
1. Basic type annotations
# Without annotations
def calculate_price(quantity, unit_price, discount):
return quantity * unit_price * (1 - discount)
# With annotations
def calculate_price(
quantity: int,
unit_price: float,
discount: float = 0.0
) -> float:
return quantity * unit_price * (1 - discount)
# Variables
name: str = "Alice"
age: int = 28
active: bool = True
price: float = 19.99
# Collection types (Python 3.9+)
names: list[str] = ["Alice", "Bob"]
coords: tuple[float, float] = (40.416, -3.703)
config: dict[str, int] = {"timeout": 30}
tags: set[str] = {"python", "web"}
2. Optional, Union and compound types
from typing import Optional, Union
# Optional[X] = X | None
def find_user(id: int) -> Optional[dict]:
return None
# Python 3.10+ syntax
def find_user_v2(id: int) -> dict | None:
return None
# Union: accept multiple types
def format_value(v: Union[int, float, str]) -> str:
return str(v)
# Python 3.10+
def format_v2(v: int | float | str) -> str:
return str(v)
from typing import Any
def log(data: Any) -> None:
print(data)
3. Callable, Sequence, Mapping
from typing import Callable, Sequence, Mapping, Iterable
def apply(func: Callable[[int, int], int], a: int, b: int) -> int:
return func(a, b)
result = apply(lambda x, y: x + y, 3, 4)
def count(items: Sequence[str]) -> int:
return len(items)
def get_value(data: Mapping[str, int], key: str) -> int:
return data.get(key, 0)
def first_n(it: Iterable[int], n: int) -> list[int]:
result = []
for i, x in enumerate(it):
if i >= n:
break
result.append(x)
return result
4. TypeVar and generics
from typing import TypeVar, Generic
T = TypeVar("T")
def first(lst: list[T]) -> T:
return lst[0]
print(first([1, 2, 3])) # int
print(first(["a", "b"])) # str
class Stack(Generic[T]):
def __init__(self) -> None:
self._items: list[T] = []
def push(self, item: T) -> None:
self._items.append(item)
def pop(self) -> T:
return self._items.pop()
def __len__(self) -> int:
return len(self._items)
stack: Stack[int] = Stack()
stack.push(1)
stack.push(2)
print(stack.pop()) # 2
5. dataclasses: data classes without boilerplate
from dataclasses import dataclass, field
@dataclass
class Product:
name: str
price: float
stock: int = 0
active: bool = True
def price_with_tax(self, rate: float = 0.2) -> float:
return self.price * (1 + rate)
def __str__(self) -> str:
return f"{self.name} (${self.price:.2f})"
p = Product("Keyboard", 49.99, stock=100)
print(p) # Keyboard ($49.99)
print(p.price_with_tax()) # 59.988
print(p == Product("Keyboard", 49.99, 100)) # True (auto __eq__)
Fields with complex defaults
from dataclasses import dataclass, field
@dataclass
class Order:
id: int
customer: str
# NEVER: items: list = [] — mutable default error
items: list[str] = field(default_factory=list)
discount: float = field(default=0.0)
tags: set[str] = field(default_factory=set)
# Field excluded from __init__ and __repr__
_cache: float = field(default=0.0, init=False, repr=False)
order = Order(id=1, customer="Alice")
order.items.append("Keyboard")
print(order)
# Order(id=1, customer='Alice', items=['Keyboard'], discount=0.0, tags=set())
6. frozen, order and slots
from dataclasses import dataclass
# frozen=True: immutable (like namedtuple but with methods)
@dataclass(frozen=True)
class Point:
x: float
y: float
def distance_to_origin(self) -> float:
return (self.x**2 + self.y**2) ** 0.5
p = Point(3.0, 4.0)
print(p.distance_to_origin()) # 5.0
# p.x = 10 → FrozenInstanceError
# order=True: enables <, <=, >, >=
@dataclass(order=True)
class Temperature:
value: float
unit: str = "C"
temps = [Temperature(30), Temperature(25), Temperature(35)]
print(sorted(temps))
# slots=True (Python 3.10+): memory efficient
@dataclass(slots=True)
class Sensor:
id: int
value: float
7. post_init: validation after creation
from dataclasses import dataclass
@dataclass
class Range:
minimum: float
maximum: float
def __post_init__(self):
if self.minimum > self.maximum:
raise ValueError(
f"minimum ({self.minimum}) cannot exceed maximum ({self.maximum})"
)
self.span = self.maximum - self.minimum # Computed field
r = Range(0.0, 100.0)
print(r.span) # 100.0
try:
Range(50.0, 10.0)
except ValueError as e:
print(e)
8. TypedDict for typed dictionaries
from typing import TypedDict, NotRequired
class DBConfig(TypedDict):
host: str
port: int
name: str
user: str
password: str
class AppConfig(TypedDict):
database: DBConfig
debug: bool
port: NotRequired[int] # Optional field (Python 3.11+)
config: AppConfig = {
"database": {
"host": "localhost", "port": 5432,
"name": "mydb", "user": "admin", "password": "secret",
},
"debug": False,
}
9. Type checking with mypy
pip install mypy
mypy my_module.py --strict
def greet(name: str) -> str:
return "Hello, " + name
result = greet(42)
# mypy error: Argument 1 to "greet" has incompatible type "int"; expected "str"
# mypy.ini
[mypy]
strict = True
ignore_missing_imports = True
10. Best practices
- Annotate public functions at minimum with parameter and return types.
- Use
Optional[X]/X | Nonewhenever a function may return None. - Don't over-annotate: obvious local variables don't need annotations.
dataclassvsnamedtuple: dataclass is mutable and supports methods; namedtuple is immutable and hashable.frozen=Truewhen you need immutability and hashability.- Avoid
Any: it's an escape hatch, not a type. - Run mypy in CI/CD to catch type errors automatically.
Comparison
| Feature | Plain class | dataclass | namedtuple | TypedDict |
|---|---|---|---|---|
Auto __init__ |
No | Yes | Yes | N/A |
Auto __repr__ |
No | Yes | Yes | N/A |
Auto __eq__ |
No | Yes | Yes | N/A |
| Mutable | Yes | Yes | No | N/A |
| Hashable | No* | No* | Yes | N/A |
| Methods | Yes | Yes | Limited | No |
Related conversions
Frequent conversions across the catalogue: