Python Data Validation with Pydantic: Models, Validators and Parsing
Pydantic is the most popular data validation library in the Python ecosystem. It converts and validates data automatically, generates JSON schemas and powers FastAPI, SQLModel and many modern tools.
1. Installation and first model
pip install pydantic # Pydantic v2
from pydantic import BaseModel, Field
from typing import Optional
from datetime import datetime
class User(BaseModel):
id: int
name: str
email: str
age: int
active: bool = True
created_at: datetime = Field(default_factory=datetime.now)
bio: Optional[str] = None
# Pydantic validates and coerces types automatically
user = User(
id=1,
name="Alice Smith",
email="alice@example.com",
age="28", # String → int automatically
)
print(user.name) # Alice Smith
print(type(user.age)) # <class 'int'>
print(user.active) # True (default)
# As dict
print(user.model_dump())
# As JSON string
print(user.model_dump_json(indent=2))
2. Field: detailed constraints
from pydantic import BaseModel, Field
from typing import Annotated
class Product(BaseModel):
id: int = Field(gt=0)
name: str = Field(min_length=1, max_length=200)
price: float = Field(gt=0, le=99999.99)
stock: int = Field(ge=0, default=0)
discount: float = Field(ge=0.0, le=1.0, default=0.0)
sku: str = Field(pattern=r"^[A-Z]{2}\d{6}$")
# Alias for camelCase APIs
cost_price: float = Field(alias="costPrice", gt=0)
class Config:
populate_by_name = True
# With Annotated (Pydantic v2 recommended style)
PositivePrice = Annotated[float, Field(gt=0, le=99999.99)]
ShortName = Annotated[str, Field(min_length=1, max_length=100)]
class ProductV2(BaseModel):
name: ShortName
price: PositivePrice
3. Custom validators
from pydantic import BaseModel, field_validator, model_validator
from typing import Optional
class Order(BaseModel):
customer: str
total: float
discount: float = 0.0
final_total: Optional[float] = None
@field_validator("customer")
@classmethod
def name_not_empty(cls, v: str) -> str:
v = v.strip()
if not v:
raise ValueError("Customer name cannot be empty")
return v.title()
@field_validator("total", "discount")
@classmethod
def must_be_positive(cls, v: float, info) -> float:
if v < 0:
raise ValueError(f"{info.field_name} cannot be negative")
return round(v, 2)
@model_validator(mode="after")
def compute_final_total(self) -> "Order":
self.final_total = round(self.total * (1 - self.discount), 2)
return self
order = Order(customer=" alice ", total=100.0, discount=0.15)
print(order.customer) # Alice
print(order.final_total) # 85.0
4. Nested models
from pydantic import BaseModel
from typing import List, Optional
from datetime import datetime
class Address(BaseModel):
street: str
city: str
postal_code: str
country: str = "US"
class OrderLine(BaseModel):
product_id: int
name: str
quantity: int
unit_price: float
@property
def subtotal(self) -> float:
return self.quantity * self.unit_price
class FullOrder(BaseModel):
id: int
customer: str
address: Address
lines: List[OrderLine]
created_at: Optional[datetime] = None
notes: Optional[str] = None
@property
def total(self) -> float:
return sum(l.subtotal for l in self.lines)
order = FullOrder(
id=1,
customer="Alice",
address={"street": "123 Main St", "city": "New York", "postal_code": "10001"},
lines=[
{"product_id": 10, "name": "Keyboard", "quantity": 2, "unit_price": 49.99},
{"product_id": 20, "name": "Mouse", "quantity": 1, "unit_price": 29.99},
],
)
print(order.total) # 129.97
print(order.address.city) # New York
5. Serialization and deserialization
from pydantic import BaseModel
from datetime import date
import json
class Employee(BaseModel):
id: int
name: str
salary: float
hire_date: date
emp = Employee(id=1, name="Alice", salary=75000.0, hire_date=date(2023, 1, 15))
# model_dump: Python dict
d = emp.model_dump()
# model_dump_json: JSON string
json_str = emp.model_dump_json()
print(json_str) # {"id":1,"name":"Alice","salary":75000.0,"hire_date":"2023-01-15"}
# model_validate: dict → model (with coercion)
data = {"id": 2, "name": "Bob", "salary": "65000", "hire_date": "2024-06-01"}
emp2 = Employee.model_validate(data)
print(emp2.salary) # 65000.0 (str → float)
print(emp2.hire_date) # 2024-06-01 (str → date)
# model_validate_json: JSON string → model
emp3 = Employee.model_validate_json(json_str)
# JSON Schema generation
print(json.dumps(Employee.model_json_schema(), indent=2))
6. model_config: advanced configuration
from pydantic import BaseModel
from pydantic.config import ConfigDict
class AppConfig(BaseModel):
model_config = ConfigDict(
str_strip_whitespace=True,
frozen=True, # Immutable
extra="forbid", # Reject undeclared fields
validate_assignment=True,
populate_by_name=True,
)
host: str
port: int
debug: bool = False
# extra="forbid" rejects unknown fields
try:
AppConfig(host="localhost", port=8000, unknown_field="x")
except Exception as e:
print(e) # Extra inputs are not permitted
# frozen=True prevents mutation
config = AppConfig(host="localhost", port=8000)
try:
config.host = "other"
except Exception as e:
print(e)
7. Discriminated unions
from pydantic import BaseModel
from typing import Annotated, Union, Literal
class Circle(BaseModel):
type: Literal["circle"] = "circle"
radius: float
def area(self): import math; return math.pi * self.radius ** 2
class Rectangle(BaseModel):
type: Literal["rectangle"] = "rectangle"
width: float
height: float
def area(self): return self.width * self.height
class Triangle(BaseModel):
type: Literal["triangle"] = "triangle"
base: float
height: float
def area(self): return 0.5 * self.base * self.height
Shape = Annotated[Union[Circle, Rectangle, Triangle], ...]
class Drawing(BaseModel):
shapes: list[Shape]
drawing = Drawing(shapes=[
{"type": "circle", "radius": 5},
{"type": "rectangle", "width": 4, "height": 3},
])
for shape in drawing.shapes:
print(f"{shape.type}: area = {shape.area():.2f}")
8. Validate external API data
import requests
from pydantic import BaseModel, field_validator
from typing import Optional, List
class Repository(BaseModel):
id: int
name: str
full_name: str
description: Optional[str] = None
stargazers_count: int
language: Optional[str] = None
html_url: str
@field_validator("name")
@classmethod
def valid_name(cls, v: str) -> str:
if not v or len(v) > 100:
raise ValueError("Invalid repository name")
return v
def get_user_repos(username: str) -> List[Repository]:
r = requests.get(f"https://api.github.com/users/{username}/repos", timeout=10)
r.raise_for_status()
return [Repository.model_validate(repo) for repo in r.json()]
repos = get_user_repos("torvalds")
for repo in sorted(repos, key=lambda r: r.stargazers_count, reverse=True)[:3]:
print(f"{repo.name}: {repo.stargazers_count} stars")
9. Best practices
- Use
Field()whenever you need constraints, aliases or complex defaults. - Prefer
model_validate()over the constructor for data from external sources. extra="forbid"in APIs to reject unexpected fields and catch bugs early.frozen=Truefor configuration models or cache keys.- Reuse types with
Annotatedto avoid repeating the same constraints. - Pydantic v2 (default since 2023) uses
model_dump()andmodel_validate()— not.dict()and.parse_obj()from v1.
Main methods (Pydantic v2)
| Method | Description |
|---|---|
Model(**data) |
Create instance with validation |
model_validate(dict) |
dict → model |
model_validate_json(str) |
JSON string → model |
model_dump() |
model → dict |
model_dump_json() |
model → JSON string |
model_json_schema() |
Generate JSON schema |
model_copy(update={}) |
Copy with changes |
Related conversions
Frequent conversions across the catalogue: