Introduction
This tutorial covers querying in SQLAlchemy ORM, from basic selects to complex joins and aggregations:- Building SELECT statements with
select() - Filtering with
where()andfilter_by() - Joining tables and relationships
- Ordering, grouping, and limiting results
- Aggregations and subqueries
- Query performance optimization
Setup: Example Models
from typing import List, Optional
from datetime import datetime
from sqlalchemy import create_engine, String, Text, DateTime, ForeignKey, func
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship, Session
class Base(DeclarativeBase):
pass
class Author(Base):
__tablename__ = "author"
id: Mapped[int] = mapped_column(primary_key=True)
name: Mapped[str] = mapped_column(String(100))
email: Mapped[str] = mapped_column(String(100))
bio: Mapped[Optional[str]] = mapped_column(Text)
books: Mapped[List["Book"]] = relationship(back_populates="author")
def __repr__(self) -> str:
return f"Author(id={self.id!r}, name={self.name!r})"
class Book(Base):
__tablename__ = "book"
id: Mapped[int] = mapped_column(primary_key=True)
title: Mapped[str] = mapped_column(String(200))
isbn: Mapped[str] = mapped_column(String(13))
published_date: Mapped[datetime] = mapped_column(DateTime)
price: Mapped[float]
author_id: Mapped[int] = mapped_column(ForeignKey("author.id"))
author: Mapped["Author"] = relationship(back_populates="books")
reviews: Mapped[List["Review"]] = relationship(back_populates="book")
def __repr__(self) -> str:
return f"Book(id={self.id!r}, title={self.title!r})"
class Review(Base):
__tablename__ = "review"
id: Mapped[int] = mapped_column(primary_key=True)
book_id: Mapped[int] = mapped_column(ForeignKey("book.id"))
reviewer_name: Mapped[str] = mapped_column(String(100))
rating: Mapped[int] # 1-5 stars
comment: Mapped[str] = mapped_column(Text)
review_date: Mapped[datetime] = mapped_column(DateTime, default=datetime.now)
book: Mapped["Book"] = relationship(back_populates="reviews")
engine = create_engine("sqlite://", echo=True)
Base.metadata.create_all(engine)
Basic SELECT Queries
Select All Entities
from sqlalchemy import select
from sqlalchemy.orm import Session
with Session(engine) as session:
# Select all authors
stmt = select(Author)
for author in session.scalars(stmt):
print(author)
# Get all results as a list
authors = session.scalars(select(Author)).all()
print(f"Found {len(authors)} authors")
Use
session.scalars() when selecting complete ORM entities. Use session.execute() when selecting specific columns or tuples.Select Specific Columns
with Session(engine) as session:
# Select specific columns
stmt = select(Author.name, Author.email)
for row in session.execute(stmt):
print(f"{row.name}: {row.email}")
# Access by index
for row in session.execute(stmt):
print(f"{row[0]}: {row[1]}")
Select with Column Aliases
from sqlalchemy import label
with Session(engine) as session:
stmt = select(
Author.name.label('author_name'),
func.count(Book.id).label('book_count')
).join(Book).group_by(Author.id)
for row in session.execute(stmt):
print(f"{row.author_name} has written {row.book_count} book(s)")
Filtering Results
WHERE Clause with where()
with Session(engine) as session:
# Simple equality
stmt = select(Author).where(Author.name == "J.K. Rowling")
author = session.scalars(stmt).one()
# Multiple conditions (AND)
stmt = select(Book).where(
Book.price > 20,
Book.published_date > datetime(2020, 1, 1)
)
# Using and_() explicitly
from sqlalchemy import and_
stmt = select(Book).where(
and_(
Book.price > 20,
Book.price < 50
)
)
# OR conditions
from sqlalchemy import or_
stmt = select(Author).where(
or_(
Author.name.like('J%'),
Author.email.like('%@gmail.com')
)
)
filter_by() for Equality
with Session(engine) as session:
# filter_by uses keyword arguments
author = session.scalars(
select(Author).filter_by(name="George Orwell")
).one()
# Multiple conditions
books = session.scalars(
select(Book).filter_by(author_id=1, price=29.99)
).all()
Use
filter_by() for simple equality checks. Use where() for complex conditions, comparisons, and SQL expressions.Common Filter Patterns
- String Matching
- Numeric Comparisons
- NULL Handling
- Date/Time
with Session(engine) as session:
# LIKE pattern
stmt = select(Book).where(Book.title.like('%Python%'))
# Case-insensitive LIKE (ilike)
stmt = select(Book).where(Book.title.ilike('%python%'))
# Starts with
stmt = select(Book).where(Book.title.startswith('The'))
# Ends with
stmt = select(Book).where(Book.title.endswith('Guide'))
# Contains
stmt = select(Book).where(Book.title.contains('SQL'))
with Session(engine) as session:
# Greater than
stmt = select(Book).where(Book.price > 30)
# Less than or equal
stmt = select(Book).where(Book.price <= 25)
# Between
stmt = select(Book).where(Book.price.between(20, 40))
# IN clause
stmt = select(Book).where(Book.id.in_([1, 2, 3, 4, 5]))
# NOT IN
stmt = select(Book).where(Book.id.not_in([1, 2, 3]))
with Session(engine) as session:
# IS NULL
stmt = select(Author).where(Author.bio.is_(None))
# IS NOT NULL
stmt = select(Author).where(Author.bio.is_not(None))
# Alternative syntax
stmt = select(Author).where(Author.bio == None)
stmt = select(Author).where(Author.bio != None)
from datetime import datetime, timedelta
with Session(engine) as session:
# Books published this year
current_year = datetime.now().year
stmt = select(Book).where(
func.extract('year', Book.published_date) == current_year
)
# Books published in last 30 days
cutoff = datetime.now() - timedelta(days=30)
stmt = select(Book).where(Book.published_date >= cutoff)
# Date range
stmt = select(Book).where(
Book.published_date.between(
datetime(2020, 1, 1),
datetime(2023, 12, 31)
)
)
Ordering Results
ORDER BY
with Session(engine) as session:
# Ascending (default)
stmt = select(Book).order_by(Book.title)
# Descending
stmt = select(Book).order_by(Book.price.desc())
# Multiple columns
stmt = select(Book).order_by(
Book.author_id,
Book.published_date.desc()
)
# Nulls first/last
stmt = select(Author).order_by(Author.bio.nulls_first())
stmt = select(Author).order_by(Author.bio.nulls_last())
Case-Insensitive Ordering
with Session(engine) as session:
# Order by lowercase version
stmt = select(Author).order_by(func.lower(Author.name))
Limiting and Pagination
LIMIT and OFFSET
with Session(engine) as session:
# First 10 results
stmt = select(Book).order_by(Book.title).limit(10)
books = session.scalars(stmt).all()
# Skip first 10, get next 10 (pagination)
stmt = select(Book).order_by(Book.title).offset(10).limit(10)
# Get single result
stmt = select(Author).where(Author.id == 1)
author = session.scalars(stmt).one() # Raises if not exactly one
# Get first result
stmt = select(Book).order_by(Book.price.desc())
most_expensive = session.scalars(stmt).first()
Use
one() when you expect exactly one result. Use first() to get the first result or None. Use one_or_none() for one result or None.Pagination Helper
def paginate(session: Session, stmt, page: int = 1, per_page: int = 20):
"""Paginate query results."""
offset = (page - 1) * per_page
# Get total count
count_stmt = select(func.count()).select_from(stmt.subquery())
total = session.scalar(count_stmt)
# Get page results
page_stmt = stmt.offset(offset).limit(per_page)
items = session.scalars(page_stmt).all()
return {
'items': items,
'total': total,
'page': page,
'per_page': per_page,
'total_pages': (total + per_page - 1) // per_page
}
# Usage
with Session(engine) as session:
stmt = select(Book).order_by(Book.title)
result = paginate(session, stmt, page=2, per_page=10)
print(f"Page {result['page']} of {result['total_pages']}")
for book in result['items']:
print(book.title)
Joins
Inner Join
with Session(engine) as session:
# Join using relationship
stmt = select(Book).join(Book.author)
# Join with filter
stmt = select(Book).join(Book.author).where(
Author.name == "Stephen King"
)
# Select from both entities
stmt = select(Book, Author).join(Book.author)
for book, author in session.execute(stmt):
print(f"{book.title} by {author.name}")
Explicit Join Conditions
with Session(engine) as session:
# Explicit join condition
stmt = select(Book).join(Author, Book.author_id == Author.id)
# Join from specific entity
stmt = select(Author).join(Book, Author.id == Book.author_id)
Outer Join
with Session(engine) as session:
# Left outer join - includes authors without books
stmt = select(Author).outerjoin(Author.books)
# Get authors with book count (including 0)
stmt = (
select(Author, func.count(Book.id).label('book_count'))
.outerjoin(Author.books)
.group_by(Author.id)
)
for author, count in session.execute(stmt):
print(f"{author.name}: {count} book(s)")
Multiple Joins
with Session(engine) as session:
# Join through relationships
stmt = (
select(Review, Book, Author)
.join(Review.book)
.join(Book.author)
)
for review, book, author in session.execute(stmt):
print(f"{review.rating}/5 - {book.title} by {author.name}")
Self-Referential Joins
# Define a tree structure
from sqlalchemy import aliased
with Session(engine) as session:
# Create aliases for self-join
parent_author = aliased(Author)
# If we had a mentor relationship:
# stmt = select(Author, parent_author).join(
# parent_author, Author.mentor_id == parent_author.id
# )
Aggregations
COUNT, SUM, AVG, MIN, MAX
with Session(engine) as session:
# Count all books
count = session.scalar(select(func.count(Book.id)))
print(f"Total books: {count}")
# Average price
avg_price = session.scalar(select(func.avg(Book.price)))
print(f"Average price: ${avg_price:.2f}")
# Min and max
min_price = session.scalar(select(func.min(Book.price)))
max_price = session.scalar(select(func.max(Book.price)))
# Sum
total_value = session.scalar(select(func.sum(Book.price)))
GROUP BY
with Session(engine) as session:
# Books per author
stmt = (
select(
Author.name,
func.count(Book.id).label('book_count')
)
.join(Book)
.group_by(Author.id, Author.name)
)
for name, count in session.execute(stmt):
print(f"{name}: {count} book(s)")
HAVING Clause
with Session(engine) as session:
# Authors with more than 5 books
stmt = (
select(
Author.name,
func.count(Book.id).label('book_count')
)
.join(Book)
.group_by(Author.id, Author.name)
.having(func.count(Book.id) > 5)
)
for name, count in session.execute(stmt):
print(f"{name}: {count} books")
Complex Aggregations
with Session(engine) as session:
# Average rating per book with review count
stmt = (
select(
Book.title,
func.avg(Review.rating).label('avg_rating'),
func.count(Review.id).label('review_count'),
func.min(Review.rating).label('lowest_rating'),
func.max(Review.rating).label('highest_rating')
)
.join(Book.reviews)
.group_by(Book.id, Book.title)
.having(func.count(Review.id) >= 5) # At least 5 reviews
.order_by(func.avg(Review.rating).desc())
)
for row in session.execute(stmt):
print(f"{row.title}:")
print(f" Average: {row.avg_rating:.1f}/5")
print(f" Reviews: {row.review_count}")
print(f" Range: {row.lowest_rating}-{row.highest_rating}")
Subqueries
Scalar Subqueries
with Session(engine) as session:
# Get authors with their average book price
avg_price_subq = (
select(func.avg(Book.price))
.where(Book.author_id == Author.id)
.scalar_subquery()
)
stmt = select(Author.name, avg_price_subq.label('avg_price'))
for name, avg_price in session.execute(stmt):
print(f"{name}: ${avg_price:.2f} avg")
Correlated Subqueries
with Session(engine) as session:
# Authors who have books more expensive than their average
avg_author_price = (
select(func.avg(Book.price))
.where(Book.author_id == Author.id)
.scalar_subquery()
)
stmt = (
select(Author)
.join(Book)
.where(Book.price > avg_author_price)
.distinct()
)
authors = session.scalars(stmt).all()
EXISTS Subqueries
from sqlalchemy import exists
with Session(engine) as session:
# Authors who have at least one 5-star review
has_five_star = (
exists()
.where(Review.book_id == Book.id)
.where(Review.rating == 5)
.where(Book.author_id == Author.id)
)
stmt = select(Author).where(has_five_star)
for author in session.scalars(stmt):
print(f"{author.name} has a 5-star review!")
FROM Subquery
with Session(engine) as session:
# Create subquery
book_stats = (
select(
Book.author_id,
func.count(Book.id).label('book_count'),
func.avg(Book.price).label('avg_price')
)
.group_by(Book.author_id)
.subquery()
)
# Query from subquery
stmt = (
select(Author.name, book_stats.c.book_count, book_stats.c.avg_price)
.join(book_stats, Author.id == book_stats.c.author_id)
.where(book_stats.c.book_count > 3)
)
for name, count, avg in session.execute(stmt):
print(f"{name}: {count} books, ${avg:.2f} avg")
Common Table Expressions (CTEs)
with Session(engine) as session:
# Define CTE
prolific_authors = (
select(Author.id, Author.name)
.join(Book)
.group_by(Author.id, Author.name)
.having(func.count(Book.id) >= 10)
.cte('prolific_authors')
)
# Use CTE in main query
stmt = (
select(
prolific_authors.c.name,
func.avg(Book.price).label('avg_price')
)
.join(Book, Book.author_id == prolific_authors.c.id)
.group_by(prolific_authors.c.name)
)
for name, avg_price in session.execute(stmt):
print(f"{name}: ${avg_price:.2f}")
UNION Queries
from sqlalchemy import union_all
with Session(engine) as session:
# Books over $50 OR with 5-star average rating
expensive = select(Book.id).where(Book.price > 50)
highly_rated = (
select(Book.id)
.join(Review)
.group_by(Book.id)
.having(func.avg(Review.rating) >= 4.5)
)
combined = union_all(expensive, highly_rated)
# Get the actual books
stmt = select(Book).where(Book.id.in_(combined))
books = session.scalars(stmt).all()
Window Functions
from sqlalchemy import over
with Session(engine) as session:
# Rank books by price within each author
stmt = select(
Book.title,
Author.name,
Book.price,
func.rank().over(
partition_by=Book.author_id,
order_by=Book.price.desc()
).label('price_rank')
).join(Book.author)
for title, author, price, rank in session.execute(stmt):
print(f"#{rank}: {title} by {author} - ${price}")
Query Performance Tips
Use SELECT only what you need
# Bad - loads all columns
books = session.scalars(select(Book)).all()
# Good - only needed columns
titles = session.scalars(select(Book.title)).all()
Use joins instead of multiple queries
# Bad - N+1 query problem
for book in session.scalars(select(Book)):
print(book.author.name) # Lazy load!
# Good - single query with join
stmt = select(Book).join(Book.author)
for book in session.scalars(stmt):
print(book.author.name)
Complete Example: Book Analytics
from sqlalchemy import select, func
from sqlalchemy.orm import Session
def get_top_rated_books(session: Session, min_reviews: int = 5, limit: int = 10):
"""Get books with highest average rating."""
stmt = (
select(
Book.title,
Author.name.label('author'),
func.avg(Review.rating).label('avg_rating'),
func.count(Review.id).label('review_count')
)
.join(Book.author)
.join(Book.reviews)
.group_by(Book.id, Book.title, Author.name)
.having(func.count(Review.id) >= min_reviews)
.order_by(func.avg(Review.rating).desc())
.limit(limit)
)
return session.execute(stmt).all()
def get_author_statistics(session: Session, author_id: int):
"""Get comprehensive statistics for an author."""
stats_stmt = select(
func.count(Book.id).label('total_books'),
func.avg(Book.price).label('avg_price'),
func.sum(Book.price).label('total_value'),
func.min(Book.published_date).label('first_published'),
func.max(Book.published_date).label('last_published')
).where(Book.author_id == author_id)
stats = session.execute(stats_stmt).one()
# Get review statistics
review_stmt = (
select(
func.avg(Review.rating).label('avg_rating'),
func.count(Review.id).label('total_reviews')
)
.join(Book)
.where(Book.author_id == author_id)
)
review_stats = session.execute(review_stmt).one()
return {
'books': stats.total_books,
'avg_price': float(stats.avg_price or 0),
'total_value': float(stats.total_value or 0),
'first_published': stats.first_published,
'last_published': stats.last_published,
'avg_rating': float(review_stats.avg_rating or 0),
'total_reviews': review_stats.total_reviews
}
def search_books(session: Session, query: str, filters: dict = None):
"""Advanced book search with filters."""
stmt = select(Book).join(Book.author)
# Text search
if query:
stmt = stmt.where(
or_(
Book.title.ilike(f'%{query}%'),
Author.name.ilike(f'%{query}%')
)
)
# Apply filters
if filters:
if 'min_price' in filters:
stmt = stmt.where(Book.price >= filters['min_price'])
if 'max_price' in filters:
stmt = stmt.where(Book.price <= filters['max_price'])
if 'author_id' in filters:
stmt = stmt.where(Book.author_id == filters['author_id'])
if 'year' in filters:
stmt = stmt.where(
func.extract('year', Book.published_date) == filters['year']
)
return session.scalars(stmt).all()
# Usage
with Session(engine) as session:
# Top rated books
print("Top Rated Books:")
for title, author, rating, reviews in get_top_rated_books(session):
print(f"{title} by {author}: {rating:.1f}/5 ({reviews} reviews)")
# Author statistics
stats = get_author_statistics(session, author_id=1)
print(f"\nAuthor Stats: {stats}")
# Search
books = search_books(
session,
query='python',
filters={'min_price': 20, 'max_price': 50}
)
print(f"\nFound {len(books)} books")
Next Steps
Working with Relationships
Learn about relationship loading strategies and advanced patterns
Key Takeaways
- Use
select()to build queries; usewhere()for filtering session.scalars()for entities,session.execute()for tuples- Join using relationship attributes for cleaner code
- Use aggregations with
func.count(),func.avg(), etc. - Group with
group_by(), filter groups withhaving() - Subqueries and CTEs help build complex queries step by step
- Always consider query performance - avoid N+1 problems