Python

Terms defined: cursor, object-relational mapper (ORM), Uniform Resource Identifier (URI)

Querying from Python

import sqlite3
import sys

db_path = sys.argv[1]
connection = sqlite3.connect(db_path)
cursor = connection.execute("select count(*) from penguins;")
rows = cursor.fetchall()
print(rows)
[(344,)]

Incremental Fetch

import sqlite3
import sys

db_path = sys.argv[1]
connection = sqlite3.connect(db_path)
cursor = connection.cursor()
cursor = cursor.execute("select species, island from penguins limit 5;")
while row := cursor.fetchone():
    print(row)
('Adelie', 'Torgersen')
('Adelie', 'Torgersen')
('Adelie', 'Torgersen')
('Adelie', 'Torgersen')
('Adelie', 'Torgersen')

Insert, Delete, and All That

import sqlite3

connection = sqlite3.connect(":memory:")
cursor = connection.cursor()
cursor.execute("create table example(num integer);")

cursor.execute("insert into example values (10), (20);")
print("after insertion", cursor.execute("select * from example;").fetchall())

cursor.execute("delete from example where num < 15;")
print("after deletion", cursor.execute("select * from example;").fetchall())
after insertion [(10,), (20,)]
after deletion [(20,)]

Interpolating Values

import sqlite3

connection = sqlite3.connect(":memory:")
cursor = connection.cursor()
cursor.execute("create table example(num integer);")

cursor.executemany("insert into example values (?);", [(10,), (20,)])
print("after insertion", cursor.execute("select * from example;").fetchall())
after insertion [(10,), (20,)]
XKCD cartoon showing a mother scolding a school for not being more careful about SQL injection attacks
XKCD Exploits of a Mom

Exercise

Write a Python script that takes island, species, sex, and other values as command-line arguments and inserts an entry into the penguins database.

Script Execution

import sqlite3

SETUP = """\
drop table if exists example;
create table example(num integer);
insert into example values (10), (20);
"""

connection = sqlite3.connect(":memory:")
cursor = connection.cursor()
cursor.executescript(SETUP)
print("after insertion", cursor.execute("select * from example;").fetchall())
after insertion [(10,), (20,)]

SQLite Exceptions in Python

import sqlite3

SETUP = """\
create table example(num integer check(num > 0));
insert into example values (10);
insert into example values (-1);
insert into example values (20);
"""

connection = sqlite3.connect(":memory:")
cursor = connection.cursor()
try:
    cursor.executescript(SETUP)
except sqlite3.Error as exc:
    print(f"SQLite exception: {exc}")
print("after execution", cursor.execute("select * from example;").fetchall())
SQLite exception: CHECK constraint failed: num > 0
after execution [(10,)]

Python in SQLite

import sqlite3

SETUP = """\
create table example(num integer);
insert into example values (-10), (10), (20), (30);
"""


def clip(value):
    if value < 0:
        return 0
    if value > 20:
        return 20
    return value


connection = sqlite3.connect(":memory:")
connection.create_function("clip", 1, clip)
cursor = connection.cursor()
cursor.executescript(SETUP)
for row in cursor.execute("select num, clip(num) from example;").fetchall():
    print(row)
(-10, 0)
(10, 10)
(20, 20)
(30, 20)

Handling Dates and Times

from datetime import date
import sqlite3


# Convert date to ISO-formatted string when writing to database
def _adapt_date_iso(val):
    return val.isoformat()


sqlite3.register_adapter(date, _adapt_date_iso)


# Convert ISO-formatted string to date when reading from database
def _convert_date(val):
    return date.fromisoformat(val.decode())


sqlite3.register_converter("date", _convert_date)

SETUP = """\
create table events(
    happened date not null,
    description text not null
);
"""

connection = sqlite3.connect(":memory:", detect_types=sqlite3.PARSE_DECLTYPES)
cursor = connection.cursor()
cursor.execute(SETUP)

cursor.executemany(
    "insert into events values (?, ?);",
    [(date(2024, 1, 10), "started tutorial"), (date(2024, 1, 29), "finished tutorial")],
)

for row in cursor.execute("select * from events;").fetchall():
    print(row)
(datetime.date(2024, 1, 10), 'started tutorial')
(datetime.date(2024, 1, 29), 'finished tutorial')

Exercise

Write a Python adapter that truncates real values to two decimal places as they are being written to the database.

SQL in Jupyter Notebooks

pip install jupysql
%load_ext sql
%sql sqlite:///db/penguins.db
Connecting to 'sqlite:///data/penguins.db'
%%sql
select species, count(*) as num
from penguins
group by species;
Running query in 'sqlite:///data/penguins.db'
species num
Adelie 152
Chinstrap 68
Gentoo 124

Pandas and SQL

pip install pandas
import pandas as pd
import sqlite3
import sys

db_path = sys.argv[1]
connection = sqlite3.connect(db_path)
query = "select species, count(*) as num from penguins group by species;"
df = pd.read_sql(query, connection)
print(df)
     species  num
0     Adelie  152
1  Chinstrap   68
2     Gentoo  124

Exercise

Write a command-line Python script that uses Pandas to re-create the penguins database.

Polars and SQL

pip install polars pyarrow adbc-driver-sqlite
import polars as pl
import sys

db_path = sys.argv[1]
uri = "sqlite:///{db_path}"
query = "select species, count(*) as num from penguins group by species;"
df = pl.read_database_uri(query, uri, engine="adbc")
print(df)
shape: (3, 2)
┌───────────┬─────┐
│ species   ┆ num │
│ ---       ┆ --- │
│ str       ┆ i64 │
╞═══════════╪═════╡
│ Adelie    ┆ 152 │
│ Chinstrap ┆ 68  │
│ Gentoo    ┆ 124 │
└───────────┴─────┘

Exercise

Write a command-line Python script that uses Polars to re-create the penguins database.

Object-Relational Mappers

from sqlmodel import Field, Session, SQLModel, create_engine, select
import sys


class Department(SQLModel, table=True):
    ident: str = Field(default=None, primary_key=True)
    name: str
    building: str


db_uri = f"sqlite:///{sys.argv[1]}"
engine = create_engine(db_uri)
with Session(engine) as session:
    statement = select(Department)
    for result in session.exec(statement).all():
        print(result)
building='Chesson' name='Genetics' ident='gen'
building='Fashet Extension' name='Histology' ident='hist'
building='Chesson' name='Molecular Biology' ident='mb'
building='TGVH' name='Endocrinology' ident='end'

Exercise

Write a command-line Python script that uses SQLModel to re-create the penguins database.

Relations with ORMs

class Staff(SQLModel, table=True):
    ident: str = Field(default=None, primary_key=True)
    personal: str
    family: str
    dept: Optional[str] = Field(default=None, foreign_key="department.ident")
    age: int


db_uri = f"sqlite:///{sys.argv[1]}"
engine = create_engine(db_uri)
SQLModel.metadata.create_all(engine)
with Session(engine) as session:
    statement = select(Department, Staff).where(Staff.dept == Department.ident)
    for dept, staff in session.exec(statement):
        print(f"{dept.name}: {staff.personal} {staff.family}")
Histology: Divit Dhaliwal
Molecular Biology: Indrans Sridhar
Molecular Biology: Pranay Khanna
Histology: Vedika Rout
Genetics: Abram Chokshi
Histology: Romil Kapoor
Molecular Biology: Ishaan Ramaswamy
Genetics: Nitya Lal