Python: Cache Functions to Disk
If you have a long-running or expensive function that ought not be re-invoked with the same arguments, then cache its output to disk.
Note: sqlite3 comes with python. This code has no dependencies.
import hashlib
import json
import sqlite3
def memoize_to_sqlite(filename: str = "cache.db"):
"""
Memoization decorator that caches the output of a method in a SQLite
database.
"""
db_conn = sqlite3.connect(filename)
db_conn.execute(
"CREATE TABLE IF NOT EXISTS cache (hash TEXT PRIMARY KEY, result TEXT)"
)
def memoize(func):
def wrapped(*args):
# Compute the hash of the <function name>:<argument>
xs = f"{func.__name__}:{repr(tuple(args))}".encode("utf-8")
arg_hash = hashlib.sha256(xs).hexdigest()
# Check if the result is already cached
cursor = db_conn.cursor()
cursor.execute(
"SELECT result FROM cache WHERE hash = ?", (arg_hash,)
)
row = cursor.fetchone()
if row is not None:
print(f"Cached result found for {arg_hash}. Returning it.")
return json.loads(row[0])
# Compute the result and cache it
result = func(*args)
cursor.execute(
"INSERT INTO cache (hash, result) VALUES (?, ?)",
(arg_hash, json.dumps(result))
)
db_conn.commit()
return result
return wrapped
return memoize
Usage
Prepend it to any function.
@memoize_to_sqlite(filename="cache.db")
def expensive_method(s: str, n: int):
# Long running slow stuff here
...
Caveats
Ensure your arguments have a stable __repr__
.
Ensure your output can be serialized as json.
Back to posts