Python shelve module question

Question:

Does the Python shelve module have any protection built in to make sure two processes aren’t writing to a file at the same time?

Asked By: Evan Fosmark

||

Answers:

The shelve module uses an underlying database package (such as dbm, gdbm or bsddb) .

The restrictions pragraph says (my emphasis):

The shelve module does not support concurrent read/write access to shelved objects. (Multiple simultaneous read accesses are safe.) When a program has a shelf open for writing, no other program should have it open for reading or writing. Unix file locking can be used to solve this, but this differs across Unix versions and requires knowledge about the database implementation used.

Conclusion: it depends on OS and the underlying DB. To keep things portable, do not build on concurrency.

Answered By: gimel

As per the top answer, it’s not safe to have multiple writers to the shelve. My approach to making shelves safer is to write a wrapper that takes care of opening and accessing shelve elements. The wrapper code looks something like this:

def open(self, mode=READONLY):
    if mode is READWRITE:
        lockfilemode = "a" 
        lockmode = LOCK_EX
        shelve_mode = 'c'
    else:
        lockfilemode = "r"
        lockmode = LOCK_SH
        shelve_mode = 'r'
    self.lockfd = open(shelvefile+".lck", lockfilemode)
    fcntl.flock(self.lockfd.fileno(), lockmode | LOCK_NB)
    self.shelve = shelve.open(shelvefile, flag=shelve_mode, protocol=pickle.HIGHEST_PROTOCOL))
def close(self):
    self.shelve.close()
    fcntl.flock(self.lockfd.fileno(), LOCK_UN)
    lockfd.close()
Answered By: Ivo Bosticky

I’ve implemented Ivo’s approach as a context manager, for anyone interested:

from contextlib import contextmanager
from fcntl import flock, LOCK_SH, LOCK_EX, LOCK_UN
import shelve

@contextmanager
def locking(lock_path, lock_mode):
    with open(lock_path, 'w') as lock:
        flock(lock.fileno(), lock_mode) # block until lock is acquired
        try:
            yield
        finally:
            flock(lock.fileno(), LOCK_UN) # release

class DBManager(object):
    def __init__(self, db_path):
        self.db_path = db_path

    def read(self):
        with locking("%s.lock" % self.db_path, LOCK_SH):
            with shelve.open(self.db_path, "r", 2) as db:
                return dict(db)

    def cas(self, old_db, new_db):
        with locking("%s.lock" % self.db_path, LOCK_EX):
            with shelve.open(self.db_path, "c", 2) as db:
                if old_db != dict(db):
                    return False
                db.clear()
                db.update(new_db)
                return True
Answered By: Samus_

Building on Ivo’s and Samus_’s approaches, I’ve implemented an even simpler wrapper for shelve.open:

import fcntl
import shelve
import contextlib
import typing


@contextlib.contextmanager
def open_safe_shelve(db_path: str, flag: typing.Literal["r", "w", "c", "n"] = "c", protocol=None, writeback=False):
    if flag in ("w", "c", "n"):
        lockfile_lock_mode = fcntl.LOCK_EX
    elif flag == "r":
        lockfile_lock_mode = fcntl.LOCK_SH
    else:
        raise ValueError(f"Invalid mode: {flag}, only 'r', 'w', 'c', 'n' are allowed.")

    with open(f"{db_path}.lock", "w") as lock:  # According to https://docs.python.org/3/library/fcntl.html#fcntl.flock, the file must be opened in write mode on some systems.
        fcntl.flock(lock.fileno(), lockfile_lock_mode)  # Block until lock is acquired.
        try:
            yield shelve.open(db_path, flag=flag, protocol=protocol, writeback=writeback)
        finally:
            fcntl.flock(lock.fileno(), fcntl.LOCK_UN)  # Release lock

This avoids having to check if the dict has changed since the last time, like in Samus_’s cas() method.

Note that this will block until the lock can be obtained. If you instead want to throw an exception if the lock is already taken, use lockfile_lock_mode | fcntl.LOCK_NB as the lock flag.

It can be used in the same way shelve would normally be used. For example:

import time
import multiprocessing

def read(db_path: str):
    print("Reading wants lock")
    with open_safe_shelve(db_path, "r") as db:
        print("Reading has lock")
        print(f"foo: {db.get('foo', None)}")
        time.sleep(10)
        print(f"foo: {db.get('foo', None)}")
        print("Reading giving up lock")


def write(db_path: str):
    print("Writing wants lock")
    with open_safe_shelve(db_path) as db:
        print("Writing has lock")
        db["foo"] = "bar"
        print("Writing giving up lock")


if __name__ == "__main__":
    db_path = "test_database"
    read_process = multiprocessing.Process(target=read, args=(db_path,))
    write_process = multiprocessing.Process(target=write, args=(db_path,))
    read_process.start()
    time.sleep(1)
    write_process.start()
    read_process.join()
    write_process.join()

will output (assuming test_database.db already exists):

Reading wants lock
Reading has lock
foo: None
Writing wants lock
# (sleeps for around 9 seconds)
foo: None
Reading giving up lock
Writing has lock
Writing giving up lock
Answered By: Robin
Categories: questions Tags: ,
Answers are sorted by their score. The answer accepted by the question owner as the best is marked with
at the top-right corner.