Customising pickle with copyreg
The copyreg module provides a way to customize how Python’s pickle module serializes and deserializes objects. When you register pickling functions with copyreg, pickle knows exactly how to handle your custom classes, making them portable across Python versions and implementations.
Why copyreg?
By default, pickle can serialize most Python objects by storing their __dict__. However, this approach has limitations:
- It may not work correctly for objects with complex internal state
- It can be inefficient for objects that contain references to functions or other unpicklable objects
- It doesn’t preserve class methods or custom attributes set outside
__init__
The copyreg module lets you define exactly how your objects should be reduced to their fundamental components and rebuilt.
Registering a Reducer Function
The core of copyreg is the register function, which takes a class and a reducer:
import copyreg
import pickle
class Point:
def __init__(self, x, y):
self.x = x
self.y = y
def __repr__(self):
return f"Point({self.x}, {self.y})"
# Register a reducer function
def reduce_point(point):
# Return constructor and arguments needed to recreate
return (Point, (point.x, point.y))
copyreg.pickle(Point, reduce_point)
# Now Point objects can be pickled
p = Point(3, 4)
data = pickle.dumps(p)
unpickled = pickle.loads(data)
print(unpickled) # Point(3, 4)
The reducer function must return a tuple of (constructor, args) where constructor is a callable that can recreate the object from args.
Why Use Reducers Instead of reduce?
While you can define __reduce__ directly on a class, using copyreg offers several advantages:
- Separation of concerns: The serialization logic lives outside the class
- Version compatibility: You can change the reducer without modifying the class
- Inheritance: Subclasses automatically inherit the reducer unless overridden
Handling Complex Objects
For objects with state that isn’t easily captured in __init__ arguments, you can use __getstate__ and __setstate__ alongside copyreg:
import copyreg
import pickle
class DataProcessor:
def __init__(self, name):
self.name = name
self.cache = {} # Not picklable by default
def __getstate__(self):
# Return state to pickle (exclude unpicklable items)
return {"name": self.name}
def __setstate__(self, state):
# Restore state
self.name = state["name"]
self.cache = {} # Recreate unpicklable attributes
def __repr__(self):
return f"DataProcessor({self.name}, cache_size={len(self.cache)})"
def reduce_processor(processor):
return (
DataProcessor,
(processor.name,),
processor.__getstate__()
)
copyreg.pickle(DataProcessor, reduce_processor)
# Test it
processor = DataProcessor("my_processor")
processor.cache["key"] = "value"
data = pickle.dumps(processor)
unpickled = pickle.loads(data)
print(unpickled) # DataProcessor(my_processor, cache_size=0)
Pickling Functions and Classes
By default, pickle can reference functions by name if they are available in the module’s global scope. However, for lambda functions or dynamically created classes, you need to register reducers:
import copyreg
import pickle
# This won't work with lambdas by default:
# point = Point(1, 2) # where Point is defined inline
# Instead, define at module level and register:
class Vector:
def __init__(self, x, y, z):
self.coords = (x, y, z)
def __repr__(self):
return f"Vector{self.coords}"
def reduce_vector(v):
return (Vector, v.coords)
copyreg.pickle(Vector, reduce_vector)
v = Vector(1, 2, 3)
print(pickle.loads(pickle.dumps(v))) # Vector(1, 2, 3)
Versioning with Constructors
You can use reducers to handle version migration by creating constructors that accept both old and new argument formats:
import copyreg
import pickle
class Config:
def __init__(self, setting, value=None):
# Support both old format (setting only) and new (setting, value)
if value is None:
# Old format: setting was a dict
if isinstance(setting, dict):
self.settings = setting
else:
self.settings = {"value": setting}
else:
# New format: explicit setting and value
self.settings = {setting: value}
def __repr__(self):
return f"Config({self.settings})"
def reduce_config(config):
# Always save in new format
settings = config.settings
if len(settings) == 1:
key, value = next(iter(settings.items()))
return (Config, (key, value))
else:
return (Config, (settings,))
copyreg.pickle(Config, reduce_config)
# New-style object
c1 = Config("debug", True)
data1 = pickle.dumps(c1)
print(pickle.loads(data1)) # Config({'debug': True})
# Old-style object (if you had one)
c2 = Config({"theme": "dark", "timeout": 30})
data2 = pickle.dumps(c2)
print(pickle.loads(data2)) # Config({'theme': 'dark', 'timeout': 30})
This approach lets you evolve your class while maintaining backward compatibility with previously pickled data.
Practical Example: Pickling File Objects
One common pain point is pickling objects that contain file handles. Here’s how to handle it:
import copyreg
import pickle
from io import StringIO
class LogBuffer:
"""A buffer that can be pickled by excluding the file handle."""
def __init__(self, filename):
self.filename = filename
self.buffer = StringIO()
self._file = None
def write(self, message):
self.buffer.write(message)
def flush(self):
# Write to actual file if available
if self._file:
self._file.write(self.buffer.getvalue())
self._file.flush()
self.buffer.truncate(0)
def __getstate__(self):
state = self.__dict__.copy()
# Don't pickle the file handle
state["_file"] = None
return state
def __setstate__(self, state):
self.__dict__.update(state)
# Reopen file if it existed
if self.filename:
self._file = open(self.filename, "a")
def __repr__(self):
return f"LogBuffer({self.filename}, buffered={len(self.buffer.getvalue())})"
def reduce_log_buffer(buffer):
return (LogBuffer, (buffer.filename,))
copyreg.pickle(LogBuffer, reduce_log_buffer)
# Example usage
log = LogBuffer("/tmp/app.log")
log.write("Line 1\n")
log.write("Line 2\n")
# Pickle it (file handle is excluded)
data = pickle.dumps(log)
print(f"Pickled size: {len(data)} bytes")
# Unpickle (file handle is re-opened)
restored = pickle.loads(data)
print(restored)
See Also
- pickle-module — The pickle module for object serialization
- copy-module — For shallow and deep copy operations
- python-dataclasses — Dataclasses and their serialization defaults