Customising pickle with copyreg

· 4 min read · Updated March 14, 2026 · advanced
python serialization pickle stdlib

The copyreg module provides a way to customize how Python’s pickle module serializes and deserializes objects. When you register pickling functions with copyreg, pickle knows exactly how to handle your custom classes, making them portable across Python versions and implementations.

Why copyreg?

By default, pickle can serialize most Python objects by storing their __dict__. However, this approach has limitations:

  • It may not work correctly for objects with complex internal state
  • It can be inefficient for objects that contain references to functions or other unpicklable objects
  • It doesn’t preserve class methods or custom attributes set outside __init__

The copyreg module lets you define exactly how your objects should be reduced to their fundamental components and rebuilt.

Registering a Reducer Function

The core of copyreg is the register function, which takes a class and a reducer:

import copyreg
import pickle

class Point:
    def __init__(self, x, y):
        self.x = x
        self.y = y
    
    def __repr__(self):
        return f"Point({self.x}, {self.y})"

# Register a reducer function
def reduce_point(point):
    # Return constructor and arguments needed to recreate
    return (Point, (point.x, point.y))

copyreg.pickle(Point, reduce_point)

# Now Point objects can be pickled
p = Point(3, 4)
data = pickle.dumps(p)
unpickled = pickle.loads(data)
print(unpickled)  # Point(3, 4)

The reducer function must return a tuple of (constructor, args) where constructor is a callable that can recreate the object from args.

Why Use Reducers Instead of reduce?

While you can define __reduce__ directly on a class, using copyreg offers several advantages:

  • Separation of concerns: The serialization logic lives outside the class
  • Version compatibility: You can change the reducer without modifying the class
  • Inheritance: Subclasses automatically inherit the reducer unless overridden

Handling Complex Objects

For objects with state that isn’t easily captured in __init__ arguments, you can use __getstate__ and __setstate__ alongside copyreg:

import copyreg
import pickle

class DataProcessor:
    def __init__(self, name):
        self.name = name
        self.cache = {}  # Not picklable by default
    
    def __getstate__(self):
        # Return state to pickle (exclude unpicklable items)
        return {"name": self.name}
    
    def __setstate__(self, state):
        # Restore state
        self.name = state["name"]
        self.cache = {}  # Recreate unpicklable attributes
    
    def __repr__(self):
        return f"DataProcessor({self.name}, cache_size={len(self.cache)})"

def reduce_processor(processor):
    return (
        DataProcessor,
        (processor.name,),
        processor.__getstate__()
    )

copyreg.pickle(DataProcessor, reduce_processor)

# Test it
processor = DataProcessor("my_processor")
processor.cache["key"] = "value"

data = pickle.dumps(processor)
unpickled = pickle.loads(data)
print(unpickled)  # DataProcessor(my_processor, cache_size=0)

Pickling Functions and Classes

By default, pickle can reference functions by name if they are available in the module’s global scope. However, for lambda functions or dynamically created classes, you need to register reducers:

import copyreg
import pickle

# This won't work with lambdas by default:
# point = Point(1, 2)  # where Point is defined inline

# Instead, define at module level and register:
class Vector:
    def __init__(self, x, y, z):
        self.coords = (x, y, z)
    
    def __repr__(self):
        return f"Vector{self.coords}"

def reduce_vector(v):
    return (Vector, v.coords)

copyreg.pickle(Vector, reduce_vector)

v = Vector(1, 2, 3)
print(pickle.loads(pickle.dumps(v)))  # Vector(1, 2, 3)

Versioning with Constructors

You can use reducers to handle version migration by creating constructors that accept both old and new argument formats:

import copyreg
import pickle

class Config:
    def __init__(self, setting, value=None):
        # Support both old format (setting only) and new (setting, value)
        if value is None:
            # Old format: setting was a dict
            if isinstance(setting, dict):
                self.settings = setting
            else:
                self.settings = {"value": setting}
        else:
            # New format: explicit setting and value
            self.settings = {setting: value}
    
    def __repr__(self):
        return f"Config({self.settings})"

def reduce_config(config):
    # Always save in new format
    settings = config.settings
    if len(settings) == 1:
        key, value = next(iter(settings.items()))
        return (Config, (key, value))
    else:
        return (Config, (settings,))

copyreg.pickle(Config, reduce_config)

# New-style object
c1 = Config("debug", True)
data1 = pickle.dumps(c1)
print(pickle.loads(data1))  # Config({'debug': True})

# Old-style object (if you had one)
c2 = Config({"theme": "dark", "timeout": 30})
data2 = pickle.dumps(c2)
print(pickle.loads(data2))  # Config({'theme': 'dark', 'timeout': 30})

This approach lets you evolve your class while maintaining backward compatibility with previously pickled data.

Practical Example: Pickling File Objects

One common pain point is pickling objects that contain file handles. Here’s how to handle it:

import copyreg
import pickle
from io import StringIO

class LogBuffer:
    """A buffer that can be pickled by excluding the file handle."""
    
    def __init__(self, filename):
        self.filename = filename
        self.buffer = StringIO()
        self._file = None
    
    def write(self, message):
        self.buffer.write(message)
    
    def flush(self):
        # Write to actual file if available
        if self._file:
            self._file.write(self.buffer.getvalue())
            self._file.flush()
            self.buffer.truncate(0)
    
    def __getstate__(self):
        state = self.__dict__.copy()
        # Don't pickle the file handle
        state["_file"] = None
        return state
    
    def __setstate__(self, state):
        self.__dict__.update(state)
        # Reopen file if it existed
        if self.filename:
            self._file = open(self.filename, "a")
    
    def __repr__(self):
        return f"LogBuffer({self.filename}, buffered={len(self.buffer.getvalue())})"

def reduce_log_buffer(buffer):
    return (LogBuffer, (buffer.filename,))

copyreg.pickle(LogBuffer, reduce_log_buffer)

# Example usage
log = LogBuffer("/tmp/app.log")
log.write("Line 1\n")
log.write("Line 2\n")

# Pickle it (file handle is excluded)
data = pickle.dumps(log)
print(f"Pickled size: {len(data)} bytes")

# Unpickle (file handle is re-opened)
restored = pickle.loads(data)
print(restored)

See Also