multiprocessing
import multiprocessing The multiprocessing module enables Python programs to leverage multiple CPU cores by running computations in separate processes. Unlike threading, each process has its own Python interpreter and memory space, which means the Global Interpreter Lock (GIL) does not block true parallel execution. This makes it the go-to choice for CPU-bound workloads like mathematical computations, data processing, and machine learning inference.
The module mirrors the threading module’s API, so if you’re familiar with threading.Thread, you’ll find multiprocessing.Process straightforward. It also provides the Pool class for convenient task distribution and shared memory primitives for efficient data passing between processes.
Syntax
import multiprocessing
from multiprocessing import Process, Pool, Queue, Lock, Value, Array, Manager
Key Functions
multiprocessing.Process
The core class for creating and managing separate processes. You instantiate a Process with a target function and arguments, then call start() to begin execution.
from multiprocessing import Process
import os
def greet(name):
print(f"Hello {name} from process {os.getpid()}")
if __name__ == '__main__':
p = Process(target=greet, args=('Alice',))
p.start()
p.join()
# Hello Alice from process 12345
Parameters:
| Parameter | Type | Default | Description |
|---|---|---|---|
group | None | None | Reserved for future use, must be None |
target | callable | None | The function to execute in the child process |
name | str | None | A descriptive name for the process |
args | tuple | () | Positional arguments passed to target |
kwargs | dict | {} | Keyword arguments passed to target |
daemon | bool | None | If True, the process will terminate when the main program exits |
Returns: A Process object.
The join() method blocks until the process completes. Always guard process creation with if __name__ == '__main__': to prevent child processes from spawning recursively on Windows.
multiprocessing.Pool
A pool of worker processes that lets you distribute tasks across multiple CPUs without manually managing individual Process objects. Use Pool.map() to apply a function to an iterable in parallel.
from multiprocessing import Pool
def square(x):
return x * x
if __name__ == '__main__':
with Pool(processes=4) as pool:
results = pool.map(square, [1, 2, 3, 4, 5])
print(results)
# [1, 4, 9, 16, 25]
Parameters:
| Parameter | Type | Default | Description |
|---|---|---|---|
processes | int | None | Number of worker processes. Defaults to cpu_count(). |
initializer | callable | None | Function called once per worker at startup |
initargs | tuple | () | Arguments passed to initializer |
maxtasksperchild | int | None | Max tasks per worker before respawning |
Returns: A Pool object.
The context manager (with statement) automatically closes the pool and waits for all workers to finish. Use pool.apply_async() for fire-and-forget tasks or pool.imap_unordered() for lazy iteration over results.
multiprocessing.Queue
A process-safe FIFO queue for passing data between processes. Any picklable object can be queued.
from multiprocessing import Process, Queue
def producer(queue):
for i in range(5):
queue.put(i)
queue.put(None) # Sentinel to signal completion
def consumer(queue):
while True:
item = queue.get()
if item is None:
break
print(f"Received: {item}")
if __name__ == '__main__':
q = Queue()
p1 = Process(target=producer, args=(q,))
p2 = Process(target=consumer, args=(q,))
p1.start()
p2.start()
p1.join()
p2.join()
# Received: 0
# Received: 1
# Received: 2
# Received: 3
# Received: 4
Parameters:
| Parameter | Type | Default | Description |
|---|---|---|---|
maxsize | int | 0 | Maximum queue size. 0 means unlimited. |
Returns: A Queue object.
Queues are thread and process safe. The get() method blocks until data is available; use get_nowait() to raise an exception instead.
multiprocessing.Lock
A simple mutex that ensures only one process can execute a critical section at a time.
from multiprocessing import Process, Lock
counter = {'value': 0}
lock = Lock()
def increment(n):
for _ in range(n):
with lock:
counter['value'] += 1
if __name__ == '__main__':
p1 = Process(target=increment, args=(1000,))
p2 = Process(target=increment, args=(1000,))
p1.start()
p2.start()
p1.join()
p2.join()
print(counter['value'])
# 2000
Parameters: No constructor parameters.
Returns: A Lock object.
Use with lock: or lock.acquire() / lock.release() to guard critical sections. Without the lock, the final value would be unpredictable due to race conditions.
multiprocessing.Value
A shared memory primitive for passing a single value between processes. Changes are visible across all processes that hold a reference.
from multiprocessing import Process, Value
import ctypes
def worker(counter):
for _ in range(1000):
with counter.get_lock():
counter.value += 1
if __name__ == '__main__':
counter = Value('i', 0) # signed integer, initial value 0
processes = [Process(target=worker, args=(counter,)) for _ in range(4)]
for p in processes:
p.start()
for p in processes:
p.join()
print(counter.value)
# 4000
Parameters:
| Parameter | Type | Default | Description |
|---|---|---|---|
typecode_or_type | str or type | — | A ctypes type like 'i' for int or 'd' for double |
initial_value | number | — | The initial value for the shared variable |
lock | bool | True | Whether to create a lock for atomic operations |
Returns: A Value object.
The typecode uses ctypes notation: 'i' (int), 'd' (double), 'b' (byte), etc. Always use get_lock() and the context manager for atomic increments.
multiprocessing.Array
A shared memory array for efficiently sharing numerical data between processes without serialization overhead.
from multiprocessing import Process, Array
import ctypes
def negate(arr):
for i in range(len(arr)):
arr[i] = -arr[i]
if __name__ == '__main__':
arr = Array('i', [1, 2, 3, 4, 5]) # Array of 5 integers
print(arr[:])
# [1, 2, 3, 4, 5]
p = Process(target=negate, args=(arr,))
p.start()
p.join()
print(arr[:])
# [-1, -2, -3, -4, -5]
Parameters:
| Parameter | Type | Default | Description |
|---|---|---|---|
typecode_or_type | str or type | — | A ctypes type for array elements |
size_or_initializer | int or iterable | — | Array size (int) or initial values (iterable) |
lock | bool | True | Whether to create a lock for atomic operations |
Returns: An Array object.
Access elements with slice notation arr[:] or index arr[0]. The array is process and thread safe.
multiprocessing.Manager
Creates a server process that holds Python objects and exposes them to other processes via proxies. More flexible than shared memory but slower.
from multiprocessing import Process, Manager
def modify_data(d, l):
d['processed'] = True
d['count'] += 1
l.append('done')
if __name__ == '__main__':
with Manager() as manager:
shared_dict = manager.dict({'processed': False, 'count': 0})
shared_list = manager.list()
p = Process(target=modify_data, args=(shared_dict, shared_list))
p.start()
p.join()
print(dict(shared_dict))
# {'processed': True, 'count': 1}
print(list(shared_list))
# ['done']
Parameters:
| Parameter | Type | Default | Description |
|---|---|---|---|
initializer | callable | None | Function called at server startup |
initargs | tuple | () | Arguments for initializer |
ctx | Context | None | Context for the manager process |
Returns: A Manager object.
Managers support dict(), list(), set(), Value(), Array(), and various synchronization primitives like Lock() and Semaphore(). They can also be accessed remotely over a network, making them useful for inter-machine communication.
Common Patterns
Worker Pool with map
from multiprocessing import Pool
def fetch_url(url):
# Simulate work
return f"fetched: {url}"
if __name__ == '__main__':
urls = ['a.com', 'b.com', 'c.com', 'd.com']
with Pool(4) as pool:
results = pool.map(fetch_url, urls)
for r in results:
print(r)
Process Pool with imap_unordered
from multiprocessing import Pool
def slow_square(x):
import time
time.sleep(0.1)
return x * x
if __name__ == '__main__':
with Pool(2) as pool:
for result in pool.imap_unordered(slow_square, range(5)):
print(f"Got: {result}")
# Results arrive as they complete, not in order
Shared State with Manager
from multiprocessing import Manager, Process
def worker(shared_data):
shared_data['visits'] += 1
if __name__ == '__main__':
with Manager() as m:
data = m.dict(visits=0)
processes = [Process(target=worker, args=(data,)) for _ in range(10)]
for p in processes:
p.start()
for p in processes:
p.join()
print(f"Total visits: {data['visits']}")
# Total visits: 10
Daemon Processes
from multiprocessing import Process
import time
def background_worker():
while True:
print("Working...")
time.sleep(5)
if __name__ == '__main__':
p = Process(target=background_worker, daemon=True)
p.start()
# Main program can continue
# When main exits, daemon process is terminated