collections
The collections module provides specialized container datatypes that offer alternatives to Python’s general-purpose built-in containers like dict, list, set, and tuple. These data structures solve common programming problems more elegantly and efficiently.
Counter
A Counter is a dict subclass for counting hashable objects. It maps elements to their counts.
from collections import Counter
# Count elements in an iterable
fruits = ['apple', 'banana', 'apple', 'orange', 'banana', 'apple']
counts = Counter(fruits)
print(counts) # Counter({'apple': 3, 'banana': 2, 'orange': 1})
# Access counts
print(counts['apple']) # 3
print(counts['grape']) # 0 (returns 0 for missing keys)
# Most common elements
print(counts.most_common(2)) # [('apple', 3), ('banana', 2)]
Counter Methods
| Method | Description |
|---|---|
elements() | Returns iterator over elements repeating each as many times as its count |
most_common(n) | Returns n most common elements and their counts |
subtract(iterable) | Subtracts counts from elements |
update(iterable) | Adds counts from elements |
# Arithmetic with Counters
c1 = Counter(a=3, b=1)
c2 = Counter(a=1, b=2)
print(c1 + c2) # Counter({'a': 4, 'b': 3})
print(c1 - c2) # Counter({'a': 2})
defaultdict
A defaultdict calls a factory function to supply missing values, eliminating KeyError checks.
from collections import defaultdict
# Group items by length
words = ['cat', 'dog', 'elephant', 'bird', 'fish']
by_length = defaultdict(list)
for word in words:
by_length[len(word)].append(word)
print(by_length)
# defaultdict(<class 'list'>, {3: ['cat', 'dog'], 8: ['elephant'], 4: ['bird', 'fish']})
Common Factory Functions
| Factory | Use Case |
|---|---|
list | Grouping values |
set | Tracking unique items |
int | Counting (starts at 0) |
dict | Nested dictionaries |
# Counting with defaultdict
char_count = defaultdict(int)
for char in 'abracadabra':
char_count[char] += 1
print(dict(char_count)) # {'a': 5, 'b': 2, 'r': 2, 'c': 1, 'd': 1}
deque
A deque (double-ended queue) supports fast appends and pops from both ends.
from collections import deque
# Efficient queue operations
d = deque([1, 2, 3])
d.append(4) # Add to right: deque([1, 2, 3, 4])
d.appendleft(0) # Add to left: deque([0, 1, 2, 3, 4])
d.pop() # Remove from right: returns 4
d.popleft() # Remove from left: returns 0
deque vs list Performance
| Operation | list | deque |
|---|---|---|
| append | O(1) | O(1) |
| appendleft | O(n) | O(1) |
| pop | O(1) | O(1) |
| popleft | O(n) | O(1) |
# Fixed-size queue for sliding window
def moving_average(iterable, n=3):
it = iter(iterable)
d = deque(maxlen=n)
for x in it:
d.append(x)
if len(d) == n:
yield sum(d) / n
# Process last 100 lines of a file
def tail(filename, n=100):
with open(filename) as f:
return deque(f, maxlen=n)
namedtuple
namedtuple creates tuple subclasses with named fields, making code more readable.
from collections import namedtuple
# Define a named tuple class
Point = namedtuple('Point', ['x', 'y'])
p = Point(11, y=22)
print(p.x, p.y) # 11 22
print(p[0], p[1]) # 11 22 (still indexable)
# Unpacking like regular tuples
x, y = p
Replacing Fields
# namedtuples are immutable - use _replace()
p = Point(x=11, y=22)
p2 = p._replace(x=33)
print(p2) # Point(x=33, y=22)
Converting to/from dict
# Convert to dict
p = Point(x=11, y=22)
d = p._asdict() # {'x': 11, 'y': 22}
# Create from dict
p2 = Point(**d)
OrderedDict
An OrderedDict remembers insertion order (less critical since Python 3.7+, still useful for LRU caches).
from collections import OrderedDict
# Maintains insertion order
d = OrderedDict()
d['first'] = 1
d['second'] = 2
d['third'] = 3
for key in d:
print(key) # first, second, third
move_to_end()
# Move items to either end
d.move_to_end('second') # Move to right (end)
d.move_to_end('first', last=False) # Move to left (beginning)
# Build an LRU cache
class LRUCache:
def __init__(self, capacity):
self.cache = OrderedDict()
self.capacity = capacity
def get(self, key):
if key not in self.cache:
return -1
self.cache.move_to_end(key)
return self.cache[key]
def put(self, key, value):
if key in self.cache:
self.cache.move_to_end(key)
self.cache[key] = value
if len(self.cache) > self.capacity:
self.cache.popitem(last=False)
ChainMap
ChainMap groups multiple dictionaries into a single view without copying.
from collections import ChainMap
# Layered configuration
defaults = {'theme': 'light', 'fontsize': 12}
user_prefs = {'theme': 'dark'}
cmd_line = {'fontsize': 14}
config = ChainMap(cmd_line, user_prefs, defaults)
print(config['theme']) # 'dark' (from user_prefs)
print(config['fontsize']) # 14 (from cmd_line)
ChainMap operates left-to-right
# Changes affect the first mapping
config['debug'] = True
print(cmd_line) # {'fontsize': 14, 'debug': True}
# Add new child to front
new_config = config.new_child({'lang': 'en'})
Common Patterns
Top N words in a document
from collections import Counter
import re
def top_words(text, n=10):
words = re.findall(r'\b\w+\b', text.lower())
return Counter(words).most_common(n)
Group by key
from collections import defaultdict
def group_by(iterable, key_func):
groups = defaultdict(list)
for item in iterable:
groups[key_func(item)].append(item)
return dict(groups)
# Group users by department
users = [
{'name': 'Alice', 'dept': 'Engineering'},
{'name': 'Bob', 'dept': 'Sales'},
{'name': 'Carol', 'dept': 'Engineering'}
]
by_dept = group_by(users, lambda u: u['dept'])
Rotating deque
from collections import deque
def rotate_array(arr, k):
d = deque(arr)
d.rotate(k) # Positive: right, Negative: left
return list(d)
print(rotate_array([1, 2, 3, 4, 5], 2)) # [4, 5, 1, 2, 3]
print(rotate_array([1, 2, 3, 4, 5], -2)) # [3, 4, 5, 1, 2]
See Also
- dict::dict — Built-in dictionary type
- list::list — Built-in list type
- built-in::tuple — Built-in tuple type