struct
The struct module converts between Python values and C structs represented as bytes. This is essential when working with binary file formats, network protocols, or any situation where you need to interface with C code. The module provides functions to pack Python values into bytes and unpack bytes into Python values according to a format string.
Format Strings
Format strings define how data is laid out in memory. They specify the byte order, size, and type of each field.
| Character | Type | Python Type | Size (bytes) |
|---|---|---|---|
x | pad byte | — | 1 |
c | char | bytes of length 1 | 1 |
b | signed char | int | 1 |
B | unsigned char | int | 1 |
? | bool | bool | 1 |
h | short | int | 2 |
H | unsigned short | int | 2 |
i | int | int | 4 |
I | unsigned int | int | 4 |
l | long | int | 4 |
L | unsigned long | int | 4 |
q | long long | int | 8 |
Q | unsigned long long | int | 8 |
f | float | float | 4 |
d | double | float | 8 |
s | char[] | bytes | — |
p | char[] | bytes | — |
P | void* | int | — |
Prefixes for byte order:
<little-endian>big-endian (network)@native (default)=native, standard size
Functions
struct.pack()
Packs values into a bytes object according to the format string.
struct.pack(format, *values) -> bytes
| Parameter | Type | Description |
|---|---|---|
format | str | Format string |
*values | various | Values to pack |
import struct
# Pack integers
packed = struct.pack('>ii', 256, 512) # Big-endian two ints
print(packed)
# b'\\x00\\x01\\x00\\x00\\x00\\x02\\x00\\x00'
# Pack a float and an int
packed = struct.pack('fi', 3.14, 100)
print(packed)
# b'\\xc3\\xf5\\t@\\x00\\x00\\x00d'
# Pack a string (fixed length)
packed = struct.pack('5s', b'hello')
print(packed)
# b'hello'
# Pack multiple values
packed = struct.pack('iii', 1, 2, 3)
print(packed)
# b'\\x00\\x00\\x00\\x01\\x00\\x00\\x00\\x02\\x00\\x00\\x00\\x03'
struct.unpack()
Unpacks bytes into values according to the format string.
struct.unpack(format, buffer) -> tuple
| Parameter | Type | Description |
|---|---|---|
format | str | Format string |
buffer | bytes | Bytes to unpack |
import struct
# Unpack two integers
data = b'\\x00\\x01\\x00\\x00\\x00\\x02\\x00\\x00'
a, b = struct.unpack('>ii', data)
print(a, b)
# 256 512
# Unpack a float and an int
data = struct.pack('fi', 3.14, 100)
value, number = struct.unpack('fi', data)
print(value, number)
# 3.140000104904175 100
# Unpack a mixed structure
# Format: int (id), float (score), 10-char string (name)
data = struct.pack('If10s', 42, 95.5, b'Alice ')
uid, score, name = struct.unpack('If10s', data)
print(uid, score, name)
# 42 95.5 b'Alice '
struct.pack_into()
Packs values into a pre-allocated buffer, useful for performance with large data.
struct.pack_into(buffer, offset, format, *values) -> None
| Parameter | Type | Default | Description |
|---|---|---|---|
| buffer | writable buffer | — | A writeable bytes-like object |
| offset | int | — | Buffer offset to start writing at |
| format | str | — | Format string |
| *values | various | — | Values to pack |
import struct
# Create a buffer
buffer = bytearray(12)
# Pack into buffer at offset 0
struct.pack_into('iii', buffer, 0, 10, 20, 30)
print(bytes(buffer))
# b'\\n\\x00\\x00\\x00\\x14\\x00\\x00\\x00\\x1e\\x00\\x00\\x00'
# Pack at offset 4 (skip first int)
struct.pack_into('ii', buffer, 4, 100, 200)
print(bytes(buffer))
# b'\\n\\x00\\x00\\x00d\\x00\\x00\\x00\\xc8\\x00\\x00\\x00'
struct.unpack_from()
Unpacks from a buffer starting at a specific offset, without copying.
struct.unpack_from(format, buffer, offset=0) -> tuple
| Parameter | Type | Default | Description |
|---|---|---|---|
| format | str | — | Format string |
| buffer | bytes | — | Buffer to unpack from |
| offset | int | 0 | Offset to start unpacking from |
import struct
# Large buffer with multiple records
buffer = struct.pack('iii', 1, 2, 3) + struct.pack('iii', 4, 5, 6)
# Unpack first record
rec1 = struct.unpack_from('iii', buffer, 0)
print(rec1)
# (1, 2, 3)
# Unpack second record (starts at byte 12)
rec2 = struct.unpack_from('iii', buffer, 12)
print(rec2)
# (4, 5, 6)
struct.calcsize()
Returns the size of the struct corresponding to the format string.
struct.calcsize(format) -> int
| Parameter | Type | Default | Description |
|---|---|---|---|
| format | str | — | Format string |
import struct
# Calculate sizes
print(struct.calcsize('i')) # 4
print(struct.calcsize('ii')) # 8
print(struct.calcsize('fi')) # 8 (4 + 4)
print(struct.calcsize('di')) # 12 (8 + 4)
print(struct.calcsize('10s')) # 10
# With byte order
print(struct.calcsize('>i')) # 4 (big-endian)
print(struct.calcsize('<i')) # 4 (little-endian)
print(struct.calcsize('!i')) # 4 (network/big-endian)
The Struct Class
For repeated use, the Struct class is more efficient as it compiles the format string once.
import struct
# Create a compiled struct
header = struct.Struct('>HH') # Two unsigned shorts, big-endian
packed = header.pack(256, 512)
print(packed)
# b'\\x01\\x00\\x02\\x00'
# Unpack
unpacked = header.unpack(packed)
print(unpacked)
# (256, 512)
# Methods
print(header.size) # 4
print(header.pack_into) # bound method
print(header.unpack_from) # bound method
import struct
# Practical example: reading a simple bitmap header
# BMP header: 2 bytes (magic), 4 bytes (file size), 4 bytes (offset)
bmp_format = struct.Struct('<HI')
# '<': little-endian
# H: unsigned short (2 bytes)
# I: unsigned int (4 bytes)
# Fake BMP header data
header_data = b'BM' + b'\\x8a\\x00\\x00\\x00' + b'\\x36\\x00\\x00\\x00'
magic, filesize, data_offset = bmp_format.unpack_from(header_data, 0)
print(f"Magic: {magic}") # Magic: 19778 (0x424D = 'BM')
print(f"File size: {filesize}") # File size: 138
print(f"Data offset: {data_offset}") # Data offset: 54
Common Patterns
Working with Network Protocols
import struct
# IPv4 header: 20 bytes
# Version/IHL (1), TOS (1), Total Length (2), ID (2), Flags/Fragment (2)
# TTL (1), Protocol (1), Checksum (2), Source IP (4), Dest IP (4)
ip_header = struct.Struct('>BBHHHBBH4s4s')
# Parse IP header
data = b'\\x45\\x00\\x00\\x1c\\xab\\xcd\\x40\\x00\\x40\\x06\\x00\\x00\\xc0\\xa8\\x01\\x01\\xc0\\xa8\\x01\\x02'
version_tos, total_len, identification, flags_frag, ttl, proto, checksum, src_ip, dst_ip = ip_header.unpack(data)
print(f"Version: {version_tos >> 4}") # 4
print(f"IHL: {version_tos & 0x0f}") # 5
print(f"Total length: {total_len}") # 28
print(f"Protocol: {proto}") # 6 (TCP)
print(f"Source IP: {struct.unpack('>4s', src_ip)[0]}") # b'\\xc0\\xa8\\x01\\x01'
Binary File Formats
import struct
# Simple fixed-width database format
# Each record: id (int), name (10 bytes), age (ubyte)
Record = struct.Struct('<I10sB')
records = [
(1, b'Alice ', 30),
(2, b'Bob ', 25),
(3, b'Charlie ', 35),
]
# Write records
with open('data.bin', 'wb') as f:
for record in records:
f.write(Record.pack(*record))
# Read records
with open('data.bin', 'rb') as f:
while True:
data = f.read(Record.size)
if not data:
break
id, name, age = Record.unpack(data)
print(f"ID: {id}, Name: {name.decode().strip()}, Age: {age}")
# ID: 1, Name: Alice, Age: 30
# ID: 2, Name: Bob, Age: 25
# ID: 3, Name: Charlie, Age: 35
Performance Comparison
import struct
import time
# Test pack vs Struct
data = (1, 2, 3, 4, 5)
# Using struct.pack each time
start = time.perf_counter()
for _ in range(100000):
struct.pack('iiiii', *data)
print(f"struct.pack: {time.perf_counter() - start:.3f}s")
# Using compiled Struct
S = struct.Struct('iiiii')
start = time.perf_counter()
for _ in range(100000):
S.pack(*data)
print(f"Struct.pack: {time.perf_counter() - start:.3f}s")
Error Handling
import struct
# Buffer too small
try:
struct.unpack('iii', b'\\x00\\x00\\x00\\x00') # Only 4 bytes
except struct.error as e:
print(f"Unpack error: {e}")
# Type mismatch
try:
struct.pack('i', 'not an int')
except struct.error as e:
print(f"Pack error: {e}")
# Invalid format string
try:
struct.calcsize('zzz')
except struct.error as e:
print(f"Format error: {e}")