Build a File Organizer Script
Introduction
A file organizer script scans a directory, applies sorting rules you define, and moves or copies files into organized subfolders. Instead of sorting your Downloads folder by hand, you run the script and it does the work.
Python’s standard library ships with everything you need. No third-party packages. The key modules are pathlib, shutil, glob, os, and datetime.
Organizing Files by Extension
The simplest sorting strategy groups files by their extension. A PDF goes into a pdf/ folder, a JPG into jpg/, and so on.
from pathlib import Path
import shutil
def organize_by_extension(source_dir, preview=False):
source = Path(source_dir)
for file in source.iterdir():
if not file.is_file():
continue
# Skip hidden files on Unix
if file.name.startswith('.'):
continue
# Get the extension (without the dot)
ext = file.suffix.lstrip('.')
if not ext:
continue
dest_dir = source / ext
dest_path = dest_dir / file.name
# Warn if destination already exists
if dest_path.exists():
print(f"Skipping {file.name}: {dest_path} already exists")
continue
if preview:
print(f"Would move {file.name} -> {dest_dir}/")
else:
dest_dir.mkdir(exist_ok=True)
shutil.move(str(file), dest_path)
organize_by_extension('./downloads')
Pass preview=True to see what would happen without actually moving anything. This safeguard lets you verify the behavior before making changes.
How it works:
Path.iterdir()iterates over everything in a directory — files and subdirectoriesfile.suffixreturns the extension including the dot (.pdf,.jpg)Path.mkdir(exist_ok=True)creates the target folder without raising an error if it already existsshutil.move()moves the file to its new location
Files with Multiple Extensions
Some files have more than one extension, like .tar.gz or .config.js. Path.suffix only returns the last part:
>>> Path('archive.tar.gz').suffix
'.gz'
>>> Path('archive.tar.gz').suffixes
['.tar', '.gz']
If you need to handle compound extensions, use Path.suffixes which returns a list of all segments.
Preserving Original Files with Copy
Sometimes you want to keep the originals. Use shutil.copy2() to copy instead of move:
def copy_by_extension(source_dir, dest_dir, preview=False):
source = Path(source_dir)
dest = Path(dest_dir)
for file in source.iterdir():
if not file.is_file() or file.name.startswith('.'):
continue
ext = file.suffix.lstrip('.') or 'no_extension'
ext_dir = dest / ext
dest_path = ext_dir / file.name
if dest_path.exists():
print(f"Skipping {file.name}: already exists in {ext_dir}")
continue
if preview:
print(f"Would copy {file.name} -> {ext_dir}/")
else:
ext_dir.mkdir(exist_ok=True)
shutil.copy2(file, dest_path)
shutil.copy2() preserves file metadata like timestamps. shutil.copy() only preserves permissions.
Organizing Files by Pattern
You may want to sort by content patterns in filenames rather than extensions. For example, group every file containing “invoice” regardless of whether it’s a PDF, XLSX, or PNG. This is where regex matching becomes useful.
from pathlib import Path
import shutil
import re
def organize_by_pattern(source_dir, preview=False):
source = Path(source_dir)
# Build the complete file list first to avoid mid-iteration mutations
all_files = [f for f in source.iterdir() if f.is_file() and not f.name.startswith('.')]
patterns = {
'invoices': re.compile(r'invoice', re.IGNORECASE),
'receipts': re.compile(r'receipt', re.IGNORECASE),
'screenshots': re.compile(r'screenshot'),
}
# Track which files have been organized
organized = set()
for pattern_name, regex in patterns.items():
dest_dir = source / pattern_name
dest_dir.mkdir(exist_ok=True)
for file in all_files:
if file in organized:
continue
if regex.search(file.name):
dest_path = dest_dir / file.name
if dest_path.exists():
print(f"Skipping {file.name}: already exists in {dest_dir}")
continue
if preview:
print(f"Would move {file.name} -> {dest_dir}/")
else:
shutil.move(str(file), dest_path)
organized.add(file)
organize_by_pattern('./downloads')
The key fix here: collect all files into a list before applying any patterns. The original code moved files mid-iteration, so a file matched by an early pattern would disappear before later patterns could check it.
glob also supports wildcard patterns for simpler matching:
for file in source.glob('*.pdf'): # PDFs only
for file in source.glob('report*'): # Anything starting with "report"
for file in source.rglob('*.pdf'): # Recursive - searches subdirectories
Organizing Files by Date
Sort files by when they were last modified. This is useful for archiving — put today’s files in one folder, last week’s in another.
from pathlib import Path
import shutil
from datetime import datetime
def organize_by_date(source_dir, preview=False):
source = Path(source_dir)
for file in source.iterdir():
if not file.is_file() or file.name.startswith('.'):
continue
mtime = file.stat().st_mtime
date = datetime.fromtimestamp(mtime)
# Format as YYYY-MM-DD (e.g., 2025-03-11)
date_folder = date.strftime('%Y-%m-%d')
dest_dir = source / date_folder
dest_path = dest_dir / file.name
if dest_path.exists():
print(f"Skipping {file.name}: already exists in {dest_dir}")
continue
if preview:
print(f"Would move {file.name} -> {dest_dir}/")
else:
dest_dir.mkdir(exist_ok=True)
shutil.move(str(file), dest_path)
organize_by_date('./downloads')
datetime.fromtimestamp() converts the Unix timestamp from st_mtime into a readable object. strftime('%Y-%m-%d') formats it as 2025-03-11.
Using os.walk() for Nested Directories
The examples so far only look at the top-level directory. Use os.walk() to traverse nested subdirectories and organize files regardless of how deep they’re buried:
import os
from pathlib import Path
import shutil
from datetime import datetime
def organize_by_date_recursive(source_dir, preview=False):
source = Path(source_dir)
for root, dirs, files in os.walk(source):
for filename in files:
file = Path(root) / filename
# Skip hidden files and files already in date folders
if filename.startswith('.') or file.parent.name.startswith('20'):
continue
mtime = file.stat().st_mtime
date = datetime.fromtimestamp(mtime)
date_folder = date.strftime('%Y-%m-%d')
dest_dir = source / date_folder
dest_path = dest_dir / filename
if dest_path.exists():
print(f"Skipping {filename}: already exists")
continue
if preview:
print(f"Would move {filename} -> {dest_dir}/")
else:
dest_dir.mkdir(exist_ok=True)
shutil.copy2(file, dest_path) # Copy instead of move
organize_by_date_recursive('./downloads')
os.walk() returns (root, directories, files) tuples. root is the current folder path, directories lists subdirectories, and files lists filenames. Set topdown=False to traverse bottom-up instead.
Grouping by Date Ranges
Instead of exact dates, group files by age: “today”, “this_week”, “this_month”, “older”:
from datetime import datetime, timedelta
def get_age_category(file_path):
mtime = file_path.stat().st_mtime
age = datetime.now() - datetime.fromtimestamp(mtime)
if age < timedelta(days=1):
return 'today'
elif age < timedelta(days=7):
return 'this_week'
elif age < timedelta(days=30):
return 'this_month'
else:
return 'older'
def organize_by_age(source_dir, preview=False):
source = Path(source_dir)
for file in source.iterdir():
if not file.is_file() or file.name.startswith('.'):
continue
category = get_age_category(file)
dest_dir = source / category
dest_path = dest_dir / file.name
if dest_path.exists():
print(f"Skipping {file.name}: already exists")
continue
if preview:
print(f"Would move {file.name} -> {dest_dir}/")
else:
dest_dir.mkdir(exist_ok=True)
shutil.move(str(file), dest_path)
organize_by_age('./downloads')
Python Version Gotchas
Path handling on Windows: shutil.move() works across platforms, but if you construct paths manually, use pathlib.Path and avoid hardcoded slashes. Windows uses \ while Unix uses /.
Race conditions: If multiple scripts run simultaneously, two processes might try to create the same folder or move the same file. Wrap operations in try/except blocks:
try:
shutil.move(src, dst)
except FileExistsError:
print(f"Already moved: {src}")
File in use: Moving a file that’s open or still being written to will raise PermissionError. Add retry logic:
import time
def safe_move(src, dst, retries=3, preview=False):
for attempt in range(retries):
try:
if preview:
print(f"Would move {src} -> {dst}")
else:
shutil.move(src, dst)
return True
except PermissionError:
if attempt < retries - 1:
time.sleep(1)
else:
raise
return False
Summary
| Task | Module/Method |
|---|---|
| Loop through files | Path.iterdir() |
| Move or copy files | shutil.move(), shutil.copy2() |
| Match by pattern | Path.glob() / Path.rglob() or re.compile() |
| Traverse directory trees | os.walk() |
See Also
- The pathlib Module — Path objects and path manipulation
- The shutil Module — Copying, moving, and archiving files
- The glob Module — Pattern-based file searching
- The os Module — Working with the filesystem