Python内存泄漏排查
Python虽有自动内存管理,但循环引用、全局缓存、未关闭资源等问题仍会导致内存泄漏。
内存泄漏常见原因
循环引用
Python
# 循环引用导致GC无法回收
class Node:
def __init__(self):
self.next = None
def create_leak():
a = Node()
b = Node()
a.next = b
b.next = a # 循环引用
return None # 外部引用消失,但a、b仍存在
# 解决:使用弱引用或手动断开
import weakref
b.next = weakref.ref(a)
全局缓存无限增长
Python
# 缓存无限增长
_cache = {}
def get_cached(key, factory):
if key not in _cache:
_cache[key] = factory()
return _cache[key]
# 问题:_cache永不清理
# 解决:限制大小或使用LRU
from functools import lru_cache
@lru_cache(maxsize=1000)
def get_cached_fixed(key):
return compute(key)
未关闭资源
Python
# 未关闭文件
def read_file(path):
f = open(path)
content = f.read()
# 未关闭f,资源泄漏
return content
# 解决:使用with
def read_file_fixed(path):
with open(path) as f:
return f.read()
闭包持有引用
Python
# 闭包意外持有大对象
def create_handler():
large_data = load_large_file() # 大数据
def handler():
# 闭包持有large_data引用
return process(large_data[:100])
return handler
# handler持有整个large_data引用
# 解决:只保留需要的数据
def create_handler_fixed():
large_data = load_large_file()
needed_data = large_data[:100] # 只保留需要部分
def handler():
return process(needed_data)
return handler
tracemalloc内存追踪
基本使用
Python
import tracemalloc
# 启动追踪
tracemalloc.start()
# 执行代码
data = [i for i in range(100000)]
# 获取内存快照
snapshot = tracemalloc.take_snapshot()
# 查看内存分配Top 10
top_stats = snapshot.statistics('lineno')
for stat in top_stats[:10]:
print(stat)
# 输出示例:
# test.py:5: 800.0 KiB, 100000 blocks
比较内存变化
Python
import tracemalloc
tracemalloc.start()
# 第一次快照
snapshot1 = tracemalloc.take_snapshot()
# 执行可能泄漏的代码
process_data()
# 第二次快照
snapshot2 = tracemalloc.take_snapshot()
# 比较差异
diff = snapshot2.compare_to(snapshot1, 'lineno')
for stat in diff[:10]:
print(stat)
追踪特定代码
Python
import tracemalloc
tracemalloc.start(10) # 最多追踪10个帧
def suspect_function():
# 获取当前内存分配
snapshot = tracemalloc.take_snapshot()
# 查看调用栈
for stat in snapshot.statistics('traceback'):
print(stat.traceback)
获取对象分配来源
Python
import tracemalloc
tracemalloc.start()
# 创建对象
obj = [i for i in range(10000)]
# 获取对象分配信息
snapshot = tracemalloc.take_snapshot()
size, peak = tracemalloc.get_traced_memory()
print(f"Current: {size / 1024:.2f} KB, Peak: {peak / 1024:.2f} KB")
# 停止追踪
tracemalloc.stop()
objgraph对象引用图
安装与基本使用
Bash
pip install objgraph
Python
import objgraph
# 查看对象数量
objgraph.show_most_common_types(limit=10)
# 输出:
# dict 12345
# list 5678
# function 1234
# 查看增长对象
objgraph.show_growth(limit=10)
查找引用链
Python
import objgraph
# 查找对象的引用者
x = []
objgraph.show_refs([x], filename='refs.png')
# 查找对象引用的对象
objgraph.show_backrefs([x], filename='backrefs.png')
# 查找最宽引用链
objgraph.find_backref_chain(obj, objgraph.is_proper_module)
查找泄漏对象
Python
import objgraph
# 查找特定类型对象
leaked_dicts = objgraph.by_type('dict')
print(f"Found {len(leaked_dicts)} dicts")
# 查看引用链
for d in leaked_dicts[:5]:
chain = objgraph.find_backref_chain(d, lambda x: x is None)
objgraph.show_chain(chain, filename='leak.png')
sys.getsizeof检查
基本使用
Python
import sys
a = [i for i in range(10000)]
print(sys.getsizeof(a)) # 列表本身大小(不含元素)
# 递归计算总大小
def get_total_size(obj):
"递归计算对象总大小"
total = sys.getsizeof(obj)
if isinstance(obj, dict):
for k, v in obj.items():
total += get_total_size(k)
total += get_total_size(v)
elif isinstance(obj, (list, tuple, set)):
for item in obj:
total += get_total_size(item)
return total
print(get_total_size(a)) # 包含元素的总大小
使用pympler精确测量
Bash
pip install pympler
Python
from pympler import asizeof
obj = {'key': [i for i in range(10000)]}
# 精确测量(包含所有引用)
print(asizeof.asizeof(obj)) # 总内存占用
# 查看各部分大小
print(asizeof.asized(obj, detail=1))
memory_profiler实时监控
安装与使用
Bash
pip install memory_profiler
Python
from memory_profiler import profile
@profile
def my_func():
a = [1] * (10 ** 6)
b = [2] * (2 * 10 ** 7)
del b
return a
# 运行时输出:
# Line # Mem usage Increment Occurrences Line Contents
# =============================================================
# 3 50.0 MiB 50.0 MiB 1 @profile
# 4 def my_func():
# 5 58.0 MiB 8.0 MiB 1 a = [1] * (10 ** 6)
# 6 210.0 MiB 152.0 MiB 1 b = [2] * (2 * 10 ** 7)
# 7 58.0 MiB -152.0 MiB 1 del b
# 8 58.0 MiB 0.0 MiB 1 return a
行级内存分析
Bash
python -m memory_profiler your_script.py
psutil系统内存监控
实时监控
Python
import psutil
import os
process = psutil.Process(os.getpid())
# 获取进程内存信息
mem_info = process.memory_info()
print(f"RSS: {mem_info.rss / 1024 / 1024:.2f} MB") # 实际物理内存
print(f"VMS: {mem_info.vms / 1024 / 1024:.2f} MB") # 虚拟内存
# 内存百分比
print(f"Memory %: {process.memory_percent():.2f}%")
# 持续监控
import time
while True:
print(f"Memory: {process.memory_info().rss / 1024 / 1024:.2f} MB")
time.sleep(1)
gc模块调试
查看GC状态
Python
import gc
# 查看各代对象数
print(gc.get_count())
# 手动回收
collected = gc.collect()
print(f"Collected {collected} objects")
# 查看未回收对象
print(f"Garbage: {len(gc.garbage)}")
# 开启调试
gc.set_debug(gc.DEBUG_STATS | gc.DEBUG_UNCOLLECTABLE)
强制回收循环引用
Python
import gc
class Cycle:
def __init__(self):
self.ref = None
# 创建循环引用
a = Cycle()
b = Cycle()
a.ref = b
b.ref = a
# 删除外部引用
del a, b
# GC应该回收,但可能存在延迟
gc.collect()
# 检查是否回收
cycles = [obj for obj in gc.get_objects() if isinstance(obj, Cycle)]
print(f"Remaining cycles: {len(cycles)}")
实战排查流程
排查步骤
Python
import tracemalloc
import gc
import sys
def diagnose_memory_leak():
"内存泄漏排查示例"
# 1. 启动追踪
tracemalloc.start()
# 2. 获取初始快照
initial_snapshot = tracemalloc.take_snapshot()
initial_mem = tracemalloc.get_traced_memory()[0]
# 3. 执行可疑代码
suspect_function()
# 4. 获取结果快照
final_snapshot = tracemalloc.take_snapshot()
final_mem = tracemalloc.get_traced_memory()[0]
# 5. 比较差异
print(f"Memory increase: {(final_mem - initial_mem) / 1024:.2f} KB")
# 6. 查看增长对象
diff = final_snapshot.compare_to(initial_snapshot, 'lineno')
for stat in diff[:5]:
print(stat)
# 7. 检查GC
collected = gc.collect()
print(f"GC collected: {collected}")
# 8. 检查未回收对象
if gc.garbage:
print(f"Uncollectable objects: {len(gc.garbage)}")
tracemalloc.stop()
定位泄漏位置
Python
import tracemalloc
import objgraph
def find_leak():
tracemalloc.start(25) # 追踪25帧
# 代码执行...
snapshot = tracemalloc.take_snapshot()
# 查看分配最多内存的位置
for stat in snapshot.statistics('traceback')[:10]:
print("\n".join(stat.traceback.format()))
# 用objgraph查看对象引用关系
objgraph.show_most_common_types()
内存泄漏典型案例
案例1:装饰器泄漏
Python
# 泄漏的装饰器
def cache_result(func):
_cache = {} # 无限增长的缓存
def wrapper(*args):
if args not in _cache:
_cache[args] = func(*args)
return _cache[args]
return wrapper
# 修复:使用lru_cache或限制大小
from functools import lru_cache
@lru_cache(maxsize=1000)
def cached_func(args):
return compute(args)
案例2:回调注册泄漏
Python
# 泄漏的回调注册
callbacks = []
def register_callback(cb):
callbacks.append(cb) # 永不移除
def trigger():
for cb in callbacks:
cb()
# 修复:提供移除机制
def unregister_callback(cb):
callbacks.remove(cb)
注意:生产环境可使用内存监控脚本定期记录内存状态,超过阈值自动报警。
要点总结
- tracemalloc追踪内存分配来源,对比快照定位增长位置
- objgraph可视化对象引用关系,find_backref_chain查找引用链
- sys.getsizeof测量对象大小,pympler精确计算引用总和
- memory_profiler行级内存监控,定位具体代码行内存变化
- gc.collect手动触发回收,gc.garbage检查不可回收对象
- 常见泄漏:循环引用、无限缓存、未关闭资源、闭包持有引用
存放路径:articles/PYTHON/专家/性能优化/内存泄漏排查.md
📝 发现内容有误?点击此处直接编辑