Python内存管理与垃圾回收

Python内存管理是自动化机制，理解其原理有助于编写高效程序和排查内存问题。

Python内存模型

对象内存结构

Python

import sys

# 每个Python对象都有PyObject_HEAD
a = 1

# PyObject_HEAD包含：
# - ob_refcnt: 引用计数
# - ob_type: 类型指针

# 查看对象大小
print(sys.getsizeof(a))  # 28字节（int对象）

# 不同类型对象大小不同
print(sys.getsizeof([]))    # 56字节（空列表）
print(sys.getsizeof({}))    # 64字节（空字典）
print(sys.getsizeof(''))    # 49字节（空字符串）
print(sys.getsizeof(None))  # 16字节

内存分配器架构

Python

# Python内存管理三层架构

# Layer 3: PyObject malloc/free
# - Python对象分配器
# - 处理PyObject_HEAD等对象特定结构

# Layer 2: PyMem malloc/free
# - Python内存分配器
# - 小对象使用内存池（arenas、pools）

# Layer 1: malloc/free
# - 系统malloc/free
# - 大对象直接调用系统分配

# 小对象（<512字节）使用内存池
# 大对象（>=512字节）直接malloc

内存池机制

Python

# Arena（256KB）→ Pool（4KB）→ Block（8/16/32...字节）

# Pool管理相同大小的Block
# 8字节Pool管理8字节Block
# 16字节Pool管理16字节Block

# 优势：
# 1. 减少malloc调用次数
# 2. 减少内存碎片
# 3. 提高分配效率

# 查看内存池状态
import gc
print(gc.isenabled())  # GC是否启用

引用计数机制

引用计数原理

Python

import sys

a = [1, 2, 3]
print(sys.getrefcount(a))  # 引用计数（通常比预期多1，因为getrefcount参数）

# 引用计数增加场景：
b = a           # +1（赋值）
c = [a]         # +1（作为容器元素）
d = {'key': a}  # +1（作为容器元素）

# 引用计数减少场景：
b = None        # -1（重新赋值）
c.pop()         # -1（移除容器元素）
del d['key']    # -1（删除）

# 引用计数为0时，对象立即回收

引用计数增加场景

Python

import sys

obj = object()
print(sys.getrefcount(obj))  # 2（本身+getrefcount参数）

# 创建引用
ref1 = obj
print(sys.getrefcount(obj))  # 3

ref2 = obj
print(sys.getrefcount(obj))  # 4

# 作为容器元素
container = [obj]
print(sys.getrefcount(obj))  # 5

# 函数参数（临时增加）
def check_ref(o):
    print(sys.getrefcount(o))  # 会额外+1

check_ref(obj)  # 函数内引用计数更高

引用计数减少场景

Python

import sys

a = [1, 2, 3]
b = a
print(sys.getrefcount(a))  # 3

b = None  # 减少引用
print(sys.getrefcount(a))  # 2

del b     # 删除变量
# b已不存在

# 作用域结束
def local_ref():
    x = [1, 2, 3]
    print(sys.getrefcount(x))
    return x  # 返回增加外部引用

result = local_ref()
# 函数内局部引用消失，但返回值保留外部引用

循环引用问题

循环引用示例

Python

# 引用计数无法处理循环引用
class Node:
    def __init__(self):
        self.next = None

a = Node()
b = Node()
a.next = b  # a引用b
b.next = a  # b引用a

# 循环引用：a→b→a
# 删除外部引用后，a和b引用计数仍为1
del a
del b
# a和b无法回收，造成内存泄漏

# GC负责检测循环引用

循环引用检测

Python

import gc

class LinkedNode:
    def __init__(self):
        self.next = None
        self.prev = None

# 创建循环链表
nodes = [LinkedNode() for _ in range(100)]
for i in range(99):
    nodes[i].next = nodes[i + 1]
    nodes[i + 1].prev = nodes[i]
nodes[99].next = nodes[0]
nodes[0].prev = nodes[99]

# 删除外部引用
del nodes

# 手动触发GC
gc.collect()
# GC检测循环引用并回收

分代垃圾回收

分代回收原理

Python

import gc

# Python GC分三代：
# 第0代：新创建对象（频繁扫描）
# 第1代：存活过一次GC的对象
# 第2代：存活过多次GC的对象（很少扫描）

# 分代假设：新对象更可能死亡，老对象更可能存活

# 查看阈值
print(gc.get_threshold())  # (700, 10, 10)
# 700：第0代触发阈值
# 10：第0代扫描10次后扫描第1代
# 10：第1代扫描10次后扫描第2代

# 调整阈值
gc.set_threshold(1000, 15, 15)

GC触发时机

Python

import gc

# 第0代对象数量达到阈值（默认700）时触发GC

# 监控GC
gc.set_debug(gc.DEBUG_STATS)

# 手动触发GC
collected = gc.collect()  # 返回回收对象数
print(f"Collected {collected} objects")

# 指定代数
collected = gc.collect(0)  # 只回收第0代
collected = gc.collect(1)  # 回收第0代和第1代
collected = gc.collect(2)  # 回收所有代

GC标志位

Python

import gc

class MyClass:
    pass

obj = MyClass()

# 禁用GC追踪（对象不参与GC）
gc.disable()
# gc不再自动运行，但引用计数仍生效

# 检查对象是否被GC追踪
print(gc.is_tracked(obj))  # True/False

# 手动追踪/取消追踪
gc.untrack(obj)  # 取消追踪
gc.track(obj)    # 添加追踪

弱引用

弱引用避免循环引用

Python

import weakref

class Node:
    def __init__(self):
        self.next = None

a = Node()
b = Node()

# 使用弱引用避免循环
a.next = b
b.next = weakref.ref(a)  # 弱引用不增加计数

# 使用弱引用
if b.next:
    node = b.next()  # 获取实际对象
    # 如果a已删除，b.next()返回None

WeakRef应用场景

Python

import weakref

class Cache:
    def __init__(self):
        self._cache = weakref.WeakValueDictionary()

    def get(self, key, factory):
        if key in self._cache:
            return self._cache[key]
        obj = factory()
        self._cache[key] = obj
        return obj

# WeakValueDictionary：值是弱引用
# 外部无引用时自动清除

# WeakSet示例
ws = weakref.WeakSet()
obj = object()
ws.add(obj)
print(len(ws))  # 1
del obj
print(len(ws))  # 0（自动清除）

内存优化技巧

使用slots减少内存

Python

import sys

# 普通类
class Point:
    def __init__(self, x, y):
        self.x = x
        self.y = y

# slots优化类
class PointSlots:
    __slots__ = ['x', 'y']

    def __init__(self, x, y):
        self.x = x
        self.y = y

# 内存对比
p1 = Point(3, 4)
p2 = PointSlots(3, 4)

print(sys.getsizeof(p1))       # 约56字节
print(sys.getsizeof(p2))       # 约16字节（节省约70%）
print(sys.getsizeof(p1.__dict__))  # 额外字典开销

避免不必要的对象创建

Python

# 低效：循环中创建对象
def process():
    for i in range(1000000):
        temp = []  # 每次创建新列表
        temp.append(i)

# 高效：复用对象
def process_efficient():
    temp = []
    for i in range(1000000):
        temp.clear()  # 清空复用
        temp.append(i)

使用生成器减少内存

Python

# 低效：列表保存所有结果
def squares_list(n):
    return [i**2 for i in range(n)]  # 占用n个元素的内存

# 高效：生成器惰性生成
def squares_gen(n):
    for i in range(n):
        yield i**2  # 每次只生成一个

# 使用
for sq in squares_gen(1000000):
    process(sq)  # 内存占用恒定

使用array替代列表

Python

import sys
from array import array

# 列表存储整数
lst = [i for i in range(100000)]
print(sys.getsizeof(lst))  # 约800KB

# array存储整数
arr = array('i', range(100000))
print(sys.getsizeof(arr))  # 约400KB（节省50%）

GC调试

GC调试模式

Python

import gc

# 开启调试信息
gc.set_debug(gc.DEBUG_STATS | gc.DEBUG_COLLECTABLE)

# DEBUG_STATS: 打印统计信息
# DEBUG_COLLECTABLE: 打印可回收对象
# DEBUG_UNCOLLECTABLE: 打印不可回收对象
# DEBUG_SAVEALL: 保存所有回收对象到gc.garbage

# 触发GC查看信息
gc.collect()

# 关闭调试
gc.set_debug(0)

查看GC状态

Python

import gc

# 获取各代对象数量
print(gc.get_count())  # (count0, count1, count2)

# 获取所有被追踪对象
objs = gc.get_objects()
print(len(objs))

# 获取对象引用链
obj = []
refs = gc.get_referrers(obj)  # 引用obj的对象
referred = gc.get_referents(obj)  # obj引用的对象

不可回收对象

Python

import gc

# gc.garbage存储不可回收对象
print(gc.garbage)

# 不可回收原因：
# 1. 对象有__del__方法且循环引用
# 2. 对象被gc.freeze()冻结

# Python 3.4+支持__del__循环引用回收
# 使用PEP 442安全终结器

要点总结

Python内存管理：引用计数为主，分代GC处理循环引用
引用计数实时回收，为0立即释放，但无法处理循环引用
分代GC三代机制：新对象频繁扫描，老对象很少扫描
使用weakref弱引用避免循环引用，WeakDict自动清理
__slots__节省约70%内存，生成器惰性计算减少内存占用
小对象用array替代list，GC阈值可通过gc.set_threshold调整

存放路径：articles/PYTHON/专家/性能优化/内存管理与垃圾回收.md

📝 发现内容有误？点击此处直接编辑