Python性能基准测试

建立性能基准体系是优化工作的基础，确保优化有效且可持续。

timeit模块

基本使用

Python

import timeit

# 测试单个表达式
time = timeit.timeit('sum(range(1000))', number=10000)
print(f"Average: {time / 10000 * 1000:.3f} ms per run")

# 测试函数
def test_func():
    return [i**2 for i in range(1000)]

time = timeit.timeit(test_func, number=1000)
print(f"Total: {time:.3f} s for 1000 runs")

# 使用setup导入模块
time = timeit.timeit(
    'random.randint(1, 100)',
    setup='import random',
    number=10000
)

repeat多次测量

Python

import timeit

# 多次测量取最优
times = timeit.repeat(
    'sum(range(10000))',
    number=1000,
    repeat=5  # 重复5次
)

print(f"Best: {min(times):.3f} s")
print(f"All: {times}")

命令行使用

Bash

# 测试代码片段
python -m timeit "sum(range(1000))"

# 指定循环次数和重复次数
python -m timeit -n 1000 -r 5 "sum(range(1000))"

# 输出：
# 1000 loops, best of 5: 27.2 usec per loop

Timer类高级用法

Python

import timeit

# 创建Timer对象
timer = timeit.Timer(
    stmt='sorted(data)',
    setup='data = list(range(1000, 0, -1))'
)

# 自动确定次数
times = timer.autorange()  # 返回(number, time)
print(f"Auto: {times[1] / times[0] * 1000:.3f} ms per loop")

# 多次测量
results = timer.repeat(repeat=10, number=1000)
print(f"Min: {min(results):.3f} s")

pytest-benchmark

安装

Bash

pip install pytest-benchmark

基本测试

Python

# test_performance.py

def test_sum(benchmark):
    # benchmark自动测量函数性能
    result = benchmark(sum, range(1000))
    assert result == 499500

def test_list_comprehension(benchmark):
    result = benchmark(lambda: [i**2 for i in range(1000)])
    assert len(result) == 1000

运行测试

Bash

pytest test_performance.py --benchmark-only

# 输出示例：
# Name (time in ms)          Min      Max     Mean    StdDev
# ---------------------------------------------------------
# test_sum                0.0272   0.0350   0.0289   0.0020
# test_list_comprehension 0.1100   0.1250   0.1150   0.0050

参数化测试

Python

import pytest

@pytest.mark.parametrize("size", [100, 1000, 10000])
def test_sort(benchmark, size):
    data = list(range(size, 0, -1))
    result = benchmark(sorted, data)
    assert result == list(range(size))

对比测试

Python

def test_append_vs_extend(benchmark):
    # 测试append
    benchmark(lambda: [x for x in range(1000)])

def test_extend(benchmark):
    # 测试extend
    benchmark(lambda: list(range(1000)))

# pytest会自动对比两个函数的性能

保存基准结果

Bash

# 保存基准结果
pytest test_performance.py --benchmark-save=baseline

# 对比基准
pytest test_performance.py --benchmark-compare=baseline

cProfile性能分析

基本使用

Python

import cProfile

def target_function():
    total = 0
    for i in range(100000):
        total += i ** 2
    return total

# 分析函数
cProfile.run('target_function()')

# 输出：
#          5 function calls in 0.015 seconds
#    Ordered by: standard name
#
#    ncalls  tottime  percall  cumtime  percall filename:lineno(function)
#         1    0.015    0.015    0.015    0.015 test.py:2(target_function)

分析脚本

Bash

python -m cProfile script.py

# 按时间排序
python -m cProfile -s cumtime script.py

# 保存结果
python -m cProfile -o output.prof script.py

使用pstats分析

Python

import cProfile
import pstats

# 生成profile
cProfile.run('target_function()', 'output.prof')

# 分析结果
stats = pstats.Stats('output.prof')

# 按累计时间排序
stats.sort_stats('cumtime')
stats.print_stats(10)  # 显示前10

# 按调用次数排序
stats.sort_stats('ncalls')
stats.print_stats(10)

# 查看特定函数的调用者
stats.print_callers('target_function')

# 查看特定函数调用的函数
stats.print_callees('target_function')

性能测试最佳实践

稳定的测试环境

Python

import timeit
import gc

# 禁用GC减少干扰
gc.disable()
try:
    time = timeit.timeit('sum(range(1000))', number=10000)
finally:
    gc.enable()

# 多次预热
for _ in range(3):
    test_func()  # 预热，让系统稳定

# 正式测量
time = timeit.timeit(test_func, number=1000)

消除干扰因素

Python

import time
import sys

# 使用min避免系统干扰
times = []
for _ in range(10):
    start = time.perf_counter()
    test_func()
    end = time.perf_counter()
    times.append(end - start)

print(f"Best time: {min(times) * 1000:.3f} ms")

# 使用perf_counter而非time.time（更高精度）

测试不同参数范围

Python

import timeit

def test_range(start, end, step):
    stmt = f'sum(range({start}, {end}, {step}))'
    time = timeit.timeit(stmt, number=10000)
    return time

# 测试不同范围
results = []
for n in [100, 1000, 10000, 100000]:
    time = test_range(0, n, 1)
    results.append((n, time))

for n, t in results:
    print(f"n={n}: {t:.3f} s")

建立基准体系

基准测试类

Python

import timeit
import json
from pathlib import Path

class PerformanceBenchmark:
    def __init__(self, name):
        self.name = name
        self.results = {}
        self.baseline_file = Path(f"baseline_{name}.json")

    def add_test(self, test_name, stmt, setup='', number=1000):
        "添加测试"
        times = timeit.repeat(stmt, setup, number=number, repeat=5)
        self.results[test_name] = {
            'min': min(times),
            'mean': sum(times) / len(times),
            'times': times,
            'number': number
        }

    def save_baseline(self):
        "保存基准"
        with open(self.baseline_file, 'w') as f:
            json.dump(self.results, f)

    def compare_baseline(self):
        "对比基准"
        if not self.baseline_file.exists():
            print("No baseline found")
            return

        baseline = json.load(open(self.baseline_file))
        for name, current in self.results.items():
            base = baseline.get(name)
            if base:
                change = (current['min'] - base['min']) / base['min'] * 100
                status = "✓" if change < 10 else "✗"
                print(f"{name}: {change:+.1f}% {status}")

# 使用
bench = PerformanceBenchmark('core_functions')
bench.add_test('sum_range', 'sum(range(1000))', number=10000)
bench.add_test('list_comp', '[i**2 for i in range(1000)]', number=1000)
bench.save_baseline()

CI集成基准测试

Python

# tests/test_benchmark.py

import pytest

@pytest.fixture
def benchmark_threshold():
    return {
        'sum_range': 0.03,  # 最大30ms
        'list_comp': 0.12,  # 最大120ms
    }

class TestPerformance:
    @pytest.mark.benchmark
    def test_sum_range(self, benchmark, benchmark_threshold):
        result = benchmark(sum, range(1000))
        assert benchmark.stats['mean'] < benchmark_threshold['sum_range']

    @pytest.mark.benchmark
    def test_list_comprehension(self, benchmark, benchmark_threshold):
        result = benchmark(lambda: [i**2 for i in range(1000)])
        assert benchmark.stats['mean'] < benchmark_threshold['list_comp']

性能测量精度对比

方法	精度	适用场景
time.time()	毫秒级	粗略测量
time.perf_counter()	微秒级	精确测量
timeit	纳秒级	代码片段
cProfile	函数级	整体分析
pytest-benchmark	毫秒级	自动化测试

注意：性能测试要多次运行取最优值，避免系统调度、GC等干扰因素。

要点总结

timeit测量代码片段，repeat多次运行取最优，autorange自动确定次数
pytest-benchmark集成测试框架，支持参数化、对比、保存基准
cProfile分析函数调用，pstats查看调用链和热点函数
使用perf_counter高精度计时，禁用GC减少测量干扰
建立基准体系：保存baseline、CI集成阈值检查、自动对比变化
多参数范围测试，确保性能在各场景下稳定

存放路径：articles/PYTHON/专家/性能优化/性能基准测试.md

📝 发现内容有误？点击此处直接编辑