Python I/O性能优化
I/O操作是程序性能瓶颈的主要来源,优化I/O能显著提升整体性能。
文件I/O优化
使用缓冲读取
Python
# 低效:逐字符读取
with open('large.txt', 'r') as f:
while char := f.read(1):
process(char)
# 高效:按缓冲区大小读取
BUFFER_SIZE = 8192 # 8KB
with open('large.txt', 'r') as f:
while chunk := f.read(BUFFER_SIZE):
process_chunk(chunk)
# 高效:逐行读取(内置缓冲)
with open('large.txt', 'r') as f:
for line in f:
process_line(line)
二进制模式读取
Python
# 低效:文本模式(需要解码)
with open('large.bin', 'r') as f:
content = f.read()
# 高效:二进制模式(直接读取)
with open('large.bin', 'rb') as f:
content = f.read()
# 二进制模式更快,无编码转换开销
批量写入
Python
# 低效:逐行写入
with open('output.txt', 'w') as f:
for line in lines:
f.write(line + '\n') # 每次写入触发IO
# 高效:批量写入
with open('output.txt', 'w') as f:
f.write('\n'.join(lines)) # 一次写入
# 高效:缓冲写入
with open('output.txt', 'w', buffering=8192) as f:
for line in lines:
f.write(line + '\n')
使用mmap内存映射
Python
import mmap
# 内存映射大文件(无需全部加载到内存)
with open('large.bin', 'rb') as f:
mm = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
# 随机访问
data = mm[1000:2000] # 只读取需要的部分
mm.close()
# 写入模式
with open('output.bin', 'r+b') as f:
mm = mmap.mmap(f.fileno(), 0)
mm[0:10] = b'new data'
mm.close()
文件描述符操作
Python
import os
# 低效:多次open/close
for chunk in chunks:
with open('file.txt', 'a') as f:
f.write(chunk)
# 高效:保持文件打开
with open('file.txt', 'a') as f:
for chunk in chunks:
f.write(chunk)
网络I/O优化
连接池复用
Python
import requests
from requests.adapters import HTTPAdapter
# 低效:每次创建新连接
for url in urls:
response = requests.get(url) # 每次新建连接
# 高效:使用Session复用连接
session = requests.Session()
adapter = HTTPAdapter(pool_connections=10, pool_maxsize=20)
session.mount('http://', adapter)
session.mount('https://', adapter)
for url in urls:
response = session.get(url) # 复用连接
session.close()
异步HTTP请求
Python
import aiohttp
import asyncio
async def fetch_batch(urls):
async with aiohttp.ClientSession() as session:
tasks = [session.get(url) for url in urls]
responses = await asyncio.gather(*tasks)
return [await r.text() for r in responses]
# 异步并发请求,吞吐量提升显著
超时设置
Python
import requests
# 设置超时避免阻塞
response = requests.get(url, timeout=(3.05, 30)) # (连接, 读取)
# aiohttp超时
import aiohttp
timeout = aiohttp.ClientTimeout(total=30)
async with aiohttp.ClientSession(timeout=timeout) as session:
pass
数据压缩传输
Python
import requests
import gzip
# 发送压缩数据
data = b'large data...'
compressed = gzip.compress(data)
headers = {'Content-Encoding': 'gzip'}
response = requests.post(url, data=compressed, headers=headers)
# 自动解压响应
response = requests.get(url)
# requests自动处理gzip响应
数据库I/O优化
连接池
Python
# SQLAlchemy连接池
from sqlalchemy import create_engine
engine = create_engine(
'postgresql://user:pass@localhost/db',
pool_size=10,
max_overflow=20,
pool_timeout=30,
pool_recycle=3600
)
# 使用连接池
with engine.connect() as conn:
result = conn.execute('SELECT * FROM users')
批量操作
Python
# 低效:逐条插入
for record in records:
conn.execute('INSERT INTO users VALUES (?, ?)', record)
# 高效:批量插入
conn.execute('INSERT INTO users VALUES (?, ?)', records)
# 使用executemany
cursor.executemany('INSERT INTO users VALUES (?, ?)', records)
预编译语句
Python
# 低效:每次编译SQL
cursor.execute('SELECT * FROM users WHERE id = ' + str(user_id))
# 高效:参数化查询(预编译)
cursor.execute('SELECT * FROM users WHERE id = ?', (user_id,))
# SQL预编译,多次执行更高效
事务批处理
Python
# 低效:多次事务
for record in records:
conn.execute('INSERT INTO table VALUES (?)', record)
conn.commit() # 每次提交
# 高效:单次事务
conn.execute('BEGIN')
for record in records:
conn.execute('INSERT INTO table VALUES (?)', record)
conn.commit() # 批量提交
异步I/O综合
异步文件操作
Python
import aiofiles
async def async_read():
async with aiofiles.open('large.txt', 'r') as f:
content = await f.read()
return content
async def async_write():
async with aiofiles.open('output.txt', 'w') as f:
await f.write('content')
async def async_stream():
async with aiofiles.open('large.txt', 'r') as f:
async for line in f:
process_line(line)
异步数据库
Python
import asyncpg
async def async_db():
pool = await asyncpg.create_pool(
'postgresql://user:pass@localhost/db',
min_size=5,
max_size=20
)
async with pool.acquire() as conn:
rows = await conn.fetch('SELECT * FROM users')
await pool.close()
异步Redis
Python
import redis.asyncio as redis
async def async_redis():
client = redis.Redis(host='localhost', port=6379)
# 管道批量操作
async with client.pipeline() as pipe:
pipe.set('key1', 'value1')
pipe.set('key2', 'value2')
pipe.get('key3')
results = await pipe.execute()
await client.close()
I/O多路复用
select/poll/epoll
Python
import selectors
def server_with_selector():
sel = selectors.DefaultSelector()
def accept(sock):
conn, addr = sock.accept()
conn.setblocking(False)
sel.register(conn, selectors.EVENT_READ, read)
def read(conn):
data = conn.recv(1024)
if data:
conn.send(data)
else:
sel.unregister(conn)
conn.close()
sock = socket.socket()
sock.bind(('localhost', 8080))
sock.listen()
sock.setblocking(False)
sel.register(sock, selectors.EVENT_READ, accept)
while True:
events = sel.select()
for key, mask in events:
callback = key.data
callback(key.fileobj)
缓存策略
内存缓存
Python
from functools import lru_cache
@lru_cache(maxsize=1000)
def read_file_cached(path):
with open(path, 'r') as f:
return f.read()
# 缓存文件内容,避免重复IO
Redis缓存
Python
import redis
r = redis.Redis()
def get_with_cache(key, fetch_func, ttl=3600):
cached = r.get(key)
if cached:
return cached
data = fetch_func()
r.setex(key, ttl, data)
return data
本地文件缓存
Python
import json
import hashlib
import os
CACHE_DIR = './cache'
def cache_result(key, data):
cache_file = os.path.join(CACHE_DIR, hashlib.md5(key.encode()).hexdigest())
with open(cache_file, 'w') as f:
json.dump(data, f)
def load_cached(key):
cache_file = os.path.join(CACHE_DIR, hashlib.md5(key.encode()).hexdigest())
if os.path.exists(cache_file):
with open(cache_file, 'r') as f:
return json.load(f)
return None
性能测试对比
| 操作 | 低效方式 | 高效方式 | 提升倍数 |
|---|---|---|---|
| 文件读取 | 逐字符 | 8KB缓冲 | ~100x |
| 字符串写入 | 逐行write | join批量 | ~10x |
| HTTP请求 | 每次新建连接 | Session复用 | ~5x |
| 数据库插入 | 逐条提交 | 批量事务 | ~20x |
注意:I/O优化要根据实际瓶颈针对性优化,使用cProfile定位热点。
要点总结
- 文件读取使用缓冲区(8KB+),大文件用mmap内存映射
- 网络请求用Session复用连接,异步并发提升吞吐
- 数据库用连接池、批量操作、事务合并减少IO次数
- 异步IO(asyncio+aiohttp+asyncpg)适合高并发场景
- 使用缓存避免重复IO,lru_cache内存缓存或Redis分布式缓存
- I/O多路复用适合单线程处理大量连接
存放路径:articles/PYTHON/专家/性能优化/I/O性能优化.md
📝 发现内容有误?点击此处直接编辑