====== 第九章:字典与集合 ======
===== 本章目标 =====
完成本章学习后,你将能够:
* 掌握字典的所有操作和方法
* 理解字典的哈希表实现原理
* 熟练使用集合操作
* 掌握字典和集合推导式
* 选择合适的数据结构解决问题
===== 字典(Dictionary) =====
==== 字典创建 ====
# 直接创建
empty = {}
empty = dict()
person = {"name": "Alice", "age": 25, "city": "NYC"}
# 从键值对创建
pairs = [("a", 1), ("b", 2)]
d = dict(pairs) # {'a': 1, 'b': 2}
# 使用关键字参数
d = dict(name="Bob", age=30) # {'name': 'Bob', 'age': 30}
# 从序列创建
d = dict.fromkeys(["a", "b", "c"], 0) # {'a': 0, 'b': 0, 'c': 0}
# 字典推导式
squares = {x: x**2 for x in range(6)} # {0: 0, 1: 1, 2: 4, 3: 9, 4: 16, 5: 25}
==== 访问和修改 ====
d = {"name": "Alice", "age": 25}
# 访问
print(d["name"]) # Alice
# print(d["gender"]) # KeyError
# 安全访问
print(d.get("name")) # Alice
print(d.get("gender")) # None(不报错)
print(d.get("gender", "N/A")) # N/A(默认值)
# 修改/添加
d["age"] = 26 # 修改
d["gender"] = "F" # 添加
# 批量更新
d.update({"city": "NYC", "age": 27})
# 删除
value = d.pop("age") # 删除并返回值
key_value = d.popitem() # 删除并返回最后插入的项(Python 3.7+)
del d["name"] # 删除
==== 字典方法 ====
d = {"a": 1, "b": 2, "c": 3}
# 视图对象
print(d.keys()) # dict_keys(['a', 'b', 'c'])
print(d.values()) # dict_values([1, 2, 3])
print(d.items()) # dict_items([('a', 1), ('b', 2), ('c', 3)])
# 视图是动态的
d2 = d.keys()
d["d"] = 4
print(d2) # dict_keys(['a', 'b', 'c', 'd'])
# 检查成员
print("a" in d) # True(检查键)
print(1 in d) # False(不检查值)
print(1 in d.values()) # True
# 清空
d.clear() # {}
==== 字典遍历 ====
d = {"name": "Alice", "age": 25, "city": "NYC"}
# 遍历键(默认)
for key in d:
print(key, d[key])
# 遍历键值对
for key, value in d.items():
print(f"{key}: {value}")
# 同时遍历多个字典
names = {"Alice": 25, "Bob": 30}
scores = {"Alice": 90, "Bob": 85}
for name in names:
print(f"{name}: {names[name]}岁, {scores[name]}分")
# 使用zip遍历多个字典
for name, (age, score) in zip(names, zip(names.values(), scores.values())):
pass
==== 默认值:setdefault和defaultdict ====
# setdefault:键不存在时设置默认值
word_count = {}
for word in ["apple", "banana", "apple"]:
word_count[word] = word_count.get(word, 0) + 1
# 使用setdefault更简洁
for word in ["apple", "banana", "apple"]:
word_count.setdefault(word, 0)
word_count[word] += 1
# defaultdict
from collections import defaultdict
# 默认值为0
counts = defaultdict(int)
counts["apple"] += 1 # 不需要检查键是否存在
# 默认值为列表
groups = defaultdict(list)
groups["fruit"].append("apple")
groups["fruit"].append("banana")
# 自定义默认值
def default_value():
return {"count": 0, "items": []}
data = defaultdict(default_value)
==== 有序字典(OrderedDict) ====
from collections import OrderedDict
# Python 3.7+普通字典也是有序的,但OrderedDict有特殊方法
d = OrderedDict()
d["a"] = 1
d["b"] = 2
d["c"] = 3
# 移动到末尾/开头
d.move_to_end("a") # 移到末尾
d.move_to_end("b", last=False) # 移到开头
# 反转
reversed_d = reversed(d) # Python 3.8+
===== 集合(Set) =====
==== 集合创建 ====
# 直接创建(注意:空{}是字典,不是集合)
empty = set() # 空集合
nums = {1, 2, 3, 4, 5}
# 从序列创建
s = set([1, 2, 2, 3, 3, 3]) # {1, 2, 3},自动去重
s = set("hello") # {'h', 'e', 'l', 'o'}
# 集合推导式
squares = {x**2 for x in range(10)} # {0, 1, 4, 9, 16, 25, 36, 49, 64, 81}
evens = {x for x in range(20) if x % 2 == 0}
==== 集合操作 ====
a = {1, 2, 3, 4}
b = {3, 4, 5, 6}
# 并集
print(a | b) # {1, 2, 3, 4, 5, 6}
print(a.union(b))
# 交集
print(a & b) # {3, 4}
print(a.intersection(b))
# 差集
print(a - b) # {1, 2}
print(a.difference(b))
# 对称差集
print(a ^ b) # {1, 2, 5, 6}
print(a.symmetric_difference(b))
# 子集/超集
print(a.issubset(b)) # False
print(a.issuperset({1, 2})) # True
print(a.isdisjoint({5, 6})) # False(有交集吗?)
==== 修改集合 ====
s = {1, 2, 3}
# 添加
s.add(4) # {1, 2, 3, 4}
s.update([5, 6]) # {1, 2, 3, 4, 5, 6}
# 删除
s.remove(3) # 元素不存在则KeyError
s.discard(10) # 元素不存在不报错
s.pop() # 随机删除并返回一个元素
s.clear() # 清空
==== frozenset ====
不可变的集合,可以作为字典的键:
fs = frozenset([1, 2, 3])
# fs.add(4) # AttributeError
# 作为字典键
graph = {
frozenset(["A", "B"]): 5,
frozenset(["B", "C"]): 3,
}
===== 字典与集合的实现原理 =====
==== 哈希表基础 ====
# 字典和集合都基于哈希表实现
# 要求:键必须是可哈希的(hashable)
# 可哈希:不可变类型
print(hash("hello"))
print(hash(42))
print(hash((1, 2)))
# 不可哈希:可变类型
# print(hash([1, 2])) # TypeError
# 自定义对象默认是可哈希的(基于id)
class Point:
def __init__(self, x, y):
self.x = x
self.y = y
def __hash__(self):
return hash((self.x, self.y))
def __eq__(self, other):
return self.x == other.x and self.y == other.y
==== 性能特点 ====
import timeit
# 查找性能对比
list_time = timeit.timeit("9999 in lst",
setup="lst = list(range(10000))", number=1000)
dict_time = timeit.timeit("9999 in d",
setup="d = dict.fromkeys(range(10000))", number=1000)
print(f"列表查找: {list_time:.4f}s") # O(n)
print(f"字典查找: {dict_time:.4f}s") # O(1)
===== 本章练习 =====
1. **词频统计**:统计一段文本中每个单词出现的次数,输出前10
2. **集合运算**:给定两个列表,找出只在A、只在B、同时在AB的元素
3. **字典合并**:合并多个字典,处理键冲突
4. **去重保留顺序**:实现一个函数,删除列表重复元素但保留原始顺序
5. **图表示**:使用字典表示无向图,实现查找邻居和最短路径
===== 本章小结 =====
本章我们学习了:
* 字典的创建、访问、修改、遍历
* defaultdict和OrderedDict
* 集合的创建和集合运算
* frozenset的使用
* 哈希表原理和性能特点
下一章:[[python_course:chapter10|第十章:高级数据结构]]