第九章:字典与集合
本章目标
完成本章学习后,你将能够:
- 掌握字典的所有操作和方法
- 理解字典的哈希表实现原理
- 熟练使用集合操作
- 掌握字典和集合推导式
- 选择合适的数据结构解决问题
字典(Dictionary)
字典创建
# 直接创建 empty = {} empty = dict() person = {"name": "Alice", "age": 25, "city": "NYC"} # 从键值对创建 pairs = [("a", 1), ("b", 2)] d = dict(pairs) # {'a': 1, 'b': 2} # 使用关键字参数 d = dict(name="Bob", age=30) # {'name': 'Bob', 'age': 30} # 从序列创建 d = dict.fromkeys(["a", "b", "c"], 0) # {'a': 0, 'b': 0, 'c': 0} # 字典推导式 squares = {x: x**2 for x in range(6)} # {0: 0, 1: 1, 2: 4, 3: 9, 4: 16, 5: 25}
访问和修改
d = {"name": "Alice", "age": 25} # 访问 print(d["name"]) # Alice # print(d["gender"]) # KeyError # 安全访问 print(d.get("name")) # Alice print(d.get("gender")) # None(不报错) print(d.get("gender", "N/A")) # N/A(默认值) # 修改/添加 d["age"] = 26 # 修改 d["gender"] = "F" # 添加 # 批量更新 d.update({"city": "NYC", "age": 27}) # 删除 value = d.pop("age") # 删除并返回值 key_value = d.popitem() # 删除并返回最后插入的项(Python 3.7+) del d["name"] # 删除
字典方法
d = {"a": 1, "b": 2, "c": 3} # 视图对象 print(d.keys()) # dict_keys(['a', 'b', 'c']) print(d.values()) # dict_values([1, 2, 3]) print(d.items()) # dict_items([('a', 1), ('b', 2), ('c', 3)]) # 视图是动态的 d2 = d.keys() d["d"] = 4 print(d2) # dict_keys(['a', 'b', 'c', 'd']) # 检查成员 print("a" in d) # True(检查键) print(1 in d) # False(不检查值) print(1 in d.values()) # True # 清空 d.clear() # {}
字典遍历
d = {"name": "Alice", "age": 25, "city": "NYC"} # 遍历键(默认) for key in d: print(key, d[key]) # 遍历键值对 for key, value in d.items(): print(f"{key}: {value}") # 同时遍历多个字典 names = {"Alice": 25, "Bob": 30} scores = {"Alice": 90, "Bob": 85} for name in names: print(f"{name}: {names[name]}岁, {scores[name]}分") # 使用zip遍历多个字典 for name, (age, score) in zip(names, zip(names.values(), scores.values())): pass
默认值:setdefault和defaultdict
# setdefault:键不存在时设置默认值 word_count = {} for word in ["apple", "banana", "apple"]: word_count[word] = word_count.get(word, 0) + 1 # 使用setdefault更简洁 for word in ["apple", "banana", "apple"]: word_count.setdefault(word, 0) word_count[word] += 1 # defaultdict from collections import defaultdict # 默认值为0 counts = defaultdict(int) counts["apple"] += 1 # 不需要检查键是否存在 # 默认值为列表 groups = defaultdict(list) groups["fruit"].append("apple") groups["fruit"].append("banana") # 自定义默认值 def default_value(): return {"count": 0, "items": []} data = defaultdict(default_value)
有序字典(OrderedDict)
from collections import OrderedDict # Python 3.7+普通字典也是有序的,但OrderedDict有特殊方法 d = OrderedDict() d["a"] = 1 d["b"] = 2 d["c"] = 3 # 移动到末尾/开头 d.move_to_end("a") # 移到末尾 d.move_to_end("b", last=False) # 移到开头 # 反转 reversed_d = reversed(d) # Python 3.8+
集合(Set)
集合创建
# 直接创建(注意:空{}是字典,不是集合) empty = set() # 空集合 nums = {1, 2, 3, 4, 5} # 从序列创建 s = set([1, 2, 2, 3, 3, 3]) # {1, 2, 3},自动去重 s = set("hello") # {'h', 'e', 'l', 'o'} # 集合推导式 squares = {x**2 for x in range(10)} # {0, 1, 4, 9, 16, 25, 36, 49, 64, 81} evens = {x for x in range(20) if x % 2 == 0}
集合操作
a = {1, 2, 3, 4} b = {3, 4, 5, 6} # 并集 print(a | b) # {1, 2, 3, 4, 5, 6} print(a.union(b)) # 交集 print(a & b) # {3, 4} print(a.intersection(b)) # 差集 print(a - b) # {1, 2} print(a.difference(b)) # 对称差集 print(a ^ b) # {1, 2, 5, 6} print(a.symmetric_difference(b)) # 子集/超集 print(a.issubset(b)) # False print(a.issuperset({1, 2})) # True print(a.isdisjoint({5, 6})) # False(有交集吗?)
修改集合
s = {1, 2, 3} # 添加 s.add(4) # {1, 2, 3, 4} s.update([5, 6]) # {1, 2, 3, 4, 5, 6} # 删除 s.remove(3) # 元素不存在则KeyError s.discard(10) # 元素不存在不报错 s.pop() # 随机删除并返回一个元素 s.clear() # 清空
frozenset
不可变的集合,可以作为字典的键:
fs = frozenset([1, 2, 3]) # fs.add(4) # AttributeError # 作为字典键 graph = { frozenset(["A", "B"]): 5, frozenset(["B", "C"]): 3, }
字典与集合的实现原理
哈希表基础
# 字典和集合都基于哈希表实现 # 要求:键必须是可哈希的(hashable) # 可哈希:不可变类型 print(hash("hello")) print(hash(42)) print(hash((1, 2))) # 不可哈希:可变类型 # print(hash([1, 2])) # TypeError # 自定义对象默认是可哈希的(基于id) class Point: def __init__(self, x, y): self.x = x self.y = y def __hash__(self): return hash((self.x, self.y)) def __eq__(self, other): return self.x == other.x and self.y == other.y
性能特点
import timeit # 查找性能对比 list_time = timeit.timeit("9999 in lst", setup="lst = list(range(10000))", number=1000) dict_time = timeit.timeit("9999 in d", setup="d = dict.fromkeys(range(10000))", number=1000) print(f"列表查找: {list_time:.4f}s") # O(n) print(f"字典查找: {dict_time:.4f}s") # O(1)
本章练习
1. 词频统计:统计一段文本中每个单词出现的次数,输出前10 2. 集合运算:给定两个列表,找出只在A、只在B、同时在AB的元素 3. 字典合并:合并多个字典,处理键冲突 4. 去重保留顺序:实现一个函数,删除列表重复元素但保留原始顺序 5. 图表示:使用字典表示无向图,实现查找邻居和最短路径