第九章：字典与集合

完成本章学习后，你将能够：

掌握字典的所有操作和方法
理解字典的哈希表实现原理
熟练使用集合操作
掌握字典和集合推导式
选择合适的数据结构解决问题

# 直接创建
empty = {}
empty = dict()
person = {"name": "Alice", "age": 25, "city": "NYC"}
 
# 从键值对创建
pairs = [("a", 1), ("b", 2)]
d = dict(pairs)  # {'a': 1, 'b': 2}
 
# 使用关键字参数
d = dict(name="Bob", age=30)  # {'name': 'Bob', 'age': 30}
 
# 从序列创建
d = dict.fromkeys(["a", "b", "c"], 0)  # {'a': 0, 'b': 0, 'c': 0}
 
# 字典推导式
squares = {x: x**2 for x in range(6)}  # {0: 0, 1: 1, 2: 4, 3: 9, 4: 16, 5: 25}

d = {"name": "Alice", "age": 25}
 
# 访问
print(d["name"])  # Alice
# print(d["gender"])  # KeyError
 
# 安全访问
print(d.get("name"))       # Alice
print(d.get("gender"))     # None（不报错）
print(d.get("gender", "N/A"))  # N/A（默认值）
 
# 修改/添加
d["age"] = 26        # 修改
d["gender"] = "F"    # 添加
 
# 批量更新
d.update({"city": "NYC", "age": 27})
 
# 删除
value = d.pop("age")  # 删除并返回值
key_value = d.popitem()  # 删除并返回最后插入的项（Python 3.7+）
del d["name"]         # 删除

d = {"a": 1, "b": 2, "c": 3}
 
# 视图对象
print(d.keys())     # dict_keys(['a', 'b', 'c'])
print(d.values())   # dict_values([1, 2, 3])
print(d.items())    # dict_items([('a', 1), ('b', 2), ('c', 3)])
 
# 视图是动态的
d2 = d.keys()
d["d"] = 4
print(d2)  # dict_keys(['a', 'b', 'c', 'd'])
 
# 检查成员
print("a" in d)      # True（检查键）
print(1 in d)        # False（不检查值）
print(1 in d.values())  # True
 
# 清空
d.clear()  # {}

d = {"name": "Alice", "age": 25, "city": "NYC"}
 
# 遍历键（默认）
for key in d:
    print(key, d[key])
 
# 遍历键值对
for key, value in d.items():
    print(f"{key}: {value}")
 
# 同时遍历多个字典
names = {"Alice": 25, "Bob": 30}
scores = {"Alice": 90, "Bob": 85}
 
for name in names:
    print(f"{name}: {names[name]}岁, {scores[name]}分")
 
# 使用zip遍历多个字典
for name, (age, score) in zip(names, zip(names.values(), scores.values())):
    pass

# setdefault：键不存在时设置默认值
word_count = {}
for word in ["apple", "banana", "apple"]:
    word_count[word] = word_count.get(word, 0) + 1
 
# 使用setdefault更简洁
for word in ["apple", "banana", "apple"]:
    word_count.setdefault(word, 0)
    word_count[word] += 1
 
# defaultdict
from collections import defaultdict
 
# 默认值为0
counts = defaultdict(int)
counts["apple"] += 1  # 不需要检查键是否存在
 
# 默认值为列表
groups = defaultdict(list)
groups["fruit"].append("apple")
groups["fruit"].append("banana")
 
# 自定义默认值
def default_value():
    return {"count": 0, "items": []}
 
data = defaultdict(default_value)

from collections import OrderedDict
 
# Python 3.7+普通字典也是有序的，但OrderedDict有特殊方法
d = OrderedDict()
d["a"] = 1
d["b"] = 2
d["c"] = 3
 
# 移动到末尾/开头
d.move_to_end("a")  # 移到末尾
d.move_to_end("b", last=False)  # 移到开头
 
# 反转
reversed_d = reversed(d)  # Python 3.8+

# 直接创建（注意：空{}是字典，不是集合）
empty = set()  # 空集合
nums = {1, 2, 3, 4, 5}
 
# 从序列创建
s = set([1, 2, 2, 3, 3, 3])  # {1, 2, 3}，自动去重
s = set("hello")  # {'h', 'e', 'l', 'o'}
 
# 集合推导式
squares = {x**2 for x in range(10)}  # {0, 1, 4, 9, 16, 25, 36, 49, 64, 81}
evens = {x for x in range(20) if x % 2 == 0}

a = {1, 2, 3, 4}
b = {3, 4, 5, 6}
 
# 并集
print(a | b)            # {1, 2, 3, 4, 5, 6}
print(a.union(b))
 
# 交集
print(a & b)            # {3, 4}
print(a.intersection(b))
 
# 差集
print(a - b)            # {1, 2}
print(a.difference(b))
 
# 对称差集
print(a ^ b)            # {1, 2, 5, 6}
print(a.symmetric_difference(b))
 
# 子集/超集
print(a.issubset(b))    # False
print(a.issuperset({1, 2}))  # True
print(a.isdisjoint({5, 6}))  # False（有交集吗？）

s = {1, 2, 3}
 
# 添加
s.add(4)           # {1, 2, 3, 4}
s.update([5, 6])   # {1, 2, 3, 4, 5, 6}
 
# 删除
s.remove(3)        # 元素不存在则KeyError
s.discard(10)      # 元素不存在不报错
s.pop()            # 随机删除并返回一个元素
s.clear()          # 清空

不可变的集合，可以作为字典的键：

fs = frozenset([1, 2, 3])
# fs.add(4)  # AttributeError
 
# 作为字典键
graph = {
    frozenset(["A", "B"]): 5,
    frozenset(["B", "C"]): 3,
}

# 字典和集合都基于哈希表实现
# 要求：键必须是可哈希的（hashable）
 
# 可哈希：不可变类型
print(hash("hello"))
print(hash(42))
print(hash((1, 2)))
 
# 不可哈希：可变类型
# print(hash([1, 2]))  # TypeError
 
# 自定义对象默认是可哈希的（基于id）
class Point:
    def __init__(self, x, y):
        self.x = x
        self.y = y
 
    def __hash__(self):
        return hash((self.x, self.y))
 
    def __eq__(self, other):
        return self.x == other.x and self.y == other.y

import timeit
 
# 查找性能对比
list_time = timeit.timeit("9999 in lst", 
    setup="lst = list(range(10000))", number=1000)
dict_time = timeit.timeit("9999 in d", 
    setup="d = dict.fromkeys(range(10000))", number=1000)
 
print(f"列表查找: {list_time:.4f}s")  # O(n)
print(f"字典查找: {dict_time:.4f}s")  # O(1)

1. 词频统计：统计一段文本中每个单词出现的次数，输出前10 2. 集合运算：给定两个列表，找出只在A、只在B、同时在AB的元素 3. 字典合并：合并多个字典，处理键冲突 4. 去重保留顺序：实现一个函数，删除列表重复元素但保留原始顺序 5. 图表示：使用字典表示无向图，实现查找邻居和最短路径

本章我们学习了：

字典的创建、访问、修改、遍历
defaultdict和OrderedDict
集合的创建和集合运算
frozenset的使用
哈希表原理和性能特点

下一章：第十章：高级数据结构

Detach Close

您访问的页面并不存在。如果允许，您可以使用创建该页面按钮来创建它。

第九章：字典与集合

本章目标

字典（Dictionary）

字典创建

访问和修改

字典方法

字典遍历

默认值：setdefault和defaultdict

有序字典（OrderedDict）

集合（Set）

集合创建

集合操作

修改集合

frozenset

字典与集合的实现原理

哈希表基础

性能特点

本章练习

本章小结

该主题尚不存在

张叶安的博客