完成本章学习后,你将能够:
# 单引号和双引号等价 s1 = 'hello' s2 = "hello" # 三引号用于多行字符串 s3 = '''这是一个 多行字符串''' s4 = """这也是一个 多行字符串""" # 原始字符串(不转义) path = r"C:\Users\name\file.txt" # 注意:\n不会被当作换行 regex = r"\d+\.\d+" # 正则表达式常用 # 字节串 b = b"hello" # bytes类型,不是str
s = "hello" # s[0] = "H" # TypeError: 'str' object does not support item assignment # 创建新字符串 s = "H" + s[1:] # "Hello" # 字符串方法返回新字符串 s = " hello " s2 = s.strip() # 返回"hello",s不变 print(s) # " hello "(原字符串未变) print(s2) # "hello"
Python 3字符串是Unicode序列:
# Unicode字符 s = "你好,世界!🌍" print(len(s)) # 7(字符数,不是字节数) # 获取Unicode码点 print(ord('A')) # 65 print(ord('中')) # 20013 print(chr(65)) # A print(chr(20013)) # 中 # Unicode转义 s = "\u4e2d\u6587" # "中文" s = "\U0001f600" # "😀"
# 编码:str -> bytes s = "中文" utf8_bytes = s.encode('utf-8') # b'\xe4\xb8\xad\xe6\x96\x87' gbk_bytes = s.encode('gbk') # b'\xd6\xd0\xce\xc4' print(len(utf8_bytes)) # 6(UTF-8中文字符占3字节) print(len(gbk_bytes)) # 4(GBK中文字符占2字节) # 解码:bytes -> str s1 = utf8_bytes.decode('utf-8') # "中文" s2 = gbk_bytes.decode('gbk') # "中文" # 错误处理 b = b"\xff\xfe" # 无效UTF-8序列 s = b.decode('utf-8', errors='replace') # 用�替换无效字节 s = b.decode('utf-8', errors='ignore') # 忽略无效字节
s = "Hello, World!" # 索引 print(s[0]) # H print(s[-1]) # !(最后一个字符) print(s[7]) # W # 切片 [start:end:step] print(s[0:5]) # Hello print(s[7:12]) # World print(s[:5]) # Hello(从头开始) print(s[7:]) # World!(到末尾) print(s[:]) # Hello, World!(副本) print(s[::2]) # Hlo ol!(每隔一个字符) print(s[::-1]) # !dlroW ,olleH(反转) # 高级切片 print(s[-5:]) # orld!(最后5个字符) print(s[:-1]) # Hello, World(去掉最后一个字符)
s = "Hello, World! Hello!" # find - 找不到返回-1 print(s.find("Hello")) # 0 print(s.find("Hello", 5)) # 14(从索引5开始找) print(s.find("xyz")) # -1 # index - 找不到抛出ValueError print(s.index("Hello")) # 0 # print(s.index("xyz")) # ValueError # rfind/rindex - 从右开始查找 print(s.rfind("Hello")) # 14 # count - 计数 print(s.count("Hello")) # 2 print(s.count("l")) # 4 # startswith/endswith print(s.startswith("Hello")) # True print(s.endswith("!")) # True print(s.startswith(("Hi", "Hello"))) # True,匹配任一
s = " Hello, World! " # 去除空白 print(s.strip()) # "Hello, World!" print(s.lstrip()) # "Hello, World! " print(s.rstrip()) # " Hello, World!" print(s.strip(" !")) # "Hello, World"(去除指定字符) # 大小写转换 s = "Hello World" print(s.upper()) # HELLO WORLD print(s.lower()) # hello world print(s.capitalize()) # Hello world(首字母大写) print(s.title()) # Hello World(每个单词首字母大写) print(s.swapcase()) # hELLO wORLD(大小写互换) # 替换 s = "Hello, World! World!" print(s.replace("World", "Python")) # Hello, Python! Python! print(s.replace("World", "Python", 1)) # Hello, Python! World!(只替换1次)
# 判断类型 print("hello".isalpha()) # True(全是字母) print("hello123".isalnum()) # True(字母或数字) print("123".isdigit()) # True(全是数字) print("123.45".isdecimal()) # False(有小数点) print(" ".isspace()) # True(全是空白) print("Hello".istitle()) # True(标题格式) print("HELLO".isupper()) # True(全大写) print("hello".islower()) # True(全小写) # 其他判断 print("hello".isidentifier()) # True(可作为标识符) print("123abc".isidentifier()) # False(数字开头)
# split s = "apple,banana,cherry" fruits = s.split(",") # ['apple', 'banana', 'cherry'] s = "a b c" # 多个空格 cols = s.split() # ['a', 'b', 'c'](默认按任意空白分割) cols = s.split(" ") # ['a', '', 'b', '', '', 'c'] # 限制分割次数 s = "a,b,c,d,e" print(s.split(",", 2)) # ['a', 'b', 'c,d,e'] # rsplit(从右边开始) print(s.rsplit(",", 2)) # ['a,b,c', 'd', 'e'] # splitlines s = "line1\nline2\r\nline3" print(s.splitlines()) # ['line1', 'line2', 'line3'] # join words = ["Hello", "World"] s = " ".join(words) # "Hello World" s = "-".join(words) # "Hello-World" s = "".join(words) # "HelloWorld" # 连接多个相同字符 s = "-" * 50 # 50个连字符
name = "Alice" age = 25 print("Name: %s, Age: %d" % (name, age)) print("Pi: %.2f" % 3.14159) # Pi: 3.14 print("Hex: %x" % 255) # Hex: ff
# 位置参数 print("Hello, {}!".format("World")) print("{0} {1}".format("Hello", "World")) print("{1} {0}".format("World", "Hello")) # Hello World # 关键字参数 print("Name: {name}, Age: {age}".format(name="Alice", age=25)) # 格式规范 print("{:.2f}".format(3.14159)) # 3.14 print("{:>10}".format("hi")) # " hi"(右对齐) print("{:<10}".format("hi")) # "hi "(左对齐) print("{:^10}".format("hi")) # " hi "(居中) print("{:0>5}".format(42)) # 00042(补零) print("{:,}".format(1234567)) # 1,234,567(千分位) print("{:.2%}".format(0.25)) # 25.00%(百分比)
name = "Alice" age = 25 # 基本用法 print(f"Hello, {name}!") print(f"Next year you'll be {age + 1}") # 表达式 print(f"Square of 5: {5 ** 2}") print(f"Name length: {len(name)}") # 格式规范 pi = 3.14159265359 print(f"Pi: {pi:.2f}") # Pi: 3.14 print(f"Pi: {pi:10.2f}") # Pi: 3.14 print(f"Pi: {pi:<10.2f}") # Pi: 3.14 print(f"Large: {1000000:,}") # Large: 1,000,000 # 调试(Python 3.8+) print(f"{age=}") # age=25 print(f"{age + 5=}") # age + 5=30 # 日期格式化 from datetime import datetime now = datetime.now() print(f"Now: {now:%Y-%m-%d %H:%M:%S}")
import re # 基本匹配 text = "The quick brown fox jumps over 13 lazy dogs." # search - 搜索第一个匹配 match = re.search(r"fox", text) if match: print(f"找到'{match.group()}'在位置{match.start()}-{match.end()}") # findall - 查找所有匹配 numbers = re.findall(r"\d+", text) # ['13'] words = re.findall(r"\b\w+\b", text) # 常用模式 patterns = { r"\d+": "一个或多个数字", r"\w+": "一个或多个单词字符", r"\s+": "一个或多个空白字符", r"[a-z]+": "一个或多个小写字母", r"^The": "以The开头", r"dogs\.$": "以dogs.结尾", r"o.": "o后跟任意字符", } # 替换 new_text = re.sub(r"fox", "cat", text) new_text = re.sub(r"\d+", "XX", text) # 将所有数字替换为XX # 分割 parts = re.split(r"\s+", text) # 按空白分割
# 低效:字符串拼接 result = "" for i in range(1000): result += str(i) # 每次创建新字符串 # 高效:使用join parts = [str(i) for i in range(1000)] result = "".join(parts) # 更高效:使用列表再join parts = [] for i in range(1000): parts.append(str(i)) result = "".join(parts) # 或使用io.StringIO from io import StringIO buffer = StringIO() for i in range(1000): buffer.write(str(i)) result = buffer.getvalue()
1. 字符串处理:实现函数将驼峰命名转换为下划线命名(camelCase → camel_case) 2. 格式化输出:使用f-string格式化一个表格,对齐各列 3. 正则提取:从HTML中提取所有URL 4. 文本统计:统计一段文本中各单词出现频率 5. 模板引擎:实现一个简单的字符串模板替换功能
本章我们深入学习了:
下一章:第八章:列表与元组