You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
hello-algo/chapter_data_structure/character_encoding/index.html

2369 lines
67 KiB

This file contains invisible Unicode characters!

This file contains invisible Unicode characters that may be processed differently from what appears below. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to reveal hidden characters.

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

<!doctype html>
<html lang="zh" class="no-js">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<meta name="description" content="一本动画图解、能运行、可提问的数据结构与算法入门书">
<meta name="author" content="Krahets">
<link rel="canonical" href="https://www.hello-algo.com/chapter_data_structure/character_encoding/">
<link rel="prev" href="../number_encoding/">
<link rel="next" href="../summary/">
<link rel="icon" href="../../assets/images/favicon.png">
<meta name="generator" content="mkdocs-1.4.2, mkdocs-material-9.1.11">
<title>3.4.   字符编码 * - Hello 算法</title>
<link rel="stylesheet" href="../../assets/stylesheets/main.85bb2934.min.css">
<link rel="stylesheet" href="../../assets/stylesheets/palette.a6bdf11c.min.css">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Noto+Sans+SC:300,300i,400,400i,700,700i%7CFira+Code:400,400i,700,700i&display=fallback">
<style>:root{--md-text-font:"Noto Sans SC";--md-code-font:"Fira Code"}</style>
<link rel="stylesheet" href="../../stylesheets/extra.css">
<script>__md_scope=new URL("../..",location),__md_hash=e=>[...e].reduce((e,_)=>(e<<5)-e+_.charCodeAt(0),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
</head>
<body dir="ltr" data-md-color-scheme="default" data-md-color-primary="white" data-md-color-accent="indigo">
<script>var palette=__md_get("__palette");if(palette&&"object"==typeof palette.color)for(var key of Object.keys(palette.color))document.body.setAttribute("data-md-color-"+key,palette.color[key])</script>
<input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
<input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
<label class="md-overlay" for="__drawer"></label>
<div data-md-component="skip">
<a href="#34" class="md-skip">
跳转至
</a>
</div>
<div data-md-component="announce">
</div>
<header class="md-header md-header--shadow" data-md-component="header">
<nav class="md-header__inner md-grid" aria-label="页眉">
<a href="../.." title="Hello 算法" class="md-header__button md-logo" aria-label="Hello 算法" data-md-component="logo">
<img src="../../assets/images/logo.png" alt="logo">
</a>
<label class="md-header__button md-icon" for="__drawer">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3V6m0 5h18v2H3v-2m0 5h18v2H3v-2Z"/></svg>
</label>
<div class="md-header__title" data-md-component="header-title">
<div class="md-header__ellipsis">
<div class="md-header__topic">
<span class="md-ellipsis">
Hello 算法
</span>
</div>
<div class="md-header__topic" data-md-component="header-topic">
<span class="md-ellipsis">
3.4. &nbsp; 字符编码 *
</span>
</div>
</div>
</div>
<form class="md-header__option" data-md-component="palette">
<input class="md-option" data-md-color-media="" data-md-color-scheme="default" data-md-color-primary="white" data-md-color-accent="indigo" aria-label="Switch to dark mode" type="radio" name="__palette" id="__palette_1">
<label class="md-header__button md-icon" title="Switch to dark mode" for="__palette_2" hidden>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 7a5 5 0 0 1 5 5 5 5 0 0 1-5 5 5 5 0 0 1-5-5 5 5 0 0 1 5-5m0 2a3 3 0 0 0-3 3 3 3 0 0 0 3 3 3 3 0 0 0 3-3 3 3 0 0 0-3-3m0-7 2.39 3.42C13.65 5.15 12.84 5 12 5c-.84 0-1.65.15-2.39.42L12 2M3.34 7l4.16-.35A7.2 7.2 0 0 0 5.94 8.5c-.44.74-.69 1.5-.83 2.29L3.34 7m.02 10 1.76-3.77a7.131 7.131 0 0 0 2.38 4.14L3.36 17M20.65 7l-1.77 3.79a7.023 7.023 0 0 0-2.38-4.15l4.15.36m-.01 10-4.14.36c.59-.51 1.12-1.14 1.54-1.86.42-.73.69-1.5.83-2.29L20.64 17M12 22l-2.41-3.44c.74.27 1.55.44 2.41.44.82 0 1.63-.17 2.37-.44L12 22Z"/></svg>
</label>
<input class="md-option" data-md-color-media="" data-md-color-scheme="slate" data-md-color-primary="indigo" data-md-color-accent="indigo" aria-label="Switch to light mode" type="radio" name="__palette" id="__palette_2">
<label class="md-header__button md-icon" title="Switch to light mode" for="__palette_1" hidden>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m17.75 4.09-2.53 1.94.91 3.06-2.63-1.81-2.63 1.81.91-3.06-2.53-1.94L12.44 4l1.06-3 1.06 3 3.19.09m3.5 6.91-1.64 1.25.59 1.98-1.7-1.17-1.7 1.17.59-1.98L15.75 11l2.06-.05L18.5 9l.69 1.95 2.06.05m-2.28 4.95c.83-.08 1.72 1.1 1.19 1.85-.32.45-.66.87-1.08 1.27C15.17 23 8.84 23 4.94 19.07c-3.91-3.9-3.91-10.24 0-14.14.4-.4.82-.76 1.27-1.08.75-.53 1.93.36 1.85 1.19-.27 2.86.69 5.83 2.89 8.02a9.96 9.96 0 0 0 8.02 2.89m-1.64 2.02a12.08 12.08 0 0 1-7.8-3.47c-2.17-2.19-3.33-5-3.49-7.82-2.81 3.14-2.7 7.96.31 10.98 3.02 3.01 7.84 3.12 10.98.31Z"/></svg>
</label>
</form>
<label class="md-header__button md-icon" for="__search">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.516 6.516 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5Z"/></svg>
</label>
<div class="md-search" data-md-component="search" role="dialog">
<label class="md-search__overlay" for="__search"></label>
<div class="md-search__inner" role="search">
<form class="md-search__form" name="search">
<input type="text" class="md-search__input" name="query" aria-label="搜索" placeholder="搜索" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
<label class="md-search__icon md-icon" for="__search">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.516 6.516 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5Z"/></svg>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12Z"/></svg>
</label>
<nav class="md-search__options" aria-label="查找">
<a href="javascript:void(0)" class="md-search__icon md-icon" title="分享" aria-label="分享" data-clipboard data-clipboard-text="" data-md-component="search-share" tabindex="-1">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M18 16.08c-.76 0-1.44.3-1.96.77L8.91 12.7c.05-.23.09-.46.09-.7 0-.24-.04-.47-.09-.7l7.05-4.11c.54.5 1.25.81 2.04.81a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3c0 .24.04.47.09.7L8.04 9.81C7.5 9.31 6.79 9 6 9a3 3 0 0 0-3 3 3 3 0 0 0 3 3c.79 0 1.5-.31 2.04-.81l7.12 4.15c-.05.21-.08.43-.08.66 0 1.61 1.31 2.91 2.92 2.91 1.61 0 2.92-1.3 2.92-2.91A2.92 2.92 0 0 0 18 16.08Z"/></svg>
</a>
<button type="reset" class="md-search__icon md-icon" title="清空当前内容" aria-label="清空当前内容" tabindex="-1">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12 19 6.41Z"/></svg>
</button>
</nav>
<div class="md-search__suggest" data-md-component="search-suggest"></div>
</form>
<div class="md-search__output">
<div class="md-search__scrollwrap" data-md-scrollfix>
<div class="md-search-result" data-md-component="search-result">
<div class="md-search-result__meta">
正在初始化搜索引擎
</div>
<ol class="md-search-result__list" role="presentation"></ol>
</div>
</div>
</div>
</div>
</div>
<div class="md-header__source">
<a href="https://github.com/krahets/hello-algo" title="前往仓库" class="md-source" data-md-component="source">
<div class="md-source__icon md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 496 512"><!--! Font Awesome Free 6.4.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2023 Fonticons, Inc.--><path d="M165.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6zm-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3zm44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9zM244.8 8C106.1 8 0 113.3 0 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C428.2 457.8 496 362.9 496 252 496 113.3 383.5 8 244.8 8zM97.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1zm-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7zm32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1zm-11.4-14.7c-1.6 1-1.6 3.6 0 5.9 1.6 2.3 4.3 3.3 5.6 2.3 1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2z"/></svg>
</div>
<div class="md-source__repository">
krahets/hello-algo
</div>
</a>
</div>
</nav>
</header>
<div class="md-container" data-md-component="container">
<main class="md-main" data-md-component="main">
<div class="md-main__inner md-grid">
<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
<div class="md-sidebar__scrollwrap">
<div class="md-sidebar__inner">
<nav class="md-nav md-nav--primary" aria-label="导航栏" data-md-level="0">
<label class="md-nav__title" for="__drawer">
<a href="../.." title="Hello 算法" class="md-nav__button md-logo" aria-label="Hello 算法" data-md-component="logo">
<img src="../../assets/images/logo.png" alt="logo">
</a>
Hello 算法
</label>
<div class="md-nav__source">
<a href="https://github.com/krahets/hello-algo" title="前往仓库" class="md-source" data-md-component="source">
<div class="md-source__icon md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 496 512"><!--! Font Awesome Free 6.4.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2023 Fonticons, Inc.--><path d="M165.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6zm-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3zm44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9zM244.8 8C106.1 8 0 113.3 0 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C428.2 457.8 496 362.9 496 252 496 113.3 383.5 8 244.8 8zM97.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1zm-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7zm32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1zm-11.4-14.7c-1.6 1-1.6 3.6 0 5.9 1.6 2.3 4.3 3.3 5.6 2.3 1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2z"/></svg>
</div>
<div class="md-source__repository">
krahets/hello-algo
</div>
</a>
</div>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_1" >
<div class="md-nav__link md-nav__link--index ">
<a href="../../chapter_preface/">0. &nbsp; &nbsp; 写在前面</a>
<label for="__nav_1">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_1">
<span class="md-nav__icon md-icon"></span>
0. &nbsp; &nbsp; 写在前面
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../chapter_preface/about_the_book/" class="md-nav__link">
0.1. &nbsp; 关于本书
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter_preface/suggestions/" class="md-nav__link">
0.2. &nbsp; 如何使用本书
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter_preface/summary/" class="md-nav__link">
0.3. &nbsp; 小结
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2" >
<div class="md-nav__link md-nav__link--index ">
<a href="../../chapter_introduction/">1. &nbsp; &nbsp; 引言</a>
<label for="__nav_2">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_2">
<span class="md-nav__icon md-icon"></span>
1. &nbsp; &nbsp; 引言
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../chapter_introduction/algorithms_are_everywhere/" class="md-nav__link">
1.1. &nbsp; 算法无处不在
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter_introduction/what_is_dsa/" class="md-nav__link">
1.2. &nbsp; 算法是什么
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter_introduction/summary/" class="md-nav__link">
1.3. &nbsp; 小结
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3" >
<div class="md-nav__link md-nav__link--index ">
<a href="../../chapter_computational_complexity/">2. &nbsp; &nbsp; 复杂度分析</a>
<label for="__nav_3">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3">
<span class="md-nav__icon md-icon"></span>
2. &nbsp; &nbsp; 复杂度分析
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../chapter_computational_complexity/performance_evaluation/" class="md-nav__link">
2.1. &nbsp; 算法效率评估
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter_computational_complexity/time_complexity/" class="md-nav__link">
2.2. &nbsp; 时间复杂度
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter_computational_complexity/space_complexity/" class="md-nav__link">
2.3. &nbsp; 空间复杂度
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter_computational_complexity/summary/" class="md-nav__link">
2.4. &nbsp; 小结
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_4" checked>
<div class="md-nav__link md-nav__link--index ">
<a href="../">3. &nbsp; &nbsp; 数据结构简介</a>
<label for="__nav_4">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="true">
<label class="md-nav__title" for="__nav_4">
<span class="md-nav__icon md-icon"></span>
3. &nbsp; &nbsp; 数据结构简介
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../classification_of_data_structure/" class="md-nav__link">
3.1. &nbsp; 数据结构分类
</a>
</li>
<li class="md-nav__item">
<a href="../basic_data_types/" class="md-nav__link">
3.2. &nbsp; 基本数据类型
</a>
</li>
<li class="md-nav__item">
<a href="../number_encoding/" class="md-nav__link">
3.3. &nbsp; 数字编码 *
</a>
</li>
<li class="md-nav__item md-nav__item--active">
<input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
<label class="md-nav__link md-nav__link--active" for="__toc">
3.4. &nbsp; 字符编码 *
<span class="md-nav__icon md-icon"></span>
</label>
<a href="./" class="md-nav__link md-nav__link--active">
3.4. &nbsp; 字符编码 *
</a>
<nav class="md-nav md-nav--secondary" aria-label="目录">
<label class="md-nav__title" for="__toc">
<span class="md-nav__icon md-icon"></span>
目录
</label>
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
<li class="md-nav__item">
<a href="#341-ascii" class="md-nav__link">
3.4.1. &nbsp; ASCII 字符集
</a>
</li>
<li class="md-nav__item">
<a href="#342-gbk" class="md-nav__link">
3.4.2. &nbsp; GBK 字符集
</a>
</li>
<li class="md-nav__item">
<a href="#343-unicode" class="md-nav__link">
3.4.3. &nbsp; Unicode 字符集
</a>
</li>
<li class="md-nav__item">
<a href="#344-utf-8" class="md-nav__link">
3.4.4. &nbsp; UTF-8 编码
</a>
</li>
<li class="md-nav__item">
<a href="#345" class="md-nav__link">
3.4.5. &nbsp; 编程语言的字符编码
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../summary/" class="md-nav__link">
3.5. &nbsp; 小结
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_5" >
<div class="md-nav__link md-nav__link--index ">
<a href="../../chapter_array_and_linkedlist/">4. &nbsp; &nbsp; 数组与链表</a>
<label for="__nav_5">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_5_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_5">
<span class="md-nav__icon md-icon"></span>
4. &nbsp; &nbsp; 数组与链表
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../chapter_array_and_linkedlist/array/" class="md-nav__link">
4.1. &nbsp; 数组
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter_array_and_linkedlist/linked_list/" class="md-nav__link">
4.2. &nbsp; 链表
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter_array_and_linkedlist/list/" class="md-nav__link">
4.3. &nbsp; 列表
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter_array_and_linkedlist/summary/" class="md-nav__link">
4.4. &nbsp; 小结
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6" >
<div class="md-nav__link md-nav__link--index ">
<a href="../../chapter_stack_and_queue/">5. &nbsp; &nbsp; 栈与队列</a>
<label for="__nav_6">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_6_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_6">
<span class="md-nav__icon md-icon"></span>
5. &nbsp; &nbsp; 栈与队列
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../chapter_stack_and_queue/stack/" class="md-nav__link">
5.1. &nbsp;
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter_stack_and_queue/queue/" class="md-nav__link">
5.2. &nbsp; 队列
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter_stack_and_queue/deque/" class="md-nav__link">
5.3. &nbsp; 双向队列
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter_stack_and_queue/summary/" class="md-nav__link">
5.4. &nbsp; 小结
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_7" >
<div class="md-nav__link md-nav__link--index ">
<a href="../../chapter_hashing/">6. &nbsp; &nbsp; 散列表</a>
<label for="__nav_7">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_7_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_7">
<span class="md-nav__icon md-icon"></span>
6. &nbsp; &nbsp; 散列表
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../chapter_hashing/hash_map/" class="md-nav__link">
6.1. &nbsp; 哈希表
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter_hashing/hash_collision/" class="md-nav__link">
6.2. &nbsp; 哈希冲突处理
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter_hashing/summary/" class="md-nav__link">
6.3. &nbsp; 小结
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8" >
<div class="md-nav__link md-nav__link--index ">
<a href="../../chapter_tree/">7. &nbsp; &nbsp;</a>
<label for="__nav_8">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_8_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_8">
<span class="md-nav__icon md-icon"></span>
7. &nbsp; &nbsp;
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../chapter_tree/binary_tree/" class="md-nav__link">
7.1. &nbsp; 二叉树
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter_tree/binary_tree_traversal/" class="md-nav__link">
7.2. &nbsp; 二叉树遍历
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter_tree/array_representation_of_tree/" class="md-nav__link">
7.3. &nbsp; 二叉树数组表示
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter_tree/binary_search_tree/" class="md-nav__link">
7.4. &nbsp; 二叉搜索树
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter_tree/avl_tree/" class="md-nav__link">
7.5. &nbsp; AVL 树 *
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter_tree/summary/" class="md-nav__link">
7.6. &nbsp; 小结
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_9" >
<div class="md-nav__link md-nav__link--index ">
<a href="../../chapter_heap/">8. &nbsp; &nbsp;</a>
<label for="__nav_9">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_9_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_9">
<span class="md-nav__icon md-icon"></span>
8. &nbsp; &nbsp;
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../chapter_heap/heap/" class="md-nav__link">
8.1. &nbsp;
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter_heap/build_heap/" class="md-nav__link">
8.2. &nbsp; 建堆操作
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter_heap/top_k/" class="md-nav__link">
8.3. &nbsp; Top-K 问题New
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter_heap/summary/" class="md-nav__link">
8.4. &nbsp; 小结
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_10" >
<div class="md-nav__link md-nav__link--index ">
<a href="../../chapter_graph/">9. &nbsp; &nbsp;</a>
<label for="__nav_10">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_10_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_10">
<span class="md-nav__icon md-icon"></span>
9. &nbsp; &nbsp;
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../chapter_graph/graph/" class="md-nav__link">
9.1. &nbsp;
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter_graph/graph_operations/" class="md-nav__link">
9.2. &nbsp; 图基础操作
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter_graph/graph_traversal/" class="md-nav__link">
9.3. &nbsp; 图的遍历
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter_graph/summary/" class="md-nav__link">
9.4. &nbsp; 小结
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_11" >
<div class="md-nav__link md-nav__link--index ">
<a href="../../chapter_searching/">10. &nbsp; &nbsp; 搜索</a>
<label for="__nav_11">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_11_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_11">
<span class="md-nav__icon md-icon"></span>
10. &nbsp; &nbsp; 搜索
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../chapter_searching/binary_search/" class="md-nav__link">
10.1. &nbsp; 二分查找New
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter_searching/binary_search_edge/" class="md-nav__link">
10.2. &nbsp; 二分查找边界New
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter_searching/replace_linear_by_hashing/" class="md-nav__link">
10.3. &nbsp; 哈希优化策略
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter_searching/searching_algorithm_revisited/" class="md-nav__link">
10.4. &nbsp; 重识搜索算法
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter_searching/summary/" class="md-nav__link">
10.5. &nbsp; 小结
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_12" >
<div class="md-nav__link md-nav__link--index ">
<a href="../../chapter_sorting/">11. &nbsp; &nbsp; 排序</a>
<label for="__nav_12">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_12_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_12">
<span class="md-nav__icon md-icon"></span>
11. &nbsp; &nbsp; 排序
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../chapter_sorting/sorting_algorithm/" class="md-nav__link">
11.1. &nbsp; 排序算法
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter_sorting/selection_sort/" class="md-nav__link">
11.2. &nbsp; 选择排序New
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter_sorting/bubble_sort/" class="md-nav__link">
11.3. &nbsp; 冒泡排序
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter_sorting/insertion_sort/" class="md-nav__link">
11.4. &nbsp; 插入排序
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter_sorting/quick_sort/" class="md-nav__link">
11.5. &nbsp; 快速排序
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter_sorting/merge_sort/" class="md-nav__link">
11.6. &nbsp; 归并排序
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter_sorting/heap_sort/" class="md-nav__link">
11.7. &nbsp; 堆排序New
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter_sorting/bucket_sort/" class="md-nav__link">
11.8. &nbsp; 桶排序
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter_sorting/counting_sort/" class="md-nav__link">
11.9. &nbsp; 计数排序
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter_sorting/radix_sort/" class="md-nav__link">
11.10. &nbsp; 基数排序
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter_sorting/summary/" class="md-nav__link">
11.11. &nbsp; 小结
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_13" >
<div class="md-nav__link md-nav__link--index ">
<a href="../../chapter_backtracking/">12. &nbsp; &nbsp; 回溯</a>
<label for="__nav_13">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_13_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_13">
<span class="md-nav__icon md-icon"></span>
12. &nbsp; &nbsp; 回溯
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../chapter_backtracking/backtracking_algorithm/" class="md-nav__link">
12.1. &nbsp; 回溯算法
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter_backtracking/permutations_problem/" class="md-nav__link">
12.2. &nbsp; 全排列问题
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter_backtracking/n_queens_problem/" class="md-nav__link">
12.3. &nbsp; N 皇后问题
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_14" >
<label class="md-nav__link" for="__nav_14" id="__nav_14_label" tabindex="0">
13. &nbsp; &nbsp; 附录
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_14_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_14">
<span class="md-nav__icon md-icon"></span>
13. &nbsp; &nbsp; 附录
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../chapter_appendix/installation/" class="md-nav__link">
13.1. &nbsp; 编程环境安装
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter_appendix/contribution/" class="md-nav__link">
13.2. &nbsp; 一起参与创作
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_15" >
<div class="md-nav__link md-nav__link--index ">
<a href="../../chapter_reference/">参考文献</a>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_15_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_15">
<span class="md-nav__icon md-icon"></span>
参考文献
</label>
<ul class="md-nav__list" data-md-scrollfix>
</ul>
</nav>
</li>
</ul>
</nav>
</div>
</div>
</div>
<div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
<div class="md-sidebar__scrollwrap">
<div class="md-sidebar__inner">
<nav class="md-nav md-nav--secondary" aria-label="目录">
<label class="md-nav__title" for="__toc">
<span class="md-nav__icon md-icon"></span>
目录
</label>
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
<li class="md-nav__item">
<a href="#341-ascii" class="md-nav__link">
3.4.1. &nbsp; ASCII 字符集
</a>
</li>
<li class="md-nav__item">
<a href="#342-gbk" class="md-nav__link">
3.4.2. &nbsp; GBK 字符集
</a>
</li>
<li class="md-nav__item">
<a href="#343-unicode" class="md-nav__link">
3.4.3. &nbsp; Unicode 字符集
</a>
</li>
<li class="md-nav__item">
<a href="#344-utf-8" class="md-nav__link">
3.4.4. &nbsp; UTF-8 编码
</a>
</li>
<li class="md-nav__item">
<a href="#345" class="md-nav__link">
3.4.5. &nbsp; 编程语言的字符编码
</a>
</li>
</ul>
</nav>
</div>
</div>
</div>
<div class="md-content" data-md-component="content">
<article class="md-content__inner md-typeset">
<a href="https://github.com/krahets/hello-algo/tree/main/docs/chapter_data_structure/character_encoding.md" title="编辑此页" class="md-content__button md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M10 20H6V4h7v5h5v3.1l2-2V8l-6-6H6c-1.1 0-2 .9-2 2v16c0 1.1.9 2 2 2h4v-2m10.2-7c.1 0 .3.1.4.2l1.3 1.3c.2.2.2.6 0 .8l-1 1-2.1-2.1 1-1c.1-.1.2-.2.4-.2m0 3.9L14.1 23H12v-2.1l6.1-6.1 2.1 2.1Z"/></svg>
</a>
<h1 id="34">3.4. &nbsp; 字符编码 *<a class="headerlink" href="#34" title="Permanent link">&para;</a></h1>
<p>在计算机中,所有数据都是以二进制数的形式存储的,字符 <code>char</code> 也不例外。为了表示字符,我们需要建立一套「字符集」,规定每个字符和二进制数之间的一一对应关系。有了字符集之后,计算机就可以通过查表完成二进制数到字符的转换。</p>
<h2 id="341-ascii">3.4.1. &nbsp; ASCII 字符集<a class="headerlink" href="#341-ascii" title="Permanent link">&para;</a></h2>
<p>「ASCII 码」是最早出现的字符集,全称为“美国标准信息交换代码”。它使用 7 位二进制数(即一个字节的低 7 位)表示一个字符,最多能够表示 128 个不同的字符。这包括英文字母的大小写、数字 0-9 、一些标点符号,以及一些控制字符(如换行符和制表符)。</p>
<p><img alt="ASCII 码" src="../character_encoding.assets/ascii_table.png" /></p>
<p align="center"> Fig. ASCII 码 </p>
<p>然而,<strong>ASCII 码仅能够表示英文</strong>。随着计算机的全球化诞生了一种能够表示更多语言的字符集「EASCII」。它在 ASCII 的 7 位基础上扩展到 8 位,能够表示 256 个不同的字符。在世界范围内,陆续出现了一批适用于不同地区的 EASCII 字符集。这些字符集的前 128 个字符统一为 ASCII 码,后 128 个字符定义不同,以适应不同语言的需求。</p>
<h2 id="342-gbk">3.4.2. &nbsp; GBK 字符集<a class="headerlink" href="#342-gbk" title="Permanent link">&para;</a></h2>
<p>后来人们发现,<strong>EASCII 码仍然无法满足许多语言的字符数量要求</strong>。例如,汉字大约有近十万个,光日常使用的就有几千个。中国国家标准总局于 1980 年发布了「GB2312」字符集其收录了 6763 个汉字,基本满足了汉字的计算机处理需要。</p>
<p>然而GB2312 无法处理部分的罕见字和繁体字。之后在 GB2312 的基础上扩展得到了「GBK」字符集它共收录了 21886 个汉字。在 GBK 编码方案中ASCII 字符使用一个字节表示,汉字使用两个字节表示。</p>
<h2 id="343-unicode">3.4.3. &nbsp; Unicode 字符集<a class="headerlink" href="#343-unicode" title="Permanent link">&para;</a></h2>
<p>随着计算机的蓬勃发展,字符集与编码标准百花齐放,而这带来了许多问题。一方面,这些字符集一般只定义了特定语言的字符,无法在多语言环境下正常工作;另一方面,同一种语言也存在多种字符集标准,如果两台电脑安装的是不同的编码标准,则在信息传递时就会出现乱码。</p>
<p>那个时代的人们就在想:<strong>如果推出一个足够完整的字符集,将世界范围内的所有语言和符号都收录其中,不就可以解决跨语言环境和乱码问题了吗</strong>?在这种想法的驱动下,一个大而全的字符集 Unicode 应运而生。</p>
<p>「Unicode」的全称为“统一字符编码”理论上能容纳一百多万个字符。它致力于将全球范围内的字符纳入到统一的字符集之中提供一种通用的字符集来处理和显示各种语言文字减少因为编码标准不同而产生的乱码问题。</p>
<p>自 1991 年发布以来Unicode 不断扩充新的语言与字符。截止 2022 年 9 月Unicode 已经包含 149186 个字符,包括各种语言的字符、符号、甚至是表情符号等。在庞大的 Unicode 字符集中,常用的字符占用 2 字节,有些生僻的字符占 3 字节甚至 4 字节。</p>
<p>Unicode 是一种字符集标准,本质上是给每个字符分配一个编号(称为“码点”),<strong>但它并没有规定在计算机中如何存储这些字符码点</strong>。我们不禁会问:当多种长度的 Unicode 码点同时出现在同一个文本中时,系统如何解析字符?例如,给定一个长度为 2 字节的编码,系统如何确认它是一个 2 字节的字符还是两个 1 字节的字符?</p>
<p>对于以上问题,<strong>一种直接的解决方案是将所有字符存储为等长的编码</strong>。如下图所示“Hello”中的每个字符占用 1 字节,“算法”中的每个字符占用 2 字节。我们可以通过高位填 0 将“Hello 算法”中的所有字符都编码为 2 字节长度。这样系统就可以每隔 2 字节解析一个字符,恢复出这个短语的内容了。</p>
<p><img alt="Unicode 编码示例" src="../character_encoding.assets/unicode_hello_algo.png" /></p>
<p align="center"> Fig. Unicode 编码示例 </p>
<p>然而ASCII 码已经向我们证明,编码英文只需要 1 字节。若采用上述方案,英文文本占用空间的大小将会是 ASCII 编码下大小的 2 倍,非常浪费内存空间。因此,我们需要一种更加高效的 Unicode 编码方法。</p>
<h2 id="344-utf-8">3.4.4. &nbsp; UTF-8 编码<a class="headerlink" href="#344-utf-8" title="Permanent link">&para;</a></h2>
<p>目前UTF-8 已成为国际上使用最广泛的 Unicode 编码方法。<strong>它是一种可变长的编码</strong>,使用 1 到 4 个字节来表示一个字符根据字符的复杂性而变。ASCII 字符只需要 1 个字节,拉丁字母和希腊字母需要 2 个字节,常用的中文字符需要 3 个字节,其他的一些生僻字符需要 4 个字节。</p>
<p>UTF-8 的编码规则并不复杂,分为两种情况:</p>
<ul>
<li>对于长度为 1 字节的字符,将最高位设置为 <span class="arithmatex">\(0\)</span> 、其余 7 位设置为 Unicode 码点。值得注意的是ASCII 字符在 Unicode 字符集中占据了前 128 个码点。也就是说,<strong>UTF-8 编码可以向下兼容 ASCII 码</strong>。这意味着我们可以使用 UTF-8 来解析年代久远的 ASCII 码文本。</li>
<li>对于长度为 <span class="arithmatex">\(n\)</span> 字节的字符(其中 <span class="arithmatex">\(n &gt; 1\)</span>),将首个字节的高 <span class="arithmatex">\(n\)</span> 位都设置为 <span class="arithmatex">\(1\)</span> 、第 <span class="arithmatex">\(n + 1\)</span> 位设置为 <span class="arithmatex">\(0\)</span> ;从第二个字节开始,将每个字节的高 2 位都设置为 <span class="arithmatex">\(10\)</span> ;其余所有位用于填充字符的 Unicode 码点。</li>
</ul>
<p>下图展示了“Hello算法”对应的 UTF-8 编码。将最高 <span class="arithmatex">\(n\)</span> 位设置为 <span class="arithmatex">\(1\)</span> 比较容易理解,可以向系统指出字符的长度为 <span class="arithmatex">\(n\)</span> 。那么,为什么要将其余所有字节的高 2 位都设置为 <span class="arithmatex">\(10\)</span> 呢?实际上,这个 <span class="arithmatex">\(10\)</span> 能够起到校验符的作用,因为在 UTF-8 编码规则下,不可能有字符的最高两位是 <span class="arithmatex">\(10\)</span> 。这是因为长度为 1 字节的字符的最高一位是 <span class="arithmatex">\(0\)</span> 。假设系统从一个错误的字节开始解析文本,字节头部的 <span class="arithmatex">\(10\)</span> 能够帮助系统快速的判断出异常。</p>
<p><img alt="UTF-8 编码示例" src="../character_encoding.assets/utf-8_hello_algo.png" /></p>
<p align="center"> Fig. UTF-8 编码示例 </p>
<p>除了 UTF-8 之外,常见的编码方式还包括 UTF-16 和 UTF-32 。它们为 Unicode 字符集提供了不同的编码方法。</p>
<ul>
<li><strong>UTF-16 编码</strong>:使用 2 或 4 个字节来表示一个字符。所有的 ASCII 字符和很多常用的非英文字符,都用 2 个字节表示;少数字符需要用到 4 个字节表示。对于 2 字节的字符UTF-16 编码与 Unicode 码点相等。</li>
<li><strong>UTF-32 编码</strong>:每个字符都使用 4 个字节。这意味着 UTF-32 会比 UTF-8 和 UTF-16 更占用空间,特别是对于主要使用 ASCII 字符的文本。</li>
</ul>
<p>从存储空间的角度看,使用 UTF-8 表示英文字符非常高效,因为它仅需 1 个字节;使用 UTF-16 编码某些非英文字符(例如中文)会更加高效,因为它只需要 2 个字节,而 UTF-8 可能需要 3 个字节。从兼容性的角度看UTF-8 的通用性最佳,许多工具和库都优先支持 UTF-8 。</p>
<h2 id="345">3.4.5. &nbsp; 编程语言的字符编码<a class="headerlink" href="#345" title="Permanent link">&para;</a></h2>
<p>对于以往的大多数编程语言,程序运行中的字符串都采用 UTF-16 或 UTF-32 这类等长的编码。这是因为在等长编码下,我们可以将字符串看作数组来处理,具体来说:</p>
<ul>
<li><strong>随机访问</strong>: UTF-16 编码的字符串可以很容易地进行随机访问。UTF-8 是一种变长编码,要找到第 <span class="arithmatex">\(i\)</span> 个字符,我们需要从字符串的开始处遍历到第 <span class="arithmatex">\(i\)</span> 个字符,这需要 <span class="arithmatex">\(O(n)\)</span> 的时间。</li>
<li><strong>字符计数</strong>: 与随机访问类似,计算 UTF-16 字符串的长度也是 <span class="arithmatex">\(O(1)\)</span> 的操作。但是,计算 UTF-8 编码的字符串的长度需要遍历整个字符串。</li>
<li><strong>字符串操作</strong>: 在 UTF-16 编码的字符串中,很多字符串操作(如分割、连接、插入、删除等)都更容易进行。在 UTF-8 编码的字符串上进行这些操作通常需要额外的计算,以确保不会产生无效的 UTF-8 编码。</li>
</ul>
<p>编程语言的字符编码方案设计是一个很有趣的话题,涉及到许多因素:</p>
<ul>
<li>Java 的 <code>String</code> 类型使用 UTF-16 编码,每个字符占用 2 字节。这是因为 Java 语言设计之初,人们认为 16 位足以表示所有可能的字符。然而,这是一个不正确的判断。后来 Unicode 规范扩展到了超过 16 位,所以 Java 中的字符现在可能由一对 16 位的值(称为“代理对”)表示。</li>
<li>JavaScript 和 TypeScript 的字符串使用 UTF-16 编码的原因与 Java 类似。当 JavaScript 语言在 1995 年被 Netscape 公司首次引入时Unicode 还处于相对早期的阶段,那时候使用 16 位的编码就足够表示所有的 Unicode 字符了。</li>
<li>C# 使用 UTF-16 编码,主要因为 .NET 平台是由 Microsoft 设计的,而 Microsoft 的很多技术,包括 Windows 操作系统,都广泛地使用 UTF-16 编码。</li>
</ul>
<p>由于以上编程语言对字符数量的低估,它们不得不采取“代理对”的方式来表示超过 16 位长度的 Unicode 字符。这是一个不得已为之的无奈之举。一方面,包含代理对的字符串中,一个字符可能占用 2 字节或 4 字节,因此丧失了等长编码的优势。另一方面,处理代理对需要增加额外代码,这增加了编程的复杂性和 Debug 难度。</p>
<p>出于以上原因,部分编程语言提出了不同的编码方案:</p>
<ul>
<li>Python 3 使用一种灵活的字符串表示,存储的字符长度取决于字符串中最大的 Unicode 码点。对于全部是 ASCII 字符的字符串,每个字符占用 1 个字节;如果字符串中包含的字符超出了 ASCII 范围但全部在基本多语言平面BMP每个字符占用 2 个字节;如果字符串中有超出 BMP 的字符,那么每个字符占用 4 个字节。</li>
<li>Go 语言的 <code>string</code> 类型在内部使用 UTF-8 编码。Go 语言还提供了 <code>rune</code> 类型,它用于表示单个 Unicode 码点。</li>
<li>Rust 语言的 str 和 String 类型在内部使用 UTF-8 编码。Rust 也提供了 char 类型,用于表示单个 Unicode 码点。</li>
</ul>
<p>需要注意的是,以上讨论的都是字符串在编程语言中的存储方式,<strong>这和字符串如何在文件中存储或在网络中传输是两个不同的问题</strong>。在文件存储或网络传输中,我们一般会将字符串编码为 UTF-8 格式,以达到最优的兼容性和空间效率。</p>
<h2 id="__comments">评论</h2>
<!-- Insert generated snippet here -->
<script
src="https://giscus.app/client.js"
data-repo="krahets/hello-algo"
data-repo-id="R_kgDOIXtSqw"
data-category="Announcements"
data-category-id="DIC_kwDOIXtSq84CSZk_"
data-mapping="pathname"
data-strict="1"
data-reactions-enabled="1"
data-emit-metadata="0"
data-input-position="top"
data-theme="preferred_color_scheme"
data-lang="zh-CN"
crossorigin="anonymous"
async
>
</script>
<!-- Synchronize Giscus theme with palette -->
<script>
var giscus = document.querySelector("script[src*=giscus]")
/* Set palette on initial load */
var palette = __md_get("__palette")
if (palette && typeof palette.color === "object") {
var theme = palette.color.scheme === "slate" ? "dark" : "light"
giscus.setAttribute("data-theme", theme)
}
/* Register event handlers after documented loaded */
document.addEventListener("DOMContentLoaded", function() {
var ref = document.querySelector("[data-md-component=palette]")
ref.addEventListener("change", function() {
var palette = __md_get("__palette")
if (palette && typeof palette.color === "object") {
var theme = palette.color.scheme === "slate" ? "dark" : "light"
/* Instruct Giscus to change theme */
var frame = document.querySelector(".giscus-frame")
frame.contentWindow.postMessage(
{ giscus: { setConfig: { theme } } },
"https://giscus.app"
)
}
})
})
</script>
</article>
</div>
<script>var tabs=__md_get("__tabs");if(Array.isArray(tabs))e:for(var set of document.querySelectorAll(".tabbed-set")){var tab,labels=set.querySelector(".tabbed-labels");for(tab of tabs)for(var label of labels.getElementsByTagName("label"))if(label.innerText.trim()===tab){var input=document.getElementById(label.htmlFor);input.checked=!0;continue e}}</script>
</div>
<button type="button" class="md-top md-icon" data-md-component="top" hidden>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8v12Z"/></svg>
回到页面顶部
</button>
</main>
<footer class="md-footer">
<nav class="md-footer__inner md-grid" aria-label="页脚" >
<a href="../number_encoding/" class="md-footer__link md-footer__link--prev" aria-label="上一页: 3.3. &amp;nbsp; 数字编码 *" rel="prev">
<div class="md-footer__button md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12Z"/></svg>
</div>
<div class="md-footer__title">
<span class="md-footer__direction">
上一页
</span>
<div class="md-ellipsis">
3.3. &nbsp; 数字编码 *
</div>
</div>
</a>
<a href="../summary/" class="md-footer__link md-footer__link--next" aria-label="下一页: 3.5. &amp;nbsp; 小结" rel="next">
<div class="md-footer__title">
<span class="md-footer__direction">
下一页
</span>
<div class="md-ellipsis">
3.5. &nbsp; 小结
</div>
</div>
<div class="md-footer__button md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M4 11v2h12l-5.5 5.5 1.42 1.42L19.84 12l-7.92-7.92L10.5 5.5 16 11H4Z"/></svg>
</div>
</a>
</nav>
<div class="md-footer-meta md-typeset">
<div class="md-footer-meta__inner md-grid">
<div class="md-copyright">
<div class="md-copyright__highlight">
Copyright &copy; 2023 Krahets
</div>
</div>
<div class="md-social">
<a href="https://github.com/krahets" target="_blank" rel="noopener" title="github.com" class="md-social__link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 496 512"><!--! Font Awesome Free 6.4.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2023 Fonticons, Inc.--><path d="M165.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6zm-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3zm44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9zM244.8 8C106.1 8 0 113.3 0 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C428.2 457.8 496 362.9 496 252 496 113.3 383.5 8 244.8 8zM97.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1zm-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7zm32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1zm-11.4-14.7c-1.6 1-1.6 3.6 0 5.9 1.6 2.3 4.3 3.3 5.6 2.3 1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2z"/></svg>
</a>
<a href="https://twitter.com/krahets" target="_blank" rel="noopener" title="twitter.com" class="md-social__link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 6.4.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2023 Fonticons, Inc.--><path d="M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z"/></svg>
</a>
<a href="https://leetcode.cn/u/jyd/" target="_blank" rel="noopener" title="leetcode.cn" class="md-social__link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 640 512"><!--! Font Awesome Free 6.4.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2023 Fonticons, Inc.--><path d="M392.8 1.2c-17-4.9-34.7 5-39.6 22l-128 448c-4.9 17 5 34.7 22 39.6s34.7-5 39.6-22l128-448c4.9-17-5-34.7-22-39.6zm80.6 120.1c-12.5 12.5-12.5 32.8 0 45.3l89.3 89.4-89.4 89.4c-12.5 12.5-12.5 32.8 0 45.3s32.8 12.5 45.3 0l112-112c12.5-12.5 12.5-32.8 0-45.3l-112-112c-12.5-12.5-32.8-12.5-45.3 0zm-306.7 0c-12.5-12.5-32.8-12.5-45.3 0l-112 112c-12.5 12.5-12.5 32.8 0 45.3l112 112c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L77.3 256l89.4-89.4c12.5-12.5 12.5-32.8 0-45.3z"/></svg>
</a>
</div>
</div>
</div>
</footer>
</div>
<div class="md-dialog" data-md-component="dialog">
<div class="md-dialog__inner md-typeset"></div>
</div>
<script id="__config" type="application/json">{"base": "../..", "features": ["content.action.edit", "content.code.annotate", "content.code.copy", "content.tabs.link", "content.tooltips", "navigation.indexes", "navigation.sections", "navigation.top", "navigation.footer", "navigation.tracking", "search.highlight", "search.share", "search.suggest", "toc.follow"], "search": "../../assets/javascripts/workers/search.208ed371.min.js", "translations": {"clipboard.copied": "\u5df2\u590d\u5236", "clipboard.copy": "\u590d\u5236", "search.result.more.one": "\u5728\u8be5\u9875\u4e0a\u8fd8\u6709 1 \u4e2a\u7b26\u5408\u6761\u4ef6\u7684\u7ed3\u679c", "search.result.more.other": "\u5728\u8be5\u9875\u4e0a\u8fd8\u6709 # \u4e2a\u7b26\u5408\u6761\u4ef6\u7684\u7ed3\u679c", "search.result.none": "\u6ca1\u6709\u627e\u5230\u7b26\u5408\u6761\u4ef6\u7684\u7ed3\u679c", "search.result.one": "\u627e\u5230 1 \u4e2a\u7b26\u5408\u6761\u4ef6\u7684\u7ed3\u679c", "search.result.other": "# \u4e2a\u7b26\u5408\u6761\u4ef6\u7684\u7ed3\u679c", "search.result.placeholder": "\u952e\u5165\u4ee5\u5f00\u59cb\u641c\u7d22", "search.result.term.missing": "\u7f3a\u5c11", "select.version": "\u9009\u62e9\u5f53\u524d\u7248\u672c"}}</script>
<script src="../../assets/javascripts/bundle.fac441b0.min.js"></script>
<script src="../../javascripts/mathjax.js"></script>
<script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
<script src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
</body>
</html>