<!doctype html>
< html lang = "zh" class = "no-js" >
< head >
< meta charset = "utf-8" >
< meta name = "viewport" content = "width=device-width,initial-scale=1" >
< meta name = "description" content = "一本动画图解、能运行、可提问的数据结构与算法入门书" >
< meta name = "author" content = "Krahets" >
< link rel = "canonical" href = "https://www.hello-algo.com/chapter_data_structure/character_encoding/" >
< link rel = "prev" href = "../number_encoding/" >
< link rel = "next" href = "../summary/" >
< link rel = "icon" href = "../../assets/images/favicon.png" >
< meta name = "generator" content = "mkdocs-1.4.2, mkdocs-material-9.1.11" >
< title > 3.4. 字符编码 * - Hello 算法< / title >
< link rel = "stylesheet" href = "../../assets/stylesheets/main.85bb2934.min.css" >
< link rel = "stylesheet" href = "../../assets/stylesheets/palette.a6bdf11c.min.css" >
< link rel = "preconnect" href = "https://fonts.gstatic.com" crossorigin >
< link rel = "stylesheet" href = "https://fonts.googleapis.com/css?family=Noto+Sans+SC:300,300i,400,400i,700,700i%7CFira+Code:400,400i,700,700i&display=fallback" >
< style > : root { --md-text-font : "Noto Sans SC" ; --md-code-font : "Fira Code" } < / style >
< link rel = "stylesheet" href = "../../stylesheets/extra.css" >
< script > _ _md _scope = new URL ( "../.." , location ) , _ _md _hash = e => [ ... e ] . reduce ( ( e , _ ) => ( e << 5 ) - e + _ . charCodeAt ( 0 ) , 0 ) , _ _md _get = ( e , _ = localStorage , t = _ _md _scope ) => JSON . parse ( _ . getItem ( t . pathname + "." + e ) ) , _ _md _set = ( e , _ , t = localStorage , a = _ _md _scope ) => { try { t . setItem ( a . pathname + "." + e , JSON . stringify ( _ ) ) } catch ( e ) { } } < / script >
< / head >
< body dir = "ltr" data-md-color-scheme = "default" data-md-color-primary = "white" data-md-color-accent = "indigo" >
< script > var palette = _ _md _get ( "__palette" ) ; if ( palette && "object" == typeof palette . color ) for ( var key of Object . keys ( palette . color ) ) document . body . setAttribute ( "data-md-color-" + key , palette . color [ key ] ) < / script >
< input class = "md-toggle" data-md-toggle = "drawer" type = "checkbox" id = "__drawer" autocomplete = "off" >
< input class = "md-toggle" data-md-toggle = "search" type = "checkbox" id = "__search" autocomplete = "off" >
< label class = "md-overlay" for = "__drawer" > < / label >
< div data-md-component = "skip" >
< a href = "#34" class = "md-skip" >
跳转至
< / a >
< / div >
< div data-md-component = "announce" >
< / div >
< header class = "md-header md-header--shadow" data-md-component = "header" >
< nav class = "md-header__inner md-grid" aria-label = "页眉" >
< a href = "../.." title = "Hello 算法" class = "md-header__button md-logo" aria-label = "Hello 算法" data-md-component = "logo" >
< img src = "../../assets/images/logo.png" alt = "logo" >
< / a >
< label class = "md-header__button md-icon" for = "__drawer" >
< svg xmlns = "http://www.w3.org/2000/svg" viewBox = "0 0 24 24" > < path d = "M3 6h18v2H3V6m0 5h18v2H3v-2m0 5h18v2H3v-2Z" / > < / svg >
< / label >
< div class = "md-header__title" data-md-component = "header-title" >
< div class = "md-header__ellipsis" >
< div class = "md-header__topic" >
< span class = "md-ellipsis" >
Hello 算法
< / span >
< / div >
< div class = "md-header__topic" data-md-component = "header-topic" >
< span class = "md-ellipsis" >
3.4. 字符编码 *
< / span >
< / div >
< / div >
< / div >
< form class = "md-header__option" data-md-component = "palette" >
< input class = "md-option" data-md-color-media = "" data-md-color-scheme = "default" data-md-color-primary = "white" data-md-color-accent = "indigo" aria-label = "Switch to dark mode" type = "radio" name = "__palette" id = "__palette_1" >
< label class = "md-header__button md-icon" title = "Switch to dark mode" for = "__palette_2" hidden >
< svg xmlns = "http://www.w3.org/2000/svg" viewBox = "0 0 24 24" > < path d = "M12 7a5 5 0 0 1 5 5 5 5 0 0 1-5 5 5 5 0 0 1-5-5 5 5 0 0 1 5-5m0 2a3 3 0 0 0-3 3 3 3 0 0 0 3 3 3 3 0 0 0 3-3 3 3 0 0 0-3-3m0-7 2.39 3.42C13.65 5.15 12.84 5 12 5c-.84 0-1.65.15-2.39.42L12 2M3.34 7l4.16-.35A7.2 7.2 0 0 0 5.94 8.5c-.44.74-.69 1.5-.83 2.29L3.34 7m.02 10 1.76-3.77a7.131 7.131 0 0 0 2.38 4.14L3.36 17M20.65 7l-1.77 3.79a7.023 7.023 0 0 0-2.38-4.15l4.15.36m-.01 10-4.14.36c.59-.51 1.12-1.14 1.54-1.86.42-.73.69-1.5.83-2.29L20.64 17M12 22l-2.41-3.44c.74.27 1.55.44 2.41.44.82 0 1.63-.17 2.37-.44L12 22Z" / > < / svg >
< / label >
< input class = "md-option" data-md-color-media = "" data-md-color-scheme = "slate" data-md-color-primary = "indigo" data-md-color-accent = "indigo" aria-label = "Switch to light mode" type = "radio" name = "__palette" id = "__palette_2" >
< label class = "md-header__button md-icon" title = "Switch to light mode" for = "__palette_1" hidden >
< svg xmlns = "http://www.w3.org/2000/svg" viewBox = "0 0 24 24" > < path d = "m17.75 4.09-2.53 1.94.91 3.06-2.63-1.81-2.63 1.81.91-3.06-2.53-1.94L12.44 4l1.06-3 1.06 3 3.19.09m3.5 6.91-1.64 1.25.59 1.98-1.7-1.17-1.7 1.17.59-1.98L15.75 11l2.06-.05L18.5 9l.69 1.95 2.06.05m-2.28 4.95c.83-.08 1.72 1.1 1.19 1.85-.32.45-.66.87-1.08 1.27C15.17 23 8.84 23 4.94 19.07c-3.91-3.9-3.91-10.24 0-14.14.4-.4.82-.76 1.27-1.08.75-.53 1.93.36 1.85 1.19-.27 2.86.69 5.83 2.89 8.02a9.96 9.96 0 0 0 8.02 2.89m-1.64 2.02a12.08 12.08 0 0 1-7.8-3.47c-2.17-2.19-3.33-5-3.49-7.82-2.81 3.14-2.7 7.96.31 10.98 3.02 3.01 7.84 3.12 10.98.31Z" / > < / svg >
< / label >
< / form >
< label class = "md-header__button md-icon" for = "__search" >
< svg xmlns = "http://www.w3.org/2000/svg" viewBox = "0 0 24 24" > < path d = "M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.516 6.516 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5Z" / > < / svg >
< / label >
< div class = "md-search" data-md-component = "search" role = "dialog" >
< label class = "md-search__overlay" for = "__search" > < / label >
< div class = "md-search__inner" role = "search" >
< form class = "md-search__form" name = "search" >
< input type = "text" class = "md-search__input" name = "query" aria-label = "搜索" placeholder = "搜索" autocapitalize = "off" autocorrect = "off" autocomplete = "off" spellcheck = "false" data-md-component = "search-query" required >
< label class = "md-search__icon md-icon" for = "__search" >
< svg xmlns = "http://www.w3.org/2000/svg" viewBox = "0 0 24 24" > < path d = "M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.516 6.516 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5Z" / > < / svg >
< svg xmlns = "http://www.w3.org/2000/svg" viewBox = "0 0 24 24" > < path d = "M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12Z" / > < / svg >
< / label >
< nav class = "md-search__options" aria-label = "查找" >
< a href = "javascript:void(0)" class = "md-search__icon md-icon" title = "分享" aria-label = "分享" data-clipboard data-clipboard-text = "" data-md-component = "search-share" tabindex = "-1" >
< svg xmlns = "http://www.w3.org/2000/svg" viewBox = "0 0 24 24" > < path d = "M18 16.08c-.76 0-1.44.3-1.96.77L8.91 12.7c.05-.23.09-.46.09-.7 0-.24-.04-.47-.09-.7l7.05-4.11c.54.5 1.25.81 2.04.81a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3c0 .24.04.47.09.7L8.04 9.81C7.5 9.31 6.79 9 6 9a3 3 0 0 0-3 3 3 3 0 0 0 3 3c.79 0 1.5-.31 2.04-.81l7.12 4.15c-.05.21-.08.43-.08.66 0 1.61 1.31 2.91 2.92 2.91 1.61 0 2.92-1.3 2.92-2.91A2.92 2.92 0 0 0 18 16.08Z" / > < / svg >
< / a >
< button type = "reset" class = "md-search__icon md-icon" title = "清空当前内容" aria-label = "清空当前内容" tabindex = "-1" >
< svg xmlns = "http://www.w3.org/2000/svg" viewBox = "0 0 24 24" > < path d = "M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12 19 6.41Z" / > < / svg >
< / button >
< / nav >
< div class = "md-search__suggest" data-md-component = "search-suggest" > < / div >
< / form >
< div class = "md-search__output" >
< div class = "md-search__scrollwrap" data-md-scrollfix >
< div class = "md-search-result" data-md-component = "search-result" >
< div class = "md-search-result__meta" >
正在初始化搜索引擎
< / div >
< ol class = "md-search-result__list" role = "presentation" > < / ol >
< / div >
< / div >
< / div >
< / div >
< / div >
< div class = "md-header__source" >
< a href = "https://github.com/krahets/hello-algo" title = "前往仓库" class = "md-source" data-md-component = "source" >
< div class = "md-source__icon md-icon" >
< svg xmlns = "http://www.w3.org/2000/svg" viewBox = "0 0 496 512" > <!-- ! Font Awesome Free 6.4.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2023 Fonticons, Inc. --> < path d = "M165.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6zm-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3zm44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9zM244.8 8C106.1 8 0 113.3 0 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C428.2 457.8 496 362.9 496 252 496 113.3 383.5 8 244.8 8zM97.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1zm-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7zm32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1zm-11.4-14.7c-1.6 1-1.6 3.6 0 5.9 1.6 2.3 4.3 3.3 5.6 2.3 1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2z" / > < / svg >
< / div >
< div class = "md-source__repository" >
krahets/hello-algo
< / div >
< / a >
< / div >
< / nav >
< / header >
< div class = "md-container" data-md-component = "container" >
< main class = "md-main" data-md-component = "main" >
< div class = "md-main__inner md-grid" >
< div class = "md-sidebar md-sidebar--primary" data-md-component = "sidebar" data-md-type = "navigation" >
< div class = "md-sidebar__scrollwrap" >
< div class = "md-sidebar__inner" >
< nav class = "md-nav md-nav--primary" aria-label = "导航栏" data-md-level = "0" >
< label class = "md-nav__title" for = "__drawer" >
< a href = "../.." title = "Hello 算法" class = "md-nav__button md-logo" aria-label = "Hello 算法" data-md-component = "logo" >
< img src = "../../assets/images/logo.png" alt = "logo" >
< / a >
Hello 算法
< / label >
< div class = "md-nav__source" >
< a href = "https://github.com/krahets/hello-algo" title = "前往仓库" class = "md-source" data-md-component = "source" >
< div class = "md-source__icon md-icon" >
< svg xmlns = "http://www.w3.org/2000/svg" viewBox = "0 0 496 512" > <!-- ! Font Awesome Free 6.4.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2023 Fonticons, Inc. --> < path d = "M165.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6zm-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3zm44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9zM244.8 8C106.1 8 0 113.3 0 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C428.2 457.8 496 362.9 496 252 496 113.3 383.5 8 244.8 8zM97.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1zm-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7zm32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1zm-11.4-14.7c-1.6 1-1.6 3.6 0 5.9 1.6 2.3 4.3 3.3 5.6 2.3 1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2z" / > < / svg >
< / div >
< div class = "md-source__repository" >
krahets/hello-algo
< / div >
< / a >
< / div >
< ul class = "md-nav__list" data-md-scrollfix >
< li class = "md-nav__item md-nav__item--section md-nav__item--nested" >
< input class = "md-nav__toggle md-toggle " type = "checkbox" id = "__nav_1" >
< div class = "md-nav__link md-nav__link--index " >
< a href = "../../chapter_preface/" > 0. 写在前面< / a >
< label for = "__nav_1" >
< span class = "md-nav__icon md-icon" > < / span >
< / label >
< / div >
< nav class = "md-nav" data-md-level = "1" aria-labelledby = "__nav_1_label" aria-expanded = "false" >
< label class = "md-nav__title" for = "__nav_1" >
< span class = "md-nav__icon md-icon" > < / span >
0. 写在前面
< / label >
< ul class = "md-nav__list" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "../../chapter_preface/about_the_book/" class = "md-nav__link" >
0.1. 关于本书
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../chapter_preface/suggestions/" class = "md-nav__link" >
0.2. 如何使用本书
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../chapter_preface/summary/" class = "md-nav__link" >
0.3. 小结
< / a >
< / li >
< / ul >
< / nav >
< / li >
< li class = "md-nav__item md-nav__item--section md-nav__item--nested" >
< input class = "md-nav__toggle md-toggle " type = "checkbox" id = "__nav_2" >
< div class = "md-nav__link md-nav__link--index " >
< a href = "../../chapter_introduction/" > 1. 引言< / a >
< label for = "__nav_2" >
< span class = "md-nav__icon md-icon" > < / span >
< / label >
< / div >
< nav class = "md-nav" data-md-level = "1" aria-labelledby = "__nav_2_label" aria-expanded = "false" >
< label class = "md-nav__title" for = "__nav_2" >
< span class = "md-nav__icon md-icon" > < / span >
1. 引言
< / label >
< ul class = "md-nav__list" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "../../chapter_introduction/algorithms_are_everywhere/" class = "md-nav__link" >
1.1. 算法无处不在
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../chapter_introduction/what_is_dsa/" class = "md-nav__link" >
1.2. 算法是什么
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../chapter_introduction/summary/" class = "md-nav__link" >
1.3. 小结
< / a >
< / li >
< / ul >
< / nav >
< / li >
< li class = "md-nav__item md-nav__item--section md-nav__item--nested" >
< input class = "md-nav__toggle md-toggle " type = "checkbox" id = "__nav_3" >
< div class = "md-nav__link md-nav__link--index " >
< a href = "../../chapter_computational_complexity/" > 2. 复杂度分析< / a >
< label for = "__nav_3" >
< span class = "md-nav__icon md-icon" > < / span >
< / label >
< / div >
< nav class = "md-nav" data-md-level = "1" aria-labelledby = "__nav_3_label" aria-expanded = "false" >
< label class = "md-nav__title" for = "__nav_3" >
< span class = "md-nav__icon md-icon" > < / span >
2. 复杂度分析
< / label >
< ul class = "md-nav__list" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "../../chapter_computational_complexity/performance_evaluation/" class = "md-nav__link" >
2.1. 算法效率评估
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../chapter_computational_complexity/time_complexity/" class = "md-nav__link" >
2.2. 时间复杂度
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../chapter_computational_complexity/space_complexity/" class = "md-nav__link" >
2.3. 空间复杂度
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../chapter_computational_complexity/summary/" class = "md-nav__link" >
2.4. 小结
< / a >
< / li >
< / ul >
< / nav >
< / li >
< li class = "md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested" >
< input class = "md-nav__toggle md-toggle " type = "checkbox" id = "__nav_4" checked >
< div class = "md-nav__link md-nav__link--index " >
< a href = "../" > 3. 数据结构简介< / a >
< label for = "__nav_4" >
< span class = "md-nav__icon md-icon" > < / span >
< / label >
< / div >
< nav class = "md-nav" data-md-level = "1" aria-labelledby = "__nav_4_label" aria-expanded = "true" >
< label class = "md-nav__title" for = "__nav_4" >
< span class = "md-nav__icon md-icon" > < / span >
3. 数据结构简介
< / label >
< ul class = "md-nav__list" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "../classification_of_data_structure/" class = "md-nav__link" >
3.1. 数据结构分类
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../basic_data_types/" class = "md-nav__link" >
3.2. 基本数据类型
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../number_encoding/" class = "md-nav__link" >
3.3. 数字编码 *
< / a >
< / li >
< li class = "md-nav__item md-nav__item--active" >
< input class = "md-nav__toggle md-toggle" type = "checkbox" id = "__toc" >
< label class = "md-nav__link md-nav__link--active" for = "__toc" >
3.4. 字符编码 *
< span class = "md-nav__icon md-icon" > < / span >
< / label >
< a href = "./" class = "md-nav__link md-nav__link--active" >
3.4. 字符编码 *
< / a >
< nav class = "md-nav md-nav--secondary" aria-label = "目录" >
< label class = "md-nav__title" for = "__toc" >
< span class = "md-nav__icon md-icon" > < / span >
目录
< / label >
< ul class = "md-nav__list" data-md-component = "toc" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "#341-ascii" class = "md-nav__link" >
3.4.1. ASCII 字符集
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "#342-gbk" class = "md-nav__link" >
3.4.2. GBK 字符集
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "#343-unicode" class = "md-nav__link" >
3.4.3. Unicode 字符集
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "#344-utf-8" class = "md-nav__link" >
3.4.4. UTF-8 编码
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "#345" class = "md-nav__link" >
3.4.5. 编程语言的字符编码
< / a >
< / li >
< / ul >
< / nav >
< / li >
< li class = "md-nav__item" >
< a href = "../summary/" class = "md-nav__link" >
3.5. 小结
< / a >
< / li >
< / ul >
< / nav >
< / li >
< li class = "md-nav__item md-nav__item--section md-nav__item--nested" >
< input class = "md-nav__toggle md-toggle " type = "checkbox" id = "__nav_5" >
< div class = "md-nav__link md-nav__link--index " >
< a href = "../../chapter_array_and_linkedlist/" > 4. 数组与链表< / a >
< label for = "__nav_5" >
< span class = "md-nav__icon md-icon" > < / span >
< / label >
< / div >
< nav class = "md-nav" data-md-level = "1" aria-labelledby = "__nav_5_label" aria-expanded = "false" >
< label class = "md-nav__title" for = "__nav_5" >
< span class = "md-nav__icon md-icon" > < / span >
4. 数组与链表
< / label >
< ul class = "md-nav__list" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "../../chapter_array_and_linkedlist/array/" class = "md-nav__link" >
4.1. 数组
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../chapter_array_and_linkedlist/linked_list/" class = "md-nav__link" >
4.2. 链表
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../chapter_array_and_linkedlist/list/" class = "md-nav__link" >
4.3. 列表
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../chapter_array_and_linkedlist/summary/" class = "md-nav__link" >
4.4. 小结
< / a >
< / li >
< / ul >
< / nav >
< / li >
< li class = "md-nav__item md-nav__item--section md-nav__item--nested" >
< input class = "md-nav__toggle md-toggle " type = "checkbox" id = "__nav_6" >
< div class = "md-nav__link md-nav__link--index " >
< a href = "../../chapter_stack_and_queue/" > 5. 栈与队列< / a >
< label for = "__nav_6" >
< span class = "md-nav__icon md-icon" > < / span >
< / label >
< / div >
< nav class = "md-nav" data-md-level = "1" aria-labelledby = "__nav_6_label" aria-expanded = "false" >
< label class = "md-nav__title" for = "__nav_6" >
< span class = "md-nav__icon md-icon" > < / span >
5. 栈与队列
< / label >
< ul class = "md-nav__list" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "../../chapter_stack_and_queue/stack/" class = "md-nav__link" >
5.1. 栈
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../chapter_stack_and_queue/queue/" class = "md-nav__link" >
5.2. 队列
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../chapter_stack_and_queue/deque/" class = "md-nav__link" >
5.3. 双向队列
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../chapter_stack_and_queue/summary/" class = "md-nav__link" >
5.4. 小结
< / a >
< / li >
< / ul >
< / nav >
< / li >
< li class = "md-nav__item md-nav__item--section md-nav__item--nested" >
< input class = "md-nav__toggle md-toggle " type = "checkbox" id = "__nav_7" >
< div class = "md-nav__link md-nav__link--index " >
< a href = "../../chapter_hashing/" > 6. 散列表< / a >
< label for = "__nav_7" >
< span class = "md-nav__icon md-icon" > < / span >
< / label >
< / div >
< nav class = "md-nav" data-md-level = "1" aria-labelledby = "__nav_7_label" aria-expanded = "false" >
< label class = "md-nav__title" for = "__nav_7" >
< span class = "md-nav__icon md-icon" > < / span >
6. 散列表
< / label >
< ul class = "md-nav__list" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "../../chapter_hashing/hash_map/" class = "md-nav__link" >
6.1. 哈希表( New)
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../chapter_hashing/hash_collision/" class = "md-nav__link" >
6.2. 哈希冲突( New)
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../chapter_hashing/hash_algorithm/" class = "md-nav__link" >
6.3. 哈希算法( New)
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../chapter_hashing/summary/" class = "md-nav__link" >
6.4. 小结
< / a >
< / li >
< / ul >
< / nav >
< / li >
< li class = "md-nav__item md-nav__item--section md-nav__item--nested" >
< input class = "md-nav__toggle md-toggle " type = "checkbox" id = "__nav_8" >
< div class = "md-nav__link md-nav__link--index " >
< a href = "../../chapter_tree/" > 7. 树< / a >
< label for = "__nav_8" >
< span class = "md-nav__icon md-icon" > < / span >
< / label >
< / div >
< nav class = "md-nav" data-md-level = "1" aria-labelledby = "__nav_8_label" aria-expanded = "false" >
< label class = "md-nav__title" for = "__nav_8" >
< span class = "md-nav__icon md-icon" > < / span >
7. 树
< / label >
< ul class = "md-nav__list" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "../../chapter_tree/binary_tree/" class = "md-nav__link" >
7.1. 二叉树
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../chapter_tree/binary_tree_traversal/" class = "md-nav__link" >
7.2. 二叉树遍历
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../chapter_tree/array_representation_of_tree/" class = "md-nav__link" >
7.3. 二叉树数组表示
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../chapter_tree/binary_search_tree/" class = "md-nav__link" >
7.4. 二叉搜索树
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../chapter_tree/avl_tree/" class = "md-nav__link" >
7.5. AVL 树 *
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../chapter_tree/summary/" class = "md-nav__link" >
7.6. 小结
< / a >
< / li >
< / ul >
< / nav >
< / li >
< li class = "md-nav__item md-nav__item--section md-nav__item--nested" >
< input class = "md-nav__toggle md-toggle " type = "checkbox" id = "__nav_9" >
< div class = "md-nav__link md-nav__link--index " >
< a href = "../../chapter_heap/" > 8. 堆< / a >
< label for = "__nav_9" >
< span class = "md-nav__icon md-icon" > < / span >
< / label >
< / div >
< nav class = "md-nav" data-md-level = "1" aria-labelledby = "__nav_9_label" aria-expanded = "false" >
< label class = "md-nav__title" for = "__nav_9" >
< span class = "md-nav__icon md-icon" > < / span >
8. 堆
< / label >
< ul class = "md-nav__list" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "../../chapter_heap/heap/" class = "md-nav__link" >
8.1. 堆
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../chapter_heap/build_heap/" class = "md-nav__link" >
8.2. 建堆操作
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../chapter_heap/top_k/" class = "md-nav__link" >
8.3. Top-K 问题( New)
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../chapter_heap/summary/" class = "md-nav__link" >
8.4. 小结
< / a >
< / li >
< / ul >
< / nav >
< / li >
< li class = "md-nav__item md-nav__item--section md-nav__item--nested" >
< input class = "md-nav__toggle md-toggle " type = "checkbox" id = "__nav_10" >
< div class = "md-nav__link md-nav__link--index " >
< a href = "../../chapter_graph/" > 9. 图< / a >
< label for = "__nav_10" >
< span class = "md-nav__icon md-icon" > < / span >
< / label >
< / div >
< nav class = "md-nav" data-md-level = "1" aria-labelledby = "__nav_10_label" aria-expanded = "false" >
< label class = "md-nav__title" for = "__nav_10" >
< span class = "md-nav__icon md-icon" > < / span >
9. 图
< / label >
< ul class = "md-nav__list" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "../../chapter_graph/graph/" class = "md-nav__link" >
9.1. 图
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../chapter_graph/graph_operations/" class = "md-nav__link" >
9.2. 图基础操作
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../chapter_graph/graph_traversal/" class = "md-nav__link" >
9.3. 图的遍历
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../chapter_graph/summary/" class = "md-nav__link" >
9.4. 小结
< / a >
< / li >
< / ul >
< / nav >
< / li >
< li class = "md-nav__item md-nav__item--section md-nav__item--nested" >
< input class = "md-nav__toggle md-toggle " type = "checkbox" id = "__nav_11" >
< div class = "md-nav__link md-nav__link--index " >
< a href = "../../chapter_searching/" > 10. 搜索< / a >
< label for = "__nav_11" >
< span class = "md-nav__icon md-icon" > < / span >
< / label >
< / div >
< nav class = "md-nav" data-md-level = "1" aria-labelledby = "__nav_11_label" aria-expanded = "false" >
< label class = "md-nav__title" for = "__nav_11" >
< span class = "md-nav__icon md-icon" > < / span >
10. 搜索
< / label >
< ul class = "md-nav__list" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "../../chapter_searching/binary_search/" class = "md-nav__link" >
10.1. 二分查找( New)
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../chapter_searching/binary_search_edge/" class = "md-nav__link" >
10.2. 二分查找边界( New)
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../chapter_searching/replace_linear_by_hashing/" class = "md-nav__link" >
10.3. 哈希优化策略
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../chapter_searching/searching_algorithm_revisited/" class = "md-nav__link" >
10.4. 重识搜索算法
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../chapter_searching/summary/" class = "md-nav__link" >
10.5. 小结
< / a >
< / li >
< / ul >
< / nav >
< / li >
< li class = "md-nav__item md-nav__item--section md-nav__item--nested" >
< input class = "md-nav__toggle md-toggle " type = "checkbox" id = "__nav_12" >
< div class = "md-nav__link md-nav__link--index " >
< a href = "../../chapter_sorting/" > 11. 排序< / a >
< label for = "__nav_12" >
< span class = "md-nav__icon md-icon" > < / span >
< / label >
< / div >
< nav class = "md-nav" data-md-level = "1" aria-labelledby = "__nav_12_label" aria-expanded = "false" >
< label class = "md-nav__title" for = "__nav_12" >
< span class = "md-nav__icon md-icon" > < / span >
11. 排序
< / label >
< ul class = "md-nav__list" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "../../chapter_sorting/sorting_algorithm/" class = "md-nav__link" >
11.1. 排序算法
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../chapter_sorting/selection_sort/" class = "md-nav__link" >
11.2. 选择排序( New)
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../chapter_sorting/bubble_sort/" class = "md-nav__link" >
11.3. 冒泡排序
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../chapter_sorting/insertion_sort/" class = "md-nav__link" >
11.4. 插入排序
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../chapter_sorting/quick_sort/" class = "md-nav__link" >
11.5. 快速排序
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../chapter_sorting/merge_sort/" class = "md-nav__link" >
11.6. 归并排序
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../chapter_sorting/heap_sort/" class = "md-nav__link" >
11.7. 堆排序( New)
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../chapter_sorting/bucket_sort/" class = "md-nav__link" >
11.8. 桶排序
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../chapter_sorting/counting_sort/" class = "md-nav__link" >
11.9. 计数排序
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../chapter_sorting/radix_sort/" class = "md-nav__link" >
11.10. 基数排序
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../chapter_sorting/summary/" class = "md-nav__link" >
11.11. 小结
< / a >
< / li >
< / ul >
< / nav >
< / li >
< li class = "md-nav__item md-nav__item--section md-nav__item--nested" >
< input class = "md-nav__toggle md-toggle " type = "checkbox" id = "__nav_13" >
< div class = "md-nav__link md-nav__link--index " >
< a href = "../../chapter_backtracking/" > 12. 回溯< / a >
< label for = "__nav_13" >
< span class = "md-nav__icon md-icon" > < / span >
< / label >
< / div >
< nav class = "md-nav" data-md-level = "1" aria-labelledby = "__nav_13_label" aria-expanded = "false" >
< label class = "md-nav__title" for = "__nav_13" >
< span class = "md-nav__icon md-icon" > < / span >
12. 回溯
< / label >
< ul class = "md-nav__list" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "../../chapter_backtracking/backtracking_algorithm/" class = "md-nav__link" >
12.1. 回溯算法
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../chapter_backtracking/permutations_problem/" class = "md-nav__link" >
12.2. 全排列问题
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../chapter_backtracking/n_queens_problem/" class = "md-nav__link" >
12.3. N 皇后问题
< / a >
< / li >
< / ul >
< / nav >
< / li >
< li class = "md-nav__item md-nav__item--section md-nav__item--nested" >
< input class = "md-nav__toggle md-toggle " type = "checkbox" id = "__nav_14" >
< label class = "md-nav__link" for = "__nav_14" id = "__nav_14_label" tabindex = "0" >
13. 附录
< span class = "md-nav__icon md-icon" > < / span >
< / label >
< nav class = "md-nav" data-md-level = "1" aria-labelledby = "__nav_14_label" aria-expanded = "false" >
< label class = "md-nav__title" for = "__nav_14" >
< span class = "md-nav__icon md-icon" > < / span >
13. 附录
< / label >
< ul class = "md-nav__list" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "../../chapter_appendix/installation/" class = "md-nav__link" >
13.1. 编程环境安装
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "../../chapter_appendix/contribution/" class = "md-nav__link" >
13.2. 一起参与创作
< / a >
< / li >
< / ul >
< / nav >
< / li >
< li class = "md-nav__item md-nav__item--section md-nav__item--nested" >
< input class = "md-nav__toggle md-toggle " type = "checkbox" id = "__nav_15" >
< div class = "md-nav__link md-nav__link--index " >
< a href = "../../chapter_reference/" > 参考文献< / a >
< / div >
< nav class = "md-nav" data-md-level = "1" aria-labelledby = "__nav_15_label" aria-expanded = "false" >
< label class = "md-nav__title" for = "__nav_15" >
< span class = "md-nav__icon md-icon" > < / span >
参考文献
< / label >
< ul class = "md-nav__list" data-md-scrollfix >
< / ul >
< / nav >
< / li >
< / ul >
< / nav >
< / div >
< / div >
< / div >
< div class = "md-sidebar md-sidebar--secondary" data-md-component = "sidebar" data-md-type = "toc" >
< div class = "md-sidebar__scrollwrap" >
< div class = "md-sidebar__inner" >
< nav class = "md-nav md-nav--secondary" aria-label = "目录" >
< label class = "md-nav__title" for = "__toc" >
< span class = "md-nav__icon md-icon" > < / span >
目录
< / label >
< ul class = "md-nav__list" data-md-component = "toc" data-md-scrollfix >
< li class = "md-nav__item" >
< a href = "#341-ascii" class = "md-nav__link" >
3.4.1. ASCII 字符集
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "#342-gbk" class = "md-nav__link" >
3.4.2. GBK 字符集
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "#343-unicode" class = "md-nav__link" >
3.4.3. Unicode 字符集
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "#344-utf-8" class = "md-nav__link" >
3.4.4. UTF-8 编码
< / a >
< / li >
< li class = "md-nav__item" >
< a href = "#345" class = "md-nav__link" >
3.4.5. 编程语言的字符编码
< / a >
< / li >
< / ul >
< / nav >
< / div >
< / div >
< / div >
< div class = "md-content" data-md-component = "content" >
< article class = "md-content__inner md-typeset" >
< a href = "https://github.com/krahets/hello-algo/tree/main/docs/chapter_data_structure/character_encoding.md" title = "编辑此页" class = "md-content__button md-icon" >
< svg xmlns = "http://www.w3.org/2000/svg" viewBox = "0 0 24 24" > < path d = "M10 20H6V4h7v5h5v3.1l2-2V8l-6-6H6c-1.1 0-2 .9-2 2v16c0 1.1.9 2 2 2h4v-2m10.2-7c.1 0 .3.1.4.2l1.3 1.3c.2.2.2.6 0 .8l-1 1-2.1-2.1 1-1c.1-.1.2-.2.4-.2m0 3.9L14.1 23H12v-2.1l6.1-6.1 2.1 2.1Z" / > < / svg >
< / a >
< h1 id = "34" > 3.4. 字符编码 *< a class = "headerlink" href = "#34" title = "Permanent link" > ¶ < / a > < / h1 >
< p > 在计算机中,所有数据都是以二进制数的形式存储的,字符 < code > char< / code > 也不例外。为了表示字符,我们需要建立一套「字符集」,规定每个字符和二进制数之间的一一对应关系。有了字符集之后,计算机就可以通过查表完成二进制数到字符的转换。< / p >
< h2 id = "341-ascii" > 3.4.1. ASCII 字符集< a class = "headerlink" href = "#341-ascii" title = "Permanent link" > ¶ < / a > < / h2 >
< p > 「ASCII 码」是最早出现的字符集,全称为“美国标准信息交换代码”。它使用 7 位二进制数(即一个字节的低 7 位)表示一个字符,最多能够表示 128 个不同的字符。这包括英文字母的大小写、数字 0-9 、一些标点符号,以及一些控制字符(如换行符和制表符)。< / p >
< p > < img alt = "ASCII 码" src = "../character_encoding.assets/ascii_table.png" / > < / p >
< p align = "center" > Fig. ASCII 码 < / p >
< p > 然而,< strong > ASCII 码仅能够表示英文< / strong > 。随着计算机的全球化, 诞生了一种能够表示更多语言的字符集「EASCII」。它在 ASCII 的 7 位基础上扩展到 8 位,能够表示 256 个不同的字符。在世界范围内,陆续出现了一批适用于不同地区的 EASCII 字符集。这些字符集的前 128 个字符统一为 ASCII 码,后 128 个字符定义不同,以适应不同语言的需求。< / p >
< h2 id = "342-gbk" > 3.4.2. GBK 字符集< a class = "headerlink" href = "#342-gbk" title = "Permanent link" > ¶ < / a > < / h2 >
< p > 后来人们发现,< strong > EASCII 码仍然无法满足许多语言的字符数量要求< / strong > 。例如,汉字大约有近十万个,光日常使用的就有几千个。中国国家标准总局于 1980 年发布了「GB2312」字符集, 其收录了 6763 个汉字,基本满足了汉字的计算机处理需要。< / p >
< p > 然而, GB2312 无法处理部分的罕见字和繁体字。之后在 GB2312 的基础上, 扩展得到了「GBK」字符集, 它共收录了 21886 个汉字。在 GBK 编码方案中, ASCII 字符使用一个字节表示,汉字使用两个字节表示。< / p >
< h2 id = "343-unicode" > 3.4.3. Unicode 字符集< a class = "headerlink" href = "#343-unicode" title = "Permanent link" > ¶ < / a > < / h2 >
< p > 随着计算机的蓬勃发展,字符集与编码标准百花齐放,而这带来了许多问题。一方面,这些字符集一般只定义了特定语言的字符,无法在多语言环境下正常工作;另一方面,同一种语言也存在多种字符集标准,如果两台电脑安装的是不同的编码标准,则在信息传递时就会出现乱码。< / p >
< p > 那个时代的人们就在想:< strong > 如果推出一个足够完整的字符集,将世界范围内的所有语言和符号都收录其中,不就可以解决跨语言环境和乱码问题了吗< / strong > ?在这种想法的驱动下,一个大而全的字符集 Unicode 应运而生。< / p >
< p > 「Unicode」的全称为“统一字符编码”, 理论上能容纳一百多万个字符。它致力于将全球范围内的字符纳入到统一的字符集之中, 提供一种通用的字符集来处理和显示各种语言文字, 减少因为编码标准不同而产生的乱码问题。< / p >
< p > 自 1991 年发布以来, Unicode 不断扩充新的语言与字符。截止 2022 年 9 月, Unicode 已经包含 149186 个字符,包括各种语言的字符、符号、甚至是表情符号等。在庞大的 Unicode 字符集中,常用的字符占用 2 字节,有些生僻的字符占 3 字节甚至 4 字节。< / p >
< p > Unicode 是一种字符集标准,本质上是给每个字符分配一个编号(称为“码点”),< strong > 但它并没有规定在计算机中如何存储这些字符码点< / strong > 。我们不禁会问:当多种长度的 Unicode 码点同时出现在同一个文本中时,系统如何解析字符?例如,给定一个长度为 2 字节的编码,系统如何确认它是一个 2 字节的字符还是两个 1 字节的字符?< / p >
< p > 对于以上问题,< strong > 一种直接的解决方案是将所有字符存储为等长的编码< / strong > 。如下图所示, “Hello”中的每个字符占用 1 字节,“算法”中的每个字符占用 2 字节。我们可以通过高位填 0 , 将“Hello 算法”中的所有字符都编码为 2 字节长度。这样系统就可以每隔 2 字节解析一个字符,恢复出这个短语的内容了。< / p >
< p > < img alt = "Unicode 编码示例" src = "../character_encoding.assets/unicode_hello_algo.png" / > < / p >
< p align = "center" > Fig. Unicode 编码示例 < / p >
< p > 然而, ASCII 码已经向我们证明,编码英文只需要 1 字节。若采用上述方案,英文文本占用空间的大小将会是 ASCII 编码下大小的 2 倍,非常浪费内存空间。因此,我们需要一种更加高效的 Unicode 编码方法。< / p >
< h2 id = "344-utf-8" > 3.4.4. UTF-8 编码< a class = "headerlink" href = "#344-utf-8" title = "Permanent link" > ¶ < / a > < / h2 >
< p > 目前, UTF-8 已成为国际上使用最广泛的 Unicode 编码方法。< strong > 它是一种可变长的编码< / strong > ,使用 1 到 4 个字节来表示一个字符, 根据字符的复杂性而变。ASCII 字符只需要 1 个字节,拉丁字母和希腊字母需要 2 个字节,常用的中文字符需要 3 个字节,其他的一些生僻字符需要 4 个字节。< / p >
< p > UTF-8 的编码规则并不复杂,分为两种情况:< / p >
< ul >
< li > 对于长度为 1 字节的字符,将最高位设置为 < span class = "arithmatex" > \(0\)< / span > 、其余 7 位设置为 Unicode 码点。值得注意的是, ASCII 字符在 Unicode 字符集中占据了前 128 个码点。也就是说,< strong > UTF-8 编码可以向下兼容 ASCII 码< / strong > 。这意味着我们可以使用 UTF-8 来解析年代久远的 ASCII 码文本。< / li >
< li > 对于长度为 < span class = "arithmatex" > \(n\)< / span > 字节的字符(其中 < span class = "arithmatex" > \(n > 1\)< / span > ),将首个字节的高 < span class = "arithmatex" > \(n\)< / span > 位都设置为 < span class = "arithmatex" > \(1\)< / span > 、第 < span class = "arithmatex" > \(n + 1\)< / span > 位设置为 < span class = "arithmatex" > \(0\)< / span > ;从第二个字节开始,将每个字节的高 2 位都设置为 < span class = "arithmatex" > \(10\)< / span > ;其余所有位用于填充字符的 Unicode 码点。< / li >
< / ul >
< p > 下图展示了“Hello算法”对应的 UTF-8 编码。将最高 < span class = "arithmatex" > \(n\)< / span > 位设置为 < span class = "arithmatex" > \(1\)< / span > 比较容易理解,可以向系统指出字符的长度为 < span class = "arithmatex" > \(n\)< / span > 。那么,为什么要将其余所有字节的高 2 位都设置为 < span class = "arithmatex" > \(10\)< / span > 呢?实际上,这个 < span class = "arithmatex" > \(10\)< / span > 能够起到校验符的作用,因为在 UTF-8 编码规则下,不可能有字符的最高两位是 < span class = "arithmatex" > \(10\)< / span > 。这是因为长度为 1 字节的字符的最高一位是 < span class = "arithmatex" > \(0\)< / span > 。假设系统从一个错误的字节开始解析文本,字节头部的 < span class = "arithmatex" > \(10\)< / span > 能够帮助系统快速的判断出异常。< / p >
< p > < img alt = "UTF-8 编码示例" src = "../character_encoding.assets/utf-8_hello_algo.png" / > < / p >
< p align = "center" > Fig. UTF-8 编码示例 < / p >
< p > 除了 UTF-8 之外,常见的编码方式还包括 UTF-16 和 UTF-32 。它们为 Unicode 字符集提供了不同的编码方法。< / p >
< ul >
< li > < strong > UTF-16 编码< / strong > :使用 2 或 4 个字节来表示一个字符。所有的 ASCII 字符和很多常用的非英文字符,都用 2 个字节表示;少数字符需要用到 4 个字节表示。对于 2 字节的字符, UTF-16 编码与 Unicode 码点相等。< / li >
< li > < strong > UTF-32 编码< / strong > :每个字符都使用 4 个字节。这意味着 UTF-32 会比 UTF-8 和 UTF-16 更占用空间,特别是对于主要使用 ASCII 字符的文本。< / li >
< / ul >
< p > 从存储空间的角度看,使用 UTF-8 表示英文字符非常高效,因为它仅需 1 个字节;使用 UTF-16 编码某些非英文字符(例如中文)会更加高效,因为它只需要 2 个字节,而 UTF-8 可能需要 3 个字节。从兼容性的角度看, UTF-8 的通用性最佳,许多工具和库都优先支持 UTF-8 。< / p >
< h2 id = "345" > 3.4.5. 编程语言的字符编码< a class = "headerlink" href = "#345" title = "Permanent link" > ¶ < / a > < / h2 >
< p > 对于以往的大多数编程语言,程序运行中的字符串都采用 UTF-16 或 UTF-32 这类等长的编码。这是因为在等长编码下,我们可以将字符串看作数组来处理,具体来说:< / p >
< ul >
< li > < strong > 随机访问< / strong > : UTF-16 编码的字符串可以很容易地进行随机访问。UTF-8 是一种变长编码,要找到第 < span class = "arithmatex" > \(i\)< / span > 个字符,我们需要从字符串的开始处遍历到第 < span class = "arithmatex" > \(i\)< / span > 个字符,这需要 < span class = "arithmatex" > \(O(n)\)< / span > 的时间。< / li >
< li > < strong > 字符计数< / strong > : 与随机访问类似,计算 UTF-16 字符串的长度也是 < span class = "arithmatex" > \(O(1)\)< / span > 的操作。但是,计算 UTF-8 编码的字符串的长度需要遍历整个字符串。< / li >
< li > < strong > 字符串操作< / strong > : 在 UTF-16 编码的字符串中,很多字符串操作(如分割、连接、插入、删除等)都更容易进行。在 UTF-8 编码的字符串上进行这些操作通常需要额外的计算,以确保不会产生无效的 UTF-8 编码。< / li >
< / ul >
< p > 编程语言的字符编码方案设计是一个很有趣的话题,涉及到许多因素:< / p >
< ul >
< li > Java 的 < code > String< / code > 类型使用 UTF-16 编码,每个字符占用 2 字节。这是因为 Java 语言设计之初,人们认为 16 位足以表示所有可能的字符。然而,这是一个不正确的判断。后来 Unicode 规范扩展到了超过 16 位,所以 Java 中的字符现在可能由一对 16 位的值(称为“代理对”)表示。< / li >
< li > JavaScript 和 TypeScript 的字符串使用 UTF-16 编码的原因与 Java 类似。当 JavaScript 语言在 1995 年被 Netscape 公司首次引入时, Unicode 还处于相对早期的阶段,那时候使用 16 位的编码就足够表示所有的 Unicode 字符了。< / li >
< li > C# 使用 UTF-16 编码,主要因为 .NET 平台是由 Microsoft 设计的,而 Microsoft 的很多技术,包括 Windows 操作系统,都广泛地使用 UTF-16 编码。< / li >
< / ul >
< p > 由于以上编程语言对字符数量的低估,它们不得不采取“代理对”的方式来表示超过 16 位长度的 Unicode 字符。这是一个不得已为之的无奈之举。一方面,包含代理对的字符串中,一个字符可能占用 2 字节或 4 字节,因此丧失了等长编码的优势。另一方面,处理代理对需要增加额外代码,这增加了编程的复杂性和 Debug 难度。< / p >
< p > 出于以上原因,部分编程语言提出了不同的编码方案:< / p >
< ul >
< li > Python 3 使用一种灵活的字符串表示,存储的字符长度取决于字符串中最大的 Unicode 码点。对于全部是 ASCII 字符的字符串,每个字符占用 1 个字节;如果字符串中包含的字符超出了 ASCII 范围, 但全部在基本多语言平面( BMP) 内, 每个字符占用 2 个字节;如果字符串中有超出 BMP 的字符,那么每个字符占用 4 个字节。< / li >
< li > Go 语言的 < code > string< / code > 类型在内部使用 UTF-8 编码。Go 语言还提供了 < code > rune< / code > 类型,它用于表示单个 Unicode 码点。< / li >
< li > Rust 语言的 str 和 String 类型在内部使用 UTF-8 编码。Rust 也提供了 char 类型,用于表示单个 Unicode 码点。< / li >
< / ul >
< p > 需要注意的是,以上讨论的都是字符串在编程语言中的存储方式,< strong > 这和字符串如何在文件中存储或在网络中传输是两个不同的问题< / strong > 。在文件存储或网络传输中,我们一般会将字符串编码为 UTF-8 格式,以达到最优的兼容性和空间效率。< / p >
< h2 id = "__comments" > 评论< / h2 >
<!-- Insert generated snippet here -->
< script
src="https://giscus.app/client.js"
data-repo="krahets/hello-algo"
data-repo-id="R_kgDOIXtSqw"
data-category="Announcements"
data-category-id="DIC_kwDOIXtSq84CSZk_"
data-mapping="pathname"
data-strict="1"
data-reactions-enabled="1"
data-emit-metadata="0"
data-input-position="top"
data-theme="preferred_color_scheme"
data-lang="zh-CN"
crossorigin="anonymous"
async
>
< / script >
<!-- Synchronize Giscus theme with palette -->
< script >
var giscus = document.querySelector("script[src*=giscus]")
/* Set palette on initial load */
var palette = __md_get("__palette")
if (palette & & typeof palette.color === "object") {
var theme = palette.color.scheme === "slate" ? "dark" : "light"
giscus.setAttribute("data-theme", theme)
}
/* Register event handlers after documented loaded */
document.addEventListener("DOMContentLoaded", function() {
var ref = document.querySelector("[data-md-component=palette]")
ref.addEventListener("change", function() {
var palette = __md_get("__palette")
if (palette & & typeof palette.color === "object") {
var theme = palette.color.scheme === "slate" ? "dark" : "light"
/* Instruct Giscus to change theme */
var frame = document.querySelector(".giscus-frame")
frame.contentWindow.postMessage(
{ giscus: { setConfig: { theme } } },
"https://giscus.app"
)
}
})
})
< / script >
< / article >
< / div >
< script > var tabs = _ _md _get ( "__tabs" ) ; if ( Array . isArray ( tabs ) ) e : for ( var set of document . querySelectorAll ( ".tabbed-set" ) ) { var tab , labels = set . querySelector ( ".tabbed-labels" ) ; for ( tab of tabs ) for ( var label of labels . getElementsByTagName ( "label" ) ) if ( label . innerText . trim ( ) === tab ) { var input = document . getElementById ( label . htmlFor ) ; input . checked = ! 0 ; continue e } } < / script >
< / div >
< button type = "button" class = "md-top md-icon" data-md-component = "top" hidden >
< svg xmlns = "http://www.w3.org/2000/svg" viewBox = "0 0 24 24" > < path d = "M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8v12Z" / > < / svg >
回到页面顶部
< / button >
< / main >
< footer class = "md-footer" >
< nav class = "md-footer__inner md-grid" aria-label = "页脚" >
< a href = "../number_encoding/" class = "md-footer__link md-footer__link--prev" aria-label = "上一页: 3.3. &nbsp; 数字编码 *" rel = "prev" >
< div class = "md-footer__button md-icon" >
< svg xmlns = "http://www.w3.org/2000/svg" viewBox = "0 0 24 24" > < path d = "M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12Z" / > < / svg >
< / div >
< div class = "md-footer__title" >
< span class = "md-footer__direction" >
上一页
< / span >
< div class = "md-ellipsis" >
3.3. 数字编码 *
< / div >
< / div >
< / a >
< a href = "../summary/" class = "md-footer__link md-footer__link--next" aria-label = "下一页: 3.5. &nbsp; 小结" rel = "next" >
< div class = "md-footer__title" >
< span class = "md-footer__direction" >
下一页
< / span >
< div class = "md-ellipsis" >
3.5. 小结
< / div >
< / div >
< div class = "md-footer__button md-icon" >
< svg xmlns = "http://www.w3.org/2000/svg" viewBox = "0 0 24 24" > < path d = "M4 11v2h12l-5.5 5.5 1.42 1.42L19.84 12l-7.92-7.92L10.5 5.5 16 11H4Z" / > < / svg >
< / div >
< / a >
< / nav >
< div class = "md-footer-meta md-typeset" >
< div class = "md-footer-meta__inner md-grid" >
< div class = "md-copyright" >
< div class = "md-copyright__highlight" >
Copyright © 2023 Krahets
< / div >
< / div >
< div class = "md-social" >
< a href = "https://github.com/krahets" target = "_blank" rel = "noopener" title = "github.com" class = "md-social__link" >
< svg xmlns = "http://www.w3.org/2000/svg" viewBox = "0 0 496 512" > <!-- ! Font Awesome Free 6.4.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2023 Fonticons, Inc. --> < path d = "M165.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6zm-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3zm44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9zM244.8 8C106.1 8 0 113.3 0 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C428.2 457.8 496 362.9 496 252 496 113.3 383.5 8 244.8 8zM97.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1zm-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7zm32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1zm-11.4-14.7c-1.6 1-1.6 3.6 0 5.9 1.6 2.3 4.3 3.3 5.6 2.3 1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2z" / > < / svg >
< / a >
< a href = "https://twitter.com/krahets" target = "_blank" rel = "noopener" title = "twitter.com" class = "md-social__link" >
< svg xmlns = "http://www.w3.org/2000/svg" viewBox = "0 0 512 512" > <!-- ! Font Awesome Free 6.4.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2023 Fonticons, Inc. --> < path d = "M459.37 151.716c.325 4.548.325 9.097.325 13.645 0 138.72-105.583 298.558-298.558 298.558-59.452 0-114.68-17.219-161.137-47.106 8.447.974 16.568 1.299 25.34 1.299 49.055 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.112-72.772 6.498.974 12.995 1.624 19.818 1.624 9.421 0 18.843-1.3 27.614-3.573-48.081-9.747-84.143-51.98-84.143-102.985v-1.299c13.969 7.797 30.214 12.67 47.431 13.319-28.264-18.843-46.781-51.005-46.781-87.391 0-19.492 5.197-37.36 14.294-52.954 51.655 63.675 129.3 105.258 216.365 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.828 46.782-104.934 104.934-104.934 30.213 0 57.502 12.67 76.67 33.137 23.715-4.548 46.456-13.32 66.599-25.34-7.798 24.366-24.366 44.833-46.132 57.827 21.117-2.273 41.584-8.122 60.426-16.243-14.292 20.791-32.161 39.308-52.628 54.253z" / > < / svg >
< / a >
< a href = "https://leetcode.cn/u/jyd/" target = "_blank" rel = "noopener" title = "leetcode.cn" class = "md-social__link" >
< svg xmlns = "http://www.w3.org/2000/svg" viewBox = "0 0 640 512" > <!-- ! Font Awesome Free 6.4.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2023 Fonticons, Inc. --> < path d = "M392.8 1.2c-17-4.9-34.7 5-39.6 22l-128 448c-4.9 17 5 34.7 22 39.6s34.7-5 39.6-22l128-448c4.9-17-5-34.7-22-39.6zm80.6 120.1c-12.5 12.5-12.5 32.8 0 45.3l89.3 89.4-89.4 89.4c-12.5 12.5-12.5 32.8 0 45.3s32.8 12.5 45.3 0l112-112c12.5-12.5 12.5-32.8 0-45.3l-112-112c-12.5-12.5-32.8-12.5-45.3 0zm-306.7 0c-12.5-12.5-32.8-12.5-45.3 0l-112 112c-12.5 12.5-12.5 32.8 0 45.3l112 112c12.5 12.5 32.8 12.5 45.3 0s12.5-32.8 0-45.3L77.3 256l89.4-89.4c12.5-12.5 12.5-32.8 0-45.3z" / > < / svg >
< / a >
< / div >
< / div >
< / div >
< / footer >
< / div >
< div class = "md-dialog" data-md-component = "dialog" >
< div class = "md-dialog__inner md-typeset" > < / div >
< / div >
< script id = "__config" type = "application/json" > { "base" : "../.." , "features" : [ "content.action.edit" , "content.code.annotate" , "content.code.copy" , "content.tabs.link" , "content.tooltips" , "navigation.indexes" , "navigation.sections" , "navigation.top" , "navigation.footer" , "navigation.tracking" , "search.highlight" , "search.share" , "search.suggest" , "toc.follow" ] , "search" : "../../assets/javascripts/workers/search.208ed371.min.js" , "translations" : { "clipboard.copied" : "\u5df2\u590d\u5236" , "clipboard.copy" : "\u590d\u5236" , "search.result.more.one" : "\u5728\u8be5\u9875\u4e0a\u8fd8\u6709 1 \u4e2a\u7b26\u5408\u6761\u4ef6\u7684\u7ed3\u679c" , "search.result.more.other" : "\u5728\u8be5\u9875\u4e0a\u8fd8\u6709 # \u4e2a\u7b26\u5408\u6761\u4ef6\u7684\u7ed3\u679c" , "search.result.none" : "\u6ca1\u6709\u627e\u5230\u7b26\u5408\u6761\u4ef6\u7684\u7ed3\u679c" , "search.result.one" : "\u627e\u5230 1 \u4e2a\u7b26\u5408\u6761\u4ef6\u7684\u7ed3\u679c" , "search.result.other" : "# \u4e2a\u7b26\u5408\u6761\u4ef6\u7684\u7ed3\u679c" , "search.result.placeholder" : "\u952e\u5165\u4ee5\u5f00\u59cb\u641c\u7d22" , "search.result.term.missing" : "\u7f3a\u5c11" , "select.version" : "\u9009\u62e9\u5f53\u524d\u7248\u672c" } } < / script >
< script src = "../../assets/javascripts/bundle.fac441b0.min.js" > < / script >
< script src = "../../javascripts/mathjax.js" > < / script >
< script src = "https://polyfill.io/v3/polyfill.min.js?features=es6" > < / script >
< script src = "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js" > < / script >
< / body >
< / html >