diff --git a/codes/cpp/chapter_heap/heap.cpp b/codes/cpp/chapter_heap/heap.cpp index d7fa44f52..84f70bb49 100644 --- a/codes/cpp/chapter_heap/heap.cpp +++ b/codes/cpp/chapter_heap/heap.cpp @@ -61,5 +61,6 @@ int main() { priority_queue, greater> minHeap(input.begin(), input.end()); cout << "输入列表并建立小顶堆后" << endl; printHeap(minHeap); + return 0; -} \ No newline at end of file +} diff --git a/codes/cpp/chapter_heap/my_heap.cpp b/codes/cpp/chapter_heap/my_heap.cpp index 526d0050c..75746c91b 100644 --- a/codes/cpp/chapter_heap/my_heap.cpp +++ b/codes/cpp/chapter_heap/my_heap.cpp @@ -151,4 +151,6 @@ int main() { /* 判断堆是否为空 */ bool isEmpty = maxHeap.empty(); cout << "\n堆是否为空 " << isEmpty << endl; + + return 0; } diff --git a/codes/cpp/chapter_heap/top_k.cpp b/codes/cpp/chapter_heap/top_k.cpp new file mode 100644 index 000000000..bd1727a76 --- /dev/null +++ b/codes/cpp/chapter_heap/top_k.cpp @@ -0,0 +1,37 @@ +/** + * File: top_k.cpp + * Created Time: 2023-06-12 + * Author: Krahets (krahets@163.com) + */ + +#include "../utils/common.hpp" + +/* 基于堆查找数组中最大的 k 个元素 */ +priority_queue, greater> topKHeap(vector &nums, int k) { + priority_queue, greater> heap; + // 将数组的前 k 个元素入堆 + for (int i = 0; i < k; i++) { + heap.push(nums[i]); + } + // 从第 k+1 个元素开始,保持堆的长度为 k + for (int i = k; i < nums.size(); i++) { + // 若当前元素大于堆顶元素,则将堆顶元素出堆、当前元素入堆 + if (nums[i] > heap.top()) { + heap.pop(); + heap.push(nums[i]); + } + } + return heap; +} + +// Driver Code +int main() { + vector nums = {1, 7, 6, 3, 2}; + int k = 3; + + priority_queue, greater> res = topKHeap(nums, k); + cout << "最大的 " << k << " 个元素为: "; + printHeap(res); + + return 0; +} diff --git a/codes/cpp/utils/print_utils.hpp b/codes/cpp/utils/print_utils.hpp index 2edba56ba..e497956bc 100644 --- a/codes/cpp/utils/print_utils.hpp +++ b/codes/cpp/utils/print_utils.hpp @@ -13,16 +13,6 @@ #include #include -/* Expose the underlying storage of the priority_queue container */ -template S &Container(priority_queue &pq) { - struct HackedQueue : private priority_queue { - static S &Container(priority_queue &pq) { - return pq.*&HackedQueue::c; - } - }; - return HackedQueue::Container(pq); -} - /* Find an element in a vector */ template int vecFind(const vector &vec, T ele) { int j = INT_MAX; @@ -217,6 +207,16 @@ template void printHashMap(unordered_map S &Container(priority_queue &pq) { + struct HackedQueue : private priority_queue { + static S &Container(priority_queue &pq) { + return pq.*&HackedQueue::c; + } + }; + return HackedQueue::Container(pq); +} + /* Print a Heap (PriorityQueue) */ template void printHeap(priority_queue &heap) { vector vec = Container(heap); diff --git a/codes/java/chapter_heap/top_k.java b/codes/java/chapter_heap/top_k.java new file mode 100644 index 000000000..8160c28d4 --- /dev/null +++ b/codes/java/chapter_heap/top_k.java @@ -0,0 +1,39 @@ +/** + * File: top_k.java + * Created Time: 2023-06-12 + * Author: Krahets (krahets@163.com) + */ + +package chapter_heap; + +import utils.*; +import java.util.*; + +public class top_k { + /* 基于堆查找数组中最大的 k 个元素 */ + static Queue topKHeap(int[] nums, int k) { + Queue heap = new PriorityQueue(); + // 将数组的前 k 个元素入堆 + for (int i = 0; i < k; i++) { + heap.add(nums[i]); + } + // 从第 k+1 个元素开始,保持堆的长度为 k + for (int i = k; i < nums.length; i++) { + // 若当前元素大于堆顶元素,则将堆顶元素出堆、当前元素入堆 + if (nums[i] > heap.peek()) { + heap.poll(); + heap.add(nums[i]); + } + } + return heap; + } + + public static void main(String[] args) { + int[] nums = { 1, 7, 6, 3, 2 }; + int k = 3; + + Queue res = topKHeap(nums, k); + System.out.println("最大的 " + k + " 个元素为"); + PrintUtil.printHeap(res); + } +} diff --git a/codes/python/chapter_heap/my_heap.py b/codes/python/chapter_heap/my_heap.py index f66abb78c..435f32257 100644 --- a/codes/python/chapter_heap/my_heap.py +++ b/codes/python/chapter_heap/my_heap.py @@ -14,7 +14,7 @@ class MaxHeap: """大顶堆""" def __init__(self, nums: list[int]): - """构造方法""" + """构造方法,根据输入列表建堆""" # 将列表元素原封不动添加进堆 self.max_heap = nums # 堆化除叶节点以外的其他所有节点 diff --git a/codes/python/chapter_heap/top_k.py b/codes/python/chapter_heap/top_k.py new file mode 100644 index 000000000..2ee2e6e68 --- /dev/null +++ b/codes/python/chapter_heap/top_k.py @@ -0,0 +1,37 @@ +""" +File: top_k.py +Created Time: 2023-06-10 +Author: Krahets (krahets@163.com) +""" + +import sys, os.path as osp + +sys.path.append(osp.dirname(osp.dirname(osp.abspath(__file__)))) +from modules import * + +import heapq + + +def top_k_heap(nums: list[int], k: int) -> list[int]: + """基于堆查找数组中最大的 k 个元素""" + heap = [] + # 将数组的前 k 个元素入堆 + for i in range(k): + heapq.heappush(heap, nums[i]) + # 从第 k+1 个元素开始,保持堆的长度为 k + for i in range(k, len(nums)): + # 若当前元素大于堆顶元素,则将堆顶元素出堆、当前元素入堆 + if nums[i] > heap[0]: + heapq.heappop(heap) + heapq.heappush(heap, nums[i]) + return heap + + +"""Driver Code""" +if __name__ == "__main__": + nums = [1, 7, 6, 3, 2] + k = 3 + + res = top_k_heap(nums, k) + print(f"最大的 {k} 个元素为") + print_heap(res) diff --git a/docs/chapter_heap/build_heap.md b/docs/chapter_heap/build_heap.md index d50365dcd..9c7e56f71 100644 --- a/docs/chapter_heap/build_heap.md +++ b/docs/chapter_heap/build_heap.md @@ -1,4 +1,4 @@ -# 建堆操作 * +# 建堆操作 如果我们想要根据输入列表生成一个堆,这个过程被称为「建堆」。 diff --git a/docs/chapter_heap/top_k.assets/top_k_heap_step1.png b/docs/chapter_heap/top_k.assets/top_k_heap_step1.png new file mode 100644 index 000000000..542197603 Binary files /dev/null and b/docs/chapter_heap/top_k.assets/top_k_heap_step1.png differ diff --git a/docs/chapter_heap/top_k.assets/top_k_heap_step2.png b/docs/chapter_heap/top_k.assets/top_k_heap_step2.png new file mode 100644 index 000000000..bfab9ab9a Binary files /dev/null and b/docs/chapter_heap/top_k.assets/top_k_heap_step2.png differ diff --git a/docs/chapter_heap/top_k.assets/top_k_heap_step3.png b/docs/chapter_heap/top_k.assets/top_k_heap_step3.png new file mode 100644 index 000000000..80a6adab8 Binary files /dev/null and b/docs/chapter_heap/top_k.assets/top_k_heap_step3.png differ diff --git a/docs/chapter_heap/top_k.assets/top_k_heap_step4.png b/docs/chapter_heap/top_k.assets/top_k_heap_step4.png new file mode 100644 index 000000000..be2218e59 Binary files /dev/null and b/docs/chapter_heap/top_k.assets/top_k_heap_step4.png differ diff --git a/docs/chapter_heap/top_k.assets/top_k_heap_step5.png b/docs/chapter_heap/top_k.assets/top_k_heap_step5.png new file mode 100644 index 000000000..3b81c9f3f Binary files /dev/null and b/docs/chapter_heap/top_k.assets/top_k_heap_step5.png differ diff --git a/docs/chapter_heap/top_k.assets/top_k_heap_step6.png b/docs/chapter_heap/top_k.assets/top_k_heap_step6.png new file mode 100644 index 000000000..4408d6b75 Binary files /dev/null and b/docs/chapter_heap/top_k.assets/top_k_heap_step6.png differ diff --git a/docs/chapter_heap/top_k.assets/top_k_heap_step7.png b/docs/chapter_heap/top_k.assets/top_k_heap_step7.png new file mode 100644 index 000000000..87796f256 Binary files /dev/null and b/docs/chapter_heap/top_k.assets/top_k_heap_step7.png differ diff --git a/docs/chapter_heap/top_k.assets/top_k_heap_step8.png b/docs/chapter_heap/top_k.assets/top_k_heap_step8.png new file mode 100644 index 000000000..25fe6f6dd Binary files /dev/null and b/docs/chapter_heap/top_k.assets/top_k_heap_step8.png differ diff --git a/docs/chapter_heap/top_k.assets/top_k_heap_step9.png b/docs/chapter_heap/top_k.assets/top_k_heap_step9.png new file mode 100644 index 000000000..95131742c Binary files /dev/null and b/docs/chapter_heap/top_k.assets/top_k_heap_step9.png differ diff --git a/docs/chapter_heap/top_k.assets/top_k_sorting.png b/docs/chapter_heap/top_k.assets/top_k_sorting.png new file mode 100644 index 000000000..464aa01a9 Binary files /dev/null and b/docs/chapter_heap/top_k.assets/top_k_sorting.png differ diff --git a/docs/chapter_heap/top_k.assets/top_k_traversal.png b/docs/chapter_heap/top_k.assets/top_k_traversal.png new file mode 100644 index 000000000..5d4eef1f0 Binary files /dev/null and b/docs/chapter_heap/top_k.assets/top_k_traversal.png differ diff --git a/docs/chapter_heap/top_k.md b/docs/chapter_heap/top_k.md new file mode 100644 index 000000000..e0832440d --- /dev/null +++ b/docs/chapter_heap/top_k.md @@ -0,0 +1,133 @@ +# Top-K 问题 + +!!! question + + 给定一个长度为 $n$ 无序数组 `nums` ,请返回数组中前 $k$ 大的元素。 + +对于该问题,我们先介绍两种思路比较直接的解法,再介绍效率更高的堆解法。 + +## 方法一:遍历选择 + +我们可以进行 $k$ 轮遍历,分别在每轮中提取第 $1$ , $2$ , $\cdots$ , $k$ 大的元素,时间复杂度为 $O(nk)$ 。 + +该方法只适用于 $k \ll n$ 的情况,因为当 $k$ 与 $n$ 比较接近时,其时间复杂度趋向于 $O(n^2)$ ,非常耗时。 + +![遍历寻找最大的 $k$ 个元素](top_k.assets/top_k_traversal.png) + +!!! tip + + 当 $k = n$ 时,我们可以得到从大到小的序列,等价于「选择排序」算法。 + +## 方法二:排序 + +我们可以对数组 `nums` 进行排序,并返回最右边的 $k$ 个元素,时间复杂度为 $O(n \log n)$ 。 + +显然,该方法“超额”完成任务了,因为我们只需要找出最大的 $k$ 个元素即可,而不需要排序其他元素。 + +![排序寻找最大的 $k$ 个元素](top_k.assets/top_k_sorting.png) + +## 方法三:堆 + +我们可以基于堆更加高效地解决 Top-K 问题,流程如下: + +1. 初始化一个小顶堆,其堆顶元素最小; +2. 先将数组的前 $k$ 个元素依次入堆; +3. 从第 $k + 1$ 个元素开始,若当前元素大于堆顶元素,则将堆顶元素出堆,并将当前元素入堆; +4. 遍历完成后,堆中保存的就是最大的 $k$ 个元素; + +=== "<1>" + ![基于堆寻找最大的 $k$ 个元素](top_k.assets/top_k_heap_step1.png) + +=== "<2>" + ![top_k_heap_step2](top_k.assets/top_k_heap_step2.png) + +=== "<3>" + ![top_k_heap_step3](top_k.assets/top_k_heap_step3.png) + +=== "<4>" + ![top_k_heap_step4](top_k.assets/top_k_heap_step4.png) + +=== "<5>" + ![top_k_heap_step5](top_k.assets/top_k_heap_step5.png) + +=== "<6>" + ![top_k_heap_step6](top_k.assets/top_k_heap_step6.png) + +=== "<7>" + ![top_k_heap_step7](top_k.assets/top_k_heap_step7.png) + +=== "<8>" + ![top_k_heap_step8](top_k.assets/top_k_heap_step8.png) + +=== "<9>" + ![top_k_heap_step9](top_k.assets/top_k_heap_step9.png) + +总共执行了 $n$ 轮入堆和出堆,堆的最大长度为 $k$ ,因此时间复杂度为 $O(n \log k)$ 。该方法的效率很高,当 $k$ 较小时,时间复杂度趋向 $O(n)$ ;当 $k$ 较大时,时间复杂度不会超过 $O(n \log n)$ 。 + +另外,该方法适用于动态数据流的使用场景。在不断加入数据时,我们可以持续维护堆内的元素,从而实现最大 $k$ 个元素的动态更新。 + +=== "Java" + + ```java title="top_k.java" + [class]{top_k}-[func]{topKHeap} + ``` + +=== "C++" + + ```cpp title="top_k.cpp" + [class]{}-[func]{topKHeap} + ``` + +=== "Python" + + ```python title="top_k.py" + [class]{}-[func]{top_k_heap} + ``` + +=== "Go" + + ```go title="top_k.go" + [class]{maxHeap}-[func]{topKHeap} + ``` + +=== "JavaScript" + + ```javascript title="top_k.js" + [class]{}-[func]{topKHeap} + ``` + +=== "TypeScript" + + ```typescript title="top_k.ts" + [class]{}-[func]{topKHeap} + ``` + +=== "C" + + ```c title="top_k.c" + [class]{maxHeap}-[func]{topKHeap} + ``` + +=== "C#" + + ```csharp title="top_k.cs" + [class]{top_k}-[func]{topKHeap} + ``` + +=== "Swift" + + ```swift title="top_k.swift" + [class]{}-[func]{topKHeap} + ``` + +=== "Zig" + + ```zig title="top_k.zig" + [class]{}-[func]{topKHeap} + ``` + +=== "Dart" + + ```dart title="top_k.dart" + [class]{}-[func]{top_k_heap} + ``` diff --git a/docs/index.md b/docs/index.md index 35ab10b86..e2cad80f8 100644 --- a/docs/index.md +++ b/docs/index.md @@ -82,7 +82,7 @@ hide:

作者简介

-靳宇栋 (Krahets),大厂高级算法工程师,上海交通大学硕士。力扣(LeetCode)全网阅读量最高博主,其 LeetBook《图解算法数据结构》已被订阅 22 万本。 +靳宇栋 (Krahets),大厂高级算法工程师,上海交通大学硕士。力扣(LeetCode)全网阅读量最高博主,其 LeetBook《图解算法数据结构》已被订阅 24 万本。 --- diff --git a/mkdocs.yml b/mkdocs.yml index d54c9ab8f..db74a27a5 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -175,8 +175,9 @@ nav: - 8.     堆: - chapter_heap/index.md - 8.1.   堆: chapter_heap/heap.md - - 8.2.   建堆操作 *: chapter_heap/build_heap.md - - 8.3.   小结: chapter_heap/summary.md + - 8.2.   建堆操作: chapter_heap/build_heap.md + - 8.3.   Top-K 问题: chapter_heap/top_k.md + - 8.4.   小结: chapter_heap/summary.md - 9.     图: - chapter_graph/index.md - 9.1.   图: chapter_graph/graph.md