本文最后更新于 2024年10月19日 晚上
问题描述
设有 n n n 个互不相同的元素 x 1 , x 2 , ⋯ , x n x_1,x_2,\cdots,x_n x 1 , x 2 , ⋯ , x n ,每个元素 x i x_i x i 带有一个权值 w i w_i w i ,且 ∑ i = 1 n w i = 1 \sum_{i=1}^nw_i=1 ∑ i = 1 n w i = 1 。若元素 x k x_k x k 满足 ∑ x i < x k w i ≤ 1 2 \sum_{x_i < x_k}w_i \leq \frac{1}{2} ∑ x i < x k w i ≤ 2 1 且 ∑ x i > x k w i ≤ 1 2 \sum_{x_i > x_k}w_i \leq \frac{1}{2} ∑ x i > x k w i ≤ 2 1 ,则称元素 x k x_{k} x k 为 x 1 , x 2 , ⋯ , x n x_{1},x_{2},\cdots,x_{n} x 1 , x 2 , ⋯ , x n 的带权中位数。请编写一个算法,能够在最坏情况下用 O ( n ) O(n) O ( n ) 时间找出 n n n 个元素的带权中位数。
提交示例:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 #include <iostream> #include <string> #include <cmath> #include <vector> #include <algorithm> using namespace std;void WeightMedian (int length,vector<int >num, vector<double >weight,int index) { }int main () { return 0 ; }
样例输入:
1 2 3 int length = 10 ;int num = {13825 , 28995 , 18417 , 92445 , 86407 , 90546 , 46896 , 14757 , 89837 , 18252 }; double weight = {0.08720404 , 0.09585595 , 0.12070109 , 0.02521966 , 0.01221968 , 0.12648725 , 0.13076193 , 0.19128049 , 0.15673069 , 0.05353921 };
样例输出:
问题分析
如果采用暴力算法,可以先对 num 数组进行排序,再顺次检查权重和,这种方法在最坏情况下的时间复杂度为 O ( n 2 ) O(n^2) O ( n 2 ) 。若想要实现 O ( n ) O(n) O ( n ) 的复杂度,必须要使用递归的方法来设计算法,并需要在每个迭代步尽可能缩小问题规模。
算法设计
算法步骤
步1: 定义结构体 Node,num 中的值为 node.value,weight 中的值为 node.weight,将 num,weight 输入数组 nodes 中,转步2;
步2: 判断是否是单元素,单元素直接返回本身;否则,转步3;
步3: 使用 O ( n ) O(n) O ( n ) 的算法查找出本步数组的中位数 m_Median,以 m_Median 为基准将原数组划分成两半:其下标 pivot 左侧元素均小于 m_Median,右侧元素均大于 m_Median,转步4;
步4: 判断 M_Median 以左(不包含)累计权重 L_Sum 是否合规。计算本轮中的 L_Sum,若满足题目要求,则返回本轮 m_Median 的值。否则,若 L_Sum ≥ \geq ≥ 1/2,则向左递归;反之,则向右递归。转步2。
时间复杂度分析
由上述算法可知,每次递归原问题规模缩小1/2;在每次迭代中,每次做划分的时间开销为 O ( n ) O(n) O ( n ) ,每次求中位数的时间开销为 O ( n ) O(n) O ( n ) ,每次求 L_Sum 的时间开销也是 O ( n ) O(n) O ( n ) 。因此,在规模为 n n n 的问题下,迭代步的开销为 O ( n ) O(n) O ( n ) 。于是可得如下递归式:
T ( n ) = { O ( 1 ) n = 1 T ( n / 2 ) + O ( n ) n > 1 \begin{equation*}
T(n) =
\begin{cases}
O(1)&\quad n = 1\\
T(n/2)+O(n)&\quad n>1
\end{cases}
\end{equation*}
T ( n ) = { O ( 1 ) T ( n /2 ) + O ( n ) n = 1 n > 1
令 n = 2 t n = 2^t n = 2 t ,则
T ( n ) = T ( 2 t ) = T ( 2 t − 1 ) + O ( 2 t ) = ⋯ = T ( 1 ) + ∑ k = 1 t O ( 2 k ) = O ( 1 ) + O ( 2 t + 1 − 1 ) = O ( n ) \begin{align*}
T(n) &= T(2^t) = T(2^{t-1}) + O(2^t) = \cdots = T(1) + \sum_{k = 1}^{t} O(2^k) \\
&= O(1) + O(2^{t+1} - 1) = O(n)
\end{align*}
T ( n ) = T ( 2 t ) = T ( 2 t − 1 ) + O ( 2 t ) = ⋯ = T ( 1 ) + k = 1 ∑ t O ( 2 k ) = O ( 1 ) + O ( 2 t + 1 − 1 ) = O ( n )
综上,其在最坏情况下时间复杂度为 O ( n ) O(n) O ( n ) ,符合题意。
算法实现
定义结构体
1 2 3 4 5 6 7 8 9 10 11 #include <iostream> #include <vector> #include <algorithm> using namespace std;struct Node { int value; double weight; };
构建分区函数
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 int Partition1 (vector<Node>& nodes, int left, int right) { int p = left - 1 ; for (int index = left; index < right; index++) { if (nodes[index].value <= nodes[right].value) { p++; swap (nodes[p], nodes[index]); } } swap (nodes[p + 1 ], nodes[right]); return p + 1 ; }int Partition2 (vector<Node>& nodes, int left, int right, Node pivot) { for (int i = left; i < right; i++) { if (nodes[i].value == pivot.value) { swap (nodes[i], nodes[right]); break ; } } return Partition1 (nodes, left, right); }
查找中位数
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 Node Select1 (vector<Node>& nodes, int left, int right, int order) ;Node Insert (vector<Node>& nodes, int start, int end, int k) { for (int i = start + 1 ; i <= end; i++) { Node cur_node = nodes[i]; int j = i; while (j > start && nodes[j - 1 ].value > cur_node.value) { nodes[j] = nodes[j - 1 ]; j--; } nodes[j] = cur_node; } return nodes[start + k - 1 ]; }Node findMedian (vector<Node>& nodes, int left, int right) { int length = right - left + 1 ; vector<Node> medians ((length + 4 ) / 5 ) ; int m_index = 0 ; for (int i = left; i <= right; i += 5 ) { int end = min (i + 4 , right); medians[m_index++] = Insert (nodes, i, end, (end - i) / 2 + 1 ); } return Select1 (medians, 0 , m_index - 1 , (m_index + 1 ) / 2 ); }Node Select1 (vector<Node>& nodes, int left, int right, int k) { if (left == right) return nodes[left]; Node m_Medians = findMedian (nodes, left, right); int pivot = Partition2 (nodes, left, right, m_Medians); int index = pivot - left + 1 ; if (k == index) return nodes[pivot]; else if (k < index) return Select1 (nodes, left, pivot - 1 , k); else return Select1 (nodes, pivot + 1 , right, k - index); }
查找带权中位数
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 Node Select2 (vector<Node>& nodes, int left, int right, double wM_position) { if (left == right) return nodes[left]; Node m_Median = findMedian (nodes, left, right); int pivot = Partition2 (nodes, left, right, m_Median); double L_Sum = 0 ; for (int j = left; j < pivot; j++) L_Sum += nodes[j].weight; if (L_Sum < wM_position && L_Sum + nodes[pivot].weight >= wM_position) return nodes[pivot]; else if (L_Sum >= wM_position) return Select2 (nodes, left, pivot - 1 , wM_position); else return Select2 (nodes, pivot + 1 , right, wM_position - L_Sum - nodes[pivot].weight); }void WeightMedian (int length, vector<int > num, vector<double > weight, int index) { vector<Node> nodes (length) ; for (int i = 0 ; i < length; i++) { nodes[i].value = num[i]; nodes[i].weight = weight[i]; } cout << Select2 (nodes, 0 , length - 1 , 0.5 ).value << endl; }
主函数
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 int main () { int length = 2 ; vector<int > num = {1 , 2 }; vector<double > weight = {0.4 , 0.6 }; cout << "Length: " << length << endl; cout << "num: " ; for (int i = 0 ; i < length; i++) cout << num[i] << " " ; cout << endl; cout << "weight: " ; for (int i = 0 ; i < length; i++) cout << weight[i] << " " ; cout << endl; cout << "The weight median is: " ; WeightMedian (length, num, weight, 0 ); return 0 ; }
实例测试
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
运行结果