Programing Problems from DolphinDB

Algorithms + Data Structures = Programs

有限种类大数据排序

Description Of Topic

给定一百万个0~255之间的整数,写出复杂度为O(n)的排序算法。

Solution Of Question

  • Language: C++ 14
  • Time Complexity: O(n)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
//
// Created by dry on 4/18/19.
//
#include <iostream>
#include <cstdlib>
#include <ctime>

#define MIN_VALUE 0
#define MAX_VALVE 255
#define MAX_NUM 1000000

using namespace std;

void init(int, int);

void output(long long);

int array[MAX_NUM];
long long len[MAX_VALVE - MIN_VALUE + 1];

int main() {
init(MIN_VALUE, MAX_VALVE); // 用 0~255 的随机数初始化 array(种子为time)

for (int i = 0; i < MAX_NUM; i++) { // Time complexity: O(n)
len[array[i]]++;
}

output(10000); // 设置输出间隔 10000, 检查是否有序
return 0;
}

void init(int a, int b) {
srand((int) time(NULL));
int n = MAX_NUM;

while (n--) {
array[n] = rand() % (b - a) + a + 1;
}

n = MAX_VALVE - MIN_VALUE + 1;
while (n--) {
len[n] = 0;
}
}

void output(long long intervalNum) {
int i = 0;
int k = len[i];
for (int j = 0; j < MAX_NUM; j++) {
while (k < j) {
k += len[++i];
}
if (j % intervalNum == 0 || j + 1 == MAX_NUM) {
cout << i << endl;
}
}
}

日期与ID双射问题

Description Of Topic

假设1969.01.010表示,请开发一个函数输出任意日期的整数表示(日期小于1969.01.01的用负数表示)。反过来,给定日期的整数表示,开发一个函数求日期对应的年月日。

Solution Of Question

  • Language: C++ 14
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
//
// Created by dry on 4/17/19.
//
#include <iostream>

#define YEAR 1969 // 该程序为通解, 改变 YEAR.MONTH.DAY 为任意日期, 均有效
#define MONTH 01
#define DAY 01
using namespace std;

bool IsLeapYear(int); // 判断闰年(四年一闰,百年不闰,四百年又闰)

int GetDayNumOfMonth(int, int); // 返回某年的某月的天数

int GetDayNumOfYear(int); // 返回某年的天数

long long GetID(); // 返回 Date 对应的 ID

string GetDate(); // 返回 ID 对应的 Date

void CheckDateID(long long, long long); // 验证 ID 为 a~b 之间, 所有 ID、Date 是否一一对应

int year, month, day;
int dayNum1, dayNum2; // dayNum1 = (YEAR.MONTH.DAY - YEAR.01.01 + 1)
long long id; // dayNum2 = (year.month.day - year.01.01 + 1)
string date;

int main() {
long long a, b;

for (int m = 1; m < MONTH; m++) {
dayNum1 += GetDayNumOfMonth(m, YEAR);
}
dayNum1 += DAY;

// cin >> id; // ID ---> Date
// cout << GetDate();

// getline(cin, date); // Date ---> ID
// cout << GetID();

cin >> a >> b; // Check Date & ID
CheckDateID(a, b);
return 0;
}

long long GetID() {
id = 0, dayNum2 = 0;
year = stoi(date.substr(0, date.find('.')));
month = stoi(date.substr(date.find('.') + 1, date.rfind('.') - date.find('.') - 1));
day = stoi(date.substr(date.rfind('.') + 1));

for (int m = 1; m < month; m++) {
dayNum2 += GetDayNumOfMonth(m, year);
}
dayNum2 += day;

if (year >= YEAR) {
for (int y = YEAR; y < year; y++) {
id += IsLeapYear(y) ? 366 : 365;
}
} else {
for (int y = year; y < YEAR; y++) {
id -= IsLeapYear(y) ? 366 : 365;
}
}
id += dayNum2 - dayNum1;

return id;
}

string GetDate() {
year = YEAR;
month = MONTH;
day = DAY;

if (id >= 0) {
if (id > GetDayNumOfYear(year) - dayNum1) {
id -= GetDayNumOfYear(year) - dayNum1 + 1;
year++;
month = 1;
day = 1;
}

while (id > GetDayNumOfYear(year) - day) {
id -= GetDayNumOfYear(year) - day + 1;
year++;
}

while (id > GetDayNumOfMonth(month, year) - day) {
id -= GetDayNumOfMonth(month, year) - day + 1;
month++;
day = 1;
}
day += id;

} else {
id = 0 - id;
if (id >= dayNum1) {
id -= dayNum1;
year--;
month = 12;
day = 31;
}

while (id >= GetDayNumOfYear(year)) {
id -= GetDayNumOfYear(year);
year--;
}
if (id >= day) {
id -= day;
month--;
day = GetDayNumOfMonth(month, year);
}

while (id >= GetDayNumOfMonth(month, year)) {
id -= GetDayNumOfMonth(month, year);
month--;
day = GetDayNumOfMonth(month, year);
}
day -= id;
}

date = to_string(year) + "." + to_string(month) + "." + to_string(day);
return date;
}

void CheckDateID(long long a, long long b) {
bool isOK, allOK = true;
for (long long i = max(a, b); i >= min(a, b); i--) {
id = i;
cout << i << " -> " << GetDate() << " -> " << GetID() << " ";

if (i == GetID()) {
isOK = true;
} else {
isOK = false;
allOK = false;
}
cout << isOK << endl;
}

if (allOK) {
cout << endl << "[*] " << "Result: ID is always equal to Date." << endl;
} else {
cout << endl << "[*] " << "Result: exist error, check again." << endl;
}
}

bool IsLeapYear(int mYear) {
bool isLeap = false;
isLeap = mYear % 4 == 0 ? true : isLeap;
isLeap = mYear % 100 == 0 ? false : isLeap;
isLeap = mYear % 400 == 0 ? true : isLeap;
return isLeap;
}

int GetDayNumOfMonth(int mMonth, int mYear) {
if (mMonth == 2 && IsLeapYear(mYear)) {
return 29;
} else if (mMonth == 2 && !IsLeapYear(mYear)) {
return 28;
} else if (mMonth == 4 || mMonth == 6 || mMonth == 9 || mMonth == 11) {
return 30;
} else {
return 31;
}
}

int GetDayNumOfYear(int mYear) {
return 365 + IsLeapYear(mYear);
}

海量数据求中位数

Description Of Topic

一个长度未知的巨型向量分布在n台机器上,如何快速找到(近似)中位数?

Solution Of Question

  • Language: C++ 14
  • Time Complexity: O(n Log n)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
//
// Created by dry on 4/18/19.
//
#include <iostream>
#include <vector>
#include <algorithm>

#define MIN_VALUE 0
#define MAX_VALVE 255
#define MAX_NUM 1000000

using namespace std;

void init(int, int);

int array[MAX_NUM];

int main() {
long long n = MAX_NUM;
float median = 0;
init(MIN_VALUE, MAX_VALVE); // 用 0~255 的随机数初始化高维 array(种子为time)

vector<int> max_heap, min_heap; // 设置大顶堆, 小顶堆
make_heap(max_heap.begin(), max_heap.end());
make_heap(min_heap.begin(), min_heap.end(), greater<int>());

while (n--) {
if (n % 2 == 1) { // n = 奇数, array[n] 先入大顶堆
max_heap.push_back(array[n]);
push_heap(max_heap.begin(), max_heap.end());
} else {
min_heap.push_back(array[n]); // n = 偶数, array[n] 先入小顶堆
push_heap(min_heap.begin(), min_heap.end(), greater<int>());
}

// 若大顶堆 堆顶 > 小顶堆 堆顶, 交换堆顶
if (max_heap.size() > 0 && min_heap.size() > 0 && max_heap.front() > min_heap.front()) {
int t1 = max_heap.front(), t2 = min_heap.front();
pop_heap(max_heap.begin(), max_heap.end());
max_heap.pop_back();
pop_heap(min_heap.begin(), min_heap.end(), greater<int>());
min_heap.pop_back();

max_heap.push_back(t2);
push_heap(max_heap.begin(), max_heap.end());
min_heap.push_back(t1);
push_heap(min_heap.begin(), min_heap.end(), greater<int>());
}
}
median = (max_heap.size() && min_heap.size()) ? (float) (max_heap[0] + min_heap[0]) / 2 : min_heap[0];
cout << median;

return 0;
}

void init(int a, int b) {
srand((int) time(NULL));
int n = MAX_NUM;

while (n--) {
array[n] = rand() % (b - a) + a + 1;
}
}

Reference