摘几个programming pearls 上提到的random function.
1. knuth version. 从1-n之间生成有序的m个不同的随机数( the output is a sorted list of m random integers in the range 0...n-1 in which no integer occurs more than once).
void genknuth(int m, int n) {
for (int i = 0; i < n; ++i) {
/* select m of remaining n-i */
if ((bigrand() % (n-i)) < m) {
cout << i << "\n";
--m;
}
}
}
2. shuffle the first m elements of the array. output the sorted list of the m random numbers.
void genshuf(int m, int n) {
int i, j;
int *x = new int[n];
for(i = 0; i < n; ++i)
x[i] = i;
for(i = 0; i < m; ++i) {
j = randint(i, n-1);
int t = x[i]; x[i] = x[j]; x[j] = t;
}
sort(x, x+m);
for(i = 0; i < m; ++i)
cout << x[i] << "\n";
}
3. floyd. This algorithm uses only m random numbers, even in the worst case.
void genfloyd(int m, int n) {
set<int> S;
set<int>::iterator i;
for(int j = n-m; j < n; ++j) {
int t = bigrand() % (j+1);
if(S.find(t) == S.end())
S.insert(t); // t not in S
else
S.insert(j); // t in S
}
for(i = S.begin(); i != S.end(); ++i)
cout << *i << "\n";
}
4. reservoir sampling. when we do not know the length of the data ( we see the data sequentially. )
We always select the first line, we select the second line with probability one half, the third line with probability one third, and so on. At the end of the process, each line has the same probability of being chosen (1/n, where n is the total number of lines in the article).