首页 诗词 字典 板报 句子 名言 友答 励志 学校 网站地图
当前位置: 首页 > 教程频道 > 开发语言 > C语言 >

找寻最大的K个数 (C语言实现)

2012-10-27 
寻找最大的K个数 (C语言实现)题目:100亿个整数,求最大的1万个数,并说出算法的/* 编译 gcc main.c * 产生测

寻找最大的K个数 (C语言实现)

题目:100亿个整数,求最大的1万个数,并说出算法的/* 编译 gcc main.c * 产生测试数据: dd if=/dev/urandom of=random.dat bs=1M count=1024 * 运行: ./a.out random.dat 100 */#include <fcntl.h>#include <stdio.h>#include <stdlib.h>#include <string.h>#include <unistd.h>#include <sys/time.h>static unsigned int BUF_PAGES;/* 缓冲区有多少个page */static unsigned int PAGE_SIZE;/* page的大小 */static unsigned int BUF_SIZE;/* 缓冲区的大小, BUF_SIZE = BUF_PAGES*PAGE_SIZE */static int *buffer;/* 输入缓冲区 */static int *heap;/* 最小堆 */long get_time_usecs();void init_heap(int n);void adjust(int n, int i);int main(int argc, char **argv){unsigned intK, idx, length;intfd, i, bytes, element;long start_usecs = get_time_usecs();fd = open(argv[1], O_RDONLY);if (fd < 0) {printf("can't open file %s\n",argv[1]);exit(0);}PAGE_SIZE = 4096;/* page = 4KB */BUF_PAGES = 512;BUF_SIZE = PAGE_SIZE*BUF_PAGES;/* 4KB*512 = 2M */buffer = (int *)malloc(BUF_SIZE);if (buffer == NULL) exit(0);K = atoi(argv[2]);heap = (int *)malloc(sizeof(int)*(K+1));if (heap == NULL) {free(buffer);exit(0);}bytes = read(fd, heap+1, K*sizeof(int));if (bytes < K*sizeof(int)) {printf("data size is too small\n");exit(0);}init_heap(K);idx = length = 0;for (;;) {if (idx == length) {/* 输入缓冲区用完 */bytes = read(fd, buffer, BUF_SIZE);if (bytes == 0) break;length = bytes/4;idx = 0;}//从buffer取出一个数,若比最小堆堆顶元素大,则替换之element = buffer[idx++];if (element > heap[1]) {heap[1] = element;adjust(K, 1);}}long end_usecs = get_time_usecs();printf("the top %d numbers are: \n", K);for (i = 1; i <= K; i++) {printf("%d ", heap[i]);if (i % 6 == 0) {printf("\n");}}printf("\n");free(buffer);free(heap);close(fd);double secs = (double)(end_usecs - start_usecs) / (double)1000000;printf("program tooks %.02f seconds.\n", secs);return 0;}void init_heap(int n) {inti;for (i = n/2; i > 0; i--) {adjust(n, i);}}/* 节点i的左子树和右子树已是最小堆, 此函数的 * 作用是把以节点i为根的树调整为最小堆 */void adjust(int n, int i){heap[0] = heap[i];i <<= 1;while (i <= n) {if (i < n && heap[i+1] < heap[i]) {i++;}if (heap[i] >= heap[0]) {break;}heap[i>>1] = heap[i];i <<= 1;}heap[i>>1] = heap[0];}long get_time_usecs(){struct timeval time;struct timezone tz;memset(&tz, '\0', sizeof(struct timezone));gettimeofday(&time, &tz);long usecs = time.tv_sec*1000000 + time.tv_usec;return usecs;}?

?

?

测试和运行: 见代码的注释,主要是如何产生测试数据,

linux提供了一个产生测试数据的命令,直接调用就行了

dd if=/dev/urandom of=random.dat bs=1M count=1024

这样产生的测试数据是二进制的,我们把美4个字节当成1个整数来用。

?

热点排行