c & 多线程归并 & 单线程归并

算法 2019-04-01

多线程归并排序 与 单线程归并排序的对比

@测试环境

  • CPU 1核
  • MEM 298M

@测试用例

用python脚本向文件中写入100万整数用于测试

import random
f = open("./data","w")
for i in range(1000000):
    f.write(str(random.randint(1,1000)))
    f.write("\n")
f.close()

@头文件

#include<stdio.h>
#include<pthread.h>
#include<stdlib.h>
#include<string.h>
#include<ctime>
int array_length,file_length;
int *array_master;
FILE *freader;

int read_length(char *fname){
    freader = fopen(fname,"rt");
    char line[80];
    int file_length = 0;

    while(fgets(line,80,freader ) != NULL)
        file_length += 1;
    return file_length;
}

int *read_file(char *fname){
    freader = fopen(fname,"rt");
    int bufsize = file_length;
    char line[80];
    int integer;
    int index = 0;
    int *input = (int *)malloc(bufsize*sizeof(int));
    while(fgets(line,80,freader) != NULL){
        sscanf(line,"%d",&integer);
        input[index] = integer;
        ++index;
        ++array_length;
    }
    fclose(freader);
    return input;
}

@多线程读写文件归并排序源码

#include "file.h"
void merge(int arr[],int left,int middle,int right){
    int i,j,k;
    int half1 = middle - left + 1;
    int half2 = right - middle;
    int first[half1],second[half2];
    for(i = 0; i< half1; i++)
        first[i] = arr[left + i];
    for(j = 0;j< half2;j++)
        second[j] = arr[middle + 1 + j];
    i = 0;
    j = 0;
    k = left;
    while(i < half1 && j < half2)
    {
        if(first[i] <= second[j]){
            arr[k] = first[i];
            ++i;
        }else
        {
            arr[k] = second[j];
            j++;
        }
        k++;
    }
    while(i < half1){
        arr[k] = first[i];
        i++;
        k++;
    }
    while(j < half2){
        arr[k] = second[j];
        j++;
        k++;
    }
}
void* merge_sort(void* arg){
    int *arr = array_master;
    int *argu = (int*)arg;
    int l = argu[0];
    int r = argu[1];
    if(l < r){
        pthread_t tid1;
        pthread_t tid2;

        int arg1[2];
        int arg2[2];

        int middle;
        middle = (l +(r-1))/2;
        arg1[0] = l;
        arg1[1] = middle;
        arg2[0] = middle +1;
        arg2[1] = r;

        pthread_create(&tid1,NULL,merge_sort,arg1);
        pthread_create(&tid2,NULL,merge_sort,arg2);

        pthread_join(tid1,NULL);
        pthread_join(tid2,NULL);

        merge(arr,1,middle,r);
        pthread_exit(0);

    }
    return NULL;
}

int main(int argc,char *argv[]){
    char *fname = argv[1];
    
    clock_t startTime = clock();
    file_length = read_length(fname);

    array_master = read_file(fname);

    int arg[2];
    arg[0] = 0;
    arg[1] = file_length - 1;

    pthread_t tid;
    pthread_create(&tid, NULL ,merge_sort,arg);

    pthread_join(tid,NULL);
    clock_t endTime = clock();
    printf("%f",double(endTime - startTime)/CLOCKS_PER_SEC);
    return 0;
}

编译命令 gcc main.c -lpthread
运行命令 ./a.out ./datadata 为数据文件

@单线程读写文件归并排序源码

#include<iostream>
#include "file.h"

template<typename T>
void __merge(T arr[],int l , int mid , int r)
{
    T aux[r-l +1];
    for(int i = l;i <= r; i++)
        aux[i-l] = arr[i];
    int i = l,j = mid+1;
    for (int k = l ;k <= r;k++)
    {
        if(i > mid){
            arr[k] = aux[j-l];
            j ++;
        }else if(j > r){
            arr[k] = aux[i-l];
            i ++;
        }else if(aux[i-l] < aux[j-l]){
            arr[k] = aux[i-l];
            i++;
        }else{
            arr[k] = aux[j-l];
            j++;
        }
    }
}

template<typename T>
void __mergeSort(T arr[],int l , int r)
{
    if( l >= r )
        return;
    int mid = (l + r)/2;
    __mergeSort(arr,l,mid);
    __mergeSort(arr,mid+1 , r);
    if(arr[mid] > arr[mid+1])
        __merge(arr , l,mid,r);
}
template<typename T>
void mergeSort(T arr[],int n)
{
    __mergeSort(arr , 0 , n-1);
}

int main(int argc,char *argv[])
{
    char *fname = argv[1];
    clock_t startTime = clock();
    file_length = read_length(fname);
    array_master = read_file(fname);
    mergeSort(array_master,file_length);
    clock_t endTime = clock();
    std::cout<<double(endTime -startTime)/CLOCKS_PER_SEC);
    return 0;
}

测试

构造数据 python number.py 数据如下

-rw-rw-r-- 1 xiaodo xiaodo 3.8M Mar 22 16:42 data
-rw-rw-r-- 1 xiaodo xiaodo 762 Mar 22 16:30 file.h
-rw-rw-r-- 1 xiaodo xiaodo 1.8K Mar 22 16:33 merge_sort.c
-rw-rw-r-- 1 xiaodo xiaodo 1.3K Mar 22 16:35 merge_sort.cpp
-rw-rw-r-- 1 xiaodo xiaodo 132 Mar 22 16:41 number.py
可以看到data存入1000万数据后 大小为38M

编译多线程和单线程程序
g++ -o cmerge merge_sort.c -lpthread
g++ -o cppmerge merge_sort.cpp -lpthread

执行与结果对比

./cmerge ./data
Segmentation fault (core dumped)
./cppmerge ./data
0.510115

  • 多线程开销实在太大了 我从100万调到10万还是内存不足
  • 单线程完胜,只有0.5s

本文由 小东@xiaodo 创作,采用 知识共享署名 3.0,可自由转载、引用,但需署名作者且注明文章出处。