[Bug 37294] Multithreaded code does not utilize processors on a Centrino Duo
Alex
gsasha at cs.technion.ac.il
Thu Mar 30 08:41:37 UTC 2006
Public bug report changed:
https://launchpad.net/malone/bugs/37294
Comment:
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <pthread.h>
#include <sys/time.h>
#include <sys/resource.h>
struct Params {
int left;
int right;
};
struct ThreadParams {
int* numbers;
int size;
};
void q_sort(int numbers[], int size)
{
struct Params stack[64];
int stack_pos = 0;
stack[0].left = 0;
stack[0].right = size-1;
while (stack_pos >= 0)
{
int left = stack[stack_pos].left;
int right = stack[stack_pos].right;
int pivot = numbers[left];
while (left < right)
{
while ((numbers[right] >= pivot) && (left < right))
right--;
if (left != right)
{
numbers[left] = numbers[right];
left++;
}
while ((numbers[left] <= pivot) && (left < right))
left++;
if (left != right)
{
numbers[right] = numbers[left];
right--;
}
}
numbers[left] = pivot;
pivot = left;
left = stack[stack_pos].left;
right = stack[stack_pos].right;
if (pivot-left > right-pivot)
{
// size of the left subarray is larger. Push it first.
stack_pos--;
if (left < pivot) {
stack_pos++;
stack[stack_pos].left = left;
stack[stack_pos].right = pivot-1;
}
if (right > pivot) {
stack_pos++;
stack[stack_pos].left = pivot+1;
stack[stack_pos].right = right;
}
}
else
{
// size of the right subarray is larger. Push it first
stack_pos--;
if (right > pivot) {
stack_pos++;
stack[stack_pos].left = pivot+1;
stack[stack_pos].right = right;
}
if (left < pivot) {
stack_pos++;
stack[stack_pos].left = left;
stack[stack_pos].right = pivot-1;
}
}
}
}
void* q_sort_worker(void* p)
{
struct ThreadParams* params = (struct ThreadParams*)p;
q_sort(params->numbers, params->size);
return 0;
}
void qsort_pthread(int numbers[], int size)
{
struct ThreadParams params1;
struct ThreadParams params2;
pthread_t thr1, thr2;
int res1 = -1, res2 = -1;
int left = 0;
int right = size-1;
int pivot = numbers[left];
while (left < right)
{
while ((numbers[right] >= pivot) && (left < right))
right--;
if (left != right)
{
numbers[left] = numbers[right];
left++;
}
while ((numbers[left] <= pivot) && (left < right))
left++;
if (left != right)
{
numbers[right] = numbers[left];
right--;
}
}
numbers[left] = pivot;
pivot = left;
left = 0;
right = size-1;
if (left < pivot) {
params1.numbers = numbers+left;
params1.size = pivot-left-1;
res1 = pthread_create(&thr1, NULL, q_sort_worker, ¶ms1);
}
if (right > pivot) {
params2.numbers = numbers+pivot+1;
params2.size = right-(pivot+1);
res2 = pthread_create(&thr2, NULL, q_sort_worker, ¶ms2);
}
if (res1 == 0) {
pthread_join(thr1, NULL);
}
if (res2 == 0) {
pthread_join(thr2, NULL);
}
}
int main()
{
int i;
int iterations = 100000;
int cur_size = 10;
int max_size = 1000000;
int *data = (int*)malloc(max_size * sizeof(int));
int *work_data = (int*)malloc(max_size*sizeof(int));
for (i=0; i<max_size; i++)
{
work_data[i] = data[i] = rand();
}
// --- Prepare it so that the median is the first element, so that the
// parallelization will have good load balancing
q_sort(work_data, max_size);
data[0] = work_data[max_size/2];
printf("# size, serial, parallel, speedup \n");
while (cur_size < max_size)
{
struct timeval start, end;
struct timezone zone;
double seq_seconds, par_seconds;
zone.tz_minuteswest = 0;
zone.tz_dsttime = 0;
// ----------- Run the parallel version
gettimeofday(&start, &zone);
// Peform the timed operation
for (i=0; i<iterations; ++i) {
int j;
for (j=0; j<cur_size; j++)
work_data[j] = data[j];
qsort_pthread(work_data,cur_size);
}
gettimeofday(&end, &zone);
par_seconds = end.tv_sec-start.tv_sec
+1e-6*(end.tv_usec-start.tv_usec);
// ------------ Run the serial version
gettimeofday(&start, &zone);
// Peform the timed operation
for (i=0; i<iterations; ++i) {
int j;
for (j=0; j<cur_size; j++)
work_data[j] = data[j];
q_sort(work_data,cur_size);
}
gettimeofday(&end, &zone);
seq_seconds = end.tv_sec-start.tv_sec
+1e-6*(end.tv_usec-start.tv_usec);
printf("%6d %6d %2.4f %2.4f %f %f %f\n", cur_size, iterations,
seq_seconds, par_seconds,
seq_seconds/iterations, par_seconds/iterations, seq_seconds/(par_seconds+1e-20));
if (par_seconds > 10.0 && iterations > 10) {
iterations /= 2;
}
/*for (i=0; i<size; i++)
printf("%d ", data[i]);
printf("\n");
for (i=0; i<size; i++)
printf("%d ", work_data[i]);
printf("\n");*/
cur_size = (int)(cur_size * 1.3);
}
}
More information about the kernel-bugs
mailing list