Helgrind es una de las herramientas del grupo Valgrind que detecta errores de sincronización en programas C/C++ que usan las primitivas POSIX. Estas abstracciones POSIX son: hilos compartiendo un espacio de direccionamiento común, creación de hilos, unión de hilos, salida de hilos, cerrojos (mutexes), variables de condición y barreras. Helgrind detecta tres clases de errores: (1) mal uso del API POSIX, (2) potenciales interbloqueos, y (3) condiciones de carrera.
El siguiente código contiene un ejemplo de código multihilo con varios (NUM_THREADS
) hilos que se crean y destruyen. Cada hilo
imprime un texto y el hilo del main
espera por el resto:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 | //compilation in linux with gcc -pthread option #include <pthread.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> #define NUM_THREADS 3 void *print_hello(void *threadid) { long tid; tid = (long)threadid; printf("Thread number \t %ld sleeps %ld seconds...\n",tid,tid); sleep(tid); printf("Thread number \t %ld exiting .............\n",tid); pthread_exit(NULL); } int main(int argc, char *argv[]) { pthread_t threads[NUM_THREADS]; long array_ids[NUM_THREADS]; int rc=0; long t; for(t=0;t<NUM_THREADS;t++){ array_ids[t]=t; printf("In main: creating thread %ld\n", array_ids[t]); rc = pthread_create(&threads[t], NULL, print_hello, (void *)t); if (rc){ printf("ERROR; return code from pthread_create() is %d\n", rc); exit(-1); } } for (t=0;t<NUM_THREADS;t++){ pthread_join(threads[t],NULL); } return 0; } |
Para compilar este código en Linux es necesario utilizar la opción -pthread del compilador (gcc). La prueba se hace con la herramienta Helgrind
explicitando la opción: --tool=helgrind
.
$ gcc -Wall -gstabs -pthread helgrind_threads_good.c -o helgrind_threads_good $ valgrind --tool=helgrind ./helgrind_threads_good ==8455== Helgrind, a thread error detector ==8455== Copyright (C) 2007-2011, and GNU GPL'd, by OpenWorks LLP et al. ==8455== Using Valgrind-3.7.0 and LibVEX; rerun with -h for copyright info ==8455== Command: ./helgrind_threads_good ==8455== In main: creating thread 0 Thread number 0 sleeps 0 seconds.. Thread number 0 exiting .......... In main: creating thread 1 Thread number 1 sleeps 1 seconds.. In main: creating thread 2 Thread number 2 sleeps 2 seconds.. Thread number 1 exiting .......... Thread number 2 exiting .......... ==8455== ==8455== For counts of detected and suppressed errors, rerun with: -v ==8455== Use --history-level=approx or =none to gain increased speed, at ==8455== the cost of reduced accuracy of conflicting-access information ==8455== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 476 from 55)
Probemos a cambiar el código para hacer que los hilos compartan información sobre la variable global counter
. Potencialmente, este cambio provoca
condiciones de carrera, dado que los diferentes hilos leen y escriben sin ningún tipo de orquestación sobre la misma variable (sin utilizar mutex
para sincronizar el acceso):
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 | //compilation in linux with gcc -pthread option #include <pthread.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> #define NUM_THREADS 2 int counter=0; void *print_hello(void *threadid) { long tid; tid = (long)threadid; printf("Thread number \t %ld sleeps %ld seconds...\n",tid,tid); counter++; sleep(tid); printf("Thread number \t %ld exiting .............\n",tid); pthread_exit(NULL); } int main(int argc, char *argv[]) { pthread_t threads[NUM_THREADS]; long array_ids[NUM_THREADS]; int rc=0; long t; for(t=0;t<NUM_THREADS;t++){ array_ids[t]=t; printf("In main: creating thread %ld\n", array_ids[t]); rc = pthread_create(&threads[t], NULL, print_hello, (void *)t); if (rc){ printf("ERROR; return code from pthread_create() is %d\n", rc); exit(-1); } } for (t=0;t<NUM_THREADS;t++){ pthread_join(threads[t],NULL); } printf("counter is %i \n", counter); return 0; } |
Lo cual va a ser detectado por Helgrind, cuando lo ejecutemos sobre el código anterior, como un error de tipo "data race":
$gcc -Wall -g -pthread helgrind_threads_bad.c -o helgrind_threads_bad $ valgrind -v --tool=helgrind ./helgrind_threads_bad ==5483== ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 237 from 55) ==5483== ==5483== 1 errors in context 1 of 2: ==5483== ---------------------------------------------------------------- ==5483== ==5483== Possible data race during write of size 4 at 0x804A030 by thread #3 ==5483== Locks held: none ==5483== at 0x8048583: print_hello (helgrind_threads_bad.c:15) ==5483== by 0x402DD35: ??? (in /usr/lib/valgrind/vgpreload_helgrind-x86-linux.so) ==5483== by 0x405AD4B: start_thread (pthread_create.c:308) ==5483== by 0x415DB8D: clone (clone.S:130) ==5483== ==5483== This conflicts with a previous write of size 4 by thread #2 ==5483== Locks held: none ==5483== at 0x8048583: print_hello (helgrind_threads_bad.c:15) ==5483== by 0x402DD35: ??? (in /usr/lib/valgrind/vgpreload_helgrind-x86-linux.so) ==5483== by 0x405AD4B: start_thread (pthread_create.c:308) ==5483== by 0x415DB8D: clone (clone.S:130) ==5483== ==5483== ==5483== 1 errors in context 2 of 2: ==5483== ---------------------------------------------------------------- ==5483== ==5483== Possible data race during read of size 4 at 0x804A030 by thread #3 ==5483== Locks held: none ==5483== at 0x804857B: print_hello (helgrind_threads_bad.c:15) ==5483== by 0x402DD35: ??? (in /usr/lib/valgrind/vgpreload_helgrind-x86-linux.so) ==5483== by 0x405AD4B: start_thread (pthread_create.c:308) ==5483== by 0x415DB8D: clone (clone.S:130) ==5483== ==5483== This conflicts with a previous write of size 4 by thread #2 ==5483== Locks held: none ==5483== at 0x8048583: print_hello (helgrind_threads_bad.c:15) ==5483== by 0x402DD35: ??? (in /usr/lib/valgrind/vgpreload_helgrind-x86-linux.so) ==5483== by 0x405AD4B: start_thread (pthread_create.c:308) ==5483== by 0x415DB8D: clone (clone.S:130) --5483-- used_suppression: 57 helgrind-glibc2X-005 --5483-- used_suppression: 180 helgrind-glibc2X-004 ==5483== ==5483== ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 237 from 55)
Si no se quiere mantener el comportamiento del código, una solución es usar cerrojos (mutex
) para arreglar el problema. Por lo
que una versión corregida del código previo es:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 | //compilation in linux with gcc -pthread option #include <pthread.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> #define NUM_THREADS 2 pthread_mutex_t mutex_counter; int counter=0; void *print_hello(void *threadid) { long tid; tid = (long)threadid; printf("Thread number \t %ld sleeps %ld seconds...\n",tid,tid); pthread_mutex_lock(&mutex_counter); counter++; pthread_mutex_unlock(&mutex_counter); sleep(tid); printf("Thread number \t %ld exiting .............\n",tid); pthread_exit(NULL); } int main(int argc, char *argv[]) { pthread_t threads[NUM_THREADS]; long array_ids[NUM_THREADS]; pthread_mutex_init(&mutex_counter,NULL); int rc=0; long t; for(t=0;t<NUM_THREADS;t++){ array_ids[t]=t; printf("In main: creating thread %ld\n", array_ids[t]); rc = pthread_create(&threads[t], NULL, print_hello, (void *)t); if (rc){ printf("ERROR; return code from pthread_create() is %d\n", rc); exit(-1); } } for (t=0;t<NUM_THREADS;t++){ pthread_join(threads[t],NULL); } pthread_mutex_destroy(&mutex_counter); printf("counter is %i \n", counter); return 0; } |
Lo que elimana el error:
$gcc -Wall -g -pthread helgrind_threads_bad_solved.c -o helgrind_threads_bad_solved $ valgrind -v --tool=helgrind ./helgrind_threads_bad_solved c70 (pthread_mutex_destroy) redirected to 0x402ded0 (pthread_mutex_destroy) counter is 2 ==7861== ==7861== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 248 from 61) --7861-- --7861-- used_suppression: 60 helgrind-glibc2X-005 --7861-- used_suppression: 184 helgrind-glibc2X-004 --7861-- used_suppression: 4 helgrind-glibc2X-101 ==7861== ==7861== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 248 from 61)
El primer ejemplo que veremos tiene que ver con las condiciones de carrera. La condición de carrera
se da entre el hilo main
y el único hilo de la aplicación.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 | //compilation in linux with gcc -pthread option #include <pthread.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> int increment_counter(int *counter) { (*counter)++; return *counter; } void *counter_thread(void *ctr) { printf("In thread: running...\n"); sleep(1); printf("[_THREAD_1]Counter is %d \n", increment_counter((int*)ctr) ); printf("In thread: exiting .............\n"); pthread_exit(NULL); } int main(int argc, char *argv[]) { int int_counter=0; pthread_t threads[1]; int rc=0; printf("(log) In main: creating thread %i\n", 1); rc = pthread_create(&threads[0], NULL, counter_thread, (void *)&int_counter); if (rc){ printf("ERROR; return code from pthread_create() is %d\n", rc); exit(-1); } sleep(1); int res_counter=increment_counter(&int_counter); pthread_join(threads[0],NULL); printf("[_MAIN___] Counter is %i \n", res_counter); return 0; } |
Este problema es detectado por Helgrind que devuelve la siguiente salida:
$ $gcc -Wall -g -pthread helgrind_threads_race.c -o helgrind_threads_race $ valgrind -v --tool=helgrind ./helgrind_threads_race ==8297== ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 1 from 1) ==8297== ==8297== 1 errors in context 1 of 2: ==8297== ---------------------------------------------------------------- ==8297== ==8297== Possible data race during write of size 4 at 0xBEE21570 by thread #2 ==8297== Locks held: none ==8297== at 0x8048592: increment_counter (helgrind_threads_race.c:12) ==8297== by 0x80485C3: counter_thread (helgrind_threads_race.c:20) ==8297== by 0x402DD35: ??? (in /usr/lib/valgrind/vgpreload_helgrind-x86-linux.so) ==8297== by 0x405AD4B: start_thread (pthread_create.c:308) ==8297== by 0x415DB8D: clone (clone.S:130) ==8297== ==8297== This conflicts with a previous write of size 4 by thread #1 ==8297== Locks held: none ==8297== at 0x8048592: increment_counter (helgrind_threads_race.c:12) ==8297== by 0x8048682: main (helgrind_threads_race.c:37) ==8297== ==8297== ==8297== 1 errors in context 2 of 2: ==8297== ---------------------------------------------------------------- ==8297== ==8297== Possible data race during read of size 4 at 0xBEE21570 by thread #2 ==8297== Locks held: none ==8297== at 0x804858A: increment_counter (helgrind_threads_race.c:12) ==8297== by 0x80485C3: counter_thread (helgrind_threads_race.c:20) ==8297== by 0x402DD35: ??? (in /usr/lib/valgrind/vgpreload_helgrind-x86-linux.so) ==8297== by 0x405AD4B: start_thread (pthread_create.c:308) ==8297== by 0x415DB8D: clone (clone.S:130) ==8297== ==8297== This conflicts with a previous write of size 4 by thread #1 ==8297== Locks held: none ==8297== at 0x8048592: increment_counter (helgrind_threads_race.c:12) ==8297== by 0x8048682: main (helgrind_threads_race.c:37) ==8297== --8297-- --8297-- used_suppression: 1 helgrind-glibc2X-005 ==8297== ==8297== ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 1 from 1)
Para solucionar el problema al igual que en caso anterior se puede usar un mutex
. La
siguiente pieza de código introduce los cambios requeridos para solventar el problema.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 | //compilation in linux with gcc -pthread option #include <pthread.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> struct struct_counter{ int i; pthread_mutex_t mutex_i; }; int increment_counter(struct struct_counter* counter) { int to_return=0; pthread_mutex_lock(&((counter)->mutex_i)); to_return=(*counter).i++; pthread_mutex_unlock(&((counter)->mutex_i)); return to_return; } void *counter_thread(void *ctr) { printf("In thread: running...\n"); sleep(1); printf("[_THREAD_1]Counter is %d \n", increment_counter((struct struct_counter*)ctr) ); printf("In thread: exiting .............\n"); pthread_exit(NULL); } int main(int argc, char *argv[]) { struct struct_counter int_counter; int_counter.i=0; pthread_mutex_init(&int_counter.mutex_i,NULL); pthread_t threads[1]; int rc=0; printf("(log) In main: creating thread %i\n", 1); rc = pthread_create(&threads[0], NULL, counter_thread, (struct struct_counter *)&int_counter); if (rc){ printf("ERROR; return code from pthread_create() is %d\n", rc); exit(-1); } sleep(1); int res_counter=increment_counter(&int_counter); pthread_join(threads[0],NULL); pthread_mutex_destroy(&int_counter.mutex_i); printf("[_MAIN___] Counter is %i \n", res_counter); return 0; } |