OpenMP
From October 12, 2019
OpenMP is an API that allows to add concurrency to our programs wether they are written using C or C++, is composed by a set of compiler’s directives for memory distributed systems.
Fork-Join
This model divides a heavy task in k threads by making subtasks and collecting results at the end.
In order to mark a section of the code to be executed in parallel the following directive is used:
#pragma omp parallel
{
}
Now with the example of getting the square off given values.
#include <chrono>
#include <stdio.h>
#include <stdlib.h>
#include "omp.h"
#define NUM_VALUES 2048000
static int validate(float* input, float* output)
{
for (int i = 0; i < NUM_VALUES; i++)
{
if ( output[i] != (input[i] * input[i]) )
{
fprintf(stdout, "Error: Element %d did not match expected output.\n", i);
fprintf(stdout, " Got %1.4f, EXPECTED %1.4f\n", output[i], input[i] * input[i]);
fflush(stdout);
return 0;
}
}
return 1;
}
void square(float* input, float* output)
{
for (int i = 0; i < NUM_VALUES; i++)
{
output[i] = input[i] * input[i];
}
}
int main(int argc, const char * argv[])
{
float* test_in = (float*) malloc(sizeof(float) * NUM_VALUES);
float* test_out = (float*) malloc(sizeof(float) * NUM_VALUES);
for (int i = 0; i < NUM_VALUES; i++)
{
test_in[i] = i;
}
auto t1 = std::chrono::high_resolution_clock::now();
#pragma omp parallel
{
square(test_in, test_out);
}
auto t2 = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>( t2 - t1 ).count();
std::cout << duration << " milliseconds\n";
//std::cout << "Greetings from thread " << omp_get_thread_num() << std::endl;
if ( validate(test_in, test_out)) {
fprintf(stdout, "All values were properly squared.\n");
}
return 0;
}