From October 18, 2019

In this case lower-up decomposition, the matrix of factors ca be understood as the product of the lower triangular matrix and an upper triangular matrix. The permutation matrix is sometimes included in the product as well.

L

void createL(float* output, float* l)
{
    for(int i = 0; i < WIDTH; i++)
    {
        for(int j = 0; j < WIDTH; j++)
        {
            if(i == j)
            {
                l[INDX_POS(i,j)] = 1;
            }
            else if (i < j)
            {
                l[INDX_POS(i,j)] = 0;
            }
            else if(i > j)
            {
                l[INDX_POS(i,j)] = output[INDX_POS(i,j)];
            }
        }
    }
}

U

void createU(float* output, float* u)
{
    for(int i = 0; i < WIDTH; i++)
    {
        for(int j = 0; j < WIDTH; j++)
        {
            if(j >= i)
            {
                u[INDX_POS(i,j)] = output[INDX_POS(i,j)];
            }
        }
    }
}

Decomposition

void LUdecomposition(float* input, float* l, float* u)
{
    for(int i = 0; i < NUM_VALUES - 1; i++)
    {
        int row = 0;
        
        for(row = i + 1; row < NUM_VALUES; row++)
        {
            float factor = input[INDX_POS(row,i)] / input[INDX_POS(i,i)];
            
            for(int col = i + 1; col < NUM_VALUES; col++)
            {
                input[INDX_POS(row,col)] = input[INDX_POS(row,col)] - factor * input[INDX_POS(i,col)];
            }            

            input[INDX_POS(row,i)] = factor;
        }
    }
    
    createL(input, l);
    createU(input, u);
}

Main

#include <chrono>
#include <stdio.h>
#include <stdlib.h>
 
#define WIDTH 4
#define NUM_VALUES 16
#define INDX_POS(i,j) ((WIDTH * i) + (j))
 
void print(char* n, float* m)
{
    fprintf(stdout, "\n%s\n", n);
    
    for(int i = 0; i < NUM_VALUES; i++)
    {
        fprintf(stdout, "%.3f ", m[i]);

        if (i % (WIDTH) == WIDTH - 1) { fprintf(stdout, "\n"); }
    }
}
 
int validate(float* input, float* output)
{
    for (int i = 0; i < WIDTH; i++)
    {
        if ( input[i] != output[i] )
        {
            fprintf(stdout, "Error: Element %d did not match expected output.\n", i);
            
            fflush(stdout);
            return 0;
        }
    }
    return 1;
}
 
void createL(float* output, float* l)
{
    for(int i = 0; i < WIDTH; i++)
    {
        for(int j = 0; j < WIDTH; j++)
        {
            if(i == j)
            {
                l[INDX_POS(i,j)] = 1;
            }
            else if (i < j)
            {
                l[INDX_POS(i,j)] = 0;
            }
            else if(i > j)
            {
                l[INDX_POS(i,j)] = output[INDX_POS(i,j)];
            }
        }
    }
}
 
void createU(float* output, float* u)
{
    for(int i = 0; i < WIDTH; i++)
    {
        for(int j = 0; j < WIDTH; j++)
        {
            if(j >= i)
            {
                u[INDX_POS(i,j)] = output[INDX_POS(i,j)];
            }
        }
    }
}
 
void LUdecomposition(float* input, float* l, float* u)
{
    for(int i = 0; i < NUM_VALUES - 1; i++)
    {
        int row = 0;
        
        for(row = i + 1; row < NUM_VALUES; row++)
        {
            float factor = input[INDX_POS(row,i)] / input[INDX_POS(i,i)];
            
            for(int col = i + 1; col < NUM_VALUES; col++)
            {
                input[INDX_POS(row,col)] = input[INDX_POS(row,col)] - factor * input[INDX_POS(i,col)];
            }            

            input[INDX_POS(row,i)] = factor;
        }
    }
    
    createL(input, l);
    createU(input, u);
}
 
int main(int argc, const char * argv[])
{
    float* test_in = (float*) malloc(sizeof(float) * NUM_VALUES);
    float* test_out = (float*) malloc(sizeof(float) * NUM_VALUES);
    float* l = (float*) malloc(sizeof(float) * NUM_VALUES);
    float* u = (float*) malloc(sizeof(float) * NUM_VALUES);
    
    for (int i = 0; i < NUM_VALUES; i++)
    {
        l[i] = 0;
        u[i] = 0;
        test_out[i] = 0;
        test_in[i] = i + 1;
    }
    
    // i: [index / NUM_VALUES/2] -- j: [index % NUM_VALUES/2]
    //   A         L       U
    // 4   3    1     0   4   3
    // 6   3    1.5   1   0   -1.5
    
    //   A         L       U
    // 3   1    1     0   3   1
    // 4   2    1.3   1   0   0.6
    
    //test_in[0] = 4; // 0 0
    //test_in[1] = 3; // 0 1
    //test_in[2] = 6; // 1 0
    //test_in[3] = 3; // 1 1
    
    //     A            L            U
    // 1   2   3    1   0   0    1   2   3
    // 4   5   6    4   1   0    0  -3  -6
    // 7   8   9    7   2   1    0   0   0
    
    test_in[0] = 80.428;
    test_in[1] = -12.818;
    test_in[2] = -81.284;
    test_in[3] = -95.437;
    test_in[4] = -79.099;
    test_in[5] = 14.754;
    test_in[6] = 80.749;
    test_in[7] = 46.408;
    test_in[8] = -28.549;
    test_in[9] = 76.515;
    test_in[10] = -14.687;
    test_in[11] = -75.622;
    test_in[12] = -88.490;
    test_in[13] = -82.585;
    test_in[14] = 85.373;
    test_in[15] = -61.574;
    
    print("INPUT", test_in);
    
    auto t1 = std::chrono::high_resolution_clock::now();

    LUdecomposition(test_in, l, u);

    auto t2 = std::chrono::high_resolution_clock::now();

    auto duration = std::chrono::duration_cast<std::chrono::milliseconds>( t2 - t1 ).count();
    
    print("L", l);
    print("U", u);
    
    double sum = 0;
    for(int i = 0; i < WIDTH; i++)
    {
        for(int j = 0; j < WIDTH; j++)
        {
            for(int k = 0; k < WIDTH; k++)
            {
                //test_out[i][j] += l[i][k] * u[k][j];
                sum = sum + l[INDX_POS(i,k)] * u[INDX_POS(k,j)];
            }

            test_out[INDX_POS(i,j)] = sum;
            sum = 0;
        }
    }
    
    print("OUTPUT", test_out);
    
    if ( validate(test_in, test_out))
    {
        fprintf(stdout, "\nAll values were properly calculated.\n");
    }

    fprintf(stdout, "\n%llu milliseconds\n", duration);

    return 0;
}