Covariance Matrix (C++)

Linear Algebra

This C++ program computes the covariance matrix for a dataset with multiple variables. The covariance matrix is symmetric and contains pairwise covariances between all variables.

Covariance Matrix

For a dataset with $n$ variables, the covariance matrix $Σ$ is an $n \times n$ matrix where:

Σ_{ij} = Cov (X_{i}, X_{j})

The matrix is symmetric: $Σ_{ij} = Σ_{ji}$ .

Implementation

#include <iostream>
#include <vector>
#include <iomanip> // For formatting output
#include <stdexcept>

// Template function to compute the mean of a vector
template <typename T>
T calculateMean(const std::vector<T>& vec) {
    T sum = 0;
    for (const T& value : vec) {
        sum += value;
    }
    return sum / vec.size();
}

// Template function to compute the covariance between two vectors
template <typename T>
T calculateCovariance(const std::vector<T>& vec1, const std::vector<T>& vec2) {
    if (vec1.size() != vec2.size()) {
        throw std::invalid_argument("Vectors must be of the same size.");
    }

    T mean1 = calculateMean(vec1);
    T mean2 = calculateMean(vec2);

    T covariance = 0;
    for (size_t i = 0; i < vec1.size(); ++i) {
        covariance += (vec1[i] - mean1) * (vec2[i] - mean2);
    }

    return covariance / vec1.size();
}

// Template function to compute the covariance matrix
template <typename T>
std::vector<std::vector<T>> calculateCovarianceMatrix(const std::vector<std::vector<T>>& data) {
    size_t numVectors = data.size();
    if (numVectors == 0) {
        throw std::invalid_argument("Data set must not be empty.");
    }

    size_t vectorSize = data[0].size();
    for (const auto& vec : data) {
        if (vec.size() != vectorSize) {
            throw std::invalid_argument("All vectors must have the same size.");
        }
    }

    // Initialize the covariance matrix
    std::vector<std::vector<T>> covarianceMatrix(numVectors, std::vector<T>(numVectors, 0));

    // Compute pairwise covariances
    for (size_t i = 0; i < numVectors; ++i) {
        for (size_t j = i; j < numVectors; ++j) {
            T covariance = calculateCovariance(data[i], data[j]);
            covarianceMatrix[i][j] = covariance;
            covarianceMatrix[j][i] = covariance; // Symmetric matrix
        }
    }

    return covarianceMatrix;
}

int main() {
    try {
        // Example data set: Each inner vector represents a variable (column in a dataset)
        std::vector<std::vector<double>> data = {
            {2.0, 4.0, 6.0, 8.0}, // Variable 1
            {1.0, 3.0, 5.0, 7.0}, // Variable 2
            {10.0, 20.0, 30.0, 40.0} // Variable 3
        };

        // Calculate the covariance matrix
        std::vector<std::vector<double>> covarianceMatrix = calculateCovarianceMatrix(data);

        // Display the covariance matrix
        std::cout << "Covariance Matrix:
";
        for (const auto& row : covarianceMatrix) {
            for (double value : row) {
                std::cout << std::setw(10) << value << " ";
            }
            std::cout << "
";
        }
    } catch (const std::exception& e) {
        std::cerr << "Error: " << e.what() << std::endl;
    }

    return 0;
}

Key Features

Matrix Representation: Uses nested vectors to represent the covariance matrix
Symmetric Matrix: Only computes upper triangle, then mirrors to lower triangle
Input Validation: Checks that all vectors have the same size
Formatted Output: Uses std::setw() for aligned matrix display

Applications

Principal Component Analysis (PCA)
Multivariate statistics
Portfolio optimization
Machine learning feature analysis