Source code for modulo_vki.core._k_matrix

import os
from tqdm import tqdm
import numpy as np
import math


[docs] def CorrelationMatrix(N_T, N_PARTITIONS=1, MEMORY_SAVING=False, FOLDER_OUT='./', SAVE_K=False, D=None,weights = np.array([])): """ This method computes the temporal correlation matrix, given a data matrix as input. It's possible to use memory saving then splitting the computing in different tranches if computationally heavy. If D has been computed using MODULO then the dimension dim_col and N_PARTITIONS is automatically loaded :param N_T: int. Number of temporal snapshots :param D: np.array. Data matrix :param SAVE_K: bool. If SAVE_K=True, the matrix K is saved on disk. If the MEMORY_SAVING feature is active, this is done by default. :param MEMORY_SAVING: bool. If MEMORY_SAVING = True, the computation of the correlation matrix is done by steps. It requires the data matrix to be partitioned, following algorithm in MODULO._data_processing. :param FOLDER_OUT: str. Folder in which the temporal correlation matrix will be stored :param N_PARTITIONS: int. Number of partitions to be read in computing the correlation matrix. If _data_processing is used to partition the data matrix, this is inherited from the main class :param weights: weight vector [w_i,....,w_{N_s}] where w_i = area_cell_i/area_grid. Only needed if grid is non-uniform & MEMORY_SAVING== True :return: K (: np.array) if the memory saving is not active. None type otherwise. """ if not MEMORY_SAVING: print("\n Computing Temporal correlation matrix K ...") K = np.dot(D.T, D) print("\n Done.") else: SAVE_K = True print("\n Using Memory Saving feature...") K = np.zeros((N_T, N_T)) dim_col = math.floor(N_T / N_PARTITIONS) if N_T % N_PARTITIONS != 0: tot_blocks_col = N_PARTITIONS + 1 else: tot_blocks_col = N_PARTITIONS for k in tqdm(range(tot_blocks_col)): di = np.load(FOLDER_OUT + f"/data_partitions/di_{k + 1}.npz")['di'] if weights.size != 0: di = np.transpose(np.transpose(di) * np.sqrt(weights)) ind_start = k * dim_col ind_end = ind_start + dim_col if (k == tot_blocks_col - 1) and (N_T - dim_col * N_PARTITIONS > 0): dim_col = N_T - dim_col * N_PARTITIONS ind_end = ind_start + dim_col K[ind_start:ind_end, ind_start:ind_end] = np.dot(di.transpose(), di) block = k + 2 while block <= tot_blocks_col: dj = np.load(FOLDER_OUT + f"/data_partitions/di_{block}.npz")['di'] if weights.size != 0: dj = np.transpose(np.transpose(dj) * np.sqrt(weights)) ind_start_out = (block - 1) * dim_col ind_end_out = ind_start_out + dim_col if (block == tot_blocks_col) and (N_T - dim_col * N_PARTITIONS > 0): dim_col = N_T - dim_col * N_PARTITIONS ind_end_out = ind_start_out + dim_col dj = dj[:, :dim_col] K[ind_start:ind_end, ind_start_out:ind_end_out] = np.dot(di.T, dj) K[ind_start_out:ind_end_out, ind_start:ind_end] = K[ind_start:ind_end, ind_start_out:ind_end_out].T block += 1 dim_col = math.floor(N_T / N_PARTITIONS) if SAVE_K: os.makedirs(FOLDER_OUT + '/correlation_matrix', exist_ok=True) np.savez(FOLDER_OUT + "/correlation_matrix/k_matrix", K=K) return K if not MEMORY_SAVING else None