Lavaan in R with very small values-CodePudding

I am trying to run a lavaan structural equation model to identify change between two time points.

I have the following data set:

structure(list(COG_T1 = c(0.0010333, 0.00105981, 0.00113736, 
0.001108715, 0.00104864, 0.00110772, 0.00109096, 0.00109855, 
0.00104169, 0.00112465, 0.001096525, 0.000985059, 0.001098955, 
0.001069465, 0.00105376, 0.00106878, 0.00110388, 0.00108702, 
0.001162835, 0.001070955, 0.0010971, 0.00111695, 0.001060525, 
0.00108797, 0.00103262, 0.001117605, 0.001061707, 0.001156365, 
0.00104431, 0.00109114, 0.001053765, 0.001045395, 0.00106441, 
0.00108481, 0.0011145, 0.001095115, 0.001099075, 0.001057, 0.001096125, 
0.00109696, 0.001064795, 0.00108024, 0.00102137, 0.001103185, 
0.00111948, 0.001110965, 0.00103784, 0.00104419, 0.00101302, 
0.00108785, 0.001098765, 0.001052415, 0.0010976, 0.001064385, 
0.001129705, 0.001076575, 0.001049785, 0.00103181, 0.001078155, 
0.001129015, 0.001024814, 0.00109171, 0.001007862, 0.001099885, 
0.00109162, 0.001060665, 0.00106572, 0.00106803, 0.00113409, 
0.001052505, 0.001138575, 0.00108723, 0.001046765, 0.001090765
), COG_T2 = c(0.00106309, 0.00106822, 0.001052205, 0.001106125, 
0.001060125, 0.00106945, 0.001092695, 0.00107696, 0.001063425, 
0.001116725, 0.00105891, 0.001054205, 0.00109295, 0.00109387, 
0.00101807, 0.001066195, 0.001120295, 0.001111565, 0.001088595, 
0.00102183, 0.0010934, 0.00111935, 0.00105371, 0.00108314, 0.0011006, 
0.001079585, 0.001127775, 0.001140825, 0.00106203, 0.001118035, 
0.00103535, 0.00099512, 0.001078955, 0.00108867, 0.0010789, 0.001030445, 
0.00106243, 0.001028545, 0.00108679, 0.00105624, 0.001110145, 
0.00107318, 0.00106523, 0.001103515, 0.00112404, 0.001064455, 
0.001040425, 0.001059305, 0.00106362, 0.001079395, 0.00107183, 
0.0010652, 0.00106983, 0.00111722, 0.00114111, 0.001059649, 0.001029902, 
0.001062825, 0.001102155, 0.001122135, 0.00103623, 0.00108648, 
0.001081035, 0.001110075, 0.001039397, 0.001057715, 0.0010338, 
0.001071455, 0.001072065, 0.001032233, 0.00111996, 0.00106407, 
0.0010693, 0.001104395)), class = "data.frame", row.names = c(NA, 
-74L))

plotted:

library(lavaan)
library(tidyverse)

id <- factor(1:length(DF$COG_T1)) 
plotdattemp <- data.frame(DF$COG_T1, DF$COG_T2, id) #create dataframe with raw scores

plotdattemp %>%
    reshape2::melt(by = "id") %>% 
    ggplot(aes(x= variable, y= value))  
    geom_point()  
    geom_line(aes(group = id))

Model fit:

#Fit the Univariate Latent Change Score model
ULCS_DF<-'

COG_T2 ~ 1*COG_T1     # Fixed regression of COG_T2 on COG_T1
dCOG1 =~ 1*COG_T2     # Fixed regression of dCOG1 on COG_T2
COG_T2 ~ 0*1          # This line constrains the intercept of COG_T2 to 0
COG_T2 ~~ 0*COG_T2    # This fixes the variance of the COG_T2 to 0 

dCOG1 ~ 1             # This estimates the intercept of the change scores 
COG_T1 ~  1           # This estimates the intercept of COG_T1 
dCOG1 ~~  dCOG1       # This estimates the variance of the change scores 
COG_T1 ~~   COG_T1    # This estimates the variance of COG_T1 
dCOG1~COG_T1          # This estimates the self-feedback parameter

'

fitULCS <- lavaan(ULCS_DF, data=DF, estimator='mlr', fixed.x=FALSE, missing='fiml')
summary(ULCS_DF, fit.measures=TRUE, standardized=TRUE, rsquare=TRUE)

But I am getting the following warnings:

Warning messages:
1: In lav_mvnorm_missing_h1_estimate_moments(Y = X[[g]], wt = WT[[g]],  :
  lavaan WARNING:
    The smallest eigenvalue of the EM estimated variance-covariance
    matrix (Sigma) is smaller than 1e-05; this may cause numerical
    instabilities; interpret the results with caution.

Since the values are too small:

How can I work around this? Should I be scaling the data around 0?

CodePudding user response：

To be honest I don't think this will make your final result vary as much as to be consider not useful. Moreover, if you take a look at the warning, which is pretty accurate given the package you are using, it's only talking about 1 engine value over N2 that you are using.

Also, if you take a look at this post:

https://stats.stackexchange.com/questions/219302/singularity-issues-in-gaussian-mixture-model

You will find a good explanation on the second comment of the winning answer. There you understand why the variance/covariance cannot be 0. In you particular case, the value is close to 0 and that is why you are getting a warning.

If you just want to get rid of the warning, one possibility would be scaling and the other normalizing. That is totally up to you. when normalizing I would suggest a wide range, not 0...1 because you can face the same problem again.

Here are some code examples where I loaded your data in a variable called a:

> summary(a)
     COG_T1              COG_T2         
 Min.   :0.0009851   Min.   :0.0009951  
 1st Qu.:0.0010538   1st Qu.:0.0010594  
 Median :0.0010859   Median :0.0010716  
 Mean   :0.0010794   Mean   :0.0010759  
 3rd Qu.:0.0010990   3rd Qu.:0.0010989  
 Max.   :0.0011628   Max.   :0.0011411  
> summary(scale(a))
     COG_T1            COG_T2       
 Min.   :-2.6717   Min.   :-2.6149  
 1st Qu.:-0.7255   1st Qu.:-0.5352  
 Median : 0.1854   Median :-0.1387  
 Mean   : 0.0000   Mean   : 0.0000  
 3rd Qu.: 0.5573   3rd Qu.: 0.7439  
 Max.   : 2.3644   Max.   : 2.1092  
> summary((a-min(a))/(max(a)-min(a)))
     COG_T1           COG_T2       
 Min.   :0.0000   Min.   :0.05659  
 1st Qu.:0.3865   1st Qu.:0.41812  
 Median :0.5673   Median :0.48704  
 Mean   :0.5305   Mean   :0.51115  
 3rd Qu.:0.6412   3rd Qu.:0.64046  
 Max.   :1.0000   Max.   :0.87780  
> summary((a-min(a))/(max(a)-min(a)) 1)
     COG_T1          COG_T2     
 Min.   :1.000   Min.   :1.057  
 1st Qu.:1.386   1st Qu.:1.418  
 Median :1.567   Median :1.487  
 Mean   :1.531   Mean   :1.511  
 3rd Qu.:1.641   3rd Qu.:1.640  
 Max.   :2.000   Max.   :1.878  
>

Edit: As the package got installed fast, here are the results using both normalize and scaling:

> fitULCS <- lavaan(ULCS_DF, data=scale(df), estimator='mlr', fixed.x=FALSE, missing='fiml')
> 
> summary(ULCS_DF, fit.measures=TRUE, standardized=TRUE, rsquare=TRUE)
   Length     Class      Mode 
        1 character character 
> 
> ULCS_DF
[1] "\n\nCOG_T2 ~ 1*COG_T1     # Fixed regression of COG_T2 on COG_T1\ndCOG1 =~ 1*COG_T2     # Fixed regression of dCOG1 on COG_T2\nCOG_T2 ~ 0*1          # This line constrains the intercept of COG_T2 to 0\nCOG_T2 ~~ 0*COG_T2    # This fixes the variance of the COG_T2 to 0 \n\ndCOG1 ~ 1             # This estimates the intercept of the change scores \nCOG_T1 ~  1           # This estimates the intercept of COG_T1 \ndCOG1 ~~  dCOG1       # This estimates the variance of the change scores \nCOG_T1 ~~   COG_T1    # This estimates the variance of COG_T1 \ndCOG1~COG_T1          # This estimates the self-feedback parameter\n\n"
> 
> normalize = function(a,x){
    
    ((a-min(a))/(max(a)-min(a))) x
    
  }
> 
> fitULCS <- lavaan(ULCS_DF, data=normalize(df,1), estimator='mlr', fixed.x=FALSE, missing='fiml')
> 
> summary(ULCS_DF, fit.measures=TRUE, standardized=TRUE, rsquare=TRUE)
   Length     Class      Mode 
        1 character character 
> 
> ULCS_DF
[1] "\n\nCOG_T2 ~ 1*COG_T1     # Fixed regression of COG_T2 on COG_T1\ndCOG1 =~ 1*COG_T2     # Fixed regression of dCOG1 on COG_T2\nCOG_T2 ~ 0*1          # This line constrains the intercept of COG_T2 to 0\nCOG_T2 ~~ 0*COG_T2    # This fixes the variance of the COG_T2 to 0 \n\ndCOG1 ~ 1             # This estimates the intercept of the change scores \nCOG_T1 ~  1           # This estimates the intercept of COG_T1 \ndCOG1 ~~  dCOG1       # This estimates the variance of the change scores \nCOG_T1 ~~   COG_T1    # This estimates the variance of COG_T1 \ndCOG1~COG_T1          # This estimates the self-feedback parameter\n\n"
>

I only got the warning when running it alone, with no pre processing.