Intro

Dataset Creation


set.seed(1)

df1 <- generate_dataset(n = 1000, d = 2, k = 2, m = 2, imbalance_ratio = 9)
#> Class 0 Probability Matrix - Variable 1 
#>  0.4 0.6 
#> Class 0 Probability Matrix - Variable 2 
#>  0.55 0.45 
#> Class 1 Probability Matrix - Variable 1 
#>  0.6 0.4 
#> Class 1 Probability Matrix - Variable 2 
#>  0.45 0.55

head(df1)
#>   Cat_1 Cat_2    Cont_1    Cont_2 Class
#> 1     2     1 3.3878249 0.7327539     0
#> 2     2     2 1.2620063 1.7249162     0
#> 3     2     2 1.1761498 0.4765681     0
#> 4     2     1 2.8701652 3.4538120     0
#> 5     1     1 0.7027487 1.9035630     0
#> 6     2     2 2.7650393 0.4951356     0

df1 <- df1 %>%
      mutate(across(starts_with("Cat_"), as.factor)) %>%
      mutate(Class = as.factor(Class))

summary(df1)
#>  Cat_1   Cat_2       Cont_1            Cont_2        Class  
#>  1:413   1:516   Min.   :-4.6165   Min.   :-4.5754   0:900  
#>  2:587   2:484   1st Qu.: 0.3354   1st Qu.: 0.3036   1:100  
#>                  Median : 1.3729   Median : 1.2975          
#>                  Mean   : 1.2358   Mean   : 1.2017          
#>                  3rd Qu.: 2.2624   3rd Qu.: 2.2296          
#>                  Max.   : 5.4844   Max.   : 6.1666

ggplot(df1, aes(x = Cont_1, y = Cont_2, color = factor(Class))) +
  geom_point(alpha = 0.6) +
  scale_color_manual(values = c("0" = "blue", "1" = "red")) +
  labs(title = "Separated Means",
       x = "X1", y = "X2") +
  theme_minimal() +
  theme(legend.position = "none")


df2 <- generate_dataset_new(n = 1000, d = 2, k = 2, m = 2, imbalance_ratio = 9)

ggplot(df2, aes(x = Cont_1, y = Cont_2, color = factor(Class))) +
  geom_point(alpha = 0.6) +
  scale_color_manual(values = c("0" = "blue", "1" = "red")) +
  labs(title = "Radial Contamination",
       x = "X1", y = "X2") +
  theme_minimal() +
  theme(legend.position = "none")

REF Comparison


d=3; k=3; m=6; n=500;

df <- generate_dataset(n = 500, d = 3, k = 3, m = 6, imbalance_ratio = 9)
#> Class 0 Probability Matrix - Variable 1 
#>  0.4 0.12 0.12 0.12 0.12 0.12 
#> Class 0 Probability Matrix - Variable 2 
#>  0.475 0.105 0.105 0.105 0.105 0.105 
#> Class 0 Probability Matrix - Variable 3 
#>  0.55 0.09 0.09 0.09 0.09 0.09 
#> Class 1 Probability Matrix - Variable 1 
#>  0.12 0.12 0.12 0.12 0.12 0.4 
#> Class 1 Probability Matrix - Variable 2 
#>  0.105 0.105 0.105 0.105 0.105 0.475 
#> Class 1 Probability Matrix - Variable 3 
#>  0.09 0.09 0.09 0.09 0.09 0.55

df <- df %>%
      mutate(across(starts_with("Cat_"), as.factor)) %>%
      mutate(Class = as.factor(Class))

Xcat <- df[,1:k]
Xcont <- df[,(k+1):(d+k)]

numerical_vars <- colnames(df)[grepl("Cont_", colnames(df))]
categorical_vars <- colnames(df)[grepl("Cat_", colnames(df))]
categorical_vars <- c(categorical_vars, "Class")
categorical_vars_no_class <- setdiff(categorical_vars, "Class")

# Binning 1st numerical variable
bins_eq <- generate_binned_cont(Xcont, bins = 20, bin_method = "equalwidth")[, 1]
bins_ww <- generate_binned_cont(Xcont, bins = 20, bin_method = "weighted")[, 1]

# Plot
ggplot(data.frame(bin = bins_eq), aes(x = factor(bin))) +
  geom_bar(fill = "steelblue", alpha = 0.7, color = "white") +
  labs(title = "Equalwidth binning", x = "Bin number (1–20)",
    y = "Count") + theme_minimal()


ggplot(data.frame(bin = bins_ww), aes(x = factor(bin))) +
  geom_bar(fill = "steelblue", alpha = 0.7, color = "white") +
  labs(title = "Weighted binning", x = "Bin number (1–20)",
    y = "Count") + theme_minimal()

Reference element - numerical


# Mean
colMeans(Xcont) 
#>   Cont_1   Cont_2   Cont_3 
#> 1.242398 1.220158 1.216711

# Median
matrixStats::colMedians(as.matrix(Xcont)) 
#>   Cont_1   Cont_2   Cont_3 
#> 1.464475 1.384288 1.362885

# FastMCD
CovMcd(Xcont)@center 
#>   Cont_1   Cont_2   Cont_3 
#> 1.546480 1.543070 1.514196

Reference element - categorical


# Shared Mode
df %>% dplyr::count(dplyr::across(all_of(categorical_vars_no_class)), 
        sort = TRUE) %>% dplyr::slice(1) %>% dplyr::select(-n) 
#>   Cat_1 Cat_2 Cat_3
#> 1     1     1     1

# MCA
Xcat_clean <- Xcat[, sapply(Xcat, function(x) length(unique(x)) > 1), drop = FALSE]
Xcat_clean <- Xcat_clean[, !duplicated(as.list(Xcat_clean)), drop = FALSE]
mca_result <- FactoMineR::MCA(Xcat_clean, graph = FALSE)
mca_coord <- mca_result$ind$coord
centroid <- matrixStats::colMedians(mca_coord)
idx <- which.min(apply(mca_coord, 1, function(row) sum((row - centroid)^2)))
df[idx, categorical_vars_no_class]
#>    Cat_1 Cat_2 Cat_3
#> 58     4     2     1

Reference element using Gower’s distance


reference_cat <- df %>% dplyr::count(dplyr::across(all_of(categorical_vars_no_class)), 
        sort = TRUE) %>% dplyr::slice(1) %>% dplyr::select(-n) 

reference_num <- CovMcd(Xcont)@center 

reference <- tibble(
      !!!setNames(reference_cat[1,], categorical_vars_no_class),
      !!!setNames(reference_num, numerical_vars))

reference
#> # A tibble: 1 × 6
#>   Cat_1 Cat_2 Cat_3 Cont_1 Cont_2 Cont_3
#>   <fct> <fct> <fct>  <dbl>  <dbl>  <dbl>
#> 1 1     1     1       1.56   1.54   1.51
    
for (col in categorical_vars_no_class) {
  reference[[col]] <- factor(reference[[col]], levels = levels(df[[col]]))
  df[[col]] <- factor(df[[col]], levels = levels(df[[col]]))}
    
df_with_ref <- rbind(df[, !names(df) %in% "Class"], reference)  

nrow(df_with_ref)
#> [1] 501

gower_mat <- NULL
gower_mat <- as.matrix(distmix(df_with_ref, method = "gower", idnum = (k+1):(d+k), idcat = 1:k))

gower_mat[(1:5),(1:5)]
#>            [,1]       [,2]      [,3]      [,4]      [,5]
#> [1,] 0.00000000 0.08436171 0.4568067 0.5915004 0.2726134
#> [2,] 0.08436171 0.00000000 0.4027517 0.6023634 0.2055320
#> [3,] 0.45680665 0.40275175 0.0000000 0.4050554 0.5305531
#> [4,] 0.59150038 0.60236340 0.4050554 0.0000000 0.5724279
#> [5,] 0.27261344 0.20553199 0.5305531 0.5724279 0.0000000

dist_to_ref <- as.numeric(gower_mat[nrow(gower_mat), 1:(nrow(gower_mat)-1)])

transf_dist <- as.numeric(Farness(dist_to_ref))

unique(round(dist_to_ref, 3))
#>   [1] 0.224 0.213 0.233 0.389 0.382 0.437 0.522 0.393 0.392 0.039 0.356 0.043
#>  [13] 0.184 0.203 0.381 0.570 0.045 0.191 0.204 0.216 0.237 0.559 0.540 0.369
#>  [25] 0.051 0.202 0.251 0.206 0.207 0.248 0.245 0.370 0.071 0.193 0.205 0.352
#>  [37] 0.229 0.398 0.220 0.074 0.037 0.380 0.058 0.404 0.567 0.364 0.241 0.361
#>  [49] 0.077 0.415 0.542 0.067 0.253 0.189 0.358 0.194 0.528 0.390 0.532 0.228
#>  [61] 0.259 0.236 0.420 0.408 0.199 0.256 0.399 0.384 0.395 0.533 0.234 0.616
#>  [73] 0.371 0.223 0.421 0.593 0.367 0.422 0.553 0.377 0.036 0.198 0.405 0.410
#>  [85] 0.195 0.372 0.379 0.530 0.425 0.186 0.544 0.030 0.232 0.235 0.448 0.378
#>  [97] 0.432 0.536 0.353 0.359 0.078 0.563 0.374 0.383 0.547 0.386 0.252 0.373
#> [109] 0.564 0.250 0.286 0.376 0.394 0.401 0.054 0.230 0.428 0.391 0.385 0.409
#> [121] 0.365 0.212 0.443 0.197 0.066 0.591 0.345 0.178 0.187 0.050 0.217 0.549
#> [133] 0.360 0.219 0.035 0.215 0.025 0.267 0.210 0.357 0.366 0.431 0.041 0.214
#> [145] 0.059 0.242 0.560 0.222 0.586 0.218 0.551 0.192 0.362 0.584 0.200 0.403
#> [157] 0.093 0.363 0.063 0.550 0.231 0.402 0.029 0.531 0.026 0.344 0.368 0.575
#> [169] 0.239 0.243 0.227 0.038 0.585 0.240 0.180 0.085 0.534 0.062 0.024 0.057
#> [181] 0.188 0.350 0.070 0.355 0.434 0.211 0.539 0.416 0.396 0.274 0.049 0.264
#> [193] 0.388 0.554 0.034 0.592 0.244 0.565 0.400 0.342 0.221 0.076 0.181 0.255
#> [205] 0.069 0.174 0.055 0.065 0.419 0.552 0.450 0.407 0.576 0.375 0.173 0.011
#> [217] 0.546 0.075 0.084 0.346 0.027 0.028 0.548 0.427 0.479 0.658 0.719 0.684
#> [229] 0.436 0.538 0.621 0.442 0.606 0.655 0.652 0.635 0.701 0.699 0.651 0.747
#> [241] 0.466 0.706 0.663 0.685 0.667 0.622 0.670 0.626 0.518 0.751 0.512 0.619
#> [253] 0.458 0.681 0.615 0.666 0.704 0.633 0.683 0.647 0.662 0.505 0.452 0.624
#> [265] 0.648 0.677 0.714 0.599

# The contribution from categorical variables jumps in discrete steps, 
# and the numeric contributions are often scaled small (since they’re divided
# by range and then averaged). The jumps arise from the similarity calculation 
# for categorical variables. Each band corresponds to a different number of 
# matching categories, across the three categorical features: the first region 
# (corresponding to lower distances) represents observations with zero matches, 
# the next to one match, the third to two matches, and the rightmost to the few 
# observations that match in all three features.

plot_data <- data.frame(dist_to_ref  = dist_to_ref, transf_dist = transf_dist)

ggplot(plot_data, aes(x = dist_to_ref, y = transf_dist)) +
  geom_point(color = "steelblue", alpha = 0.7, size = 2) +
  labs(title = "Scatter plot",
       x = "Distance to reference", y = "Transformed distance (Farness)") +
  theme_minimal()


threshold <- 0.9
outlier_labels <- rep(0, n)
outlier_labels[which(df$Class == 1)] <- 1
predicted_outliers <- ifelse(transf_dist > threshold, 1, 0)

which(predicted_outliers == 1)
#>  [1] 453 454 463 464 466 468 470 471 473 476 482 485 487 489 496 497 499

TP <- sum(outlier_labels == 1 & predicted_outliers == 1)
TN <- sum(outlier_labels == 0 & predicted_outliers == 0)
FP <- sum(outlier_labels == 0 & predicted_outliers == 1)
FN <- sum(outlier_labels == 1 & predicted_outliers == 0)

cat("TP:", TP, "; TN:", TN, "; FP:", FP, "; FN:", FN,  "\n")
#> TP: 17 ; TN: 450 ; FP: 0 ; FN: 33

# Evaluation Metrics
epsilon <- 1e-8
recall_1 <- TP / (TP + FN + epsilon)
recall_0 <- TN / (TN + FP + epsilon)
precision_1 <- TP / (TP + FP + epsilon)
precision_0 <- TN / (TN + FN + epsilon)

cat("Re(1):", recall_1, "; Re(0):", recall_0, "; Pr(1):", precision_1, "; Pr(0):", precision_0, "\n")
#> Re(1): 0.34 ; Re(0): 1 ; Pr(1): 1 ; Pr(0): 0.931677

f1_score <- (2 * recall_1 * precision_1) / (recall_1 + precision_1 + epsilon)

# F1 Score in REF + Gower
f1_score 
#> [1] 0.5074627

Two-Stage Quantile Distance using Gower’s distance


gower_mat <- NULL
gower_mat <- as.matrix(distmix(df[,1:(d+k)], method = "gower", idnum = (k+1):(d+k), idcat = 1:k))

gower_mat[(1:5),(1:5)]
#>            [,1]       [,2]      [,3]      [,4]      [,5]
#> [1,] 0.00000000 0.08436171 0.4568067 0.5915004 0.2726134
#> [2,] 0.08436171 0.00000000 0.4027517 0.6023634 0.2055320
#> [3,] 0.45680665 0.40275175 0.0000000 0.4050554 0.5305531
#> [4,] 0.59150038 0.60236340 0.4050554 0.0000000 0.5724279
#> [5,] 0.27261344 0.20553199 0.5305531 0.5724279 0.0000000

vec <- apply(gower_mat, 1, quantile, probs = 0.9)

transf_dist <- as.numeric(Farness(vec))

threshold <- 0.9
outlier_labels <- rep(0, n)
outlier_labels[which(df$Class == 1)] <- 1
predicted_outliers <- ifelse(transf_dist > threshold, 1, 0)

which(predicted_outliers == 1)
#>  [1]   6  29  90  94  96 156 168 184 205 225 336 349 395 451 452 453 454 456 457
#> [20] 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 478
#> [39] 479 480 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 499
#> [58] 500

TP <- sum(outlier_labels == 1 & predicted_outliers == 1)
TN <- sum(outlier_labels == 0 & predicted_outliers == 0)
FP <- sum(outlier_labels == 0 & predicted_outliers == 1)
FN <- sum(outlier_labels == 1 & predicted_outliers == 0)

cat("TP:", TP, "; TN:", TN, "; FP:", FP, "; FN:", FN,  "\n")
#> TP: 45 ; TN: 437 ; FP: 13 ; FN: 5

# Evaluation Metrics
epsilon <- 1e-8
recall_1 <- TP / (TP + FN + epsilon)
recall_0 <- TN / (TN + FP + epsilon)
precision_1 <- TP / (TP + FP + epsilon)
precision_0 <- TN / (TN + FN + epsilon)

cat("Re(1):", recall_1, "; Re(0):", recall_0, "; Pr(1):", precision_1, "; Pr(0):", precision_0, "\n")
#> Re(1): 0.9 ; Re(0): 0.9711111 ; Pr(1): 0.7758621 ; Pr(0): 0.9886878

f1_score <- (2 * recall_1 * precision_1) / (recall_1 + precision_1 + epsilon)

# F1 Score in TSQD + Gower
f1_score 
#> [1] 0.8333333

Ahmad Distance Computation


aux_cond <- Ahmad.aux(x.cont = df[, numerical_vars], x.cat = df[, categorical_vars_no_class],
        type = "Norm", bins = 20, only.categ = FALSE, js = FALSE, bin_method = "weighted")

aux_cond$dist[(1:5),(1:5)]
#>          1         2        3        4         5
#> 1 0.000000 1.3239268 1.662908 1.317760 1.4235122
#> 2 1.323927 0.0000000 1.206093 1.669250 0.8747004
#> 3 1.662908 1.2060925 0.000000 1.336262 0.9317920
#> 4 1.317760 1.6692504 1.336262 0.000000 1.4344361
#> 5 1.423512 0.8747004 0.931792 1.434436 0.0000000

aux_rb <- AhmadMah.aux(x.cont = df[, numerical_vars], x.cat = df[, categorical_vars_no_class],
        type = "Norm", bins = 20, only.categ = FALSE, js = FALSE, bin_method = "weighted", method_mah = "rb")

aux_rb$dist[(1:5),(1:5)]
#>          [,1]     [,2]     [,3]     [,4]     [,5]
#> [1,] 0.000000 2.553087 3.345772 2.601286 2.863939
#> [2,] 2.553087 0.000000 2.125060 3.412934 1.310440
#> [3,] 3.345772 2.125060 0.000000 2.567532 1.290024
#> [4,] 2.601286 3.412934 2.567532 0.000000 2.827411
#> [5,] 2.863939 1.310440 1.290024 2.827411 0.000000

Farness Embedding


# Numerical distance matrix - aux_cond$cont

# Categorical distance matrix - aux_cond$cat

upper_cont <- aux_cond$cont[upper.tri(aux_cond$cont)]
new_cont <- Farness_NP(upper_cont)
upper_cat <- aux_cond$cat[upper.tri(aux_cond$cat)]
new_cat <- Farness_NP(upper_cat)
  
XY <- cbind(new_cont, new_cat)
mcd <- CovMcd(XY)
robust_mean <- mcd@center  
robust_mean
#>           X           X 
#> -0.01842408  0.01450700
robust_cov  <- mcd@cov 
robust_cov  
#>           X         X
#> X 0.9041671 0.4318279
#> X 0.4318279 0.9446106

sums <- new_cont + new_cat
sums_std <- (new_cont + new_cat - sum(robust_mean)) / 
  sqrt(robust_cov[1,1] + robust_cov[2,2] + 2 * robust_cov[1,2])
n <- nrow(df)
S <- matrix(0, n, n)
S[upper.tri(S)] <- sums_std
S[lower.tri(S)] <- t(S)[lower.tri(S)]

S[1:5,1:5]
#>             [,1]        [,2]       [,3]        [,4]       [,5]
#> [1,]  0.00000000 -0.00540924  0.7015131 -0.06965326 -0.2374443
#> [2,] -0.00540924  0.00000000  0.4054226  0.77099641 -0.7782189
#> [3,]  0.70151308  0.40542261  0.0000000  0.19950242 -0.2992298
#> [4,] -0.06965326  0.77099641  0.1995024  0.00000000  0.3505706
#> [5,] -0.23744434 -0.77821890 -0.2992298  0.35057063  0.0000000

scores <- apply(S, 1, quantile, probs = 0.9)

final_scores <- Farness(scores)
final_scores
#>   [1] 6.369758e-01 4.119090e-01 1.325534e-01 1.889720e-01 2.956840e-02
#>   [6] 6.381277e-01 7.143737e-01 8.494824e-01 1.651260e-02 3.536760e-01
#>  [11] 2.299649e-01 1.488937e-01 6.218252e-03 3.015043e-01 6.547641e-01
#>  [16] 1.273752e-01 2.271977e-01 4.786162e-01 9.560509e-01 5.742226e-01
#>  [21] 4.828428e-01 2.244053e-01 3.641441e-01 3.270864e-01 3.018717e-01
#>  [26] 2.234267e-01 6.767401e-01 5.297044e-01 6.647153e-01 6.313561e-01
#>  [31] 4.702629e-01 7.055533e-01 7.410210e-01 6.126785e-01 6.432880e-01
#>  [36] 1.629379e-01 4.370591e-02 2.984982e-01 5.361725e-01 8.298171e-01
#>  [41] 7.487553e-01 2.818171e-01 5.006381e-02 1.553411e-02 7.291627e-02
#>  [46] 3.620664e-01 3.958172e-01 2.516979e-01 5.590788e-01 2.511129e-01
#>  [51] 3.448541e-01 2.865932e-01 7.818390e-01 8.259872e-01 1.344183e-01
#>  [56] 4.849833e-01 3.052245e-01 5.137513e-01 2.598031e-01 6.482970e-01
#>  [61] 2.087110e-02 4.663306e-01 2.912114e-02 2.084181e-01 5.132469e-01
#>  [66] 8.074075e-02 2.721405e-02 5.732095e-01 3.592341e-01 8.056576e-01
#>  [71] 5.141579e-01 3.699146e-01 4.125124e-01 3.044475e-01 3.731981e-01
#>  [76] 7.546299e-01 7.405278e-01 6.854579e-02 7.780470e-01 7.589554e-02
#>  [81] 7.756357e-01 6.939149e-02 9.382646e-03 5.164441e-01 7.373153e-01
#>  [86] 1.358435e-01 3.211296e-01 1.161427e-01 6.543899e-03 9.783366e-01
#>  [91] 6.113196e-01 4.123285e-01 5.016567e-01 6.566962e-01 5.438351e-01
#>  [96] 8.565420e-01 5.560384e-01 8.303353e-01 5.395717e-01 7.988648e-02
#> [101] 1.795891e-01 2.944749e-01 8.894465e-01 1.075013e-01 3.922969e-01
#> [106] 7.226973e-02 4.956242e-01 3.104840e-01 1.084932e-01 4.303329e-01
#> [111] 7.123021e-01 6.750143e-02 3.386561e-01 4.070278e-01 9.793970e-01
#> [116] 2.198750e-01 1.276059e-01 5.982495e-01 7.139399e-01 7.063950e-01
#> [121] 2.252354e-01 6.416994e-01 3.967614e-01 5.034115e-01 7.293179e-01
#> [126] 3.635287e-01 3.523776e-02 9.131566e-01 5.389539e-01 7.557470e-01
#> [131] 5.006430e-01 4.748117e-01 2.605015e-01 4.252117e-01 5.104025e-01
#> [136] 1.208033e-01 7.585656e-02 4.469351e-01 8.000777e-02 1.162750e-01
#> [141] 1.356874e-01 3.379156e-01 2.666645e-01 7.803860e-02 8.564139e-01
#> [146] 6.130442e-01 6.619171e-01 4.673016e-01 5.564516e-01 8.974883e-01
#> [151] 3.901505e-01 1.062467e-01 2.924375e-01 5.624779e-01 6.996568e-01
#> [156] 7.766238e-01 8.594705e-01 4.509150e-01 3.191634e-01 6.917868e-01
#> [161] 3.216086e-01 4.229757e-01 6.646739e-02 3.733675e-01 9.304611e-02
#> [166] 9.040421e-02 7.198236e-01 7.754725e-01 8.292059e-01 3.320380e-02
#> [171] 7.935078e-01 5.293065e-01 2.045456e-01 7.191250e-01 4.623472e-02
#> [176] 5.903019e-03 7.818167e-01 9.502713e-01 4.060814e-01 2.903636e-01
#> [181] 7.050240e-01 6.896401e-01 2.002280e-01 4.486354e-01 5.618199e-01
#> [186] 1.468660e-03 8.128046e-01 8.067122e-01 3.510491e-01 3.459193e-02
#> [191] 7.218922e-01 3.767276e-01 3.192651e-01 9.460530e-01 5.010405e-01
#> [196] 6.604427e-01 7.089317e-01 2.106706e-01 5.403360e-01 4.013092e-01
#> [201] 2.989627e-01 1.240055e-01 7.710331e-02 8.622627e-01 7.872082e-01
#> [206] 3.667917e-01 2.798596e-01 6.039133e-01 6.814409e-01 6.397877e-01
#> [211] 3.377575e-01 7.047463e-01 3.202743e-01 2.145371e-01 3.334817e-01
#> [216] 7.186989e-01 1.821458e-01 8.252875e-01 7.777809e-01 3.264673e-01
#> [221] 7.240626e-01 4.744284e-01 6.889586e-01 5.477230e-02 7.511109e-01
#> [226] 3.496625e-02 1.008605e-01 5.471337e-04 5.434090e-01 5.095173e-01
#> [231] 8.005203e-01 6.289372e-01 7.611033e-01 5.667411e-01 4.351540e-02
#> [236] 1.353902e-02 5.850516e-01 6.838438e-01 1.964308e-01 6.168852e-01
#> [241] 6.240477e-01 3.108208e-01 8.127437e-01 8.174497e-02 9.386451e-01
#> [246] 2.355540e-01 2.131777e-01 7.974289e-01 8.903903e-01 2.734527e-01
#> [251] 2.693995e-01 5.287146e-02 1.623817e-01 3.814624e-01 5.541644e-01
#> [256] 2.419472e-01 7.593824e-01 2.364633e-01 6.841991e-01 5.951785e-01
#> [261] 5.122062e-01 7.018313e-01 5.923530e-02 5.692973e-02 5.786954e-01
#> [266] 3.796186e-01 3.999887e-02 3.562192e-01 6.155743e-01 2.364875e-01
#> [271] 6.331659e-01 7.169508e-01 4.803791e-01 4.165553e-01 5.866215e-01
#> [276] 2.407379e-01 6.314995e-01 6.364534e-01 8.183705e-02 7.053418e-01
#> [281] 7.857908e-01 4.526960e-01 5.524198e-01 2.713118e-01 8.092322e-01
#> [286] 4.453622e-02 7.357509e-01 4.586200e-02 4.639242e-01 4.405020e-01
#> [291] 7.122707e-01 3.844322e-01 7.202286e-01 7.377262e-01 1.362973e-01
#> [296] 2.577239e-02 2.039100e-01 6.537367e-01 5.757452e-01 3.864655e-01
#> [301] 6.556312e-01 9.280448e-01 7.231620e-01 7.649510e-02 7.525962e-01
#> [306] 5.542651e-01 1.528054e-02 9.351787e-01 1.891390e-03 7.938440e-01
#> [311] 4.475273e-01 3.859883e-01 1.454262e-01 6.922620e-01 4.428968e-01
#> [316] 6.295268e-02 5.430263e-01 2.456809e-02 1.715679e-05 5.848230e-01
#> [321] 5.287603e-01 3.519802e-01 7.356968e-01 8.542728e-01 5.545230e-01
#> [326] 1.711211e-02 4.144309e-01 3.050136e-02 3.589634e-01 6.440208e-01
#> [331] 4.484842e-01 5.946442e-01 8.481559e-01 6.333348e-01 3.976965e-02
#> [336] 7.437874e-01 2.286629e-03 1.813947e-02 2.314014e-01 7.036220e-01
#> [341] 5.607053e-02 1.967423e-01 3.665559e-01 2.749579e-01 6.128811e-01
#> [346] 8.952244e-01 5.566815e-01 6.365456e-02 3.693868e-01 5.304886e-02
#> [351] 4.553183e-01 1.984471e-01 5.072614e-01 7.612842e-01 5.065274e-01
#> [356] 1.627993e-01 3.544722e-01 2.033665e-01 8.279904e-01 1.549579e-01
#> [361] 6.076003e-01 6.548617e-02 2.525019e-01 4.842540e-01 7.669538e-01
#> [366] 4.290056e-01 1.346805e-01 5.067183e-01 6.519573e-01 2.604802e-01
#> [371] 5.741894e-01 7.358380e-01 9.172774e-01 4.028479e-02 4.957611e-01
#> [376] 8.637460e-01 8.765795e-01 5.813622e-01 2.004861e-02 6.419175e-02
#> [381] 2.396379e-01 2.239955e-01 7.591780e-02 5.663380e-01 2.832897e-01
#> [386] 7.407755e-01 2.048542e-02 5.290604e-01 3.170950e-01 1.270165e-01
#> [391] 6.107486e-01 3.094804e-01 7.894840e-01 7.649650e-01 9.279567e-01
#> [396] 3.930064e-01 5.958103e-02 6.361833e-01 7.316335e-01 7.832605e-02
#> [401] 1.416399e-02 6.842289e-02 8.223080e-02 1.714922e-01 1.836215e-01
#> [406] 3.625497e-01 2.470110e-01 5.783308e-01 5.059663e-01 1.615694e-01
#> [411] 4.741622e-01 6.516043e-01 5.478485e-01 7.944194e-01 9.382833e-01
#> [416] 5.170594e-01 2.986591e-01 2.893781e-01 2.189790e-01 7.568088e-01
#> [421] 7.044278e-01 5.266463e-01 9.581733e-01 3.943012e-01 3.683441e-01
#> [426] 1.510536e-01 5.206964e-02 7.996138e-01 7.641532e-01 1.021919e-03
#> [431] 2.149568e-01 8.499773e-01 4.993570e-01 1.270302e-02 4.094585e-01
#> [436] 4.154965e-02 2.418469e-01 8.442189e-01 5.521189e-01 3.885479e-01
#> [441] 8.981606e-01 3.217021e-01 5.401805e-01 3.075803e-01 2.523967e-01
#> [446] 2.754219e-01 4.419963e-01 6.113514e-01 3.170788e-01 6.823112e-01
#> [451] 9.959681e-01 9.883386e-01 9.999707e-01 9.997448e-01 9.499098e-01
#> [456] 9.997528e-01 9.893326e-01 9.229448e-01 9.338517e-01 9.991961e-01
#> [461] 9.989836e-01 9.986840e-01 9.996869e-01 9.996646e-01 9.995224e-01
#> [466] 9.998095e-01 9.881994e-01 9.998875e-01 9.993525e-01 9.997927e-01
#> [471] 9.994918e-01 9.908886e-01 9.870116e-01 9.832785e-01 9.998029e-01
#> [476] 9.995176e-01 7.727779e-01 9.923872e-01 9.999747e-01 9.826179e-01
#> [481] 8.610434e-01 9.941502e-01 9.966887e-01 9.929836e-01 9.923305e-01
#> [486] 9.995981e-01 9.998049e-01 9.863690e-01 9.997027e-01 9.953120e-01
#> [491] 9.822830e-01 9.991206e-01 9.789516e-01 9.862962e-01 9.933184e-01
#> [496] 9.998376e-01 9.998220e-01 6.589553e-01 9.995412e-01 9.863190e-01

predicted_outliers <- ifelse(final_scores > 0.9, 1, 0)

which(predicted_outliers == 1)
#>  [1]  19  90 115 128 178 194 245 302 308 373 395 415 423 451 452 453 454 455 456
#> [20] 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475
#> [39] 476 478 479 480 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496
#> [58] 497 499 500