install.packages("RSDA", dependencies=TRUE)
::install_github("PROMiDAT/RSDA") devtools
<- read.sym.table(file = 'tsym1.csv', header=TRUE, sep=';',dec='.', row.names=1)
ex3
ex3#> # A tibble: 7 × 7
#> F1 F2 F3 F4 F5 F6 F7
#> <dbl> <symblc_n> <symbl> <dbl> <symblc_> <symblc_n> <symblc_n>
#> 1 2.8 [1.00 : 2.00] <hist> 6 {a,d} [0.00 : 90.00] [9.00 : 24.00]
#> 2 1.4 [3.00 : 9.00] <hist> 8 {b,c,d} [-90.00 : 98.00] [-9.00 : 9.00]
#> 3 3.2 [-1.00 : 4.00] <hist> -7 {a,b} [65.00 : 90.00] [65.00 : 70.00]
#> 4 -2.1 [0.00 : 2.00] <hist> 0 {a,b,c,d} [45.00 : 89.00] [25.00 : 67.00]
#> 5 -3 [-4.00 : -2.00] <hist> -9.5 {b} [20.00 : 40.00] [9.00 : 40.00]
#> 6 0.1 [10.00 : 21.00] <hist> -1 {a,d} [5.00 : 8.00] [5.00 : 8.00]
#> 7 9 [4.00 : 21.00] <hist> 0.5 {a} [3.14 : 6.76] [4.00 : 6.00]
##How to save a Symbolic Table in a CSV file with RSDA?
write.sym.table(ex3, file = 'tsymtemp.csv', sep = ';',dec = '.',
row.names = TRUE, col.names = TRUE)
data(example3)
example3#> # A tibble: 7 × 7
#> F1 F2 F3 F4 F5 F6
#> <dbl> <symblc_n> <symblc_m> <dbl> <symblc_> <symblc_n>
#> 1 2.8 [1.00 : 2.00] M1:0.10 M2:0.70 M3:0.20 6 {e,g,i,k} [0.00 : 90.00]
#> 2 1.4 [3.00 : 9.00] M1:0.60 M2:0.30 M3:0.10 8 {a,b,c,d} [-90.00 : 98.00]
#> 3 3.2 [-1.00 : 4.00] M1:0.20 M2:0.20 M3:0.60 -7 {2,b,1,c} [65.00 : 90.00]
#> 4 -2.1 [0.00 : 2.00] M1:0.90 M2:0.00 M3:0.10 0 {a,3,4,c} [45.00 : 89.00]
#> 5 -3 [-4.00 : -2.00] M1:0.60 M2:0.00 M3:0.40 -9.5 {e,g,i,k} [20.00 : 40.00]
#> 6 0.1 [10.00 : 21.00] M1:0.00 M2:0.70 M3:0.30 -1 {e,1,i} [5.00 : 8.00]
#> 7 9 [4.00 : 21.00] M1:0.20 M2:0.20 M3:0.60 0.5 {e,a,2} [3.14 : 6.76]
#> # … with 1 more variable: F7 <symblc_n>
2,]
example3[#> # A tibble: 1 × 7
#> F1 F2 F3 F4 F5 F6
#> <dbl> <symblc_n> <symblc_m> <dbl> <symblc_s> <symblc_n>
#> 1 1.4 [3.00 : 9.00] M1:0.60 M2:0.30 M3:0.10 8 {a,b,c,d} [-90.00 : 98.00]
#> # … with 1 more variable: F7 <symblc_n>
3]
example3[,#> # A tibble: 7 × 1
#> F3
#> <symblc_m>
#> 1 M1:0.10 M2:0.70 M3:0.20
#> 2 M1:0.60 M2:0.30 M3:0.10
#> 3 M1:0.20 M2:0.20 M3:0.60
#> 4 M1:0.90 M2:0.00 M3:0.10
#> 5 M1:0.60 M2:0.00 M3:0.40
#> 6 M1:0.00 M2:0.70 M3:0.30
#> 7 M1:0.20 M2:0.20 M3:0.60
2:3,5]
example3[#> # A tibble: 2 × 1
#> F5
#> <symblc_s>
#> 1 {a,b,c,d}
#> 2 {2,b,1,c}
$F1
example3#> [1] 2.8 1.4 3.2 -2.1 -3.0 0.1 9.0
data(ex1_db2so)
ex1_db2so#> state sex county group age
#> 1 Florida M 2 6 3
#> 2 California F 4 3 4
#> 3 Texas M 12 3 4
#> 4 Florida F 2 3 4
#> 5 Texas M 4 6 4
#> 6 Texas F 2 3 3
#> 7 Florida M 6 3 4
#> 8 Florida F 2 6 4
#> 9 California M 2 3 6
#> 10 California F 21 3 4
#> 11 California M 2 3 4
#> 12 California M 2 6 7
#> 13 Texas F 23 3 4
#> 14 Florida M 2 3 4
#> 15 Florida F 12 7 4
#> 16 Texas M 2 3 8
#> 17 California F 3 7 9
#> 18 California M 2 3 11
#> 19 California M 1 3 11
The classic.to.sym
function allows to convert a
traditional table into a symbolic one, to this we must indicate the
following parameters.
x
= a data.frameconcept
= variables to be used as a conceptvariables
= variables to be used, conceptible with
tidyselect optionsdefault.numeric
= function that will be used by default
for numerical values (sym.interval)default.categorical
= functions to be used by default
for categorical values (sym.model)<- classic.to.sym(x = ex1_db2so,
result concept = c(state, sex),
variables = c(county, group, age))
result#> # A tibble: 6 × 3
#> county group age
#> <symblc_n> <symblc_n> <symblc_n>
#> 1 [3.00 : 21.00] [3.00 : 7.00] [4.00 : 9.00]
#> 2 [1.00 : 2.00] [3.00 : 6.00] [4.00 : 11.00]
#> 3 [2.00 : 12.00] [3.00 : 7.00] [4.00 : 4.00]
#> 4 [2.00 : 6.00] [3.00 : 6.00] [3.00 : 4.00]
#> 5 [2.00 : 23.00] [3.00 : 3.00] [3.00 : 4.00]
#> 6 [2.00 : 12.00] [3.00 : 6.00] [4.00 : 8.00]
We can add new variables indicating the type we want them to be.
<- classic.to.sym(x = ex1_db2so,
result concept = c("state", "sex"),
variables = c(county, group, age),
age_hist = sym.histogram(age, breaks = pretty(ex1_db2so$age, 5)))
result#> # A tibble: 6 × 4
#> age_hist county group age
#> <symblc_h> <symblc_n> <symblc_n> <symblc_n>
#> 1 <hist> [3.00 : 21.00] [3.00 : 7.00] [4.00 : 9.00]
#> 2 <hist> [1.00 : 2.00] [3.00 : 6.00] [4.00 : 11.00]
#> 3 <hist> [2.00 : 12.00] [3.00 : 7.00] [4.00 : 4.00]
#> 4 <hist> [2.00 : 6.00] [3.00 : 6.00] [3.00 : 4.00]
#> 5 <hist> [2.00 : 23.00] [3.00 : 3.00] [3.00 : 4.00]
#> 6 <hist> [2.00 : 12.00] [3.00 : 6.00] [4.00 : 8.00]
data(USCrime)
head(USCrime)
#> state fold population householdsize racepctblack racePctWhite racePctAsian
#> 1 8 1 0.19 0.33 0.02 0.90 0.12
#> 2 53 1 0.00 0.16 0.12 0.74 0.45
#> 3 24 1 0.00 0.42 0.49 0.56 0.17
#> 4 34 1 0.04 0.77 1.00 0.08 0.12
#> 5 42 1 0.01 0.55 0.02 0.95 0.09
#> 6 6 1 0.02 0.28 0.06 0.54 1.00
#> racePctHisp agePct12t21 agePct12t29 agePct16t24 agePct65up numbUrban pctUrban
#> 1 0.17 0.34 0.47 0.29 0.32 0.20 1.0
#> 2 0.07 0.26 0.59 0.35 0.27 0.02 1.0
#> 3 0.04 0.39 0.47 0.28 0.32 0.00 0.0
#> 4 0.10 0.51 0.50 0.34 0.21 0.06 1.0
#> 5 0.05 0.38 0.38 0.23 0.36 0.02 0.9
#> 6 0.25 0.31 0.48 0.27 0.37 0.04 1.0
#> medIncome pctWWage pctWFarmSelf pctWInvInc pctWSocSec pctWPubAsst pctWRetire
#> 1 0.37 0.72 0.34 0.60 0.29 0.15 0.43
#> 2 0.31 0.72 0.11 0.45 0.25 0.29 0.39
#> 3 0.30 0.58 0.19 0.39 0.38 0.40 0.84
#> 4 0.58 0.89 0.21 0.43 0.36 0.20 0.82
#> 5 0.50 0.72 0.16 0.68 0.44 0.11 0.71
#> 6 0.52 0.68 0.20 0.61 0.28 0.15 0.25
#> medFamInc perCapInc whitePerCap blackPerCap indianPerCap AsianPerCap
#> 1 0.39 0.40 0.39 0.32 0.27 0.27
#> 2 0.29 0.37 0.38 0.33 0.16 0.30
#> 3 0.28 0.27 0.29 0.27 0.07 0.29
#> 4 0.51 0.36 0.40 0.39 0.16 0.25
#> 5 0.46 0.43 0.41 0.28 0.00 0.74
#> 6 0.62 0.72 0.76 0.77 0.28 0.52
#> OtherPerCap HispPerCap NumUnderPov PctPopUnderPov PctLess9thGrade
#> 1 0.36 0.41 0.08 0.19 0.10
#> 2 0.22 0.35 0.01 0.24 0.14
#> 3 0.28 0.39 0.01 0.27 0.27
#> 4 0.36 0.44 0.01 0.10 0.09
#> 5 0.51 0.48 0.00 0.06 0.25
#> 6 0.48 0.60 0.01 0.12 0.13
#> PctNotHSGrad PctBSorMore PctUnemployed PctEmploy PctEmplManu PctEmplProfServ
#> 1 0.18 0.48 0.27 0.68 0.23 0.41
#> 2 0.24 0.30 0.27 0.73 0.57 0.15
#> 3 0.43 0.19 0.36 0.58 0.32 0.29
#> 4 0.25 0.31 0.33 0.71 0.36 0.45
#> 5 0.30 0.33 0.12 0.65 0.67 0.38
#> 6 0.12 0.80 0.10 0.65 0.19 0.77
#> PctOccupManu PctOccupMgmtProf MalePctDivorce MalePctNevMarr FemalePctDiv
#> 1 0.25 0.52 0.68 0.40 0.75
#> 2 0.42 0.36 1.00 0.63 0.91
#> 3 0.49 0.32 0.63 0.41 0.71
#> 4 0.37 0.39 0.34 0.45 0.49
#> 5 0.42 0.46 0.22 0.27 0.20
#> 6 0.06 0.91 0.49 0.57 0.61
#> TotalPctDiv PersPerFam PctFam2Par PctKids2Par PctYoungKids2Par PctTeen2Par
#> 1 0.75 0.35 0.55 0.59 0.61 0.56
#> 2 1.00 0.29 0.43 0.47 0.60 0.39
#> 3 0.70 0.45 0.42 0.44 0.43 0.43
#> 4 0.44 0.75 0.65 0.54 0.83 0.65
#> 5 0.21 0.51 0.91 0.91 0.89 0.85
#> 6 0.58 0.44 0.62 0.69 0.87 0.53
#> PctWorkMomYoungKids PctWorkMom NumIlleg PctIlleg NumImmig PctImmigRecent
#> 1 0.74 0.76 0.04 0.14 0.03 0.24
#> 2 0.46 0.53 0.00 0.24 0.01 0.52
#> 3 0.71 0.67 0.01 0.46 0.00 0.07
#> 4 0.85 0.86 0.03 0.33 0.02 0.11
#> 5 0.40 0.60 0.00 0.06 0.00 0.03
#> 6 0.30 0.43 0.00 0.11 0.04 0.30
#> PctImmigRec5 PctImmigRec8 PctImmigRec10 PctRecentImmig PctRecImmig5
#> 1 0.27 0.37 0.39 0.07 0.07
#> 2 0.62 0.64 0.63 0.25 0.27
#> 3 0.06 0.15 0.19 0.02 0.02
#> 4 0.20 0.30 0.31 0.05 0.08
#> 5 0.07 0.20 0.27 0.01 0.02
#> 6 0.35 0.43 0.47 0.50 0.50
#> PctRecImmig8 PctRecImmig10 PctSpeakEnglOnly PctNotSpeakEnglWell
#> 1 0.08 0.08 0.89 0.06
#> 2 0.25 0.23 0.84 0.10
#> 3 0.04 0.05 0.88 0.04
#> 4 0.11 0.11 0.81 0.08
#> 5 0.04 0.05 0.88 0.05
#> 6 0.56 0.57 0.45 0.28
#> PctLargHouseFam PctLargHouseOccup PersPerOccupHous PersPerOwnOccHous
#> 1 0.14 0.13 0.33 0.39
#> 2 0.16 0.10 0.17 0.29
#> 3 0.20 0.20 0.46 0.52
#> 4 0.56 0.62 0.85 0.77
#> 5 0.16 0.19 0.59 0.60
#> 6 0.25 0.19 0.29 0.53
#> PersPerRentOccHous PctPersOwnOccup PctPersDenseHous PctHousLess3BR MedNumBR
#> 1 0.28 0.55 0.09 0.51 0.5
#> 2 0.17 0.26 0.20 0.82 0.0
#> 3 0.43 0.42 0.15 0.51 0.5
#> 4 1.00 0.94 0.12 0.01 0.5
#> 5 0.37 0.89 0.02 0.19 0.5
#> 6 0.18 0.39 0.26 0.73 0.0
#> HousVacant PctHousOccup PctHousOwnOcc PctVacantBoarded PctVacMore6Mos
#> 1 0.21 0.71 0.52 0.05 0.26
#> 2 0.02 0.79 0.24 0.02 0.25
#> 3 0.01 0.86 0.41 0.29 0.30
#> 4 0.01 0.97 0.96 0.60 0.47
#> 5 0.01 0.89 0.87 0.04 0.55
#> 6 0.02 0.84 0.30 0.16 0.28
#> MedYrHousBuilt PctHousNoPhone PctWOFullPlumb OwnOccLowQuart OwnOccMedVal
#> 1 0.65 0.14 0.06 0.22 0.19
#> 2 0.65 0.16 0.00 0.21 0.20
#> 3 0.52 0.47 0.45 0.18 0.17
#> 4 0.52 0.11 0.11 0.24 0.21
#> 5 0.73 0.05 0.14 0.31 0.31
#> 6 0.25 0.02 0.05 0.94 1.00
#> OwnOccHiQuart RentLowQ RentMedian RentHighQ MedRent MedRentPctHousInc
#> 1 0.18 0.36 0.35 0.38 0.34 0.38
#> 2 0.21 0.42 0.38 0.40 0.37 0.29
#> 3 0.16 0.27 0.29 0.27 0.31 0.48
#> 4 0.19 0.75 0.70 0.77 0.89 0.63
#> 5 0.30 0.40 0.36 0.38 0.38 0.22
#> 6 1.00 0.67 0.63 0.68 0.62 0.47
#> MedOwnCostPctInc MedOwnCostPctIncNoMtg NumInShelters NumStreet PctForeignBorn
#> 1 0.46 0.25 0.04 0 0.12
#> 2 0.32 0.18 0.00 0 0.21
#> 3 0.39 0.28 0.00 0 0.14
#> 4 0.51 0.47 0.00 0 0.19
#> 5 0.51 0.21 0.00 0 0.11
#> 6 0.59 0.11 0.00 0 0.70
#> PctBornSameState PctSameHouse85 PctSameCity85 PctSameState85 LandArea PopDens
#> 1 0.42 0.50 0.51 0.64 0.12 0.26
#> 2 0.50 0.34 0.60 0.52 0.02 0.12
#> 3 0.49 0.54 0.67 0.56 0.01 0.21
#> 4 0.30 0.73 0.64 0.65 0.02 0.39
#> 5 0.72 0.64 0.61 0.53 0.04 0.09
#> 6 0.42 0.49 0.73 0.64 0.01 0.58
#> PctUsePubTrans LemasPctOfficDrugUn ViolentCrimesPerPop
#> 1 0.20 0.32 0.20
#> 2 0.45 0.00 0.67
#> 3 0.02 0.00 0.43
#> 4 0.28 0.00 0.12
#> 5 0.02 0.00 0.03
#> 6 0.10 0.00 0.14
<- classic.to.sym(x = USCrime,
result concept = state,
variables= c(NumInShelters,
NumImmig,
ViolentCrimesPerPop),ViolentCrimesPerPop_hist = sym.histogram(ViolentCrimesPerPop,
breaks = pretty(USCrime$ViolentCrimesPerPop,5)))
result#> # A tibble: 46 × 4
#> ViolentCrimesPerPop_hist NumInShelters NumImmig ViolentCrimesPerPop
#> <symblc_h> <symblc_n> <symblc_n> <symblc_n>
#> 1 <hist> [0.00 : 0.32] [0.00 : 0.04] [0.01 : 1.00]
#> 2 <hist> [0.01 : 0.18] [0.01 : 0.09] [0.05 : 0.36]
#> 3 <hist> [0.00 : 1.00] [0.00 : 0.57] [0.05 : 0.57]
#> 4 <hist> [0.00 : 0.08] [0.00 : 0.02] [0.02 : 1.00]
#> 5 <hist> [0.00 : 1.00] [0.00 : 1.00] [0.01 : 1.00]
#> 6 <hist> [0.00 : 0.68] [0.00 : 0.23] [0.07 : 0.75]
#> 7 <hist> [0.00 : 0.79] [0.00 : 0.14] [0.00 : 0.94]
#> 8 <hist> [0.01 : 0.01] [0.01 : 0.01] [0.37 : 0.37]
#> 9 <hist> [1.00 : 1.00] [0.39 : 0.39] [1.00 : 1.00]
#> 10 <hist> [0.00 : 0.52] [0.00 : 1.00] [0.06 : 1.00]
#> # … with 36 more rows
data("ex_mcfa1")
head(ex_mcfa1)
#> suspect age hair eyes region
#> 1 1 42 h_red e_brown Bronx
#> 2 2 20 h_black e_green Bronx
#> 3 3 64 h_brown e_brown Brooklyn
#> 4 4 55 h_blonde e_brown Bronx
#> 5 5 4 h_brown e_green Manhattan
#> 6 6 61 h_blonde e_green Bronx
<- classic.to.sym(x = ex_mcfa1,
sym.table concept = suspect,
variables=c(hair,
eyes,
region),default.categorical = sym.set)
sym.table#> # A tibble: 100 × 3
#> hair eyes region
#> <symblc_s> <symblc_s> <symblc_s>
#> 1 {h_red} {e_brown,e_black} {Bronx}
#> 2 {h_black,h_blonde} {e_green,e_black} {Bronx,Manhattan}
#> 3 {h_brown,h_white} {e_brown,e_green} {Brooklyn,Queens}
#> 4 {h_blonde} {e_brown,e_black} {Bronx,Manhattan}
#> 5 {h_brown,h_red} {e_green} {Manhattan,Bronx}
#> 6 {h_blonde,h_white} {e_green,e_blue} {Bronx,Queens}
#> 7 {h_white,h_red} {e_black,e_blue} {Queens,Bronx}
#> 8 {h_blonde,h_white} {e_brown,e_black} {Manhattan,Brooklyn}
#> 9 {h_blonde,h_white} {e_black,e_brown} {Brooklyn,Bronx}
#> 10 {h_brown,h_black} {e_brown,e_green} {Manhattan,Bronx}
#> # … with 90 more rows
We can modify the function that will be applied by default to the categorical variables
<- classic.to.sym(x = ex_mcfa1,
sym.table concept = suspect,
default.categorical = sym.set)
sym.table#> # A tibble: 100 × 4
#> age hair eyes region
#> <symblc_n> <symblc_s> <symblc_s> <symblc_s>
#> 1 [22.00 : 42.00] {h_red} {e_brown,e_black} {Bronx}
#> 2 [20.00 : 57.00] {h_black,h_blonde} {e_green,e_black} {Bronx,Manhattan}
#> 3 [29.00 : 64.00] {h_brown,h_white} {e_brown,e_green} {Brooklyn,Queens}
#> 4 [14.00 : 55.00] {h_blonde} {e_brown,e_black} {Bronx,Manhattan}
#> 5 [4.00 : 47.00] {h_brown,h_red} {e_green} {Manhattan,Bronx}
#> 6 [32.00 : 61.00] {h_blonde,h_white} {e_green,e_blue} {Bronx,Queens}
#> 7 [49.00 : 61.00] {h_white,h_red} {e_black,e_blue} {Queens,Bronx}
#> 8 [8.00 : 32.00] {h_blonde,h_white} {e_brown,e_black} {Manhattan,Brooklyn}
#> 9 [39.00 : 67.00] {h_blonde,h_white} {e_black,e_brown} {Brooklyn,Bronx}
#> 10 [50.00 : 68.00] {h_brown,h_black} {e_brown,e_green} {Manhattan,Bronx}
#> # … with 90 more rows
<- SDS.to.RSDA(file.path = "hani3101.sds")
hani3101 #> Preprocessing file
#> Converting data to JSON format
#> Processing variable 1: R3101
#> Processing variable 2: RNINO12
#> Processing variable 3: RNINO3
#> Processing variable 4: RNINO4
#> Processing variable 5: RNINO34
#> Processing variable 6: RSOI
hani3101#> # A tibble: 32 × 6
#> R3101 RNINO12
#> <symblc_m> <symblc_m>
#> 1 X2:0.21 X4:0.18 X3:0.15 X5:... X1:0.17 X2:0.83 X3:0.00
#> 2 X2:0.30 X4:0.14 X3:0.19 X5:... X1:0.00 X2:0.25 X3:0.75
#> 3 X2:0.16 X4:0.12 X3:0.20 X5:... X1:0.67 X2:0.33 X3:0.00
#> 4 X2:0.13 X4:0.15 X3:0.22 X5:... X1:0.17 X2:0.83 X3:0.00
#> 5 X2:0.14 X4:0.14 X3:0.18 X5:... X1:0.42 X2:0.58 X3:0.00
#> 6 X2:0.26 X4:0.06 X3:0.23 X5:... X1:0.00 X2:0.67 X3:0.33
#> 7 X2:0.28 X4:0.14 X3:0.10 X5:... X1:0.00 X2:1.00 X3:0.00
#> 8 X2:0.25 X4:0.15 X3:0.19 X5:... X1:0.00 X2:1.00 X3:0.00
#> 9 X2:0.20 X4:0.15 X3:0.19 X5:... X1:0.00 X2:1.00 X3:0.00
#> 10 X2:0.21 X4:0.16 X3:0.31 X5:... X1:0.08 X2:0.92 X3:0.00
#> # … with 22 more rows, and 4 more variables: RNINO3 <symblc_m>,
#> # RNINO4 <symblc_m>, RNINO34 <symblc_m>, RSOI <symblc_m>
# We can save the file in CSV to RSDA format as follows:
write.sym.table(hani3101,
file='hani3101.csv',
sep=';',
dec='.',
row.names=TRUE,
col.names=TRUE)
<- SODAS.to.RSDA("abalone.xml")
abalone #> Processing variable 1: LENGTH
#> Processing variable 2: DIAMETER
#> Processing variable 3: HEIGHT
#> Processing variable 4: WHOLE_WEIGHT
#> Processing variable 5: SHUCKED_WEIGHT
#> Processing variable 6: VISCERA_WEIGHT
#> Processing variable 7: SHELL_WEIGHT
abalone#> # A tibble: 24 × 7
#> LENGTH DIAMETER HEIGHT WHOLE_WEIGHT SHUCKED_WEIGHT
#> <symblc_n> <symblc_n> <symblc_n> <symblc_n> <symblc_n>
#> 1 [0.28 : 0.66] [0.20 : 0.48] [0.07 : 0.18] [0.08 : 1.37] [0.03 : 0.64]
#> 2 [0.30 : 0.74] [0.22 : 0.58] [0.02 : 1.13] [0.15 : 2.25] [0.06 : 1.16]
#> 3 [0.34 : 0.78] [0.26 : 0.63] [0.06 : 0.23] [0.20 : 2.66] [0.07 : 1.49]
#> 4 [0.39 : 0.82] [0.30 : 0.65] [0.10 : 0.25] [0.26 : 2.51] [0.11 : 1.23]
#> 5 [0.40 : 0.74] [0.32 : 0.60] [0.10 : 0.24] [0.35 : 2.20] [0.12 : 0.84]
#> 6 [0.45 : 0.80] [0.38 : 0.63] [0.14 : 0.22] [0.64 : 2.53] [0.16 : 0.93]
#> 7 [0.49 : 0.72] [0.36 : 0.58] [0.12 : 0.21] [0.68 : 2.12] [0.16 : 0.82]
#> 8 [0.55 : 0.70] [0.46 : 0.58] [0.18 : 0.22] [1.21 : 1.81] [0.32 : 0.71]
#> 9 [0.08 : 0.24] [0.06 : 0.18] [0.01 : 0.06] [0.00 : 0.07] [0.00 : 0.03]
#> 10 [0.13 : 0.58] [0.10 : 0.45] [0.00 : 0.15] [0.01 : 0.89] [0.00 : 0.50]
#> # … with 14 more rows, and 2 more variables: VISCERA_WEIGHT <symblc_n>,
#> # SHELL_WEIGHT <symblc_n>
write.sym.table(abalone,
file='abalone.csv',
sep=';',
dec='.',
row.names = TRUE,
col.names = TRUE)
data(example3)
mean(example3$F1)
#> [1] 1.628571
mean(example3[,1])
#> [1] 1.628571
mean(example3$F2)
#> [1] 5
mean(example3[,2])
#> [1] 5
mean(example3$F2,method = "interval")
#> <symbolic_interval[1]>
#> [1] [1.86 : 8.14]
mean(example3[,2],method = "interval")
#> <symbolic_interval[1]>
#> [1] [1.86 : 8.14]
median(example3$F1)
#> [1] 1.4
median(example3[,1])
#> [1] 1.4
median(example3$F2)
#> [1] 1.5
median(example3[,2])
#> [1] 1.5
median(example3$F6, method = 'interval')
#> <symbolic_interval[1]>
#> [1] [5.00 : 89.00]
median(example3[,6], method = 'interval')
#> <symbolic_interval[1]>
#> [1] [5.00 : 89.00]
var(example3[,1])
#> [1] 15.98238
var(example3[,2])
#> [1] 90.66667
var(example3$F6)
#> [1] 1872.358
var(example3$F6, method = 'interval')
#> <symbolic_interval[1]>
#> [1] [2,408.97 : 1,670.51]
var(example3$F6, method = 'billard')
#> [1] 1355.143
sd(example3$F1)
#> [1] 3.997797
sd(example3$F2)
#> [1] 6.733003
sd(example3$F6)
#> [1] 30.59704
sd(example3$F6, method = 'interval')
#> <symbolic_interval[1]>
#> [1] [49.08 : 40.87]
sd(example3$F6, method = 'billard')
#> [1] 36.81226
cor(example3$F1, example3$F4)
#> [1] 0.2864553
cor(example3[,1], example3[,4])
#> [,1]
#> [1,] 0.2864553
cor(example3$F2, example3$F6, method = 'centers')
#> [1] -0.6693648
cor(example3$F2, example3$F6, method = 'billard')
#> [1] -0.6020041
library(ggpolypath)
#> Loading required package: ggplot2
data(oils)
<- RSDA:::to.v3(RSDA:::to.v2(oils))
oils sym.radar.plot(oils[2:3,])
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
#> caller; using TRUE
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
#> caller; using TRUE
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
#> caller; using TRUE
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
#> caller; using TRUE
sym.radar.plot(oils[2:5,])
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
#> caller; using TRUE
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
#> caller; using TRUE
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
#> caller; using TRUE
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
#> caller; using TRUE
<- interval.histogram.plot(oils[,2],
res n.bins = 4,
col = c(2,3,4,5))
res#> $frequency
#> [1] 25 49 1 25
#>
#> $histogram
#> [,1]
#> [1,] 0.7
#> [2,] 1.9
#> [3,] 3.1
#> [4,] 4.3
<- interval.histogram.plot(oils[,3],
res n.bins = 3,
main = "Histogram",
col = c(2, 3, 4))
res#> $frequency
#> [1] 50 25 25
#>
#> $histogram
#> [,1]
#> [1,] 0.7
#> [2,] 1.9
#> [3,] 3.1
data("oils")
<- sym.dist.interval(sym.data = oils[,1:4],
DM method = "Gowda.Diday")
<- hclust(DM)
model plot(model, hang = -1)
<- sym.dist.interval(sym.data= oils[,1:4],
DM method = "Ichino")
<- hclust(DM)
model plot(model, hang = -1)
<- sym.dist.interval(sym.data = oils[,c(1,2,4)],
DM gamma = 0.5,
method = "Hausdorff",
normalize = FALSE,
SpanNormalize = TRUE,
euclidea = TRUE,
q = 2)
<- hclust(DM)
model plot(model, hang = -1)
data(int_prost_train)
data(int_prost_test)
<- sym.lm(formula = lpsa~., sym.data = int_prost_train, method = 'cm')
res.cm
res.cm#>
#> Call:
#> stats::lm(formula = formula, data = centers)
#>
#> Coefficients:
#> (Intercept) lcavol lweight age lbph svi
#> 0.411537 0.579327 0.614128 -0.018659 0.143918 0.730937
#> lcp gleason pgg45
#> -0.205536 -0.030924 0.009507
<- sym.predict(model = res.cm, new.sym.data = int_prost_test) pred.cm
RMSE.L(int_prost_test$lpsa, pred.cm$Fitted)
#> [1] 0.7229999
RMSE.U(int_prost_test$lpsa, pred.cm$Fitted)
#> [1] 0.7192467
R2.L(int_prost_test$lpsa, pred.cm$Fitted)
#> [1] 0.501419
R2.U(int_prost_test$lpsa, pred.cm$Fitted)
#> [1] 0.5058389
deter.coefficient(int_prost_test$lpsa, pred.cm$Fitted)
#> [1] 0.4962964
data(int_prost_train)
data(int_prost_test)
<- sym.glm(sym.data = int_prost_train,
res.cm.lasso response = 9,
method = 'cm',
alpha = 1,
nfolds = 10,
grouped = TRUE)
<- sym.predict(res.cm.lasso,
pred.cm.lasso response = 9,
int_prost_test,method = 'cm')
plot(res.cm.lasso)
plot(res.cm.lasso$glmnet.fit, "lambda", label=TRUE)
RMSE.L(int_prost_test$lpsa,pred.cm.lasso)
#> [1] 0.6945169
RMSE.U(int_prost_test$lpsa,pred.cm.lasso)
#> [1] 0.6914335
R2.L(int_prost_test$lpsa,pred.cm.lasso)
#> [1] 0.5409863
R2.U(int_prost_test$lpsa,pred.cm.lasso)
#> [1] 0.544571
deter.coefficient(int_prost_test$lpsa, pred.cm.lasso)
#> [1] 0.4857015
data(int_prost_train)
data(int_prost_test)
<- sym.glm(sym.data = int_prost_train,
res.cm.ridge response = 9,
method = 'cm',
alpha = 0,
nfolds = 10,
grouped = TRUE)
<- sym.predict(res.cm.ridge,
pred.cm.ridge response = 9,
int_prost_test,method = 'cm')
plot(res.cm.ridge)
plot(res.cm.ridge$glmnet.fit, "lambda", label=TRUE)
RMSE.L(int_prost_test$lpsa, pred.cm.ridge)
#> [1] 0.703543
RMSE.U(int_prost_test$lpsa, pred.cm.ridge)
#> [1] 0.7004145
R2.L(int_prost_test$lpsa, pred.cm.ridge)
#> [1] 0.5286114
R2.U(int_prost_test$lpsa, pred.cm.ridge)
#> [1] 0.5322683
deter.coefficient(int_prost_test$lpsa, pred.cm.ridge)
#> [1] 0.4808652
data("oils")
<- sym.pca(oils,'centers')
res plot(res, choix = "ind")
plot(res, choix = "var")
<- sym.pca(oils,'tops')
res plot(res, choix = "ind")
<- sym.pca(oils, 'principal.curves')
res plot(res, choix = "ind")
<- sym.pca(oils,'optimized.distance')
res plot(res, choix = "ind")
plot(res, choix = "var")
<- sym.pca(oils,'optimized.variance')
res plot(res, choix = "ind")
plot(res, choix = "var")
data("ex_mcfa1")
ex_mcfa1#> suspect age hair eyes region
#> 1 1 42 h_red e_brown Bronx
#> 2 2 20 h_black e_green Bronx
#> 3 3 64 h_brown e_brown Brooklyn
#> 4 4 55 h_blonde e_brown Bronx
#> 5 5 4 h_brown e_green Manhattan
#> 6 6 61 h_blonde e_green Bronx
#> 7 7 61 h_white e_black Queens
#> 8 8 32 h_blonde e_brown Manhattan
#> 9 9 39 h_blonde e_black Brooklyn
#> 10 10 50 h_brown e_brown Manhattan
#> 11 11 41 h_red e_blue Manhattan
#> 12 12 35 h_blonde e_green Brooklyn
#> 13 13 56 h_blonde e_brown Bronx
#> 14 14 52 h_red e_brown Queens
#> 15 15 55 h_red e_green Brooklyn
#> 16 16 25 h_brown e_brown Queens
#> 17 17 52 h_blonde e_brown Brooklyn
#> 18 18 28 h_red e_brown Manhattan
#> 19 19 21 h_white e_blue Manhattan
#> 20 20 66 h_black e_black Brooklyn
#> 21 21 67 h_blonde e_brown Queens
#> 22 22 13 h_white e_blue Brooklyn
#> 23 23 39 h_brown e_green Manhattan
#> 24 24 47 h_black e_green Brooklyn
#> 25 25 54 h_blonde e_brown Bronx
#> 26 26 75 h_brown e_blue Brooklyn
#> 27 27 3 h_white e_green Manhattan
#> 28 28 40 h_white e_green Manhattan
#> 29 29 58 h_red e_blue Queens
#> 30 30 41 h_brown e_green Bronx
#> 31 31 25 h_white e_black Brooklyn
#> 32 32 75 h_blonde e_blue Manhattan
#> 33 33 58 h_white e_brown Bronx
#> 34 34 61 h_white e_brown Manhattan
#> 35 35 52 h_white e_blue Bronx
#> 36 36 19 h_red e_black Queens
#> 37 37 58 h_red e_black Bronx
#> 38 38 46 h_black e_green Manhattan
#> 39 39 74 h_brown e_black Manhattan
#> 40 40 26 h_blonde e_brown Brooklyn
#> 41 41 63 h_blonde e_blue Queens
#> 42 42 40 h_brown e_black Queens
#> 43 43 65 h_black e_brown Brooklyn
#> 44 44 51 h_blonde e_brown Brooklyn
#> 45 45 15 h_white e_black Brooklyn
#> 46 46 32 h_blonde e_brown Bronx
#> 47 47 68 h_white e_black Manhattan
#> 48 48 51 h_white e_black Queens
#> 49 49 14 h_red e_green Queens
#> 50 50 72 h_white e_brown Brooklyn
#> 51 51 7 h_red e_blue Brooklyn
#> 52 52 22 h_red e_brown Bronx
#> 53 53 52 h_red e_brown Brooklyn
#> 54 54 62 h_brown e_green Bronx
#> 55 55 41 h_black e_brown Queens
#> 56 56 32 h_black e_black Manhattan
#> 57 57 58 h_brown e_brown Queens
#> 58 58 25 h_black e_brown Queens
#> 59 59 70 h_blonde e_green Brooklyn
#> 60 60 64 h_brown e_blue Queens
#> 61 61 25 h_white e_blue Bronx
#> 62 62 42 h_black e_black Brooklyn
#> 63 63 56 h_red e_black Brooklyn
#> 64 64 41 h_blonde e_black Brooklyn
#> 65 65 8 h_white e_black Manhattan
#> 66 66 7 h_black e_green Brooklyn
#> 67 67 42 h_white e_brown Queens
#> 68 68 10 h_white e_blue Manhattan
#> 69 69 60 h_brown e_black Bronx
#> 70 70 52 h_blonde e_brown Brooklyn
#> 71 71 39 h_brown e_blue Manhattan
#> 72 72 69 h_brown e_green Queens
#> 73 73 67 h_blonde e_green Manhattan
#> 74 74 46 h_red e_black Brooklyn
#> 75 75 72 h_black e_black Queens
#> 76 76 66 h_red e_blue Queens
#> 77 77 4 h_black e_blue Manhattan
#> 78 78 62 h_black e_green Brooklyn
#> 79 79 10 h_blonde e_blue Bronx
#> 80 80 16 h_blonde e_black Manhattan
#> 81 81 59 h_blonde e_brown Bronx
#> 82 82 63 h_blonde e_blue Manhattan
#> 83 83 54 h_red e_blue Queens
#> 84 84 14 h_brown e_blue Brooklyn
#> 85 85 48 h_black e_green Manhattan
#> 86 86 59 h_blonde e_black Bronx
#> 87 87 73 h_blonde e_black Bronx
#> 88 88 51 h_brown e_brown Bronx
#> 89 89 14 h_white e_black Bronx
#> 90 90 58 h_blonde e_black Queens
#> 91 91 56 h_red e_green Manhattan
#> 92 92 26 h_red e_blue Brooklyn
#> 93 93 59 h_brown e_black Manhattan
#> 94 94 27 h_white e_green Manhattan
#> 95 95 38 h_black e_green Manhattan
#> 96 96 5 h_blonde e_green Bronx
#> 97 97 14 h_black e_blue Queens
#> 98 98 13 h_black e_brown Manhattan
#> 99 99 54 h_white e_blue Brooklyn
#> 100 100 66 h_white e_green Manhattan
#> 101 1 22 h_red e_black Bronx
#> 102 2 57 h_blonde e_black Manhattan
#> 103 3 29 h_white e_green Queens
#> 104 4 14 h_blonde e_black Manhattan
#> 105 5 47 h_red e_green Bronx
#> 106 6 32 h_white e_blue Queens
#> 107 7 49 h_red e_blue Bronx
#> 108 8 8 h_white e_black Brooklyn
#> 109 9 67 h_white e_brown Bronx
#> 110 10 68 h_black e_green Bronx
#> 111 11 15 h_black e_brown Manhattan
#> 112 12 46 h_white e_brown Bronx
#> 113 13 68 h_white e_black Manhattan
#> 114 14 55 h_blonde e_blue Manhattan
#> 115 15 7 h_white e_green Bronx
#> 116 16 10 h_black e_brown Brooklyn
#> 117 17 49 h_red e_blue Manhattan
#> 118 18 12 h_brown e_blue Brooklyn
#> 119 19 41 h_white e_blue Bronx
#> 120 20 10 h_brown e_blue Bronx
#> 121 21 12 h_white e_green Manhattan
#> 122 22 53 h_white e_blue Manhattan
#> 123 23 5 h_black e_black Manhattan
#> 124 24 46 h_brown e_black Queens
#> 125 25 14 h_brown e_black Queens
#> 126 26 55 h_white e_green Brooklyn
#> 127 27 53 h_red e_brown Manhattan
#> 128 28 31 h_black e_brown Manhattan
#> 129 29 31 h_blonde e_brown Queens
#> 130 30 55 h_brown e_black Brooklyn
<- classic.to.sym(x = ex_mcfa1,
sym.table concept = suspect,
default.categorical = sym.set)
sym.table#> # A tibble: 100 × 4
#> age hair eyes region
#> <symblc_n> <symblc_s> <symblc_s> <symblc_s>
#> 1 [22.00 : 42.00] {h_red} {e_brown,e_black} {Bronx}
#> 2 [20.00 : 57.00] {h_black,h_blonde} {e_green,e_black} {Bronx,Manhattan}
#> 3 [29.00 : 64.00] {h_brown,h_white} {e_brown,e_green} {Brooklyn,Queens}
#> 4 [14.00 : 55.00] {h_blonde} {e_brown,e_black} {Bronx,Manhattan}
#> 5 [4.00 : 47.00] {h_brown,h_red} {e_green} {Manhattan,Bronx}
#> 6 [32.00 : 61.00] {h_blonde,h_white} {e_green,e_blue} {Bronx,Queens}
#> 7 [49.00 : 61.00] {h_white,h_red} {e_black,e_blue} {Queens,Bronx}
#> 8 [8.00 : 32.00] {h_blonde,h_white} {e_brown,e_black} {Manhattan,Brooklyn}
#> 9 [39.00 : 67.00] {h_blonde,h_white} {e_black,e_brown} {Brooklyn,Bronx}
#> 10 [50.00 : 68.00] {h_brown,h_black} {e_brown,e_green} {Manhattan,Bronx}
#> # … with 90 more rows
<- sym.mcfa(sym.table, c(2,3))
res mcfa.scatterplot(res[,2], res[,3], sym.data = sym.table, pos.var = c(2,3))
<- sym.mcfa(sym.table, c(2,3,4))
res mcfa.scatterplot(res[,2], res[,3], sym.data = sym.table, pos.var = c(2,3,4))
<- oils
data
data#> # A tibble: 8 × 4
#> GRA FRE IOD SAP
#> * <symblc_n> <symblc_n> <symblc_n> <symblc_n>
#> 1 [0.93 : 0.94] [-27.00 : -18.00] [170.00 : 204.00] [118.00 : 196.00]
#> 2 [0.93 : 0.94] [-5.00 : -4.00] [192.00 : 208.00] [188.00 : 197.00]
#> 3 [0.92 : 0.92] [-6.00 : -1.00] [99.00 : 113.00] [189.00 : 198.00]
#> 4 [0.92 : 0.93] [-6.00 : -4.00] [104.00 : 116.00] [187.00 : 193.00]
#> 5 [0.92 : 0.92] [-25.00 : -15.00] [80.00 : 82.00] [189.00 : 193.00]
#> 6 [0.91 : 0.92] [0.00 : 6.00] [79.00 : 90.00] [187.00 : 196.00]
#> 7 [0.86 : 0.87] [30.00 : 38.00] [40.00 : 48.00] [190.00 : 199.00]
#> 8 [0.86 : 0.86] [22.00 : 32.00] [53.00 : 77.00] [190.00 : 202.00]
<- sym.umap(data)
coords
coords#> V1 V2
#> 1 -2.115430 -5.932068954
#> 2 -1.954026 -6.093473703
#> 3 -1.802698 -6.244936180
#> 4 -1.911227 -6.137141003
#> 5 -2.055794 -5.991380366
#> 6 -2.155173 -5.891868069
#> 7 -2.018848 -6.028642365
#> 8 -1.840133 -6.207315883
#> 9 -7.043801 -11.914521092
#> 10 -6.810658 -11.929515381
#> 11 -7.057139 -11.829152600
#> 12 -7.104821 -11.803920375
#> 13 -6.899722 -12.114140843
#> 14 -6.806376 -12.250424660
#> 15 -7.026197 -12.151660743
#> 16 -7.124126 -12.073180632
#> 17 -8.408478 -12.090026527
#> 18 -8.313943 -12.592761074
#> 19 -8.204598 -12.468866768
#> 20 -8.056253 -12.573297470
#> 21 -8.328437 -12.229818158
#> 22 -8.249429 -12.294366841
#> 23 -8.490012 -12.168744756
#> 24 -8.245589 -12.453569166
#> 25 -7.839864 -12.249897454
#> 26 -7.931728 -12.027331945
#> 27 -7.775770 -12.510457438
#> 28 -7.900560 -12.090919420
#> 29 -7.636157 -12.392316066
#> 30 -7.543132 -12.331554460
#> 31 -7.500538 -12.343282751
#> 32 -7.752418 -12.220507897
#> 33 -5.235231 1.767590170
#> 34 -4.972326 1.622102703
#> 35 -5.689030 1.882306605
#> 36 -5.836856 2.005416012
#> 37 -5.006563 1.334982998
#> 38 -4.980298 1.090985551
#> 39 -5.336948 1.290530224
#> 40 -5.356591 1.346105714
#> 41 -6.103772 0.189964672
#> 42 -6.099633 0.149992699
#> 43 -6.685271 -0.006012316
#> 44 -6.625425 0.101780131
#> 45 -5.864915 -0.076967025
#> 46 -5.820806 0.035541375
#> 47 -6.060941 -0.155246194
#> 48 -6.128711 -0.204963764
#> 49 -4.664122 1.812260227
#> 50 -4.360073 1.685271920
#> 51 -4.848387 1.716976975
#> 52 -4.392929 1.655983220
#> 53 -4.617946 1.422627612
#> 54 -4.518084 1.313231479
#> 55 -4.847020 1.522515869
#> 56 -4.310571 1.403768546
#> 57 -5.526307 0.625535169
#> 58 -4.881434 0.549848950
#> 59 -5.553716 0.711274734
#> 60 -4.804018 0.445893500
#> 61 -5.167961 0.542516307
#> 62 -4.936037 0.640029722
#> 63 -5.281246 0.461077016
#> 64 -4.889400 0.528984955
#> 65 -2.103998 21.635644516
#> 66 -2.156375 21.342605553
#> 67 -2.990322 19.774950870
#> 68 -2.998939 19.783198202
#> 69 -2.123680 21.395502588
#> 70 -2.038553 21.524883540
#> 71 -3.007471 19.791290630
#> 72 -3.030844 19.810974883
#> 73 -1.998866 21.605077506
#> 74 -2.287929 21.329298323
#> 75 -2.705131 19.487057256
#> 76 -2.745367 19.524048302
#> 77 -2.156892 21.462006209
#> 78 -1.875270 21.682769745
#> 79 -2.995182 19.777431035
#> 80 -2.772396 19.547678008
#> 81 -6.504910 2.156184623
#> 82 -6.336243 2.356432263
#> 83 -6.819049 2.022376084
#> 84 -6.802843 2.065996465
#> 85 -6.299863 2.181479097
#> 86 -6.120538 2.188138154
#> 87 -6.849061 2.004239979
#> 88 -6.816935 2.101729542
#> 89 -6.993410 0.540555205
#> 90 -7.099367 0.423528160
#> 91 -7.204375 0.687581959
#> 92 -7.288208 0.819067773
#> 93 -7.036797 0.375369604
#> 94 -6.991133 0.378727570
#> 95 -7.069539 0.776693065
#> 96 -7.276018 0.769030333
#> 97 16.031371 -1.852578461
#> 98 15.712515 -1.622235040
#> 99 15.520256 -1.512869213
#> 100 15.530231 -1.637371831
#> 101 15.895129 -1.814920926
#> 102 15.969025 -1.838762759
#> 103 15.471363 -1.989783215
#> 104 15.622516 -1.731147437
#> 105 16.315830 -0.603047599
#> 106 16.071306 -0.623885520
#> 107 15.743349 -0.825479754
#> 108 15.629294 -0.698731776
#> 109 16.299508 -0.475051499
#> 110 16.040455 -0.733048306
#> 111 15.963306 -0.706051623
#>