library(Rfuzzycoco)
options(width = 200)
We will use the mtcars
dataset, three input variables
and one output variable. The params()
function has some
defaults. Parameters with no value (NA) may also be automatically set by
the fuzzycoco C++ library.
x <- mtcars[c("mpg", "hp", "wt")]
y <- mtcars["qsec"]
pms <- params(
nb_rules = 2, nb_max_var_per_rule = 3, # structural parameters
rules.pop_size = 100, mfs.pop_size = 100, # coevolution population sizes
ivars.nb_sets = 3, , ivars.nb_bits_pos = 8, # input vars: 3 fuzzy sets, and 8 bits to discretize the values
ovars.nb_sets = 3, ovars.nb_bits_pos = 8, # output vars: 3 fuzzy sets, and 8 bits to discretize the values
metricsw.sensitivity = 0, metricsw.specificity = 0, metricsw.rmse = 1, # we just use RMSE (root mean square error)
output_vars_defuzz_thresholds = 17 # threshold for the qsec output variable
)
# the full list of generated parameters
str(pms)
#> List of 6
#> $ global_params :List of 7
#> ..$ nb_rules : num 2
#> ..$ nb_max_var_per_rule : num 3
#> ..$ max_generations : num 100
#> ..$ max_fitness : num 1
#> ..$ nb_cooperators : num 2
#> ..$ influence_rules_initial_population: logi FALSE
#> ..$ influence_evolving_ratio : num 0.8
#> $ input_vars_params :List of 4
#> ..$ nb_sets : num 3
#> ..$ nb_bits_vars: int NA
#> ..$ nb_bits_sets: int NA
#> ..$ nb_bits_pos : num 8
#> $ output_vars_params:List of 4
#> ..$ nb_sets : num 3
#> ..$ nb_bits_vars: int NA
#> ..$ nb_bits_sets: int NA
#> ..$ nb_bits_pos : num 8
#> $ rules_params :List of 5
#> ..$ pop_size : num 100
#> ..$ elite_size : num 5
#> ..$ cx_prob : num 0.5
#> ..$ mut_flip_genome: num 0.5
#> ..$ mut_flip_bit : num 0.025
#> $ mfs_params :List of 5
#> ..$ pop_size : num 100
#> ..$ elite_size : num 5
#> ..$ cx_prob : num 0.5
#> ..$ mut_flip_genome: num 0.5
#> ..$ mut_flip_bit : num 0.025
#> $ fitness_params :List of 3
#> ..$ output_vars_defuzz_thresholds: num 17
#> ..$ metrics_weights :List of 16
#> .. ..$ sensitivity : num 0
#> .. ..$ specificity : num 0
#> .. ..$ accuracy : num 0
#> .. ..$ ppv : num 0
#> .. ..$ rmse : num 1
#> .. ..$ rrse : num 0
#> .. ..$ rae : num 0
#> .. ..$ mse : num 0
#> .. ..$ distanceThreshold : num 0
#> .. ..$ distanceMinThreshold: num 0
#> .. ..$ nb_vars : num 0
#> .. ..$ overLearn : num 0
#> .. ..$ true_positives : num 0
#> .. ..$ false_positives : num 0
#> .. ..$ true_negatives : num 0
#> .. ..$ false_negatives : num 0
#> ..$ features_weights : list()
# N.B: progress = FALSE because we are in a vignette
fit <- fit_xy(model, x, y, progress = FALSE)
# we get a "fuzzycoco_fit" object
class(fit)
#> [1] "fuzzycoco_fit"
# that has many components
names(fit)
#> [1] "fit" "fuzzy_system" "params" "engine" "seed" "mode" "infos"
# among which: the model fitting information
str(fit$fit)
#> List of 3
#> $ fitness : num 0.473
#> $ metrics :List of 16
#> ..$ sensitivity : num 0.87
#> ..$ specificity : num 0.444
#> ..$ accuracy : num 0.75
#> ..$ ppv : num 0.8
#> ..$ rmse : num 1.08
#> ..$ rrse : num 0.0598
#> ..$ rae : num 0.0495
#> ..$ mse : num 1.16
#> ..$ distanceThreshold : num 0.465
#> ..$ distanceMinThreshold: num 0
#> ..$ nb_vars : num 2
#> ..$ overLearn : num 0
#> ..$ true_positives : num 20
#> ..$ false_positives : num 5
#> ..$ true_negatives : num 4
#> ..$ false_negatives : num 3
#> $ generations: int 100
# the (best) fitted Fuzzy System
str(fit$fuzzy_system)
#> List of 3
#> $ variables :List of 2
#> ..$ input :List of 3
#> .. ..$ mpg:List of 3
#> .. .. ..$ mpg.1: num 21
#> .. .. ..$ mpg.2: num 22.6
#> .. .. ..$ mpg.3: num 25.7
#> .. ..$ hp :List of 3
#> .. .. ..$ hp.1: num 65.3
#> .. .. ..$ hp.2: num 280
#> .. .. ..$ hp.3: num 332
#> .. ..$ wt :List of 3
#> .. .. ..$ wt.1: num 3.38
#> .. .. ..$ wt.2: num 4.92
#> .. .. ..$ wt.3: num 5.39
#> ..$ output:List of 1
#> .. ..$ qsec:List of 3
#> .. .. ..$ qsec.1: num 15.6
#> .. .. ..$ qsec.2: num 18.8
#> .. .. ..$ qsec.3: num 22.9
#> $ rules :List of 2
#> ..$ rule1:List of 2
#> .. ..$ antecedents:List of 1
#> .. .. ..$ mpg:List of 1
#> .. .. .. ..$ mpg.2: num 22.6
#> .. ..$ consequents:List of 1
#> .. .. ..$ qsec:List of 1
#> .. .. .. ..$ qsec.3: num 22.9
#> ..$ rule2:List of 2
#> .. ..$ antecedents:List of 1
#> .. .. ..$ hp:List of 1
#> .. .. .. ..$ hp.1: num 65.3
#> .. ..$ consequents:List of 1
#> .. .. ..$ qsec:List of 1
#> .. .. .. ..$ qsec.2: num 18.8
#> $ default_rules:List of 1
#> ..$ qsec: chr "qsec.1"
There is a also a formula-based interface:
df <- cbind(x, y)
# N.B: we use a different engine, and also overwrite `max_generations` and `seed` from the model parameters
fit <- fit(model, qsec ~ ., df, engine = "rcpp", seed = 456, max_generations = 20)
str(fit$fit)
#> List of 3
#> $ fitness : num 0.434
#> $ metrics :List of 16
#> ..$ sensitivity : num 0.739
#> ..$ specificity : num 0.556
#> ..$ accuracy : num 0.688
#> ..$ ppv : num 0.81
#> ..$ rmse : num 1.21
#> ..$ rrse : num 0.0691
#> ..$ rae : num 0.0584
#> ..$ mse : num 1.45
#> ..$ distanceThreshold : num 0.421
#> ..$ distanceMinThreshold: num 0
#> ..$ nb_vars : num 3
#> ..$ overLearn : num 0
#> ..$ true_positives : num 17
#> ..$ false_positives : num 4
#> ..$ true_negatives : num 5
#> ..$ false_negatives : num 6
#> $ generations: int 20
When you get a Fuzzy System you can evaluate it on some complete data (input + output). It will assess its performance on that data.
# evaluating on the same data used to fit it should obviously get the same results
res <- evaluate(fit, df)
str(res)
#> List of 2
#> $ fitness: num 0.434
#> $ metrics:List of 16
#> ..$ sensitivity : num 0.739
#> ..$ specificity : num 0.556
#> ..$ accuracy : num 0.688
#> ..$ ppv : num 0.81
#> ..$ rmse : num 1.21
#> ..$ rrse : num 0.0691
#> ..$ rae : num 0.0584
#> ..$ mse : num 1.45
#> ..$ distanceThreshold : num 0.421
#> ..$ distanceMinThreshold: num 0
#> ..$ nb_vars : num 3
#> ..$ overLearn : num 0
#> ..$ true_positives : num 17
#> ..$ false_positives : num 4
#> ..$ true_negatives : num 5
#> ..$ false_negatives : num 6
# let's modify the data
df2 <- df
df2$qsec <- rnorm(nrow(df2), 17)
res2 <- evaluate(fit, df2)
str(res2)
#> List of 2
#> $ fitness: num 0.249
#> $ metrics:List of 16
#> ..$ sensitivity : num 0.615
#> ..$ specificity : num 0.316
#> ..$ accuracy : num 0.438
#> ..$ ppv : num 0.381
#> ..$ rmse : num 2
#> ..$ rrse : num 0.113
#> ..$ rae : num 0.0953
#> ..$ mse : num 4.02
#> ..$ distanceThreshold : num 0.418
#> ..$ distanceMinThreshold: num 0
#> ..$ nb_vars : num 3
#> ..$ overLearn : num 0
#> ..$ true_positives : num 8
#> ..$ false_positives : num 13
#> ..$ true_negatives : num 6
#> ..$ false_negatives : num 5