Getting started • Rfuzzycoco

library(Rfuzzycoco)

data and parameters

We will use the mtcars dataset, three input variables and one output variable. The params() function has some defaults. Parameters with no value (NA) may also be automatically set by the fuzzycoco C++ library.

x <- mtcars[c("mpg", "hp", "wt")]
y <- mtcars["qsec"]

pms <- params(
  nb_rules = 2, nb_max_var_per_rule = 3,        # structural parameters
  rules.pop_size = 100, mfs.pop_size = 100,     # coevolution population sizes
  ivars.nb_sets = 3, , ivars.nb_bits_pos = 8,   # input vars: 3 fuzzy sets, and 8 bits to discretize the values 
  ovars.nb_sets = 3, ovars.nb_bits_pos = 8,     # output vars: 3 fuzzy sets, and 8 bits to discretize the values 
  metricsw.sensitivity = 0, metricsw.specificity = 0, metricsw.rmse = 1, # we just use RMSE (root mean square error)
  output_vars_defuzz_thresholds = 17            # threshold for the qsec output variable
)

# the full list of generated parameters
str(pms)
#> List of 6
#>  $ global_params     :List of 7
#>   ..$ nb_rules                          : num 2
#>   ..$ nb_max_var_per_rule               : num 3
#>   ..$ max_generations                   : num 100
#>   ..$ max_fitness                       : num 1
#>   ..$ nb_cooperators                    : num 2
#>   ..$ influence_rules_initial_population: logi FALSE
#>   ..$ influence_evolving_ratio          : num 0.8
#>  $ input_vars_params :List of 4
#>   ..$ nb_sets     : num 3
#>   ..$ nb_bits_vars: int NA
#>   ..$ nb_bits_sets: int NA
#>   ..$ nb_bits_pos : num 8
#>  $ output_vars_params:List of 4
#>   ..$ nb_sets     : num 3
#>   ..$ nb_bits_vars: int NA
#>   ..$ nb_bits_sets: int NA
#>   ..$ nb_bits_pos : num 8
#>  $ rules_params      :List of 5
#>   ..$ pop_size       : num 100
#>   ..$ elite_size     : num 5
#>   ..$ cx_prob        : num 0.5
#>   ..$ mut_flip_genome: num 0.5
#>   ..$ mut_flip_bit   : num 0.025
#>  $ mfs_params        :List of 5
#>   ..$ pop_size       : num 100
#>   ..$ elite_size     : num 5
#>   ..$ cx_prob        : num 0.5
#>   ..$ mut_flip_genome: num 0.5
#>   ..$ mut_flip_bit   : num 0.025
#>  $ fitness_params    :List of 3
#>   ..$ output_vars_defuzz_thresholds: num 17
#>   ..$ metrics_weights              :List of 16
#>   .. ..$ sensitivity         : num 0
#>   .. ..$ specificity         : num 0
#>   .. ..$ accuracy            : num 0
#>   .. ..$ ppv                 : num 0
#>   .. ..$ rmse                : num 1
#>   .. ..$ rrse                : num 0
#>   .. ..$ rae                 : num 0
#>   .. ..$ mse                 : num 0
#>   .. ..$ distanceThreshold   : num 0
#>   .. ..$ distanceMinThreshold: num 0
#>   .. ..$ nb_vars             : num 0
#>   .. ..$ overLearn           : num 0
#>   .. ..$ true_positives      : num 0
#>   .. ..$ false_positives     : num 0
#>   .. ..$ true_negatives      : num 0
#>   .. ..$ false_negatives     : num 0
#>   ..$ features_weights             : list()

using the base R S3 interface

model

Let’s create a regression model, with our params, and set a seed for reproducibility.

model <- fuzzycoco("regression", pms, seed = 123)
class(model)
#> [1] "fuzzycoco_model"

fitting the model

fit_xy(): using dataframes

# N.B: progress = FALSE because we are in a vignette
fit <- fit_xy(model, x, y, progress = FALSE)

# we get a "fuzzycoco_fit" object
class(fit)
#> [1] "fuzzycoco_fit"

# that has many components
names(fit)
#> [1] "fit"          "fuzzy_system" "params"       "engine"       "seed"         "mode"         "infos"

# among which: the model fitting information 
str(fit$fit)
#> List of 3
#>  $ fitness    : num 0.402
#>  $ metrics    :List of 16
#>   ..$ sensitivity         : num 0.522
#>   ..$ specificity         : num 0.667
#>   ..$ accuracy            : num 0.562
#>   ..$ ppv                 : num 0.8
#>   ..$ rmse                : num 1.32
#>   ..$ rrse                : num 0.074
#>   ..$ rae                 : num 0.0592
#>   ..$ mse                 : num 1.73
#>   ..$ distanceThreshold   : num 0.301
#>   ..$ distanceMinThreshold: num 0
#>   ..$ nb_vars             : num 1
#>   ..$ overLearn           : num 0
#>   ..$ true_positives      : num 12
#>   ..$ false_positives     : num 3
#>   ..$ true_negatives      : num 6
#>   ..$ false_negatives     : num 11
#>  $ generations: int 100

# the (best) fitted Fuzzy System 
str(fit$fuzzy_system)
#> List of 3
#>  $ variables    :List of 2
#>   ..$ input :List of 3
#>   .. ..$ mpg:List of 3
#>   .. .. ..$ mpg.1: num 16.2
#>   .. .. ..$ mpg.2: num 23.5
#>   .. .. ..$ mpg.3: num 27.4
#>   .. ..$ hp :List of 3
#>   .. .. ..$ hp.1: num 104
#>   .. .. ..$ hp.2: num 114
#>   .. .. ..$ hp.3: num 171
#>   .. ..$ wt :List of 3
#>   .. .. ..$ wt.1: num 2.66
#>   .. .. ..$ wt.2: num 4.76
#>   .. .. ..$ wt.3: num 5.41
#>   ..$ output:List of 1
#>   .. ..$ qsec:List of 3
#>   .. .. ..$ qsec.1: num 16.8
#>   .. .. ..$ qsec.2: num 19.5
#>   .. .. ..$ qsec.3: num 22
#>  $ rules        :List of 1
#>   ..$ rule1:List of 2
#>   .. ..$ antecedents:List of 1
#>   .. .. ..$ hp:List of 1
#>   .. .. .. ..$ hp.1: num 104
#>   .. ..$ consequents:List of 1
#>   .. .. ..$ qsec:List of 1
#>   .. .. .. ..$ qsec.2: num 19.5
#>  $ default_rules:List of 1
#>   ..$ qsec: chr "qsec.1"

fit(): using a formula

There is a also a formula-based interface:

df <- cbind(x, y)
# N.B: we use a different engine, and also overwrite `max_generations` and `seed` from the model parameters
fit <- fit(model, qsec ~ ., df, engine = "rcpp", seed = 456, max_generations = 20)

str(fit$fit)
#> List of 3
#>  $ fitness    : num 0.388
#>  $ metrics    :List of 16
#>   ..$ sensitivity         : num 0.87
#>   ..$ specificity         : num 0.556
#>   ..$ accuracy            : num 0.781
#>   ..$ ppv                 : num 0.833
#>   ..$ rmse                : num 1.36
#>   ..$ rrse                : num 0.0742
#>   ..$ rae                 : num 0.0577
#>   ..$ mse                 : num 1.86
#>   ..$ distanceThreshold   : num 0.478
#>   ..$ distanceMinThreshold: num 0
#>   ..$ nb_vars             : num 2
#>   ..$ overLearn           : num 0
#>   ..$ true_positives      : num 20
#>   ..$ false_positives     : num 4
#>   ..$ true_negatives      : num 5
#>   ..$ false_negatives     : num 3
#>  $ generations: int 20

evaluating the model

When you get a Fuzzy System you can evaluate it on some complete data (input + output). It will assess its performance on that data.

# evaluating on the same data used to fit it should obviously get the same results
res <- evaluate(fit, df)
str(res)
#> List of 2
#>  $ fitness: num 0.388
#>  $ metrics:List of 16
#>   ..$ sensitivity         : num 0.87
#>   ..$ specificity         : num 0.556
#>   ..$ accuracy            : num 0.781
#>   ..$ ppv                 : num 0.833
#>   ..$ rmse                : num 1.36
#>   ..$ rrse                : num 0.0742
#>   ..$ rae                 : num 0.0577
#>   ..$ mse                 : num 1.86
#>   ..$ distanceThreshold   : num 0.478
#>   ..$ distanceMinThreshold: num 0
#>   ..$ nb_vars             : num 2
#>   ..$ overLearn           : num 0
#>   ..$ true_positives      : num 20
#>   ..$ false_positives     : num 4
#>   ..$ true_negatives      : num 5
#>   ..$ false_negatives     : num 3

# let's modify the data
df2 <- df
df2$qsec <- rnorm(nrow(df2), 17)
res2 <- evaluate(fit, df2)
str(res2)
#> List of 2
#>  $ fitness: num 0.27
#>  $ metrics:List of 16
#>   ..$ sensitivity         : num 0.769
#>   ..$ specificity         : num 0.263
#>   ..$ accuracy            : num 0.469
#>   ..$ ppv                 : num 0.417
#>   ..$ rmse                : num 1.89
#>   ..$ rrse                : num 0.109
#>   ..$ rae                 : num 0.0932
#>   ..$ mse                 : num 3.56
#>   ..$ distanceThreshold   : num 0.456
#>   ..$ distanceMinThreshold: num 0
#>   ..$ nb_vars             : num 2
#>   ..$ overLearn           : num 0
#>   ..$ true_positives      : num 10
#>   ..$ false_positives     : num 14
#>   ..$ true_negatives      : num 5
#>   ..$ false_negatives     : num 3

predicting data

You can use the Fuzzy System (from the fitted model) to predict the output variables based on some input data.

y2 <- predict(fit, x)

# recompute rmse
rmse2 <- sqrt(mean((y2[[1]] - y[[1]])^2))
# compare it to what was reported
all.equal(rmse2,  fit$fit$metrics$rmse)
#> [1] TRUE