Skip to content

R Examples

Complete R examples demonstrating rfastlowess capabilities with base R and visualization.

Batch Smoothing

Process complete datasets with confidence intervals, diagnostics, and cross-validation.

#!/usr/bin/env Rscript
# =============================================================================
# rfastlowess Batch Smoothing Example
#
# This example demonstrates batch LOWESS smoothing features:
# - Basic smoothing with different parameters
# - Robustness iterations for outlier handling
# - Confidence and prediction intervals
# - Diagnostics and cross-validation
# - S3 methods for print and plot
#
# The Lowess class is the primary interface for
# processing complete datasets that fit in memory.
# =============================================================================

library(rfastlowess)

generate_sample_data <- function(n_points = 1000) {
    # Generate complex sample data with a trend, seasonality, and outliers.
    set.seed(42)
    x <- seq(0, 50, length.out = n_points)

    # Trend plus Seasonality
    y_true <- 0.5 * x + 5 * sin(x * 0.5)

    # Gaussian noise
    y <- y_true + rnorm(n_points, mean = 0, sd = 1.5)

    # Add significant outliers (10% of data)
    n_outliers <- round(n_points * 0.1)
    outlier_indices <- sample(seq_len(n_points), n_outliers)
    # Random magnitude 10-20, random sign
    dataset_outliers <- runif(n_outliers, 10, 20) *
        sample(c(-1, 1), n_outliers, replace = TRUE)
    y[outlier_indices] <- y[outlier_indices] + dataset_outliers

    list(x = x, y = y, y_true = y_true)
}

main <- function() {
    cat(strrep("=", 80), "\n")
    cat("rfastlowess Batch Smoothing Example\n")
    cat(strrep("=", 80), "\n\n")

    # 1. Generate Data
    data <- generate_sample_data(1000)
    x <- data$x
    y <- data$y
    cat(sprintf("Generated %d data points with outliers.\n", length(x)))

    # 2. Basic Smoothing (Default parameters)
    cat("Running basic smoothing...\n")
    # Use a smaller fraction (0.05) to capture the sine wave seasonality
    basic_model <- Lowess(iterations = 0L, fraction = 0.05)
    print(basic_model)
    basic_res <- basic_model$fit(x, y)
    print(basic_res)

    # 3. Robust Smoothing (IRLS)
    cat("Running robust smoothing (3 iterations)...\n")
    robust_model <- Lowess(
        fraction = 0.05,
        iterations = 3L,
        robustness_method = "bisquare",
        return_robustness_weights = TRUE
    )
    print(robust_model)
    robust_res <- robust_model$fit(x, y)
    print(robust_res)

    # 4. Uncertainty Quantification
    cat("Computing confidence and prediction intervals...\n")
    res_intervals <- Lowess(
        fraction = 0.05,
        confidence_intervals = 0.95,
        prediction_intervals = 0.95,
        return_diagnostics = TRUE
    )$fit(x, y)

    # 5. Cross-Validation for optimal fraction
    cat("Running cross-validation to find optimal fraction...\n")
    cv_fractions <- c(0.05, 0.1, 0.2, 0.4)
    res_cv <- Lowess(
        cv_fractions = cv_fractions,
        cv_method = "kfold",
        cv_k = 5L
    )$fit(x, y)

    if (!is.null(res_cv$fraction_used)) {
        cat(sprintf("Optimal fraction found: %.2f\n", res_cv$fraction_used))
    }

    # Diagnostics Printout
    if (!is.null(res_intervals$diagnostics)) {
        diag <- res_intervals$diagnostics
        cat("\nFit Statistics (Intervals Model):\n")
        # Handle potential list or S3 object return structure
        r2 <- if (!is.null(diag$r_squared)) diag$r_squared else NA
        rmse <- if (!is.null(diag$rmse)) diag$rmse else NA
        mae <- if (!is.null(diag$mae)) diag$mae else NA

        cat(sprintf(" - R^2:   %.4f\n", r2))
        cat(sprintf(" - RMSE: %.4f\n", rmse))
        cat(sprintf(" - MAE:  %.4f\n", mae))
    }

    # 6. Boundary Policy Comparison
    cat("\nDemonstrating boundary policy effects on linear data...\n")
    xl <- seq(0, 10, length.out = 50)
    yl <- 2 * xl + 1

    # Compare policies
    r_ext <- Lowess(fraction = 0.6, boundary_policy = "extend")$fit(xl, yl)
    r_ref <- Lowess(fraction = 0.6, boundary_policy = "reflect")$fit(xl, yl)
    r_zr <- Lowess(fraction = 0.6, boundary_policy = "zero")$fit(xl, yl)

    cat("Boundary policy comparison:\n")
    cat(sprintf(
        " - Extend (Default): first=%.2f, last=%.2f\n",
        r_ext$y[1], r_ext$y[length(r_ext$y)]
    ))
    cat(sprintf(
        " - Reflect:          first=%.2f, last=%.2f\n",
        r_ref$y[1], r_ref$y[length(r_ref$y)]
    ))
    cat(sprintf(
        " - Zero:             first=%.2f, last=%.2f\n",
        r_zr$y[1], r_zr$y[length(r_zr$y)]
    ))

    cat("\n=== Batch Smoothing Example Complete ===\n")
}

# Run if called directly
if (sys.nframe() == 0) {
    main()
}

Download batch_smoothing.R


Streaming Smoothing

Process large datasets in memory-efficient chunks with overlap merging.

#!/usr/bin/env Rscript
# =============================================================================
# rfastlowess Streaming Smoothing Examples
#
# This example demonstrates streaming LOWESS smoothing for large datasets:
# - Basic chunked processing
# - Different chunk sizes and overlap strategies
# - Processing very large datasets
# - Parallel vs sequential execution
#
# The streaming adapter (smooth_streaming function) is designed for:
# - Large datasets (>100K points) that don't fit in memory
# - Batch processing pipelines
# - File-based data processing
# - ETL (Extract, Transform, Load) workflows
# =============================================================================

library(rfastlowess)

main <- function() {
    cat(strrep("=", 80), "\n")
    cat("rfastlowess Streaming Smoothing Examples\n")
    cat(strrep("=", 80), "\n\n")

    example_1_basic_streaming()
    example_2_chunk_comparison()
    example_3_large_dataset()
    example_4_parallel_comparison()
}

# =============================================================================
# Example 1: Basic Streaming Processing
# Demonstrates the fundamental streaming workflow
# =============================================================================
example_1_basic_streaming <- function() {
    cat("Example 1: Basic Streaming Processing\n")
    cat(strrep("-", 80), "\n")

    # Generate test data: y = 2x + 1 with noise
    n <- 100
    x <- as.numeric(0:(n - 1))
    y <- 2.0 * x + 1.0 + sin(x * 0.3) * 2.0

    # Process with streaming adapter
    model <- StreamingLowess(
        fraction = 0.5,
        chunk_size = 30L,
        overlap = 10L,
        iterations = 2L,
        weight_function = "tricube",
        robustness_method = "bisquare"
    )
    print(model)
    result <- model$process_chunk(x, y)
    final <- model$finalize()
    result$x <- c(result$x, final$x)
    result$y <- c(result$y, final$y)

    cat(sprintf("Dataset: %d points\n", n))
    cat("Chunk size: 30, Overlap: 10\n")
    cat(sprintf("Output points: %d\n", length(result$y)))
    cat(sprintf("All points processed: %s\n", length(result$y) == n))
    cat(
        "First 5 smoothed values:",
        paste(round(result$y[1:5], 4), collapse = ", "), "\n\n"
    )
}

# =============================================================================
# Example 2: Chunk Size Comparison
# Shows how different chunk sizes affect processing
# =============================================================================
example_2_chunk_comparison <- function() {
    cat("Example 2: Chunk Size Comparison\n")
    cat(strrep("-", 80), "\n")

    # Generate test data
    n <- 500
    x <- as.numeric(0:(n - 1))
    y <- 2.0 * x + 1.0

    chunk_configs <- list(
        list(chunk_size = 50L, overlap = 10L, description = "Small chunks"),
        list(chunk_size = 100L, overlap = 20L, description = "Medium chunks"),
        list(chunk_size = 200L, overlap = 40L, description = "Large chunks")
    )

    for (config in chunk_configs) {
        start_time <- Sys.time()
        model <- StreamingLowess(
            fraction = 0.5,
            chunk_size = config$chunk_size,
            overlap = config$overlap,
            iterations = 2L
        )
        result <- model$process_chunk(x, y)
        final <- model$finalize()
        result$y <- c(result$y, final$y)
        duration <- as.numeric(Sys.time() - start_time, units = "secs")

        cat(sprintf(
            "%s (size: %d, overlap: %d)\n",
            config$description, config$chunk_size, config$overlap
        ))
        cat(sprintf("  Output points: %d\n", length(result$y)))
        cat(sprintf("  Time: %.4fs\n", duration))
    }
    cat("\n")
}

# =============================================================================
# Example 3: Large Dataset Processing
# Demonstrates processing a very large dataset
# =============================================================================
example_3_large_dataset <- function() {
    cat("Example 3: Large Dataset Processing\n")
    cat(strrep("-", 80), "\n")

    n <- 50000 # 50K points
    cat(sprintf("Processing %d data points in streaming mode...\n", n))

    x <- as.numeric(0:(n - 1))
    y <- 2.0 * x + 1.0 + sin(x * 0.01) * 10.0

    start_time <- Sys.time()
    model <- StreamingLowess(
        fraction = 0.3,
        chunk_size = 5000L,
        overlap = 500L,
        iterations = 2L,
        parallel = TRUE # Enable parallel execution
    )
    print(model)
    result <- model$process_chunk(x, y)
    final <- model$finalize()
    result$y <- c(result$y, final$y)
    duration <- as.numeric(Sys.time() - start_time, units = "secs")

    cat(sprintf("Processed %d points in %.4fs\n", length(result$y), duration))
    cat("Memory efficiency: Constant (chunk size = 5000 points)\n\n")
}

# =============================================================================
# Example 4: Parallel vs Sequential Comparison
# Compares execution time with and without parallelism
# =============================================================================
example_4_parallel_comparison <- function() {
    cat("Example 4: Parallel vs Sequential Comparison\n")
    cat(strrep("-", 80), "\n")

    n <- 10000
    x <- as.numeric(0:(n - 1))
    y <- sin(x * 0.1) + cos(x * 0.01)

    # Parallel execution
    start_time <- Sys.time()
    model_parallel <- StreamingLowess(
        fraction = 0.5,
        chunk_size = 1000L,
        overlap = 100L,
        iterations = 3L,
        parallel = TRUE
    )
    result_parallel <- model_parallel$process_chunk(x, y)
    final_p <- model_parallel$finalize()
    result_parallel$y <- c(result_parallel$y, final_p$y)
    parallel_time <- as.numeric(Sys.time() - start_time, units = "secs")

    # Sequential execution
    start_time <- Sys.time()
    model_sequential <- StreamingLowess(
        fraction = 0.5,
        chunk_size = 1000L,
        overlap = 100L,
        iterations = 3L,
        parallel = FALSE
    )
    result_sequential <- model_sequential$process_chunk(x, y)
    final_s <- model_sequential$finalize()
    result_sequential$y <- c(result_sequential$y, final_s$y)
    sequential_time <- as.numeric(Sys.time() - start_time, units = "secs")

    cat(sprintf(
        "Parallel:   %.4fs (%d points)\n",
        parallel_time, length(result_parallel$y)
    ))
    cat(sprintf(
        "Sequential: %.4fs (%d points)\n",
        sequential_time, length(result_sequential$y)
    ))
    if (sequential_time > 0) {
        cat(sprintf("Speedup: %.2fx\n", sequential_time / parallel_time))
    }
    cat("\n")
}

# Run if called directly
if (sys.nframe() == 0) {
    main()
}

Download streaming_smoothing.R


Online Smoothing

Real-time smoothing with sliding window for streaming data applications.

#!/usr/bin/env Rscript
# =============================================================================
# rfastlowess Online Smoothing Examples
#
# This example demonstrates online LOWESS smoothing for real-time data:
# - Basic incremental processing with streaming data
# - Real-time sensor data smoothing
# - Different window sizes and their effects
# - Memory-bounded processing
#
# The online adapter (smooth_online function) is designed for:
# - Real-time data streams
# - Memory-constrained environments
# - Sensor data processing
# - Incremental updates without reprocessing entire dataset
# =============================================================================

library(rfastlowess)

main <- function() {
    cat(strrep("=", 80), "\n")
    cat("rfastlowess Online Smoothing Examples\n")
    cat(strrep("=", 80), "\n\n")

    example_1_basic_online()
    example_2_sensor_simulation()
    example_3_window_comparison()
    example_4_memory_bounded()
}

# =============================================================================
# Example 1: Basic Online Processing
# Demonstrates incremental data processing
# =============================================================================
example_1_basic_online <- function() {
    cat("Example 1: Basic Online Processing\n")
    cat(strrep("-", 80), "\n")

    # Simulate streaming data: y = 2x + 1 with small noise
    x <- c(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0)
    y <- c(3.1, 5.0, 7.2, 8.9, 11.1, 13.0, 15.2, 16.8, 19.1, 21.0)

    # Process all at once with online adapter
    model <- OnlineLowess(
        fraction = 0.5,
        window_capacity = 5L,
        min_points = 3L,
        iterations = 2L,
        weight_function = "tricube",
        robustness_method = "bisquare"
    )
    print(model)
    result <- model$add_points(x, y)

    cat("Processing data points with sliding window...\n")
    cat("Window capacity: 5\n")
    cat(sprintf("Output points: %d\n", length(result$y)))
    cat("Smoothed values:", paste(round(result$y, 4), collapse = ", "), "\n\n")
}

# =============================================================================
# Example 2: Real-Time Sensor Data Simulation
# Simulates processing temperature sensor readings
# =============================================================================
example_2_sensor_simulation <- function() {
    cat("Example 2: Real-Time Sensor Data Simulation\n")
    cat(strrep("-", 80), "\n")
    cat("Simulating temperature sensor readings with noise...\n\n")

    # Simulate temperature sensor: base temp 20.0 degC with daily cycle + noise
    n <- 24 # 24 hours
    x <- as.numeric(0:(n - 1))
    base_temp <- 20.0
    daily_cycle <- 5.0 * sin(x * pi / 12.0)
    noise <- ((0:(n - 1)) * 7) %% 11 * 0.3 - 1.5
    y <- base_temp + daily_cycle + noise

    model <- OnlineLowess(
        fraction = 0.4,
        window_capacity = 12L, # Half-day window
        min_points = 3L,
        iterations = 2L
    )
    print(model)
    result <- model$add_points(x, y)

    cat(sprintf("%6s %12s %12s\n", "Hour", "Raw Temp", "Smoothed"))
    cat(strrep("-", 35), "\n")

    # Show first 10 values
    for (i in seq_len(min(10, length(result$y)))) {
        cat(sprintf("%6.0f %10.2f degC %10.2f degC\n", x[i], y[i], result$y[i]))
    }

    if (length(result$y) > 10) {
        cat(sprintf("  ... (%d more rows)\n", length(result$y) - 10))
    }
    cat("\n")
}

# =============================================================================
# Example 3: Window Size Comparison
# Shows how different window sizes affect smoothing behavior
# =============================================================================
example_3_window_comparison <- function() {
    cat("Example 3: Window Size Comparison\n")
    cat(strrep("-", 80), "\n")

    # Generate test data with some variation
    x <- as.numeric(1:50)
    y <- 2.0 * x + sin(x * 0.5) * 3.0

    window_sizes <- c(5L, 10L, 20L)

    for (window_size in window_sizes) {
        model <- OnlineLowess(
            fraction = 0.5,
            window_capacity = window_size,
            min_points = 3L,
            iterations = 2L
        )
        result <- model$add_points(x, y)

        cat(sprintf("Window capacity: %d\n", window_size))
        cat(sprintf("  Output points: %d\n", length(result$y)))
        if (length(result$y) >= 5) {
            last_5 <- tail(result$y, 5)
            cat(
                "  Last 5 smoothed:",
                paste(round(last_5, 4), collapse = ", "), "\n"
            )
        } else {
            cat(
                "  Smoothed values:",
                paste(round(result$y, 4), collapse = ", "), "\n"
            )
        }
    }
    cat("\n")
}

# =============================================================================
# Example 4: Memory-Bounded Processing
# Demonstrates efficient processing for resource-constrained systems
# =============================================================================
example_4_memory_bounded <- function() {
    cat("Example 4: Memory-Bounded Processing\n")
    cat(strrep("-", 80), "\n")

    # Simulate a long data stream
    total_points <- 10000
    cat(sprintf(
        "Processing %d points with minimal memory footprint...\n",
        total_points
    ))

    x <- as.numeric(0:(total_points - 1))
    y <- 2.0 * x + sin(x * 0.1) * 5.0 +
        ((0:(total_points - 1)) %% 7 - 3.0) * 0.5

    start_time <- Sys.time()
    model <- OnlineLowess(
        fraction = 0.3,
        window_capacity = 20L, # Small window = low memory usage
        min_points = 3L,
        iterations = 1L,
        parallel = FALSE # Sequential for low latency
    )
    result <- model$add_points(x, y)
    duration <- as.numeric(Sys.time() - start_time, units = "secs")

    cat(sprintf("\nProcessed %d points in %.4fs\n", length(result$y), duration))
    if (length(result$y) > 0) {
        cat(sprintf("Final smoothed value: %.2f\n", tail(result$y, 1)))
    }
    cat("Memory usage: Constant (window size = 20 points)\n\n")
}

# Run if called directly
if (sys.nframe() == 0) {
    main()
}

Download online_smoothing.R


Running the Examples

# Install the package first
# From R:
# install.packages("rfastlowess")

# Or from source:
# R CMD INSTALL bindings/r

# Run examples
Rscript examples/r/batch_smoothing.R
Rscript examples/r/streaming_smoothing.R
Rscript examples/r/online_smoothing.R

Quick Start

library(rfastlowess)

# Generate sample data
set.seed(42)
x <- seq(0, 10, length.out = 100)
y <- sin(x) + rnorm(100, sd = 0.3)

# Basic smoothing
model <- Lowess(fraction = 0.3)
print(model)
result <- model$fit(x, y)
print(result)

# With confidence intervals
model <- Lowess(
    fraction = 0.3,
    confidence_intervals = 0.95,
    return_diagnostics = TRUE
)
result <- model$fit(x, y)

# Visualization
plot(result, main = "Quick Start Example")