R Examples¶
Complete R examples demonstrating rfastlowess capabilities with base R and visualization.
Batch Smoothing¶
Process complete datasets with confidence intervals, diagnostics, and cross-validation.
#!/usr/bin/env Rscript
# =============================================================================
# rfastlowess Batch Smoothing Example
#
# This example demonstrates batch LOWESS smoothing features:
# - Basic smoothing with different parameters
# - Robustness iterations for outlier handling
# - Confidence and prediction intervals
# - Diagnostics and cross-validation
# - S3 methods for print and plot
#
# The Lowess class is the primary interface for
# processing complete datasets that fit in memory.
# =============================================================================
library(rfastlowess)
generate_sample_data <- function(n_points = 1000) {
# Generate complex sample data with a trend, seasonality, and outliers.
set.seed(42)
x <- seq(0, 50, length.out = n_points)
# Trend plus Seasonality
y_true <- 0.5 * x + 5 * sin(x * 0.5)
# Gaussian noise
y <- y_true + rnorm(n_points, mean = 0, sd = 1.5)
# Add significant outliers (10% of data)
n_outliers <- round(n_points * 0.1)
outlier_indices <- sample(seq_len(n_points), n_outliers)
# Random magnitude 10-20, random sign
dataset_outliers <- runif(n_outliers, 10, 20) *
sample(c(-1, 1), n_outliers, replace = TRUE)
y[outlier_indices] <- y[outlier_indices] + dataset_outliers
list(x = x, y = y, y_true = y_true)
}
main <- function() {
cat(strrep("=", 80), "\n")
cat("rfastlowess Batch Smoothing Example\n")
cat(strrep("=", 80), "\n\n")
# 1. Generate Data
data <- generate_sample_data(1000)
x <- data$x
y <- data$y
cat(sprintf("Generated %d data points with outliers.\n", length(x)))
# 2. Basic Smoothing (Default parameters)
cat("Running basic smoothing...\n")
# Use a smaller fraction (0.05) to capture the sine wave seasonality
basic_model <- Lowess(iterations = 0L, fraction = 0.05)
print(basic_model)
basic_res <- basic_model$fit(x, y)
print(basic_res)
# 3. Robust Smoothing (IRLS)
cat("Running robust smoothing (3 iterations)...\n")
robust_model <- Lowess(
fraction = 0.05,
iterations = 3L,
robustness_method = "bisquare",
return_robustness_weights = TRUE
)
print(robust_model)
robust_res <- robust_model$fit(x, y)
print(robust_res)
# 4. Uncertainty Quantification
cat("Computing confidence and prediction intervals...\n")
res_intervals <- Lowess(
fraction = 0.05,
confidence_intervals = 0.95,
prediction_intervals = 0.95,
return_diagnostics = TRUE
)$fit(x, y)
# 5. Cross-Validation for optimal fraction
cat("Running cross-validation to find optimal fraction...\n")
cv_fractions <- c(0.05, 0.1, 0.2, 0.4)
res_cv <- Lowess(
cv_fractions = cv_fractions,
cv_method = "kfold",
cv_k = 5L
)$fit(x, y)
if (!is.null(res_cv$fraction_used)) {
cat(sprintf("Optimal fraction found: %.2f\n", res_cv$fraction_used))
}
# Diagnostics Printout
if (!is.null(res_intervals$diagnostics)) {
diag <- res_intervals$diagnostics
cat("\nFit Statistics (Intervals Model):\n")
# Handle potential list or S3 object return structure
r2 <- if (!is.null(diag$r_squared)) diag$r_squared else NA
rmse <- if (!is.null(diag$rmse)) diag$rmse else NA
mae <- if (!is.null(diag$mae)) diag$mae else NA
cat(sprintf(" - R^2: %.4f\n", r2))
cat(sprintf(" - RMSE: %.4f\n", rmse))
cat(sprintf(" - MAE: %.4f\n", mae))
}
# 6. Boundary Policy Comparison
cat("\nDemonstrating boundary policy effects on linear data...\n")
xl <- seq(0, 10, length.out = 50)
yl <- 2 * xl + 1
# Compare policies
r_ext <- Lowess(fraction = 0.6, boundary_policy = "extend")$fit(xl, yl)
r_ref <- Lowess(fraction = 0.6, boundary_policy = "reflect")$fit(xl, yl)
r_zr <- Lowess(fraction = 0.6, boundary_policy = "zero")$fit(xl, yl)
cat("Boundary policy comparison:\n")
cat(sprintf(
" - Extend (Default): first=%.2f, last=%.2f\n",
r_ext$y[1], r_ext$y[length(r_ext$y)]
))
cat(sprintf(
" - Reflect: first=%.2f, last=%.2f\n",
r_ref$y[1], r_ref$y[length(r_ref$y)]
))
cat(sprintf(
" - Zero: first=%.2f, last=%.2f\n",
r_zr$y[1], r_zr$y[length(r_zr$y)]
))
cat("\n=== Batch Smoothing Example Complete ===\n")
}
# Run if called directly
if (sys.nframe() == 0) {
main()
}
Streaming Smoothing¶
Process large datasets in memory-efficient chunks with overlap merging.
#!/usr/bin/env Rscript
# =============================================================================
# rfastlowess Streaming Smoothing Examples
#
# This example demonstrates streaming LOWESS smoothing for large datasets:
# - Basic chunked processing
# - Different chunk sizes and overlap strategies
# - Processing very large datasets
# - Parallel vs sequential execution
#
# The streaming adapter (smooth_streaming function) is designed for:
# - Large datasets (>100K points) that don't fit in memory
# - Batch processing pipelines
# - File-based data processing
# - ETL (Extract, Transform, Load) workflows
# =============================================================================
library(rfastlowess)
main <- function() {
cat(strrep("=", 80), "\n")
cat("rfastlowess Streaming Smoothing Examples\n")
cat(strrep("=", 80), "\n\n")
example_1_basic_streaming()
example_2_chunk_comparison()
example_3_large_dataset()
example_4_parallel_comparison()
}
# =============================================================================
# Example 1: Basic Streaming Processing
# Demonstrates the fundamental streaming workflow
# =============================================================================
example_1_basic_streaming <- function() {
cat("Example 1: Basic Streaming Processing\n")
cat(strrep("-", 80), "\n")
# Generate test data: y = 2x + 1 with noise
n <- 100
x <- as.numeric(0:(n - 1))
y <- 2.0 * x + 1.0 + sin(x * 0.3) * 2.0
# Process with streaming adapter
model <- StreamingLowess(
fraction = 0.5,
chunk_size = 30L,
overlap = 10L,
iterations = 2L,
weight_function = "tricube",
robustness_method = "bisquare"
)
print(model)
result <- model$process_chunk(x, y)
final <- model$finalize()
result$x <- c(result$x, final$x)
result$y <- c(result$y, final$y)
cat(sprintf("Dataset: %d points\n", n))
cat("Chunk size: 30, Overlap: 10\n")
cat(sprintf("Output points: %d\n", length(result$y)))
cat(sprintf("All points processed: %s\n", length(result$y) == n))
cat(
"First 5 smoothed values:",
paste(round(result$y[1:5], 4), collapse = ", "), "\n\n"
)
}
# =============================================================================
# Example 2: Chunk Size Comparison
# Shows how different chunk sizes affect processing
# =============================================================================
example_2_chunk_comparison <- function() {
cat("Example 2: Chunk Size Comparison\n")
cat(strrep("-", 80), "\n")
# Generate test data
n <- 500
x <- as.numeric(0:(n - 1))
y <- 2.0 * x + 1.0
chunk_configs <- list(
list(chunk_size = 50L, overlap = 10L, description = "Small chunks"),
list(chunk_size = 100L, overlap = 20L, description = "Medium chunks"),
list(chunk_size = 200L, overlap = 40L, description = "Large chunks")
)
for (config in chunk_configs) {
start_time <- Sys.time()
model <- StreamingLowess(
fraction = 0.5,
chunk_size = config$chunk_size,
overlap = config$overlap,
iterations = 2L
)
result <- model$process_chunk(x, y)
final <- model$finalize()
result$y <- c(result$y, final$y)
duration <- as.numeric(Sys.time() - start_time, units = "secs")
cat(sprintf(
"%s (size: %d, overlap: %d)\n",
config$description, config$chunk_size, config$overlap
))
cat(sprintf(" Output points: %d\n", length(result$y)))
cat(sprintf(" Time: %.4fs\n", duration))
}
cat("\n")
}
# =============================================================================
# Example 3: Large Dataset Processing
# Demonstrates processing a very large dataset
# =============================================================================
example_3_large_dataset <- function() {
cat("Example 3: Large Dataset Processing\n")
cat(strrep("-", 80), "\n")
n <- 50000 # 50K points
cat(sprintf("Processing %d data points in streaming mode...\n", n))
x <- as.numeric(0:(n - 1))
y <- 2.0 * x + 1.0 + sin(x * 0.01) * 10.0
start_time <- Sys.time()
model <- StreamingLowess(
fraction = 0.3,
chunk_size = 5000L,
overlap = 500L,
iterations = 2L,
parallel = TRUE # Enable parallel execution
)
print(model)
result <- model$process_chunk(x, y)
final <- model$finalize()
result$y <- c(result$y, final$y)
duration <- as.numeric(Sys.time() - start_time, units = "secs")
cat(sprintf("Processed %d points in %.4fs\n", length(result$y), duration))
cat("Memory efficiency: Constant (chunk size = 5000 points)\n\n")
}
# =============================================================================
# Example 4: Parallel vs Sequential Comparison
# Compares execution time with and without parallelism
# =============================================================================
example_4_parallel_comparison <- function() {
cat("Example 4: Parallel vs Sequential Comparison\n")
cat(strrep("-", 80), "\n")
n <- 10000
x <- as.numeric(0:(n - 1))
y <- sin(x * 0.1) + cos(x * 0.01)
# Parallel execution
start_time <- Sys.time()
model_parallel <- StreamingLowess(
fraction = 0.5,
chunk_size = 1000L,
overlap = 100L,
iterations = 3L,
parallel = TRUE
)
result_parallel <- model_parallel$process_chunk(x, y)
final_p <- model_parallel$finalize()
result_parallel$y <- c(result_parallel$y, final_p$y)
parallel_time <- as.numeric(Sys.time() - start_time, units = "secs")
# Sequential execution
start_time <- Sys.time()
model_sequential <- StreamingLowess(
fraction = 0.5,
chunk_size = 1000L,
overlap = 100L,
iterations = 3L,
parallel = FALSE
)
result_sequential <- model_sequential$process_chunk(x, y)
final_s <- model_sequential$finalize()
result_sequential$y <- c(result_sequential$y, final_s$y)
sequential_time <- as.numeric(Sys.time() - start_time, units = "secs")
cat(sprintf(
"Parallel: %.4fs (%d points)\n",
parallel_time, length(result_parallel$y)
))
cat(sprintf(
"Sequential: %.4fs (%d points)\n",
sequential_time, length(result_sequential$y)
))
if (sequential_time > 0) {
cat(sprintf("Speedup: %.2fx\n", sequential_time / parallel_time))
}
cat("\n")
}
# Run if called directly
if (sys.nframe() == 0) {
main()
}
Download streaming_smoothing.R
Online Smoothing¶
Real-time smoothing with sliding window for streaming data applications.
#!/usr/bin/env Rscript
# =============================================================================
# rfastlowess Online Smoothing Examples
#
# This example demonstrates online LOWESS smoothing for real-time data:
# - Basic incremental processing with streaming data
# - Real-time sensor data smoothing
# - Different window sizes and their effects
# - Memory-bounded processing
#
# The online adapter (smooth_online function) is designed for:
# - Real-time data streams
# - Memory-constrained environments
# - Sensor data processing
# - Incremental updates without reprocessing entire dataset
# =============================================================================
library(rfastlowess)
main <- function() {
cat(strrep("=", 80), "\n")
cat("rfastlowess Online Smoothing Examples\n")
cat(strrep("=", 80), "\n\n")
example_1_basic_online()
example_2_sensor_simulation()
example_3_window_comparison()
example_4_memory_bounded()
}
# =============================================================================
# Example 1: Basic Online Processing
# Demonstrates incremental data processing
# =============================================================================
example_1_basic_online <- function() {
cat("Example 1: Basic Online Processing\n")
cat(strrep("-", 80), "\n")
# Simulate streaming data: y = 2x + 1 with small noise
x <- c(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0)
y <- c(3.1, 5.0, 7.2, 8.9, 11.1, 13.0, 15.2, 16.8, 19.1, 21.0)
# Process all at once with online adapter
model <- OnlineLowess(
fraction = 0.5,
window_capacity = 5L,
min_points = 3L,
iterations = 2L,
weight_function = "tricube",
robustness_method = "bisquare"
)
print(model)
result <- model$add_points(x, y)
cat("Processing data points with sliding window...\n")
cat("Window capacity: 5\n")
cat(sprintf("Output points: %d\n", length(result$y)))
cat("Smoothed values:", paste(round(result$y, 4), collapse = ", "), "\n\n")
}
# =============================================================================
# Example 2: Real-Time Sensor Data Simulation
# Simulates processing temperature sensor readings
# =============================================================================
example_2_sensor_simulation <- function() {
cat("Example 2: Real-Time Sensor Data Simulation\n")
cat(strrep("-", 80), "\n")
cat("Simulating temperature sensor readings with noise...\n\n")
# Simulate temperature sensor: base temp 20.0 degC with daily cycle + noise
n <- 24 # 24 hours
x <- as.numeric(0:(n - 1))
base_temp <- 20.0
daily_cycle <- 5.0 * sin(x * pi / 12.0)
noise <- ((0:(n - 1)) * 7) %% 11 * 0.3 - 1.5
y <- base_temp + daily_cycle + noise
model <- OnlineLowess(
fraction = 0.4,
window_capacity = 12L, # Half-day window
min_points = 3L,
iterations = 2L
)
print(model)
result <- model$add_points(x, y)
cat(sprintf("%6s %12s %12s\n", "Hour", "Raw Temp", "Smoothed"))
cat(strrep("-", 35), "\n")
# Show first 10 values
for (i in seq_len(min(10, length(result$y)))) {
cat(sprintf("%6.0f %10.2f degC %10.2f degC\n", x[i], y[i], result$y[i]))
}
if (length(result$y) > 10) {
cat(sprintf(" ... (%d more rows)\n", length(result$y) - 10))
}
cat("\n")
}
# =============================================================================
# Example 3: Window Size Comparison
# Shows how different window sizes affect smoothing behavior
# =============================================================================
example_3_window_comparison <- function() {
cat("Example 3: Window Size Comparison\n")
cat(strrep("-", 80), "\n")
# Generate test data with some variation
x <- as.numeric(1:50)
y <- 2.0 * x + sin(x * 0.5) * 3.0
window_sizes <- c(5L, 10L, 20L)
for (window_size in window_sizes) {
model <- OnlineLowess(
fraction = 0.5,
window_capacity = window_size,
min_points = 3L,
iterations = 2L
)
result <- model$add_points(x, y)
cat(sprintf("Window capacity: %d\n", window_size))
cat(sprintf(" Output points: %d\n", length(result$y)))
if (length(result$y) >= 5) {
last_5 <- tail(result$y, 5)
cat(
" Last 5 smoothed:",
paste(round(last_5, 4), collapse = ", "), "\n"
)
} else {
cat(
" Smoothed values:",
paste(round(result$y, 4), collapse = ", "), "\n"
)
}
}
cat("\n")
}
# =============================================================================
# Example 4: Memory-Bounded Processing
# Demonstrates efficient processing for resource-constrained systems
# =============================================================================
example_4_memory_bounded <- function() {
cat("Example 4: Memory-Bounded Processing\n")
cat(strrep("-", 80), "\n")
# Simulate a long data stream
total_points <- 10000
cat(sprintf(
"Processing %d points with minimal memory footprint...\n",
total_points
))
x <- as.numeric(0:(total_points - 1))
y <- 2.0 * x + sin(x * 0.1) * 5.0 +
((0:(total_points - 1)) %% 7 - 3.0) * 0.5
start_time <- Sys.time()
model <- OnlineLowess(
fraction = 0.3,
window_capacity = 20L, # Small window = low memory usage
min_points = 3L,
iterations = 1L,
parallel = FALSE # Sequential for low latency
)
result <- model$add_points(x, y)
duration <- as.numeric(Sys.time() - start_time, units = "secs")
cat(sprintf("\nProcessed %d points in %.4fs\n", length(result$y), duration))
if (length(result$y) > 0) {
cat(sprintf("Final smoothed value: %.2f\n", tail(result$y, 1)))
}
cat("Memory usage: Constant (window size = 20 points)\n\n")
}
# Run if called directly
if (sys.nframe() == 0) {
main()
}
Running the Examples¶
# Install the package first
# From R:
# install.packages("rfastlowess")
# Or from source:
# R CMD INSTALL bindings/r
# Run examples
Rscript examples/r/batch_smoothing.R
Rscript examples/r/streaming_smoothing.R
Rscript examples/r/online_smoothing.R
Quick Start¶
library(rfastlowess)
# Generate sample data
set.seed(42)
x <- seq(0, 10, length.out = 100)
y <- sin(x) + rnorm(100, sd = 0.3)
# Basic smoothing
model <- Lowess(fraction = 0.3)
print(model)
result <- model$fit(x, y)
print(result)
# With confidence intervals
model <- Lowess(
fraction = 0.3,
confidence_intervals = 0.95,
return_diagnostics = TRUE
)
result <- model$fit(x, y)
# Visualization
plot(result, main = "Quick Start Example")