Python Examples¶
Complete Python examples demonstrating fastlowess capabilities with NumPy and matplotlib.
Batch Smoothing¶
Process complete datasets with confidence intervals, diagnostics, and cross-validation.
#!/usr/bin/env python3
"""
fastlowess Batch Smoothing Example
This example demonstrates batch LOWESS smoothing features:
- Basic smoothing with different parameters
- Robustness iterations for outlier handling
- Confidence and prediction intervals
- Diagnostics and cross-validation
The Lowess class is the primary interface for
processing complete datasets that fit in memory.
"""
import os
import matplotlib.pyplot as plt
import numpy as np
from fastlowess import Lowess
# Get script directory for relative paths
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
PLOTS_DIR = os.path.join(SCRIPT_DIR, "plots")
def generate_sample_data(n_points=1000):
"""
Generate complex sample data with a trend, seasonality, and outliers.
"""
np.random.seed(42)
x = np.linspace(0, 50, n_points)
# Trend + Seasonality
y_true = 0.5 * x + 5 * np.sin(x * 0.5)
# Gaussian noise
y = y_true + np.random.normal(0, 1.5, n_points)
# Add significant outliers (10% of data)
n_outliers = int(n_points * 0.1)
outlier_indices = np.random.choice(n_points, size=n_outliers, replace=False)
y[outlier_indices] += np.random.uniform(10, 20, n_outliers) * np.random.choice(
[-1, 1], n_outliers
)
return x, y, y_true
def run_batch_models(x, y):
"""Fit the batch example models and return their smoothing results."""
print("Running basic smoothing...")
res_basic = Lowess(iterations=0, fraction=0.05).fit(x, y)
print("Running robust smoothing (3 iterations)...")
res_robust = Lowess(
fraction=0.05,
iterations=3,
robustness_method="bisquare",
return_robustness_weights=True,
).fit(x, y)
print("Computing confidence and prediction intervals...")
res_intervals = Lowess(
fraction=0.05,
confidence_intervals=0.95,
prediction_intervals=0.95,
return_diagnostics=True,
).fit(x, y)
print("Running cross-validation to find optimal fraction...")
cv_fractions = [0.05, 0.1, 0.2, 0.4]
res_cv = Lowess(cv_fractions=cv_fractions, cv_method="kfold", cv_k=5).fit(x, y)
return res_basic, res_robust, res_intervals, res_cv
def plot_batch_results(x, y, y_true, smoothing_results):
"""Create the main batch example plots and return their figure objects."""
res_basic, res_robust, res_intervals = smoothing_results
fig1 = plt.figure(figsize=(12, 8))
plt.scatter(x, y, alpha=0.3, color="gray", s=10, label="Noisy Data (w/ Outliers)")
plt.plot(x, y_true, "k--", alpha=0.8, label="True Signal")
plt.plot(x, res_basic.y, "r-", linewidth=2, label="Basic LOWESS (Non-robust)")
plt.plot(x, res_robust.y, "g-", linewidth=2.5, label="Robust LOWESS (3 iters)")
plt.fill_between(
x,
res_intervals.confidence_lower,
res_intervals.confidence_upper,
color="blue",
alpha=0.2,
label="95% Confidence Interval",
)
plt.title("fastlowess: Robust Batch Smoothing with Intervals")
plt.xlabel("X Axis")
plt.ylabel("Y Axis")
plt.legend()
plt.grid(True, alpha=0.3)
fig2 = plt.figure(figsize=(12, 3))
plt.scatter(
x,
res_robust.robustness_weights,
c=res_robust.robustness_weights,
cmap="viridis",
s=10,
)
plt.title("Robustness Weights (Low weight = Outlier suspected)")
plt.colorbar(label="Weight")
plt.grid(True, alpha=0.3)
plt.ylim(-0.1, 1.1)
return fig1, fig2
def plot_boundary_policy_demo():
"""Create a boundary policy comparison plot for linear data."""
print("\nDemonstrating boundary policy effects on linear data...")
xl = np.linspace(0, 10, 50)
yl = 2 * xl + 1
r_ext = Lowess(fraction=0.6, boundary_policy="extend").fit(xl, yl)
r_ref = Lowess(fraction=0.6, boundary_policy="reflect").fit(xl, yl)
r_zr = Lowess(fraction=0.6, boundary_policy="zero").fit(xl, yl)
fig3 = plt.figure(figsize=(10, 5))
plt.plot(xl, yl, "k--", label="True Linear Trend")
plt.plot(xl, r_ext.y, "r-", label="Extend (Default) - constant padding")
plt.plot(xl, r_ref.y, "g-", label="Reflect - mirrored padding")
plt.plot(xl, r_zr.y, "b-", label="Zero - zero padding")
plt.title("Effect of Boundary Policies on Linear Data (q=0.6)")
plt.legend()
plt.grid(True, alpha=0.2)
return fig3
def save_example_plots(fig1, fig2, fig3):
"""Save the generated batch example figures to disk."""
os.makedirs(PLOTS_DIR, exist_ok=True)
print(f"\nSaving plots to {PLOTS_DIR}/...")
fig1.savefig(os.path.join(PLOTS_DIR, "batch_main.png"))
fig2.savefig(os.path.join(PLOTS_DIR, "batch_weights.png"))
fig3.savefig(os.path.join(PLOTS_DIR, "batch_boundary.png"))
print("Done!")
def main():
"""Run the batch smoothing example and save the generated plots."""
print("=== fastlowess Batch Smoothing Example ===")
x, y, y_true = generate_sample_data(1000)
print(f"Generated {len(x)} data points with outliers.")
res_basic, res_robust, res_intervals, res_cv = run_batch_models(x, y)
print(f"Optimal fraction found: {res_cv.fraction_used}")
diag = res_intervals.diagnostics
print("\nFit Statistics (Intervals Model):")
if diag is None:
print(" - Diagnostics unavailable")
else:
print(f" - R²: {diag.r_squared:.4f}")
print(f" - RMSE: {diag.rmse:.4f}")
print(f" - MAE: {diag.mae:.4f}")
fig1, fig2 = plot_batch_results(
x,
y,
y_true,
(res_basic, res_robust, res_intervals),
)
fig3 = plot_boundary_policy_demo()
save_example_plots(fig1, fig2, fig3)
if __name__ == "__main__":
main()
Streaming Smoothing¶
Process large datasets in memory-efficient chunks with overlap merging.
#!/usr/bin/env python3
"""
fastlowess Streaming Smoothing Example
This example demonstrates streaming LOWESS smoothing for large datasets:
- Basic chunked processing
- Handling datasets that don't fit in memory
- Parallel execution for extreme speed
"""
import time
import os
import matplotlib.pyplot as plt
import numpy as np
from fastlowess import Lowess, StreamingLowess
# Get script directory for relative paths
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
PLOTS_DIR = os.path.join(SCRIPT_DIR, "plots")
def main():
"""Run the streaming smoothing example and compare it with batch mode."""
print("=== fastlowess Streaming Mode Example ===")
n_points = 100_000
print(f"Generating large dataset: {n_points} points...")
x = np.linspace(0, 100, n_points)
y = np.cos(x * 0.1) + np.random.normal(0, 0.5, n_points)
res_batch, batch_time = run_batch_smoothing(x, y)
full_stream_y, stream_time = run_streaming_smoothing(x, y, n_points)
compare_results(res_batch.y, full_stream_y)
save_streaming_plot(x, y, res_batch.y, full_stream_y, n_points)
print(f"Batch took: {batch_time:.4f} seconds")
print(f"Streaming took: {stream_time:.4f} seconds")
def run_batch_smoothing(x, y):
"""Run the baseline batch smoothing pass and return the result and timing."""
start = time.time()
print("Running Batch LOWESS (Parallel)...")
res_batch = Lowess(fraction=0.01).fit(x, y)
return res_batch, time.time() - start
def run_streaming_smoothing(x, y, n_points):
"""Run the streaming smoother in chunks and return output and timing."""
start = time.time()
print("Running Streaming LOWESS (Chunked)...")
chunk_size = 10_000
model = StreamingLowess(fraction=0.01, chunk_size=2000, overlap=200, parallel=True)
stream_y = []
for i in range(0, n_points, chunk_size):
cx = x[i : i + chunk_size]
cy = y[i : i + chunk_size]
res = model.process_chunk(cx, cy)
if hasattr(res, "y") and len(res.y) > 0:
stream_y.append(res.y)
res_final = model.finalize()
if hasattr(res_final, "y") and len(res_final.y) > 0:
stream_y.append(res_final.y)
full_stream_y = np.concatenate(stream_y)
print(f"Stream output length: {len(full_stream_y)}")
return full_stream_y, time.time() - start
def compare_results(batch_y, full_stream_y):
"""Report the numerical difference between batch and streaming outputs."""
if len(full_stream_y) == len(batch_y):
mse = np.mean((batch_y - full_stream_y) ** 2)
print(f"Mean Squared Difference (Batch vs Stream): {mse:.2e}")
return
print(
f"Warning: Length mismatch. Batch={len(batch_y)}, Stream={len(full_stream_y)}"
)
min_len = min(len(batch_y), len(full_stream_y))
mse = np.mean((batch_y[:min_len] - full_stream_y[:min_len]) ** 2)
print(f"Mean Squared Difference (First {min_len} points): {mse:.2e}")
def save_streaming_plot(x, y, batch_y, full_stream_y, n_points):
"""Save a zoomed plot comparing batch and streaming smoothing outputs."""
zoom_range = (40, 60)
zoom_mask = (x >= zoom_range[0]) & (x <= zoom_range[1])
min_len = min(len(x), len(full_stream_y))
plt.figure(figsize=(12, 8))
display_mask = np.random.choice([False, True], size=n_points, p=[0.99, 0.01])
plt.scatter(
x[display_mask & zoom_mask],
y[display_mask & zoom_mask],
alpha=0.3,
color="gray",
s=10,
label="Raw Data (sampled)",
)
plt.plot(x[zoom_mask], batch_y[zoom_mask], "r-", linewidth=3, label="Batch Result")
plt.plot(
x[:min_len][zoom_mask[:min_len]],
full_stream_y[:min_len][zoom_mask[:min_len]],
"b--",
linewidth=2,
label="Streaming Result",
)
plt.title(f"fastlowess: Streaming Smoothing on {n_points} points")
plt.xlabel("X Axis")
plt.ylabel("Y Axis")
plt.legend()
plt.grid(True, alpha=0.2)
plt.xlim(zoom_range)
plt.ylim(-2.5, 2.5)
plt.tight_layout()
os.makedirs(PLOTS_DIR, exist_ok=True)
plt.savefig(os.path.join(PLOTS_DIR, "streaming_smoothing.png"))
print(f"\nPlot saved to {PLOTS_DIR}/streaming_smoothing.png")
if __name__ == "__main__":
main()
Download streaming_smoothing.py
Online Smoothing¶
Real-time smoothing with sliding window for streaming data applications.
#!/usr/bin/env python3
"""
fastlowess Online Smoothing Example
This example demonstrates online LOWESS smoothing for real-time data:
- Basic incremental processing with streaming data
- Real-time sensor data smoothing
- Different update modes (Full vs Incremental)
- Memory-bounded processing with sliding window
The OnlineLowess class is designed for:
- Real-time data streams
- Sensors and monitoring
- Low-latency applications
"""
import os
import matplotlib.pyplot as plt
import numpy as np
from fastlowess import OnlineLowess
# Get script directory for relative paths
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
PLOTS_DIR = os.path.join(SCRIPT_DIR, "plots")
def main():
"""Run the online smoothing example and save the generated plots."""
print("=== fastlowess Online Smoothing Example ===")
# 1. Simulate a real-time signal
# A sine wave with changing frequency and random noise
n_points = 1000
x = np.arange(n_points, dtype=float)
y_true = 20.0 + 5.0 * np.sin(x * 0.1) + 2.0 * np.sin(x * 0.02)
y = y_true + np.random.normal(0, 1.2, n_points)
# Add some sudden spikes (sensor glitches)
y[200:205] += 15.0
y[600:610] -= 10.0
print(f"Simulating {n_points} real-time data points...")
# 2. Sequential Online Processing
# Full Update Mode (higher accuracy)
print("Processing with 'full' update mode...")
model_full = OnlineLowess(
fraction=0.3, window_capacity=50, iterations=3, update_mode="full"
)
res_full = model_full.add_points(x, y)
# Incremental Update Mode (faster for large windows)
print("Processing with 'incremental' update mode...")
model_inc = OnlineLowess(
fraction=0.3, window_capacity=50, iterations=3, update_mode="incremental"
)
res_inc = model_inc.add_points(x, y)
# Plotting
os.makedirs(PLOTS_DIR, exist_ok=True)
fig1 = plt.figure(figsize=(12, 7))
# Original Data
plt.scatter(x, y, s=5, alpha=0.3, color="gray", label="Raw Sensor Stream")
plt.plot(x, y_true, "k--", alpha=0.6, label="True Signal")
# Online Results
plt.plot(x, res_full.y, "r-", linewidth=2, label="Online LOWESS (Full)")
plt.plot(
x,
res_inc.y,
"b-",
linewidth=1.5,
alpha=0.7,
label="Online LOWESS (Incremental)",
)
# Highlight a zoom area to show the windowing effect
plt.axvspan(400, 500, color="yellow", alpha=0.1, label="Zoom Area")
plt.title("fastlowess: Real-time Online Smoothing (Sliding Window)")
plt.xlabel("Time / Sequence Index")
plt.ylabel("Sensor Value")
plt.legend()
plt.grid(True, alpha=0.3)
# Zoom In
fig2 = plt.figure(figsize=(12, 4))
mask = (x >= 400) & (x <= 500)
plt.scatter(x[mask], y[mask], s=20, alpha=0.4, color="gray")
plt.plot(x[mask], y_true[mask], "k--")
plt.plot(x[mask], res_full.y[mask], "r-", linewidth=3, label="Full Update")
plt.plot(x[mask], res_inc.y[mask], "b-", linewidth=2, label="Incremental")
plt.title("Detailed View (Time 400-500)")
plt.legend()
plt.grid(True, alpha=0.2)
plt.tight_layout()
print(f"\nSaving plots to {PLOTS_DIR}/...")
fig1.savefig(os.path.join(PLOTS_DIR, "online_main.png"))
fig2.savefig(os.path.join(PLOTS_DIR, "online_zoom.png"))
print("Done!")
if __name__ == "__main__":
main()
Running the Examples¶
# Install dependencies
pip install fastlowess matplotlib numpy
# Run examples
cd examples/python
python batch_smoothing.py
python streaming_smoothing.py
python online_smoothing.py
Output¶
The batch smoothing example generates visualization plots in examples/python/plots/:
batch_main.png- Main smoothing comparisonbatch_weights.png- Robustness weights visualizationbatch_boundary.png- Boundary policy comparison