Skip to content

Python Examples

Complete Python examples demonstrating fastlowess capabilities with NumPy and matplotlib.

Batch Smoothing

Process complete datasets with confidence intervals, diagnostics, and cross-validation.

#!/usr/bin/env python3
"""
fastlowess Batch Smoothing Example

This example demonstrates batch LOWESS smoothing features:
- Basic smoothing with different parameters
- Robustness iterations for outlier handling
- Confidence and prediction intervals
- Diagnostics and cross-validation

The Lowess class is the primary interface for
processing complete datasets that fit in memory.
"""

import os

import matplotlib.pyplot as plt
import numpy as np

from fastlowess import Lowess

# Get script directory for relative paths
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
PLOTS_DIR = os.path.join(SCRIPT_DIR, "plots")


def generate_sample_data(n_points=1000):
    """
    Generate complex sample data with a trend, seasonality, and outliers.
    """
    np.random.seed(42)
    x = np.linspace(0, 50, n_points)

    # Trend + Seasonality
    y_true = 0.5 * x + 5 * np.sin(x * 0.5)

    # Gaussian noise
    y = y_true + np.random.normal(0, 1.5, n_points)

    # Add significant outliers (10% of data)
    n_outliers = int(n_points * 0.1)
    outlier_indices = np.random.choice(n_points, size=n_outliers, replace=False)
    y[outlier_indices] += np.random.uniform(10, 20, n_outliers) * np.random.choice(
        [-1, 1], n_outliers
    )

    return x, y, y_true


def run_batch_models(x, y):
    """Fit the batch example models and return their smoothing results."""
    print("Running basic smoothing...")
    res_basic = Lowess(iterations=0, fraction=0.05).fit(x, y)

    print("Running robust smoothing (3 iterations)...")
    res_robust = Lowess(
        fraction=0.05,
        iterations=3,
        robustness_method="bisquare",
        return_robustness_weights=True,
    ).fit(x, y)

    print("Computing confidence and prediction intervals...")
    res_intervals = Lowess(
        fraction=0.05,
        confidence_intervals=0.95,
        prediction_intervals=0.95,
        return_diagnostics=True,
    ).fit(x, y)

    print("Running cross-validation to find optimal fraction...")
    cv_fractions = [0.05, 0.1, 0.2, 0.4]
    res_cv = Lowess(cv_fractions=cv_fractions, cv_method="kfold", cv_k=5).fit(x, y)
    return res_basic, res_robust, res_intervals, res_cv


def plot_batch_results(x, y, y_true, smoothing_results):
    """Create the main batch example plots and return their figure objects."""
    res_basic, res_robust, res_intervals = smoothing_results
    fig1 = plt.figure(figsize=(12, 8))

    plt.scatter(x, y, alpha=0.3, color="gray", s=10, label="Noisy Data (w/ Outliers)")
    plt.plot(x, y_true, "k--", alpha=0.8, label="True Signal")
    plt.plot(x, res_basic.y, "r-", linewidth=2, label="Basic LOWESS (Non-robust)")
    plt.plot(x, res_robust.y, "g-", linewidth=2.5, label="Robust LOWESS (3 iters)")
    plt.fill_between(
        x,
        res_intervals.confidence_lower,
        res_intervals.confidence_upper,
        color="blue",
        alpha=0.2,
        label="95% Confidence Interval",
    )

    plt.title("fastlowess: Robust Batch Smoothing with Intervals")
    plt.xlabel("X Axis")
    plt.ylabel("Y Axis")
    plt.legend()
    plt.grid(True, alpha=0.3)

    fig2 = plt.figure(figsize=(12, 3))
    plt.scatter(
        x,
        res_robust.robustness_weights,
        c=res_robust.robustness_weights,
        cmap="viridis",
        s=10,
    )
    plt.title("Robustness Weights (Low weight = Outlier suspected)")
    plt.colorbar(label="Weight")
    plt.grid(True, alpha=0.3)
    plt.ylim(-0.1, 1.1)
    return fig1, fig2


def plot_boundary_policy_demo():
    """Create a boundary policy comparison plot for linear data."""
    print("\nDemonstrating boundary policy effects on linear data...")
    xl = np.linspace(0, 10, 50)
    yl = 2 * xl + 1

    r_ext = Lowess(fraction=0.6, boundary_policy="extend").fit(xl, yl)
    r_ref = Lowess(fraction=0.6, boundary_policy="reflect").fit(xl, yl)
    r_zr = Lowess(fraction=0.6, boundary_policy="zero").fit(xl, yl)

    fig3 = plt.figure(figsize=(10, 5))
    plt.plot(xl, yl, "k--", label="True Linear Trend")
    plt.plot(xl, r_ext.y, "r-", label="Extend (Default) - constant padding")
    plt.plot(xl, r_ref.y, "g-", label="Reflect - mirrored padding")
    plt.plot(xl, r_zr.y, "b-", label="Zero - zero padding")
    plt.title("Effect of Boundary Policies on Linear Data (q=0.6)")
    plt.legend()
    plt.grid(True, alpha=0.2)
    return fig3


def save_example_plots(fig1, fig2, fig3):
    """Save the generated batch example figures to disk."""
    os.makedirs(PLOTS_DIR, exist_ok=True)
    print(f"\nSaving plots to {PLOTS_DIR}/...")
    fig1.savefig(os.path.join(PLOTS_DIR, "batch_main.png"))
    fig2.savefig(os.path.join(PLOTS_DIR, "batch_weights.png"))
    fig3.savefig(os.path.join(PLOTS_DIR, "batch_boundary.png"))
    print("Done!")


def main():
    """Run the batch smoothing example and save the generated plots."""
    print("=== fastlowess Batch Smoothing Example ===")
    x, y, y_true = generate_sample_data(1000)
    print(f"Generated {len(x)} data points with outliers.")

    res_basic, res_robust, res_intervals, res_cv = run_batch_models(x, y)
    print(f"Optimal fraction found: {res_cv.fraction_used}")

    diag = res_intervals.diagnostics
    print("\nFit Statistics (Intervals Model):")
    if diag is None:
        print(" - Diagnostics unavailable")
    else:
        print(f" - R²:   {diag.r_squared:.4f}")
        print(f" - RMSE: {diag.rmse:.4f}")
        print(f" - MAE:  {diag.mae:.4f}")

    fig1, fig2 = plot_batch_results(
        x,
        y,
        y_true,
        (res_basic, res_robust, res_intervals),
    )
    fig3 = plot_boundary_policy_demo()
    save_example_plots(fig1, fig2, fig3)


if __name__ == "__main__":
    main()

Download batch_smoothing.py


Streaming Smoothing

Process large datasets in memory-efficient chunks with overlap merging.

#!/usr/bin/env python3
"""
fastlowess Streaming Smoothing Example

This example demonstrates streaming LOWESS smoothing for large datasets:
- Basic chunked processing
- Handling datasets that don't fit in memory
- Parallel execution for extreme speed
"""

import time
import os

import matplotlib.pyplot as plt
import numpy as np

from fastlowess import Lowess, StreamingLowess

# Get script directory for relative paths
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
PLOTS_DIR = os.path.join(SCRIPT_DIR, "plots")


def main():
    """Run the streaming smoothing example and compare it with batch mode."""
    print("=== fastlowess Streaming Mode Example ===")
    n_points = 100_000
    print(f"Generating large dataset: {n_points} points...")
    x = np.linspace(0, 100, n_points)
    y = np.cos(x * 0.1) + np.random.normal(0, 0.5, n_points)

    res_batch, batch_time = run_batch_smoothing(x, y)
    full_stream_y, stream_time = run_streaming_smoothing(x, y, n_points)
    compare_results(res_batch.y, full_stream_y)
    save_streaming_plot(x, y, res_batch.y, full_stream_y, n_points)
    print(f"Batch took: {batch_time:.4f} seconds")
    print(f"Streaming took: {stream_time:.4f} seconds")


def run_batch_smoothing(x, y):
    """Run the baseline batch smoothing pass and return the result and timing."""
    start = time.time()
    print("Running Batch LOWESS (Parallel)...")
    res_batch = Lowess(fraction=0.01).fit(x, y)
    return res_batch, time.time() - start


def run_streaming_smoothing(x, y, n_points):
    """Run the streaming smoother in chunks and return output and timing."""
    start = time.time()
    print("Running Streaming LOWESS (Chunked)...")
    chunk_size = 10_000
    model = StreamingLowess(fraction=0.01, chunk_size=2000, overlap=200, parallel=True)
    stream_y = []

    for i in range(0, n_points, chunk_size):
        cx = x[i : i + chunk_size]
        cy = y[i : i + chunk_size]
        res = model.process_chunk(cx, cy)
        if hasattr(res, "y") and len(res.y) > 0:
            stream_y.append(res.y)

    res_final = model.finalize()
    if hasattr(res_final, "y") and len(res_final.y) > 0:
        stream_y.append(res_final.y)

    full_stream_y = np.concatenate(stream_y)
    print(f"Stream output length: {len(full_stream_y)}")
    return full_stream_y, time.time() - start


def compare_results(batch_y, full_stream_y):
    """Report the numerical difference between batch and streaming outputs."""
    if len(full_stream_y) == len(batch_y):
        mse = np.mean((batch_y - full_stream_y) ** 2)
        print(f"Mean Squared Difference (Batch vs Stream): {mse:.2e}")
        return

    print(
        f"Warning: Length mismatch. Batch={len(batch_y)}, Stream={len(full_stream_y)}"
    )
    min_len = min(len(batch_y), len(full_stream_y))
    mse = np.mean((batch_y[:min_len] - full_stream_y[:min_len]) ** 2)
    print(f"Mean Squared Difference (First {min_len} points): {mse:.2e}")


def save_streaming_plot(x, y, batch_y, full_stream_y, n_points):
    """Save a zoomed plot comparing batch and streaming smoothing outputs."""
    zoom_range = (40, 60)
    zoom_mask = (x >= zoom_range[0]) & (x <= zoom_range[1])
    min_len = min(len(x), len(full_stream_y))

    plt.figure(figsize=(12, 8))
    display_mask = np.random.choice([False, True], size=n_points, p=[0.99, 0.01])
    plt.scatter(
        x[display_mask & zoom_mask],
        y[display_mask & zoom_mask],
        alpha=0.3,
        color="gray",
        s=10,
        label="Raw Data (sampled)",
    )
    plt.plot(x[zoom_mask], batch_y[zoom_mask], "r-", linewidth=3, label="Batch Result")
    plt.plot(
        x[:min_len][zoom_mask[:min_len]],
        full_stream_y[:min_len][zoom_mask[:min_len]],
        "b--",
        linewidth=2,
        label="Streaming Result",
    )

    plt.title(f"fastlowess: Streaming Smoothing on {n_points} points")
    plt.xlabel("X Axis")
    plt.ylabel("Y Axis")
    plt.legend()
    plt.grid(True, alpha=0.2)
    plt.xlim(zoom_range)
    plt.ylim(-2.5, 2.5)
    plt.tight_layout()
    os.makedirs(PLOTS_DIR, exist_ok=True)
    plt.savefig(os.path.join(PLOTS_DIR, "streaming_smoothing.png"))
    print(f"\nPlot saved to {PLOTS_DIR}/streaming_smoothing.png")


if __name__ == "__main__":
    main()

Download streaming_smoothing.py


Online Smoothing

Real-time smoothing with sliding window for streaming data applications.

#!/usr/bin/env python3
"""
fastlowess Online Smoothing Example

This example demonstrates online LOWESS smoothing for real-time data:
- Basic incremental processing with streaming data
- Real-time sensor data smoothing
- Different update modes (Full vs Incremental)
- Memory-bounded processing with sliding window

The OnlineLowess class is designed for:
- Real-time data streams
- Sensors and monitoring
- Low-latency applications
"""

import os

import matplotlib.pyplot as plt
import numpy as np

from fastlowess import OnlineLowess

# Get script directory for relative paths
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
PLOTS_DIR = os.path.join(SCRIPT_DIR, "plots")


def main():
    """Run the online smoothing example and save the generated plots."""
    print("=== fastlowess Online Smoothing Example ===")

    # 1. Simulate a real-time signal
    # A sine wave with changing frequency and random noise
    n_points = 1000
    x = np.arange(n_points, dtype=float)
    y_true = 20.0 + 5.0 * np.sin(x * 0.1) + 2.0 * np.sin(x * 0.02)
    y = y_true + np.random.normal(0, 1.2, n_points)

    # Add some sudden spikes (sensor glitches)
    y[200:205] += 15.0
    y[600:610] -= 10.0

    print(f"Simulating {n_points} real-time data points...")

    # 2. Sequential Online Processing
    # Full Update Mode (higher accuracy)
    print("Processing with 'full' update mode...")
    model_full = OnlineLowess(
        fraction=0.3, window_capacity=50, iterations=3, update_mode="full"
    )
    res_full = model_full.add_points(x, y)

    # Incremental Update Mode (faster for large windows)
    print("Processing with 'incremental' update mode...")
    model_inc = OnlineLowess(
        fraction=0.3, window_capacity=50, iterations=3, update_mode="incremental"
    )
    res_inc = model_inc.add_points(x, y)

    # Plotting
    os.makedirs(PLOTS_DIR, exist_ok=True)

    fig1 = plt.figure(figsize=(12, 7))

    # Original Data
    plt.scatter(x, y, s=5, alpha=0.3, color="gray", label="Raw Sensor Stream")
    plt.plot(x, y_true, "k--", alpha=0.6, label="True Signal")

    # Online Results
    plt.plot(x, res_full.y, "r-", linewidth=2, label="Online LOWESS (Full)")
    plt.plot(
        x,
        res_inc.y,
        "b-",
        linewidth=1.5,
        alpha=0.7,
        label="Online LOWESS (Incremental)",
    )

    # Highlight a zoom area to show the windowing effect
    plt.axvspan(400, 500, color="yellow", alpha=0.1, label="Zoom Area")

    plt.title("fastlowess: Real-time Online Smoothing (Sliding Window)")
    plt.xlabel("Time / Sequence Index")
    plt.ylabel("Sensor Value")
    plt.legend()
    plt.grid(True, alpha=0.3)

    # Zoom In
    fig2 = plt.figure(figsize=(12, 4))
    mask = (x >= 400) & (x <= 500)
    plt.scatter(x[mask], y[mask], s=20, alpha=0.4, color="gray")
    plt.plot(x[mask], y_true[mask], "k--")
    plt.plot(x[mask], res_full.y[mask], "r-", linewidth=3, label="Full Update")
    plt.plot(x[mask], res_inc.y[mask], "b-", linewidth=2, label="Incremental")
    plt.title("Detailed View (Time 400-500)")
    plt.legend()
    plt.grid(True, alpha=0.2)

    plt.tight_layout()

    print(f"\nSaving plots to {PLOTS_DIR}/...")
    fig1.savefig(os.path.join(PLOTS_DIR, "online_main.png"))
    fig2.savefig(os.path.join(PLOTS_DIR, "online_zoom.png"))
    print("Done!")


if __name__ == "__main__":
    main()

Download online_smoothing.py


Running the Examples

# Install dependencies
pip install fastlowess matplotlib numpy

# Run examples
cd examples/python
python batch_smoothing.py
python streaming_smoothing.py
python online_smoothing.py

Output

The batch smoothing example generates visualization plots in examples/python/plots/:

  • batch_main.png - Main smoothing comparison
  • batch_weights.png - Robustness weights visualization
  • batch_boundary.png - Boundary policy comparison