Use Case: Regression (Numeric Prediction)¶

Predict numeric values like prices, quantities, durations, or scores.

When to Use This¶

Price prediction (real estate, products, services)
Demand forecasting
Customer lifetime value (LTV) prediction
Risk scoring (insurance, credit)
Duration/time estimation
Any continuous numeric target

Complete Implementation¶

from featrixsphere.api import FeatrixSphere

featrix = FeatrixSphere()

# 1. Create Foundational Model
fm = featrix.create_foundational_model(
    name="price_prediction_model",
    data_file="properties.csv",
    ignore_columns=["property_id", "listing_date", "address"]
)
fm.wait_for_training()

# 2. Create regressor for price
predictor = fm.create_regressor(
    target_column="price",
    name="property_price_predictor"
)
predictor.wait_for_training()

# 3. Make predictions
property_features = {
    "square_feet": 2500,
    "bedrooms": 4,
    "bathrooms": 2.5,
    "year_built": 2010,
    "lot_size": 0.25,
    "neighborhood": "downtown",
    "garage_spaces": 2,
    "pool": True
}

result = predictor.predict(property_features)
print(f"Predicted price: ${result.prediction:,.2f}")

# 4. Batch predictions
import pandas as pd

properties_df = pd.read_csv("new_listings.csv")
results = predictor.batch_predict(properties_df, show_progress=True)

for i, result in enumerate(results):
    property_id = properties_df.iloc[i]["property_id"]
    print(f"Property {property_id}: ${result.prediction:,.2f}")

# 5. Deploy to production
endpoint = predictor.create_api_endpoint(
    name="price_api_v1",
    description="Property price prediction endpoint"
)

# 6. Publish
fm.publish(org_id="my_org", name="price_model_v1")

Use Case: Customer Lifetime Value¶

# Train on historical customer data
fm = featrix.create_foundational_model(
    name="ltv_model",
    data_file="customers_with_ltv.csv",
    ignore_columns=["customer_id", "signup_date"]
)
fm.wait_for_training()

predictor = fm.create_regressor(
    target_column="lifetime_value",
    name="ltv_predictor"
)
predictor.wait_for_training()

# Predict LTV for new customers
new_customer = {
    "acquisition_channel": "organic_search",
    "first_purchase_amount": 150.0,
    "first_purchase_category": "electronics",
    "device_type": "mobile",
    "email_subscribed": True
}

result = predictor.predict(new_customer)
print(f"Predicted LTV: ${result.prediction:,.2f}")

# Use LTV for customer acquisition decisions
def should_acquire(customer, max_acquisition_cost):
    ltv = predictor.predict(customer).prediction
    return ltv > max_acquisition_cost * 3  # 3x ROI threshold

Use Case: Demand Forecasting¶

fm = featrix.create_foundational_model(
    name="demand_model",
    data_file="historical_sales.csv"
)
fm.wait_for_training()

predictor = fm.create_regressor(
    target_column="units_sold",
    name="demand_predictor"
)
predictor.wait_for_training()

# Predict demand for upcoming period
forecast_inputs = {
    "product_category": "electronics",
    "price_point": 299.99,
    "is_promotion": True,
    "season": "holiday",
    "day_of_week": "saturday",
    "competitor_price": 329.99
}

result = predictor.predict(forecast_inputs)
print(f"Predicted units: {result.prediction:.0f}")

# Forecast for multiple scenarios
scenarios = [
    {"price_point": 249.99, "is_promotion": True},
    {"price_point": 299.99, "is_promotion": True},
    {"price_point": 299.99, "is_promotion": False},
    {"price_point": 349.99, "is_promotion": False}
]

for scenario in scenarios:
    inputs = {**forecast_inputs, **scenario}
    result = predictor.predict(inputs)
    print(f"Price ${scenario['price_point']}, Promo={scenario['is_promotion']}: {result.prediction:.0f} units")

Use Case: Insurance Risk Scoring¶

fm = featrix.create_foundational_model(
    name="insurance_risk_model",
    data_file="claims_history.csv",
    ignore_columns=["policy_id", "customer_id"]
)
fm.wait_for_training()

predictor = fm.create_regressor(
    target_column="annual_claim_amount",
    name="risk_predictor"
)
predictor.wait_for_training()

# Score a new applicant
applicant = {
    "age": 35,
    "gender": "F",
    "bmi": 24.5,
    "smoker": False,
    "region": "northeast",
    "children": 2,
    "occupation_risk": "low"
}

result = predictor.predict(applicant)
expected_claims = result.prediction

# Calculate premium with margin
base_premium = expected_claims * 1.3  # 30% margin
print(f"Expected annual claims: ${expected_claims:,.2f}")
print(f"Recommended premium: ${base_premium:,.2f}")

Use Case: Time/Duration Estimation¶

fm = featrix.create_foundational_model(
    name="delivery_time_model",
    data_file="deliveries.csv"
)
fm.wait_for_training()

predictor = fm.create_regressor(
    target_column="delivery_hours",
    name="eta_predictor"
)
predictor.wait_for_training()

# Predict delivery time
delivery = {
    "origin_city": "chicago",
    "destination_city": "denver",
    "package_weight": 5.2,
    "shipping_class": "standard",
    "day_of_week": "monday",
    "weather_conditions": "clear"
}

result = predictor.predict(delivery)
eta_hours = result.prediction
print(f"Estimated delivery: {eta_hours:.1f} hours")

# Convert to human-readable
from datetime import datetime, timedelta
estimated_arrival = datetime.now() + timedelta(hours=eta_hours)
print(f"Expected by: {estimated_arrival.strftime('%Y-%m-%d %H:%M')}")

Understanding Regression Results¶

result = predictor.predict(features)

# The prediction
result.prediction          # Numeric value (e.g., 450000.0)

# Tracking
result.prediction_uuid     # UUID for feedback

# Input warnings
result.guardrails          # Warnings for unusual values

Guardrails and Warnings¶

result = predictor.predict({
    "square_feet": 50000,    # Unusually large
    "bedrooms": 100,         # Unrealistic
    "year_built": 3000       # Future date
})

if result.guardrails:
    print("Input warnings:")
    for column, warning in result.guardrails.items():
        print(f"  {column}: {warning}")
    print(f"Prediction may be unreliable: ${result.prediction:,.2f}")

Sending Feedback¶

Track actual values to improve future models:

# After sale closes
actual_price = 465000

featrix.prediction_feedback(
    prediction_uuid=result.prediction_uuid,
    ground_truth=actual_price
)

Feature Importance¶

Understand what drives predictions:

result = predictor.predict(features, feature_importance=True)

print(f"Predicted price: ${result.prediction:,.2f}")
print("Top factors:")
for feature, importance in sorted(
    result.feature_importance.items(),
    key=lambda x: abs(x[1]),
    reverse=True
)[:5]:
    print(f"  {feature}: {importance:+.3f}")

Example output:

Predicted price: $485,000.00
Top factors:
  square_feet: +0.45
  neighborhood: +0.28
  bedrooms: +0.15
  year_built: +0.12
  lot_size: +0.08

Grid Search for Optimization¶

Find optimal parameter combinations:

# Create a prediction grid
grid = predictor.predict_grid(degrees_of_freedom=2, grid_shape=(10, 8))
grid.set_axis_labels(["Price", "Ad Spend"])

# Explore price and marketing spend combinations
prices = [19.99, 24.99, 29.99, 34.99, 39.99]
ad_spends = [100, 250, 500, 1000, 2000]

for i, price in enumerate(prices):
    for j, spend in enumerate(ad_spends):
        grid.predict(
            {"price": price, "ad_spend": spend, "category": "electronics"},
            grid_position=(i, j)
        )

grid.process_batch(show_progress=True)

# Find optimal combination
optimal = grid.get_optimal_position()
print(f"Optimal position: {optimal}")

# Visualize
grid.plot_heatmap()

Production API¶

Python¶

result = endpoint.predict(
    {"square_feet": 2500, "bedrooms": 4, "neighborhood": "downtown"},
    api_key=endpoint.api_key
)
print(f"Price: ${result.prediction:,.2f}")

HTTP¶

curl -X POST "https://sphere-api.featrix.com/endpoint/price_api_v1/predict" \
  -H "X-API-Key: your-api-key" \
  -H "Content-Type: application/json" \
  -d '{"square_feet": 2500, "bedrooms": 4, "neighborhood": "downtown"}'

Response¶

{
  "prediction": 485000.0,
  "prediction_uuid": "550e8400-e29b-41d4-a716-446655440000",
  "guardrails": {}
}

Best Practices¶

Exclude IDs and timestamps - They don't carry predictive information
Include all relevant features - The model learns relationships automatically
Monitor prediction distribution - Watch for drift in predicted values
Send feedback with actual values - Real outcomes improve accuracy
Use guardrails - Pay attention to warnings about unusual inputs
Test edge cases - Verify behavior at extreme values