Skip to content

Use Case: Regression (Numeric Prediction)

Predict numeric values like prices, quantities, durations, or scores.

When to Use This

  • Price prediction (real estate, products, services)
  • Demand forecasting
  • Customer lifetime value (LTV) prediction
  • Risk scoring (insurance, credit)
  • Duration/time estimation
  • Any continuous numeric target

Complete Implementation

from featrixsphere.api import FeatrixSphere

featrix = FeatrixSphere()

# 1. Create Foundational Model
fm = featrix.create_foundational_model(
    name="price_prediction_model",
    data_file="properties.csv",
    ignore_columns=["property_id", "listing_date", "address"]
)
fm.wait_for_training()

# 2. Create regressor for price
predictor = fm.create_regressor(
    target_column="price",
    name="property_price_predictor"
)
predictor.wait_for_training()

# 3. Make predictions
property_features = {
    "square_feet": 2500,
    "bedrooms": 4,
    "bathrooms": 2.5,
    "year_built": 2010,
    "lot_size": 0.25,
    "neighborhood": "downtown",
    "garage_spaces": 2,
    "pool": True
}

result = predictor.predict(property_features)
print(f"Predicted price: ${result.prediction:,.2f}")

# 4. Batch predictions
import pandas as pd

properties_df = pd.read_csv("new_listings.csv")
results = predictor.batch_predict(properties_df, show_progress=True)

for i, result in enumerate(results):
    property_id = properties_df.iloc[i]["property_id"]
    print(f"Property {property_id}: ${result.prediction:,.2f}")

# 5. Deploy to production
endpoint = predictor.create_api_endpoint(
    name="price_api_v1",
    description="Property price prediction endpoint"
)

# 6. Publish
fm.publish(org_id="my_org", name="price_model_v1")

Use Case: Customer Lifetime Value

# Train on historical customer data
fm = featrix.create_foundational_model(
    name="ltv_model",
    data_file="customers_with_ltv.csv",
    ignore_columns=["customer_id", "signup_date"]
)
fm.wait_for_training()

predictor = fm.create_regressor(
    target_column="lifetime_value",
    name="ltv_predictor"
)
predictor.wait_for_training()

# Predict LTV for new customers
new_customer = {
    "acquisition_channel": "organic_search",
    "first_purchase_amount": 150.0,
    "first_purchase_category": "electronics",
    "device_type": "mobile",
    "email_subscribed": True
}

result = predictor.predict(new_customer)
print(f"Predicted LTV: ${result.prediction:,.2f}")

# Use LTV for customer acquisition decisions
def should_acquire(customer, max_acquisition_cost):
    ltv = predictor.predict(customer).prediction
    return ltv > max_acquisition_cost * 3  # 3x ROI threshold

Use Case: Demand Forecasting

fm = featrix.create_foundational_model(
    name="demand_model",
    data_file="historical_sales.csv"
)
fm.wait_for_training()

predictor = fm.create_regressor(
    target_column="units_sold",
    name="demand_predictor"
)
predictor.wait_for_training()

# Predict demand for upcoming period
forecast_inputs = {
    "product_category": "electronics",
    "price_point": 299.99,
    "is_promotion": True,
    "season": "holiday",
    "day_of_week": "saturday",
    "competitor_price": 329.99
}

result = predictor.predict(forecast_inputs)
print(f"Predicted units: {result.prediction:.0f}")

# Forecast for multiple scenarios
scenarios = [
    {"price_point": 249.99, "is_promotion": True},
    {"price_point": 299.99, "is_promotion": True},
    {"price_point": 299.99, "is_promotion": False},
    {"price_point": 349.99, "is_promotion": False}
]

for scenario in scenarios:
    inputs = {**forecast_inputs, **scenario}
    result = predictor.predict(inputs)
    print(f"Price ${scenario['price_point']}, Promo={scenario['is_promotion']}: {result.prediction:.0f} units")

Use Case: Insurance Risk Scoring

fm = featrix.create_foundational_model(
    name="insurance_risk_model",
    data_file="claims_history.csv",
    ignore_columns=["policy_id", "customer_id"]
)
fm.wait_for_training()

predictor = fm.create_regressor(
    target_column="annual_claim_amount",
    name="risk_predictor"
)
predictor.wait_for_training()

# Score a new applicant
applicant = {
    "age": 35,
    "gender": "F",
    "bmi": 24.5,
    "smoker": False,
    "region": "northeast",
    "children": 2,
    "occupation_risk": "low"
}

result = predictor.predict(applicant)
expected_claims = result.prediction

# Calculate premium with margin
base_premium = expected_claims * 1.3  # 30% margin
print(f"Expected annual claims: ${expected_claims:,.2f}")
print(f"Recommended premium: ${base_premium:,.2f}")

Use Case: Time/Duration Estimation

fm = featrix.create_foundational_model(
    name="delivery_time_model",
    data_file="deliveries.csv"
)
fm.wait_for_training()

predictor = fm.create_regressor(
    target_column="delivery_hours",
    name="eta_predictor"
)
predictor.wait_for_training()

# Predict delivery time
delivery = {
    "origin_city": "chicago",
    "destination_city": "denver",
    "package_weight": 5.2,
    "shipping_class": "standard",
    "day_of_week": "monday",
    "weather_conditions": "clear"
}

result = predictor.predict(delivery)
eta_hours = result.prediction
print(f"Estimated delivery: {eta_hours:.1f} hours")

# Convert to human-readable
from datetime import datetime, timedelta
estimated_arrival = datetime.now() + timedelta(hours=eta_hours)
print(f"Expected by: {estimated_arrival.strftime('%Y-%m-%d %H:%M')}")

Understanding Regression Results

result = predictor.predict(features)

# The prediction
result.prediction          # Numeric value (e.g., 450000.0)

# Tracking
result.prediction_uuid     # UUID for feedback

# Input warnings
result.guardrails          # Warnings for unusual values

Guardrails and Warnings

result = predictor.predict({
    "square_feet": 50000,    # Unusually large
    "bedrooms": 100,         # Unrealistic
    "year_built": 3000       # Future date
})

if result.guardrails:
    print("Input warnings:")
    for column, warning in result.guardrails.items():
        print(f"  {column}: {warning}")
    print(f"Prediction may be unreliable: ${result.prediction:,.2f}")

Sending Feedback

Track actual values to improve future models:

# After sale closes
actual_price = 465000

featrix.prediction_feedback(
    prediction_uuid=result.prediction_uuid,
    ground_truth=actual_price
)

Feature Importance

Understand what drives predictions:

result = predictor.predict(features, feature_importance=True)

print(f"Predicted price: ${result.prediction:,.2f}")
print("Top factors:")
for feature, importance in sorted(
    result.feature_importance.items(),
    key=lambda x: abs(x[1]),
    reverse=True
)[:5]:
    print(f"  {feature}: {importance:+.3f}")

Example output:

Predicted price: $485,000.00
Top factors:
  square_feet: +0.45
  neighborhood: +0.28
  bedrooms: +0.15
  year_built: +0.12
  lot_size: +0.08

Grid Search for Optimization

Find optimal parameter combinations:

# Create a prediction grid
grid = predictor.predict_grid(degrees_of_freedom=2, grid_shape=(10, 8))
grid.set_axis_labels(["Price", "Ad Spend"])

# Explore price and marketing spend combinations
prices = [19.99, 24.99, 29.99, 34.99, 39.99]
ad_spends = [100, 250, 500, 1000, 2000]

for i, price in enumerate(prices):
    for j, spend in enumerate(ad_spends):
        grid.predict(
            {"price": price, "ad_spend": spend, "category": "electronics"},
            grid_position=(i, j)
        )

grid.process_batch(show_progress=True)

# Find optimal combination
optimal = grid.get_optimal_position()
print(f"Optimal position: {optimal}")

# Visualize
grid.plot_heatmap()

Production API

Python

result = endpoint.predict(
    {"square_feet": 2500, "bedrooms": 4, "neighborhood": "downtown"},
    api_key=endpoint.api_key
)
print(f"Price: ${result.prediction:,.2f}")

HTTP

curl -X POST "https://sphere-api.featrix.com/endpoint/price_api_v1/predict" \
  -H "X-API-Key: your-api-key" \
  -H "Content-Type: application/json" \
  -d '{"square_feet": 2500, "bedrooms": 4, "neighborhood": "downtown"}'

Response

{
  "prediction": 485000.0,
  "prediction_uuid": "550e8400-e29b-41d4-a716-446655440000",
  "guardrails": {}
}

Best Practices

  1. Exclude IDs and timestamps - They don't carry predictive information
  2. Include all relevant features - The model learns relationships automatically
  3. Monitor prediction distribution - Watch for drift in predicted values
  4. Send feedback with actual values - Real outcomes improve accuracy
  5. Use guardrails - Pay attention to warnings about unusual inputs
  6. Test edge cases - Verify behavior at extreme values