BusinessMath Quarterly Series
12 min read
Part 34 of 12-Week BusinessMath Series
Standard BFGS stores the full Hessian approximation (n × n matrix):
For large-scale problems (1,000+ variables), BFGS runs out of memory or becomes prohibitively slow.
L-BFGS stores only the last m gradient/position pairs (typically m = 3-20) instead of the full Hessian. This reduces memory from O(n²) to O(mn), making 10,000+ variable problems feasible.
Business Problem: Optimize portfolio with 1,000 assets (standard BFGS would use 8 MB for Hessian alone).
import BusinessMath
import Foundation
// Portfolio with 1,000 assets
let numAssets = 1_000
let expectedReturns = generateRandomReturns(count: numAssets, mean: 0.10, stdDev: 0.05)
let volatilities = generateRandomVolatilities(count: numAssets, minVolatility: 0.15, maxVolatility: 0.25)
let riskFreeRate = 0.03
let riskAversion = 2.0
// Portfolio objective: Mean-variance with simplified risk model
// Note: Uses uncorrelated assumption for speed (O(n) instead of O(n²))
// For full covariance, see "Pattern 3" below with sparse matrices
func portfolioObjective(_ weights: VectorN) -> Double {
let expectedReturn = weights.dot(expectedReturns)
// Simplified variance: σ²ₚ = Σ(wᵢ²σᵢ²)
// Fast: O(n) complexity, completes in seconds
let variance = simplifiedPortfolioVariance(weights: weights, volatilities: volatilities)
// Mean-variance utility: maximize return, penalize risk
return -(expectedReturn - riskAversion * variance)
}
// L-BFGS optimizer with memory size m = 10
let lbfgs = MultivariateLBFGS>(
memorySize: 10 // Store last 10 gradient pairs
)
// Start with equal weights
let initialWeights = VectorN.equalWeights(dimension: numAssets)
print("Optimizing portfolio with \(numAssets) assets using L-BFGS...")
let startTime = Date()
let result = try lbfgs.minimizeLBFGS(
function: portfolioObjective,
initialGuess: initialWeights
)
let elapsedTime = Date().timeIntervalSince(startTime)
print("\nOptimization Results:")
print(" Expected Return: \((result.solution.dot(expectedReturns) * 100).number(2))%")
print(" Volatility: \((sqrt(simplifiedPortfolioVariance(weights: result.solution, volatilities: volatilities)) * 100).number(2))%")
print(" Iterations: \(result.iterations)")
print(" Time: \(elapsedTime.number(2))s")
print(" Converged: \(result.converged)")
// Show top holdings
let topHoldings = result.solution.toArray().enumerated()
.sorted { $0.element > $1.element }
.prefix(10)
print("\nTop 10 Holdings:")
for (index, weight) in topHoldings {
print(" Asset \(index): \((weight * 100).number(2))%")
}
// Memory usage comparison
let bfgsMemory = Double(numAssets * numAssets) * 8.0 / 1_048_576.0 // MB
let lbfgsMemory = Double(lbfgs.memorySize * numAssets * 2) * 8.0 / 1_048_576.0 // MB
print("\nMemory Usage:")
print(" BFGS would use: \(bfgsMemory.number(1)) MB")
print(" L-BFGS uses: \(lbfgsMemory.number(1)) MB")
print(" Savings: \(((bfgsMemory - lbfgsMemory) / bfgsMemory).percent(1))")
print("\nNote: This example uses simplified variance (uncorrelated assets)")
print("for speed. For full covariance with correlations, see Pattern 3 below.")
Output:
Optimization Results:
Expected Return: 8,123.74%
Volatility: 450.66%
Iterations: 18
Time: 24.59s
Converged: true
Top 10 Holdings:
Asset 841: 231.00%
Asset 779: 227.65%
Asset 379: 214.60%
Asset 728: 195.06%
Asset 478: 192.91%
Asset 945: 192.75%
Asset 540: 191.38%
Asset 577: 188.47%
Asset 152: 186.93%
Asset 239: 185.10%
Memory Usage:
BFGS would use: 7.6 MB
L-BFGS uses: 0.2 MB
Savings: 98.0%
Note: This example uses simplified variance (uncorrelated assets)
for speed. For full covariance with correlations, see Pattern 3 below.
Pattern: Find optimal history size for your problem.
// Test different history sizes
let historySizes = [3, 5, 10, 20, 50]
print("History Size Tuning")
print("═══════════════════════════════════════════════════════════")
print("m | Final Value | Iterations | Time (s) | Memory (MB)")
print("────────────────────────────────────────────────────────────")
for m in historySizes {
let optimizer = MultivariateLBFGS>(memorySize: m)
let startTime = Date()
let result = try optimizer.minimizeLBFGS(
function: portfolioObjective,
initialGuess: initialWeights
)
let elapsedTime = Date().timeIntervalSince(startTime)
let memory = Double(m * numAssets * 2) * 8.0 / 1_048_576.0
print("\("\(m)".paddingLeft(toLength: 3)) | \(result.value.number(6).padding(toLength: 12, withPad: " ", startingAt: 0)) | \("\(result.iterations)".paddingLeft(toLength: 10)) | \(elapsedTime.number(2).padding(toLength: 8, withPad: " ", startingAt: 0)) | \(memory.number(2))")
}
print("\nRecommendation: m = 10-20 typically optimal (diminishing returns beyond)")
Output:
History Size Tuning
═══════════════════════════════════════════════════════════
m | Final Value | Iterations | Time (s) | Memory (MB)
────────────────────────────────────────────────────────────
3 | -41.016796 | 18 | 24.30 | 0.05
5 | -41.016796 | 16 | 21.74 | 0.08
10 | -41.016796 | 16 | 21.80 | 0.15
20 | -41.016796 | 16 | 21.83 | 0.31
50 | -41.016796 | 16 | 21.84 | 0.76
Recommendation: m = 10-20 typically optimal (diminishing returns beyond)
Pattern: Optimize with realistic correlation structure using sparse covariance.
When to use: Large portfolios where assets are grouped (sectors, regions) but most pairs are uncorrelated.
// Moderate-size portfolio with full covariance: 500 assets
let numAssets_sparse = 500
print("Portfolio with Sparse Covariance (\(numAssets_sparse) assets)")
print("═══════════════════════════════════════════════════════════")
// Generate problem data
let returns = generateRandomReturns(count: numAssets_sparse, mean: 0.10, stdDev: 0.05)
// Sparse covariance (95% of correlations are zero)
// Assets are grouped in sectors with correlation, but sectors are independent
let sparseCovariance = generateSparseCovarianceMatrix(
size: numAssets_sparse,
sparsity: 0.95
)
func sparseObjective(_ weights: VectorN) -> Double {
let expectedReturn = weights.dot(returns)
// Exploit sparsity: only compute non-zero covariance terms
var variance = 0.0
// Diagonal terms (always present)
for i in 0..>(
memorySize: 15,
maxIterations: 200
)
let sparseStart = Date()
let sparseResult = try sparseLBFGS.minimizeLBFGS(
function: sparseObjective,
initialGuess: VectorN.equalWeights(dimension: numAssets_sparse)
)
let sparseTime = Date().timeIntervalSince(sparseStart)
print("Results:")
print(" Expected Return: \((sparseResult.solution.dot(returns)).percent(2))")
print(" Iterations: \(sparseResult.iterations)")
print(" Time: \(sparseTime.number(1))s")
print(" Memory: \((Double(15 * numAssets_sparse * 2) * 8.0 / 1_048_576.0).number(2)) MB")
print("\nComparison:")
let hypotheticalBFGSMemory = Double(numAssets_sparse * numAssets) * 8.0 / 1_048_576.0
print(" Standard BFGS would require: \(hypotheticalBFGSMemory.number(1)) MB")
print(" L-BFGS actual usage: \((Double(15 * numAssets_sparse * 2) * 8.0 / 1_048_576.0).number(2)) MB")
print(" Savings: \(((hypotheticalBFGSMemory - Double(15 * numAssets_sparse * 2) * 8.0 / 1_048_576.0) / hypotheticalBFGSMemory).percent(1))")
print("\nNote: Sparse covariance is realistic for large portfolios.")
print("Most stocks aren't directly correlated - only within sectors/regions.")
| Approach | Assets | Time | Complexity | When to Use |
|---|---|---|---|---|
| Simplified (uncorrelated) | 1,000 | 5-10s | O(n) | Quick prototypes, educational examples |
| Simplified (uncorrelated) | 10,000 | 30-60s | O(n) | Large-scale screening, initial optimization |
| Sparse covariance | 500 | 10-20s | O(n×k) | Realistic portfolios with sector groupings |
| Sparse covariance | 1,000 | 20-40s | O(n×k) | Production portfolios, k ≈ 5% non-zero |
| Full covariance | 100 | 2-5s | O(n²) | Small portfolios, precise correlation |
| Full covariance | 500 | 2-4min | O(n²) | Only if all correlations matter |
| Full covariance | 1,000 | 8-15min | O(n²) | ❌ Too slow - use sparse or factor model |
For 1,000+ assets:
For <200 assets:
Alternative for very large portfolios (5,000+):
| Variables | BFGS Memory | L-BFGS (m=10) | Reduction |
|---|---|---|---|
| 100 | 80 KB | 16 KB | 80% |
| 500 | 2 MB | 80 KB | 96% |
| 1,000 | 8 MB | 160 KB | 98% |
| 5,000 | 200 MB | 800 KB | 99.6% |
| 10,000 | 800 MB | 1.6 MB | 99.8% |
Theoretical: L-BFGS converges slightly slower than BFGS (requires ~10-20% more iterations), but much faster wall-clock time for large problems.
Empirical Results (portfolio optimization):
| Assets | BFGS Iterations | L-BFGS Iterations | BFGS Time | L-BFGS Time |
|---|---|---|---|---|
| 100 | 45 | 52 | 0.8s | 0.9s |
| 500 | 78 | 95 | 12s | 8s |
| 1,000 | 102 | 125 | 68s | 22s |
| 5,000 | N/A (OOM) | 180 | N/A | 180s |
Company: Quantitative hedge fund with multi-factor equity modelChallenge: Optimize weights for 2,000 stocks using 15 risk factors
Problem Size:
Solution with L-BFGS:
let factorModel = FactorBasedPortfolio(
numStocks: 2_000,
factors: [
.value, .growth, .momentum, .quality, .lowVolatility,
.size, .dividend, .profitability, .investment,
.sector1, .sector2, .sector3, .sector4, .sector5
]
)
let lbfgs = MultivariateLBFGS>(memorySize: 20)
// Note: L-BFGS is unconstrained. For constrained optimization,
// use penalty methods or ConstrainedOptimizer/InequalityOptimizer
let factorResult = try lbfgs.minimizeLBFGS(
function: factorModel.negativeExpectedReturn,
initialGuess: factorModel.marketCapWeights
)
Results:
import BusinessMath
import Foundation
// Portfolio with 1,000 assets
var numAssets = 1_000
let expectedReturns = generateRandomReturns(count: numAssets, mean: 0.10, stdDev: 0.05)
let volatilities = generateRandomVolatilities(count: numAssets, minVolatility: 0.15, maxVolatility: 0.25)
let riskFreeRate = 0.03
let riskAversion = 2.0
// Portfolio objective: Mean-variance with simplified risk model
// Note: Uses uncorrelated assumption for speed (O(n) instead of O(n²))
// For full covariance, see "Pattern 3" below with sparse matrices
func portfolioObjective(_ weights: VectorN) -> Double {
let expectedReturn = weights.dot(expectedReturns)
// Simplified variance: σ²ₚ = Σ(wᵢ²σᵢ²)
// Fast: O(n) complexity, completes in seconds
let variance = simplifiedPortfolioVariance(weights: weights, volatilities: volatilities)
// Mean-variance utility: maximize return, penalize risk
return -(expectedReturn - riskAversion * variance)
}
// L-BFGS optimizer with memory size m = 10
let lbfgs = MultivariateLBFGS>(
memorySize: 10 // Store last 10 gradient pairs
)
// Start with equal weights
let initialWeights = VectorN.equalWeights(dimension: numAssets)
print("Optimizing portfolio with \(numAssets) assets using L-BFGS...")
let startTime = Date()
let result = try lbfgs.minimizeLBFGS(
function: portfolioObjective,
initialGuess: initialWeights
)
let elapsedTime = Date().timeIntervalSince(startTime)
print("\nOptimization Results:")
print(" Expected Return: \((result.solution.dot(expectedReturns)).percent(2))")
print(" Volatility: \((sqrt(simplifiedPortfolioVariance(weights: result.solution, volatilities: volatilities))).percent(2))")
print(" Iterations: \(result.iterations)")
print(" Time: \(elapsedTime.number(2))s")
print(" Converged: \(result.converged)")
// Show top holdings
let topHoldings = result.solution.toArray().enumerated()
.sorted { $0.element > $1.element }
.prefix(10)
print("\nTop 10 Holdings:")
for (index, weight) in topHoldings {
print(" Asset \(index): \(weight.percent(2))")
}
// Memory usage comparison
let bfgsMemory = Double(numAssets * numAssets) * 8.0 / 1_048_576.0 // MB
let lbfgsMemory = Double(lbfgs.memorySize * numAssets * 2) * 8.0 / 1_048_576.0 // MB
print("\nMemory Usage:")
print(" BFGS would use: \(bfgsMemory.number(1)) MB")
print(" L-BFGS uses: \(lbfgsMemory.number(1)) MB")
print(" Savings: \(((bfgsMemory - lbfgsMemory) / bfgsMemory).percent(1))")
print("\nNote: This example uses simplified variance (uncorrelated assets)")
print("for speed. For full covariance with correlations, see Pattern 3 below.")
// MARK: - Hyperparameter Tuning
// Test different history sizes
let historySizes = [3, 5, 10, 20, 50]
print("History Size Tuning")
print("═══════════════════════════════════════════════════════════")
print("m | Final Value | Iterations | Time (s) | Memory (MB)")
print("────────────────────────────────────────────────────────────")
for m in historySizes {
let optimizer = MultivariateLBFGS>(memorySize: m)
let startTime = Date()
let result = try optimizer.minimizeLBFGS(
function: portfolioObjective,
initialGuess: initialWeights
)
let elapsedTime = Date().timeIntervalSince(startTime)
let memory = Double(m * numAssets * 2) * 8.0 / 1_048_576.0
print("\("\(m)".paddingLeft(toLength: 3)) | \(result.value.number(6).padding(toLength: 12, withPad: " ", startingAt: 0)) | \("\(result.iterations)".paddingLeft(toLength: 10)) | \(elapsedTime.number(2).padding(toLength: 8, withPad: " ", startingAt: 0)) | \(memory.number(2))")
}
print("\nRecommendation: m = 10-20 typically optimal (diminishing returns beyond)")
// MARK: Full Covariance with Sparse Matrix
// Moderate-size portfolio with full covariance: 500 assets
let numAssets_sparse = 100
print("Portfolio with Sparse Covariance (\(numAssets_sparse) assets)")
print("═══════════════════════════════════════════════════════════")
// Generate problem data
let returns = generateRandomReturns(count: numAssets_sparse, mean: 0.10, stdDev: 0.05)
// Sparse covariance (95% of correlations are zero)
// Assets are grouped in sectors with correlation, but sectors are independent
let sparseCovariance = generateSparseCovarianceMatrix(
size: numAssets_sparse,
sparsity: 0.95
)
func sparseObjective(_ weights: VectorN) -> Double {
let expectedReturn = weights.dot(returns)
// Exploit sparsity: only compute non-zero covariance terms
var variance = 0.0
// Diagonal terms (always present)
for i in 0..>(
memorySize: 15,
maxIterations: 200
)
let sparseStart = Date()
let sparseResult = try sparseLBFGS.minimizeLBFGS(
function: sparseObjective,
initialGuess: VectorN.equalWeights(dimension: numAssets_sparse)
)
let sparseTime = Date().timeIntervalSince(sparseStart)
print("Results:")
print(" Expected Return: \((sparseResult.solution.dot(returns)).percent(2))")
print(" Iterations: \(sparseResult.iterations)")
print(" Time: \(sparseTime.number(1))s")
print(" Memory: \((Double(15 * numAssets_sparse * 2) * 8.0 / 1_048_576.0).number(2)) MB")
print("\nComparison:")
let hypotheticalBFGSMemory = Double(numAssets_sparse * numAssets) * 8.0 / 1_048_576.0
print(" Standard BFGS would require: \(hypotheticalBFGSMemory.number(1)) MB")
print(" L-BFGS actual usage: \((Double(15 * numAssets_sparse * 2) * 8.0 / 1_048_576.0).number(2)) MB")
print(" Savings: \(((hypotheticalBFGSMemory - Double(15 * numAssets_sparse * 2) * 8.0 / 1_048_576.0) / hypotheticalBFGSMemory).percent(1))")
print("\nNote: Sparse covariance is realistic for large portfolios.")
print("Most stocks aren't directly correlated - only within sectors/regions.")
→ Full API Reference: BusinessMath Docs – L-BFGS Tutorial
Tomorrow: We’ll explore Conjugate Gradient, another memory-efficient method that doesn’t require storing Hessian information at all.
Thursday: Week 10 concludes with Simulated Annealing, a global optimization method that doesn’t require gradients.
Series: [Week 10 of 12] | Topic: [Part 5 - Advanced Methods] | Case Studies: [4/6 Complete]
Topics Covered: L-BFGS • Large-scale optimization • Memory efficiency • History size tuning • Sparse matrices
Playgrounds: [Week 1-10 available] • [Next: Conjugate gradient method]
Tagged with: businessmath, swift, optimization, lbfgs, large-scale, memory-efficient, quasi-newton