Introduction

In this bonus practical we build an optimization workflow using actual data.

Please ensure fmxdat has been updated to the latest version.

library(tidyverse);library(lubridate);library(tbl2xts); library(glue); library(RiskPortfolios)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

# Load data and look at it:

cpi <- fmxdat::cpi

Rets <- fmxdat::mvo_example_data

ref <- fmxdat::mvo_example_data_ref

constraints <- fmxdat::mvo_ex_constraints

From the above, note that you are given:

Monthly returns data on local and global assets (all converted to ZAR).
The ref dataframe gives the full names for the asset classes.
The constraints given give us individual constraints (lbub) and group constraints (group_LB and group_UB).
Nominal return forecasts to use include are here: Nominal_FC
- We need to provide the optimizer with real (ZAR) return forecasts, so it is imperative to have a view on inflation and currency depreciation too.
- For this, use the constraints$CPI & constraints$ZAR_Dep values.

Suppose we want to:

Find the optimal combinations using a MVO framework, using both our nominal forecasts and sample return forecasts.
Construct a MVO plot that shows the different assets on there and the efficient frontier too.
Also construct a flexible workflow that shows your own portfolios on this graph.

Let’s go.

Step 1

We first need to convert our nominal returns into real and get sample return inputs too.
Thereafter, we convert our constraints and inputs defined in the previous practical’s matrix notation to be used as inputs in our optimization calls.

Sample forecasts and converting Nominal to Real:

CPI <- constraints$CPI
ZAR_dep <- constraints$ZAR_Dep

# Sample Real Returns:
Return_Forecasts <-
Rets %>% 
  gather(Alias, Returns, -date) %>% 
  left_join(., ref, by = "Alias") %>% 
  group_by(Alias) %>% 
  summarise(
                Smpl_Geom_Mean = exp(mean(log(1+Returns)))^12 - 1,
                # For your own sanity, the above is effectively the same as doing:
                # Smpl_Geom_Mean_Sanity = prod(1+Returns_Use)^(12/n())-1,
                Smpl_SD = sd(Returns) * sqrt(12)
                ) %>% 
  
  left_join(., constraints$Nominal_FC) %>% 
  
  mutate( across(c(-Alias, -Smpl_SD),  ~ifelse( !grepl("LCL", Alias), . + ZAR_dep , .))) %>% 
  mutate( across(c(-Alias, -Smpl_SD),  ~. - CPI))

## Joining with `by = join_by(Alias)`

Return_Forecasts <- Return_Forecasts %>% filter(Alias %in% ref$Alias)

Return_Forecasts

## # A tibble: 16 × 5
##    Alias        Smpl_Geom_Mean Smpl_SD Default Default2
##    <chr>                 <dbl>   <dbl>   <dbl>    <dbl>
##  1 Cred_Corp_ST         0.0601 0.148    0.0315   0.017 
##  2 Cred_Gl              0.0638 0.134    0.0463   0.04  
##  3 Csh_Gl               0.0478 0.158    0.018    0.018 
##  4 Csh_LCL              0.0198 0.00531  0.015    0.0262
##  5 Eq_EM                0.0732 0.149    0.094    0.0618
##  6 Eq_LCL               0.0525 0.146    0.07     0.0455
##  7 Eq_World             0.108  0.147    0.0599   0.052 
##  8 FI_EM                0.0818 0.126    0.0786   0.031 
##  9 FI_Glb               0.0524 0.142    0.0165   0.018 
## 10 FI_LCL               0.0352 0.0772   0.057    0.062 
## 11 Gold                 0.116  0.195    0.028    0.028 
## 12 ILB_LCL              0.0262 0.0577   0.05     0.05  
## 13 ILB_US               0.0673 0.153    0.022    0.026 
## 14 Infra                0.100  0.135    0.052    0.067 
## 15 REIT                 0.0622 0.172    0.053    0.05  
## 16 REIT_LCL             0.0405 0.194    0.045    0.045

The above is now the real forecasts using the sample geometric mean, and our two default nominal forecasts converted to real.

Notice how global stock returns are: Nominal + Currency Depr - CPI, while Local is simply Nominal - CPI.

Our Currency and Inflation forecasts are therefore critical inputs.

Step 2

The next step is to put our constraints into usable matrix notation to be passed to the non-linear optimizer engine.

Let’s do this:

# If there's a missing inflation number, replace NA with:
cpireplacement <- 0.06/12

# Make all my ZAR returns real (offshore returns are already in ZAR):
Real_Rets <- 
left_join(Rets %>% gather(Alias, Returns, -date) %>% fmxdat::YM(), cpi %>% select(-date), by = "YM") %>% mutate(cpi = coalesce(cpi, cpireplacement)) %>% mutate(Returns_Use = (1+Returns)/(1+cpi)-1) %>% 
  filter(Alias %in% ref$Alias)
        
order <- ref$Alias %>% unique

# I ensure my constraints match by the order specified...
UB <- constraints$lbub %>% arrange(match(Alias, order)) %>% select(Alias, UB)
LB <- constraints$lbub %>% arrange(match(Alias, order)) %>% select(Alias, LB)

avec_lt <- 
  constraints$group_LB %>% 
  filter(Alias %in% order) %>% 
  mutate(across(-Alias, ~ifelse(is.na(.x), 0, 1))) %>% 
  # Get our ordering right:
  arrange(match(Alias, order)) %>% select(-Alias)

avec_gt <- 
  constraints$group_UB %>% 
  filter(Alias %in% order) %>% 
  mutate(across(-Alias, ~ifelse(is.na(.x), 0, 1))) %>% 
  # Get our ordering right:
  arrange(match(Alias, order)) %>% select(-Alias)

bvec_lt <- constraints$group_LB %>% select(-Alias) %>% apply(., 2, min, na.rm=T)
bvec_gt <- constraints$group_UB %>% select(-Alias) %>% apply(., 2, max, na.rm=T)

# You can now check that these constraints are valid....:
# ncol(avec_lt) == length(bvec_lt)
if( sum(avec_gt) != sum(avec_lt) ) stop("\n\nCheck const_gt and const_lt input dimensions match....If there is a value for one asset in a column, it must have an entry for both... \n\n")


mvo_constraints <- list()

mvo_constraints$order <- order
# Individual UB / LB constraints:
mvo_constraints$Indiv$UB <- UB %>% pull(UB)
mvo_constraints$Indiv$LB <- LB %>% pull(LB)

# Amat for LB and UB:::
mvo_constraints$Group$amat <- 
  bind_cols(avec_lt %>% purrr::set_names(names(avec_gt)), 
            avec_gt %>% purrr::set_names(glue("LB_{names(avec_lt)}"))) 

# bvec for LB and UB:::
mvo_constraints$Group$UB <- bvec_gt %>% purrr::set_names(glue("{names(bvec_gt)}")) 
mvo_constraints$Group$LB <- bvec_lt %>% purrr::set_names(glue("LB_{names(bvec_lt)}"))

# Long only constraint:::

mvo_constraints$Active$amat <- data.frame(long = rep(1,length(order)), short = 0, neutral = 0) %>% as.matrix()
mvo_constraints$Active$bvec <- data.frame(long = 1, short = 0, neutral = 0) %>% as.matrix()
    
# Forecast_Choice = "Default"
Forecast_Choice_List <- Return_Forecasts %>% select(-Alias, -Smpl_SD) %>% names() %>% as.list()
gamma_List <- seq(0,1,by = 0.001) %>% as.list()

LetsOptimize <- function(Real_Rets, Cov_Start_Date = ymd(19000101), Return_Forecasts, Forecast_Choice_List, mvo_constraints, gamma_List, order){
  
  # FC_Choice <- Forecast_Choice_List[[1]]
  FC_Choice <- Forecast_Choice_List

# Calculate the covariance matrix to use:
return_mat <- 
  Real_Rets %>% filter(date >= Cov_Start_Date) %>% 
  select(date, Alias, Returns_Use) %>% 
  spread(Alias, Returns_Use) %>% select(-date) %>% as.matrix()

if( any(is.na(return_mat)) ) stop("Return matrix not full... check.")

# Use Ledoit-Wolfe covariance shrinkageL
Sigma <- RiskPortfolios::covEstimation(return_mat, control = list(cov_type = "lw"))
diag(Sigma) <- diag(Sigma) + 1e-8

# Again, order matters big time here!
Sigma <- Sigma[order,order]

# Calculate forecast returns to use:

mu <- Return_Forecasts %>% select(Alias, Mu = all_of(FC_Choice)) %>% arrange(match(Alias, order)) %>% pull(Mu)
names(mu) <- order

  # Remember, this is a quadratic program solve....
safeOpt <- purrr::safely(quadprog::solve.QP)

# Loop across the different risk paramater settings:

optim_loop <- function(safeOpt, gamma_List, mvo_constraints, mu, Sigma){
  
# gamma_choice <- gamma_List[[1]]
gamma_choice <- gamma_List

# Remember::::> set UB as -1! 
groups_mat <- mvo_constraints$Group$amat %>% mutate(across(!starts_with("LB"), ~.*-1)) %>% as.matrix()

n <- length(mu)
dvec <- mu
 
# Let's run the optimizer and save:
Amat <- cbind(1, -dvec, -diag(n), diag(n), groups_mat)
bvec <- c(1, -gamma_choice, -mvo_constraints$Indiv$UB, mvo_constraints$Indiv$LB, -mvo_constraints$Group$UB, mvo_constraints$Group$LB)
opt <- safeOpt(Sigma, dvec, Amat, bvec, meq = 1)
  
opt_weights <- opt$result$solution
if(!is.null(opt_weights)) { 
  names(opt_weights) <- order
  
  return(tibble(Assets = order, Weights = round(opt_weights, 5), gamma = gamma_choice, mu = dvec, Sigma = list(Sigma)))
  
} else {
  
    return(NULL)
  
  }

  
}

Optimum_df <- gamma_List %>% map_df(~optim_loop(safeOpt, ., mvo_constraints, mu, Sigma))

if(!is.null(Optimum_df)){
  
  return(Optimum_df %>% mutate(Type = FC_Choice))
  
} else {
  
  return(NULL)
}


}

Result_all_mu <- 
Forecast_Choice_List %>% map_df(~LetsOptimize(Real_Rets, Cov_Start_Date = ymd(19000101), Return_Forecasts, ., mvo_constraints, gamma_List, order))

Step 3

Right, now for each risk level (gamma) we’ve estimated an optimal level (where applicable). Where gammas don’t converge - no value is given.

What remains now is to graphically represent the above in a mean-variance frontier plot, and also to plot the efficient frontier.

For the efficient frontier (or any portfolio with weights you can provide), the following holds (see here for the matrix theory):

Remember, in R: matrix multiplying the above is done using %*%

# So let's create and plot the efficient frontier line
TypeL <- Result_all_mu$Type %>% unique %>% as.list()
gamma_L <- Result_all_mu %>% group_split(gamma,Type)
N <- Result_all_mu$Assets %>% unique %>% length

# Also, we want to make returns nominal now again...
CPI_Add <- constraints$CPI

EF_creator <- function(gamma_L, N, CPI_Add){
  
  # gsel <- gamma_L[[3]]
  gsel <- gamma_L
  
  if(nrow(gsel) > N) stop("Multiple Optimal points...{unique(gsel$gamma)} | {unique(gsel$Type)}")
  
  Sigma <- gsel %>% slice(1) %>% select(Sigma) %>% pull(Sigma)
  
  Sigma <- Sigma[[1]][gsel$Assets,gsel$Assets]
  wt <- gsel %>% pull(Weights)
  mu <- gsel %>% mutate(mu = mu + CPI_Add) %>% pull(mu)
  
  # Now, math from above:
    tibble(
      gamma = unique(gsel$gamma),
      Eff_Mu = as.numeric(wt %*% mu),
      Eff_SD = sqrt( as.numeric( t(wt) %*% Sigma %*% wt ) ) * sqrt(12),
      Type = unique(gsel$Type)
    )
      
  }
  
  EFLine <- gamma_L %>% map_df(~EF_creator(., N, CPI_Add))

Portfolios

Let’s now add portfolios to this efficient frontier.

Suppose we have two combinations that we want to test:

Port_A <- tibble("Eq_LCL" = 0.4,
           "ILB_US" = 0.1,
           "Eq_World" = 0.23,
           "Csh_LCL" = 0.05,
           "Gold" = 0.07,
           "Eq_EM" = 0.05,
           "FI_Glb" = 0.1
)

# Increase holdings of Eq_World and Gold           
Port_B <- tibble("Eq_LCL" = 0.4,
           "ILB_US" = 0.1,
           "Eq_World" = 0.28,
           "Csh_LCL" = 0.05,
           "Gold" = 0.1,
           "Eq_EM" = 0.05,
           "FI_Glb" = 0.02
)
           
CPI_Add <- constraints$CPI

Ports <-            
full_join( Port_A %>% gather(Assets, Port_A), 
            Port_B %>% gather(Assets, Port_B),
            by = "Assets") %>% mutate(across(-Assets, ~coalesce(., 0))) %>% 
  gather(Portfolio, Weight, -Assets)

Ports <- 
left_join(Ports, 
          Result_all_mu %>% select(Assets, mu, Sigma, Type) %>% unique %>% mutate(mu = mu + CPI_Add),
          by = "Assets",
          relationship = "many-to-many"
)
     
# Let's create a function to do the matrix magic quick:
# build the function using:
# R1 <- Ports %>% filter(Type == first(Type), Portfolio == first(Portfolio))


Optim_creator <- function(R1){
Sig <- R1 %>% select(Sigma) %>% pull(Sigma) %>% .[[1]] %>% .[all_of(R1$Assets), all_of(R1$Assets)]
Wt <- R1 %>% arrange(match(Assets, R1$Assets)) %>% pull(Weight)
Mu <- R1 %>% arrange(match(Assets, R1$Assets)) %>% pull(mu)

tibble(
Mu = as.numeric(Wt %*% Mu),
SD = sqrt( as.numeric( t(Wt) %*% Sig %*% Wt ) ) * sqrt(12),
Portfolio = unique(R1$Portfolio),
Type = unique(R1$Type)
)
}


Plot_ports <- 
  Ports %>% group_split(Type, Portfolio) %>% map_df(~Optim_creator(.))

## Warning: Using `all_of()` outside of a selecting function was deprecated in tidyselect
## 1.2.0.
## ℹ See details at
##   <https://tidyselect.r-lib.org/reference/faq-selection-context.html>
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Final step

Right, now we have all we need to make the EF plot:

# Choose which forecasts to use:
Type_Sel = "Default"

g_EF <- 
EFLine %>% filter(Type %in% Type_Sel) %>% ggplot() + 
  geom_line(aes(Eff_Mu, Eff_SD, color = Type), color = "steelblue", linewidth = 1.2, alpha = 0.65) + 
  geom_point(aes(Eff_Mu, Eff_SD), alpha = 0.65, size = 1.2, color = "steelblue") + 
  coord_flip() + 
  labs(x = "Sample Volatility", y = "Mean Returns") 

# Add assets:
library(fmxdat)

Ret_FC_Nominal <- Return_Forecasts %>% mutate(across(c(-Alias, -Smpl_SD), ~. + CPI_Add))


g_EF + 
  geom_point(data = Ret_FC_Nominal, 
             aes(Default, Smpl_SD, color = Alias), size = 3) +  scale_color_hue(l=60, c=85) + 
  geom_label(data = Ret_FC_Nominal, 
             aes(Default, Smpl_SD, label = Alias), alpha = 0.3, vjust = 1) + 
  geom_point(data = Plot_ports %>% filter(Type %in% Type_Sel), 
             aes(Mu, SD), size = 3, color = "gray50", shape  = 19) +  
  geom_label(data = Plot_ports %>% filter(Type %in% Type_Sel), 
             aes(Mu, SD, label = Portfolio, fill = Portfolio), alpha = 0.3, vjust = 1) + 
    guides(fill = "none", color = "none") + 
  scale_x_continuous(labels = scales::percent_format(accuracy = 1), breaks = scales::pretty_breaks(10)) + 
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), breaks = scales::pretty_breaks(10)) + 
  
  labs(title = glue::glue("Efficient Frontier: {Type_Sel}"),
       subtitle = glue("Real USD Forecasts | Inflation forecast: {round(constraints$CPI, 2)*100}% | Currency Depreciation: {round(constraints$ZAR_Dep, 2)*100}%"),
       caption = "All values are nominal and annualised. Source: Fin Metrics Course. Data: Bloomberg."
) + fmxdat::theme_fmx(title.size = ggpts(45),
                      subtitle.size = ggpts(39), 
                      CustomCaption = T,
                      caption.size = ggpts(28))

And that’s a wrap.

Hope you found working through this tutorial useful.

Nico

Financial Econometrics Practical

Practical 3 Bonus: Optimization Worked Example

NF Katzke