Brand Choice Modelling
Contents
Brand Choice Modelling#
Imports#
import pandas as pd
import scipy as sp
import seaborn as sns
import matplotlib.pyplot as plt
import sklearn
import numpy as np
import pickle
import joblib
import itertools
from sklearn.linear_model import LogisticRegression
sns.set()
Read and Prepare Dataset#
df = pd.read_csv("purchase data.csv"); df.head()
ID | Day | Incidence | Brand | Quantity | Last_Inc_Brand | Last_Inc_Quantity | Price_1 | Price_2 | Price_3 | ... | Promotion_3 | Promotion_4 | Promotion_5 | Sex | Marital status | Age | Education | Income | Occupation | Settlement size | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 200000001 | 1 | 0 | 0 | 0 | 0 | 0 | 1.59 | 1.87 | 2.01 | ... | 0 | 0 | 0 | 0 | 0 | 47 | 1 | 110866 | 1 | 0 |
1 | 200000001 | 11 | 0 | 0 | 0 | 0 | 0 | 1.51 | 1.89 | 1.99 | ... | 0 | 0 | 0 | 0 | 0 | 47 | 1 | 110866 | 1 | 0 |
2 | 200000001 | 12 | 0 | 0 | 0 | 0 | 0 | 1.51 | 1.89 | 1.99 | ... | 0 | 0 | 0 | 0 | 0 | 47 | 1 | 110866 | 1 | 0 |
3 | 200000001 | 16 | 0 | 0 | 0 | 0 | 0 | 1.52 | 1.89 | 1.98 | ... | 0 | 0 | 0 | 0 | 0 | 47 | 1 | 110866 | 1 | 0 |
4 | 200000001 | 18 | 0 | 0 | 0 | 0 | 0 | 1.52 | 1.89 | 1.99 | ... | 0 | 0 | 0 | 0 | 0 | 47 | 1 | 110866 | 1 | 0 |
5 rows × 24 columns
def do_clustering(df, pipeline, drop_cols=None, sel_cols=None, do_fit=False):
y = None
df_new = df.copy()
if drop_cols: df_new = df_new.drop(columns=drop_cols, axis=1)
df_filter = df_new.copy()
if sel_cols: df_filter = df_new[sel_cols]
if do_fit:y = pipeline.fit_predict(df_filter)
else: y = pipeline.predict(df_filter)
if 'pca' in pipeline.named_steps:
m = pipeline.named_steps['pca']
comp_names = [f"PCA{i+1}" for i in range(m.n_components)]
transform_df = df_filter.copy()
for step in pipeline.named_steps:
transform_df = pipeline.named_steps[step].transform(transform_df)
if step == "pca": break
pca_df = pd.DataFrame(transform_df,
columns=comp_names,
index=df_filter.index)
df_new = pd.concat([df_new, pca_df], axis=1)
df_new['y'] = y+1
return df_new, pipeline
pipeline = joblib.load("cluster_pipeline.pkl"); pipeline
sel_cols = ['Sex','Marital status','Age','Education','Income','Occupation','Settlement size']
df_segments, _ = do_clustering(df, pipeline, sel_cols=sel_cols)
names = {1:"Standard",
2:"Career-Focussed",
3:"Fewer-Opportunities",
4:"Well-off"}
df_segments['labels'] = df_segments['y'].map(names)
df_segments.head().T
0 | 1 | 2 | 3 | 4 | |
---|---|---|---|---|---|
ID | 200000001 | 200000001 | 200000001 | 200000001 | 200000001 |
Day | 1 | 11 | 12 | 16 | 18 |
Incidence | 0 | 0 | 0 | 0 | 0 |
Brand | 0 | 0 | 0 | 0 | 0 |
Quantity | 0 | 0 | 0 | 0 | 0 |
Last_Inc_Brand | 0 | 0 | 0 | 0 | 0 |
Last_Inc_Quantity | 0 | 0 | 0 | 0 | 0 |
Price_1 | 1.59 | 1.51 | 1.51 | 1.52 | 1.52 |
Price_2 | 1.87 | 1.89 | 1.89 | 1.89 | 1.89 |
Price_3 | 2.01 | 1.99 | 1.99 | 1.98 | 1.99 |
Price_4 | 2.09 | 2.09 | 2.09 | 2.09 | 2.09 |
Price_5 | 2.66 | 2.66 | 2.66 | 2.66 | 2.66 |
Promotion_1 | 0 | 0 | 0 | 0 | 0 |
Promotion_2 | 1 | 0 | 0 | 0 | 0 |
Promotion_3 | 0 | 0 | 0 | 0 | 0 |
Promotion_4 | 0 | 0 | 0 | 0 | 0 |
Promotion_5 | 0 | 0 | 0 | 0 | 0 |
Sex | 0 | 0 | 0 | 0 | 0 |
Marital status | 0 | 0 | 0 | 0 | 0 |
Age | 47 | 47 | 47 | 47 | 47 |
Education | 1 | 1 | 1 | 1 | 1 |
Income | 110866 | 110866 | 110866 | 110866 | 110866 |
Occupation | 1 | 1 | 1 | 1 | 1 |
Settlement size | 0 | 0 | 0 | 0 | 0 |
PCA1 | 0.362152 | 0.362152 | 0.362152 | 0.362152 | 0.362152 |
PCA2 | -0.639557 | -0.639557 | -0.639557 | -0.639557 | -0.639557 |
PCA3 | 1.462706 | 1.462706 | 1.462706 | 1.462706 | 1.462706 |
PCA4 | -0.593242 | -0.593242 | -0.593242 | -0.593242 | -0.593242 |
y | 3 | 3 | 3 | 3 | 3 |
labels | Fewer-Opportunities | Fewer-Opportunities | Fewer-Opportunities | Fewer-Opportunities | Fewer-Opportunities |
features = "Brand|Price*|Promotion*|labels"
df_brand_choice = df_segments[df_segments.Incidence == 1].filter(regex=features).drop(columns=["Last_Inc_Brand"]).reset_index(drop=True); df_brand_choice.head()
Brand | Price_1 | Price_2 | Price_3 | Price_4 | Price_5 | Promotion_1 | Promotion_2 | Promotion_3 | Promotion_4 | Promotion_5 | labels | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2 | 1.50 | 1.90 | 1.99 | 2.09 | 2.67 | 0 | 0 | 0 | 0 | 0 | Fewer-Opportunities |
1 | 5 | 1.39 | 1.90 | 1.91 | 2.12 | 2.62 | 1 | 0 | 0 | 0 | 1 | Fewer-Opportunities |
2 | 1 | 1.47 | 1.90 | 1.99 | 1.97 | 2.67 | 0 | 0 | 0 | 1 | 0 | Fewer-Opportunities |
3 | 4 | 1.21 | 1.35 | 1.99 | 2.16 | 2.68 | 1 | 1 | 0 | 0 | 0 | Fewer-Opportunities |
4 | 2 | 1.46 | 1.88 | 1.97 | 1.89 | 2.37 | 1 | 0 | 0 | 1 | 1 | Fewer-Opportunities |
df_brand_choice.filter(regex="Pr")
Price_1 | Price_2 | Price_3 | Price_4 | Price_5 | Promotion_1 | Promotion_2 | Promotion_3 | Promotion_4 | Promotion_5 | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 1.50 | 1.90 | 1.99 | 2.09 | 2.67 | 0 | 0 | 0 | 0 | 0 |
1 | 1.39 | 1.90 | 1.91 | 2.12 | 2.62 | 1 | 0 | 0 | 0 | 1 |
2 | 1.47 | 1.90 | 1.99 | 1.97 | 2.67 | 0 | 0 | 0 | 1 | 0 |
3 | 1.21 | 1.35 | 1.99 | 2.16 | 2.68 | 1 | 1 | 0 | 0 | 0 |
4 | 1.46 | 1.88 | 1.97 | 1.89 | 2.37 | 1 | 0 | 0 | 1 | 1 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
14633 | 1.48 | 1.89 | 2.01 | 2.18 | 2.69 | 0 | 0 | 0 | 0 | 0 |
14634 | 1.35 | 1.57 | 2.02 | 2.21 | 2.70 | 1 | 1 | 0 | 0 | 0 |
14635 | 1.50 | 1.85 | 2.06 | 2.24 | 2.79 | 1 | 1 | 0 | 0 | 0 |
14636 | 1.42 | 1.51 | 1.97 | 2.24 | 2.78 | 0 | 0 | 0 | 0 | 0 |
14637 | 1.51 | 1.82 | 2.09 | 2.24 | 2.80 | 0 | 0 | 0 | 0 | 0 |
14638 rows × 10 columns
Brand choice probability#
model_brand = LogisticRegression(multi_class='multinomial', solver='sag')
model_brand.fit(df_brand_choice.filter(regex="Pr").values, df_brand_choice['Brand'].values)
LogisticRegression(multi_class='multinomial', solver='sag')
model_brand.coef_.shape
(5, 10)
df_brand_choice.filter(regex="Pr").columns.tolist(), df_brand_choice.Brand.unique().tolist()
(['Price_1',
'Price_2',
'Price_3',
'Price_4',
'Price_5',
'Promotion_1',
'Promotion_2',
'Promotion_3',
'Promotion_4',
'Promotion_5'],
[2, 5, 1, 4, 3])
df_brand_coeff = pd.DataFrame(model_brand.coef_, columns=df_brand_choice.filter(regex="Pr").columns.tolist(), index=model_brand.classes_)
fig, ax = plt.subplots(figsize=(12,6))
sns.heatmap(data=df_brand_coeff, cmap="RdYlBu", annot=True, ax=ax)
<AxesSubplot:>
df_brand_coeff.loc[1, 'Price_1']
-4.469319087099478
prices = np.arange(0.5, 3.5,0.01); prices.shape, prices
((300,),
array([0.5 , 0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6 ,
0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7 , 0.71,
0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8 , 0.81, 0.82,
0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.9 , 0.91, 0.92, 0.93,
0.94, 0.95, 0.96, 0.97, 0.98, 0.99, 1. , 1.01, 1.02, 1.03, 1.04,
1.05, 1.06, 1.07, 1.08, 1.09, 1.1 , 1.11, 1.12, 1.13, 1.14, 1.15,
1.16, 1.17, 1.18, 1.19, 1.2 , 1.21, 1.22, 1.23, 1.24, 1.25, 1.26,
1.27, 1.28, 1.29, 1.3 , 1.31, 1.32, 1.33, 1.34, 1.35, 1.36, 1.37,
1.38, 1.39, 1.4 , 1.41, 1.42, 1.43, 1.44, 1.45, 1.46, 1.47, 1.48,
1.49, 1.5 , 1.51, 1.52, 1.53, 1.54, 1.55, 1.56, 1.57, 1.58, 1.59,
1.6 , 1.61, 1.62, 1.63, 1.64, 1.65, 1.66, 1.67, 1.68, 1.69, 1.7 ,
1.71, 1.72, 1.73, 1.74, 1.75, 1.76, 1.77, 1.78, 1.79, 1.8 , 1.81,
1.82, 1.83, 1.84, 1.85, 1.86, 1.87, 1.88, 1.89, 1.9 , 1.91, 1.92,
1.93, 1.94, 1.95, 1.96, 1.97, 1.98, 1.99, 2. , 2.01, 2.02, 2.03,
2.04, 2.05, 2.06, 2.07, 2.08, 2.09, 2.1 , 2.11, 2.12, 2.13, 2.14,
2.15, 2.16, 2.17, 2.18, 2.19, 2.2 , 2.21, 2.22, 2.23, 2.24, 2.25,
2.26, 2.27, 2.28, 2.29, 2.3 , 2.31, 2.32, 2.33, 2.34, 2.35, 2.36,
2.37, 2.38, 2.39, 2.4 , 2.41, 2.42, 2.43, 2.44, 2.45, 2.46, 2.47,
2.48, 2.49, 2.5 , 2.51, 2.52, 2.53, 2.54, 2.55, 2.56, 2.57, 2.58,
2.59, 2.6 , 2.61, 2.62, 2.63, 2.64, 2.65, 2.66, 2.67, 2.68, 2.69,
2.7 , 2.71, 2.72, 2.73, 2.74, 2.75, 2.76, 2.77, 2.78, 2.79, 2.8 ,
2.81, 2.82, 2.83, 2.84, 2.85, 2.86, 2.87, 2.88, 2.89, 2.9 , 2.91,
2.92, 2.93, 2.94, 2.95, 2.96, 2.97, 2.98, 2.99, 3. , 3.01, 3.02,
3.03, 3.04, 3.05, 3.06, 3.07, 3.08, 3.09, 3.1 , 3.11, 3.12, 3.13,
3.14, 3.15, 3.16, 3.17, 3.18, 3.19, 3.2 , 3.21, 3.22, 3.23, 3.24,
3.25, 3.26, 3.27, 3.28, 3.29, 3.3 , 3.31, 3.32, 3.33, 3.34, 3.35,
3.36, 3.37, 3.38, 3.39, 3.4 , 3.41, 3.42, 3.43, 3.44, 3.45, 3.46,
3.47, 3.48, 3.49]))
brand = 5
df_prices = pd.DataFrame({"mean":df_brand_choice.filter(regex="Price").mean(),
"min":df_brand_choice.filter(regex="Price").min(),
"max":df_brand_choice.filter(regex="Price").max()}).reset_index().melt(id_vars='index')
df_prices
index | variable | value | |
---|---|---|---|
0 | Price_1 | mean | 1.384559 |
1 | Price_2 | mean | 1.764717 |
2 | Price_3 | mean | 2.006694 |
3 | Price_4 | mean | 2.159658 |
4 | Price_5 | mean | 2.654296 |
5 | Price_1 | min | 1.100000 |
6 | Price_2 | min | 1.260000 |
7 | Price_3 | min | 1.870000 |
8 | Price_4 | min | 1.760000 |
9 | Price_5 | min | 2.110000 |
10 | Price_1 | max | 1.590000 |
11 | Price_2 | max | 1.900000 |
12 | Price_3 | max | 2.140000 |
13 | Price_4 | max | 2.260000 |
14 | Price_5 | max | 2.800000 |
regex="Price"
def get_df_summary(df_brand_choice, regex="Price"):
df = pd.DataFrame({"mean":df_brand_choice.filter(regex=regex).mean(),
"min":df_brand_choice.filter(regex=regex).min(),
"max":df_brand_choice.filter(regex=regex).max()}).reset_index().melt(id_vars='index')
df['id'] = df['index'].str.cat(df['variable'], sep="-")
df = df.drop(columns=['index', 'variable'], axis=1)
df = df.set_index('id').T.reset_index(drop=True)
return df
get_df_summary(df_brand_choice,regex="Promotion")
id | Promotion_1-mean | Promotion_2-mean | Promotion_3-mean | Promotion_4-mean | Promotion_5-mean | Promotion_1-min | Promotion_2-min | Promotion_3-min | Promotion_4-min | Promotion_5-min | Promotion_1-max | Promotion_2-max | Promotion_3-max | Promotion_4-max | Promotion_5-max |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.372455 | 0.349638 | 0.043858 | 0.128091 | 0.04543 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
include_promo = "min"
brand = 5
df_brand_elasticity=pd.DataFrame()
df_brand_elasticity['prices'] = prices
df_brand_elasticity['promo']= 1
df_brand_elasticity['brand']= brand
df_brand_elasticity = df_brand_elasticity.join(get_df_summary(df_brand_choice,regex="Price")).ffill()
df_brand_elasticity = df_brand_elasticity.join(get_df_summary(df_brand_choice,regex="Promotion")).ffill()
for br in model_brand.classes_:
df_input = df_brand_elasticity.filter(regex=f"(Price.*mean|Promo.*{include_promo})").copy()
df_input[f"Price_{br}-mean"] = df_brand_elasticity.prices
probabilities = model_brand.predict_proba(df_input.values)
df_brand_elasticity.loc[:, f"Probabilities_{br}" ] = probabilities[:,br-1]
if br == brand: df_brand_elasticity.loc[:, f"Elasticity_{br}" ] = df_brand_coeff.loc[brand, f'Price_{brand}']*df_brand_elasticity.prices*(1- df_brand_elasticity[f"Probabilities_{br}"])
else: df_brand_elasticity.loc[:, f"Elasticity_{br}" ] = -1*df_brand_coeff.loc[brand, f'Price_{brand}']*df_brand_elasticity.prices*df_brand_elasticity[f"Probabilities_{br}"]
df_brand_elasticity
prices | promo | brand | Price_1-mean | Price_2-mean | Price_3-mean | Price_4-mean | Price_5-mean | Price_1-min | Price_2-min | ... | Probabilities_1 | Elasticity_1 | Probabilities_2 | Elasticity_2 | Probabilities_3 | Elasticity_3 | Probabilities_4 | Elasticity_4 | Probabilities_5 | Elasticity_5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.50 | 1 | 5 | 1.384559 | 1.764717 | 2.006694 | 2.159658 | 2.654296 | 1.1 | 1.26 | ... | 9.183243e-01 | 0.265457 | 0.853568 | 0.246738 | 0.002126 | 0.000615 | 0.652661 | 0.188662 | 0.709821 | -0.083881 |
1 | 0.51 | 1 | 5 | 1.384559 | 1.764717 | 2.006694 | 2.159658 | 2.654296 | 1.1 | 1.26 | ... | 9.143896e-01 | 0.269606 | 0.850910 | 0.250889 | 0.002180 | 0.000643 | 0.650092 | 0.191678 | 0.708717 | -0.085884 |
2 | 0.52 | 1 | 5 | 1.384559 | 1.764717 | 2.006694 | 2.159658 | 2.654296 | 1.1 | 1.26 | ... | 9.102819e-01 | 0.273657 | 0.848212 | 0.254997 | 0.002234 | 0.000672 | 0.647513 | 0.194661 | 0.707608 | -0.087901 |
3 | 0.53 | 1 | 5 | 1.384559 | 1.764717 | 2.006694 | 2.159658 | 2.654296 | 1.1 | 1.26 | ... | 9.059955e-01 | 0.277606 | 0.845475 | 0.259062 | 0.002290 | 0.000702 | 0.644924 | 0.197611 | 0.706493 | -0.089933 |
4 | 0.54 | 1 | 5 | 1.384559 | 1.764717 | 2.006694 | 2.159658 | 2.654296 | 1.1 | 1.26 | ... | 9.015243e-01 | 0.281448 | 0.842696 | 0.263083 | 0.002348 | 0.000733 | 0.642325 | 0.200529 | 0.705373 | -0.091980 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
295 | 3.45 | 1 | 5 | 1.384559 | 1.764717 | 2.006694 | 2.159658 | 2.654296 | 1.1 | 1.26 | ... | 7.788910e-07 | 0.000002 | 0.010487 | 0.020917 | 0.057650 | 0.114986 | 0.036948 | 0.073695 | 0.222011 | -1.551743 |
296 | 3.46 | 1 | 5 | 1.384559 | 1.764717 | 2.006694 | 2.159658 | 2.654296 | 1.1 | 1.26 | ... | 7.335952e-07 | 0.000001 | 0.010264 | 0.020532 | 0.056995 | 0.114010 | 0.036359 | 0.072730 | 0.220519 | -1.559226 |
297 | 3.47 | 1 | 5 | 1.384559 | 1.764717 | 2.006694 | 2.159658 | 2.654296 | 1.1 | 1.26 | ... | 6.909231e-07 | 0.000001 | 0.010046 | 0.020153 | 0.056344 | 0.113033 | 0.035777 | 0.071774 | 0.219033 | -1.566714 |
298 | 3.48 | 1 | 5 | 1.384559 | 1.764717 | 2.006694 | 2.159658 | 2.654296 | 1.1 | 1.26 | ... | 6.507233e-07 | 0.000001 | 0.009832 | 0.019781 | 0.055696 | 0.112056 | 0.035204 | 0.070826 | 0.217553 | -1.574206 |
299 | 3.49 | 1 | 5 | 1.384559 | 1.764717 | 2.006694 | 2.159658 | 2.654296 | 1.1 | 1.26 | ... | 6.128534e-07 | 0.000001 | 0.009623 | 0.019416 | 0.055053 | 0.111079 | 0.034638 | 0.069888 | 0.216080 | -1.581702 |
300 rows × 43 columns
df_brand_elasticity['Price_5-max'][0]
2.8
o = 5 #Own Brand
c = 4 #Cross Brand
fig, axes = plt.subplots(1,2, figsize=(12,6))
sns.lineplot(data=df_brand_elasticity, x="prices", y=f"Elasticity_{o}", ax=axes[0]).set(title=f"Own_brand_{o}")
axes[0].axvspan(df_brand_elasticity[f'Price_{o}-min'][0], df_brand_elasticity[f'Price_{o}-max'][0], alpha=.2, color='red')
axes[0].axvline(df_brand_elasticity[f'Price_{o}-mean'][0], color='blue')
sns.lineplot(data=df_brand_elasticity, x="prices", y=f"Elasticity_{c}", ax=axes[1]).set(title=f"Cross_brand_{c}")
axes[1].axvspan(df_brand_elasticity[f'Price_{c}-min'][0], df_brand_elasticity[f'Price_{c}-max'][0], alpha=.2, color='red')
axes[1].axvline(df_brand_elasticity[f'Price_{c}-mean'][0], color='blue')
<matplotlib.lines.Line2D at 0xf1f6c080eb0>
Brand choice probability by segments#
segments = df_segments['labels'].unique().tolist(); segments
segments.insert(0, None)
segments
[None, 'Fewer-Opportunities', 'Well-off', 'Career-Focussed', 'Standard']
def get_brand_elasticity_df(df_brand_choice, segment=None, brand=5, include_promo='min', max_iter=1000):
label = "Aggregate"
df = df_brand_choice.copy()
if segment:
label = segment
df = df_brand_choice[df_brand_choice['labels'] == segment].copy()
# Model Training
model_brand = LogisticRegression(multi_class='multinomial', solver='sag', max_iter=max_iter)
model_brand.fit(df.filter(regex="Pr").values, df['Brand'].values)
df_brand_coeff = pd.DataFrame(model_brand.coef_, columns=df.filter(regex="Pr").columns.tolist(), index=model_brand.classes_)
prices = np.arange(0.5, 3.5,0.01)
df_brand_elasticity = pd.DataFrame()
df_brand_elasticity['prices'] = prices
df_brand_elasticity['promo']= 1
df_brand_elasticity['brand']= brand
df_brand_elasticity = df_brand_elasticity.join(get_df_summary(df,regex="Price")).ffill()
df_brand_elasticity = df_brand_elasticity.join(get_df_summary(df,regex="Promotion")).ffill()
for br in model_brand.classes_:
df_input = df_brand_elasticity.filter(regex=f"(Price.*mean|Promo.*{include_promo})").copy()
df_input[f"Price_{br}-mean"] = df_brand_elasticity.prices
probabilities = model_brand.predict_proba(df_input.values)
df_brand_elasticity.loc[:, f"Probabilities_{br}" ] = probabilities[:,br-1]
if br == brand: df_brand_elasticity.loc[:, f"Elasticity_{br}" ] = df_brand_coeff.loc[brand, f'Price_{brand}']*df_brand_elasticity.prices*(1- df_brand_elasticity[f"Probabilities_{br}"])
else: df_brand_elasticity.loc[:, f"Elasticity_{br}" ] = -1*df_brand_coeff.loc[brand, f'Price_{brand}']*df_brand_elasticity.prices*df_brand_elasticity[f"Probabilities_{br}"]
df_brand_elasticity['label'] = label
return df_brand_elasticity
a = get_brand_elasticity_df(df_brand_choice,segment=segments[0])
a.head()
prices | promo | brand | Price_1-mean | Price_2-mean | Price_3-mean | Price_4-mean | Price_5-mean | Price_1-min | Price_2-min | ... | Elasticity_1 | Probabilities_2 | Elasticity_2 | Probabilities_3 | Elasticity_3 | Probabilities_4 | Elasticity_4 | Probabilities_5 | Elasticity_5 | label | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.50 | 1 | 5 | 1.384559 | 1.764717 | 2.006694 | 2.159658 | 2.654296 | 1.1 | 1.26 | ... | 0.265461 | 0.853569 | 0.246742 | 0.002126 | 0.000614 | 0.652661 | 0.188665 | 0.709819 | -0.083883 | Aggregate |
1 | 0.51 | 1 | 5 | 1.384559 | 1.764717 | 2.006694 | 2.159658 | 2.654296 | 1.1 | 1.26 | ... | 0.269610 | 0.850911 | 0.250893 | 0.002179 | 0.000643 | 0.650092 | 0.191681 | 0.708715 | -0.085886 | Aggregate |
2 | 0.52 | 1 | 5 | 1.384559 | 1.764717 | 2.006694 | 2.159658 | 2.654296 | 1.1 | 1.26 | ... | 0.273662 | 0.848213 | 0.255001 | 0.002234 | 0.000672 | 0.647513 | 0.194664 | 0.707606 | -0.087903 | Aggregate |
3 | 0.53 | 1 | 5 | 1.384559 | 1.764717 | 2.006694 | 2.159658 | 2.654296 | 1.1 | 1.26 | ... | 0.277611 | 0.845475 | 0.259066 | 0.002290 | 0.000702 | 0.644924 | 0.197614 | 0.706491 | -0.089935 | Aggregate |
4 | 0.54 | 1 | 5 | 1.384559 | 1.764717 | 2.006694 | 2.159658 | 2.654296 | 1.1 | 1.26 | ... | 0.281453 | 0.842697 | 0.263087 | 0.002347 | 0.000733 | 0.642325 | 0.200531 | 0.705370 | -0.091982 | Aggregate |
5 rows × 44 columns
brands = [1,2,3,4,5]
df_brand_elasticity_all = pd.concat([get_brand_elasticity_df(df_brand_choice, segment=s, brand=b) for s,b in itertools.product(segments, brands)]).reset_index(drop=True)
df_brand_elasticity_all.shape
(7500, 44)