Skip to main content
Became Hot Network Question
add some missing context (imports)
Source Link
J_H
  • 43.3k
  • 3
  • 38
  • 158
from itertools import cycle

import pandas as pd

row, col = 5 * len(data['Product'].unique().tolist()), 5 * len(data['Product'].unique().tolist()) + 1
df_corr_name = pd.DataFrame.from_records([[0.5]*col]*row)
df_corr_name = df_corr_name.loc[ : , df_corr_name.columns != 0]
df_corr_name

#CREATE NEW COLUMNS


#year
tenor_list = cycle(['Year 1', 'Year 2', 'Year 3', 'Year 4', 'Year 5'])
df_corr_name['Year'] = [next(tenor_list) for i in range(len(df_corr_name))]
df_corr_name.insert(0, 'Year', df_corr_name.pop('Year'))


#product
name_list = data['Product'].unique().tolist()
rep = 5
df_corr_name['Product'] = [ele for ele in name_list for i in range(rep)]
df_corr_name.insert(1, 'Product', df_corr_name.pop('Product'))

#rating
df_tcc_quality = data[['Product', 'Rating']].drop_duplicates()
quality_list = [list(i) for i in zip(df_tcc_quality['Product'], df_tcc_quality['Rating'])]
tcc_list_100 = df_corr_name['Product'].tolist()
L = []
for i in range(len(tcc_list_100)):
    for j in range(len(quality_list)):
        if tcc_list_100[i] == quality_list[j][0]:
            L.append(quality_list[j][1])
df_corr_name['Rating'] = L
df_corr_name.insert(2, 'Rating', df_corr_name.pop('Rating'))


#HEADERS
#Year
df_corr_name.loc[-1] = ['', '', ''] + [next(tenor_list) for i in range(len(df_corr_name))]
df_corr_name.iloc[-1] = df_corr_name.iloc[-1].astype(str)
df_corr_name.index = df_corr_name.index + 1 
df_corr_name = df_corr_name.sort_index()

#Name
df_corr_name.loc[-1] = ['', '', ''] + [ele for ele in name_list for i in range(rep)]
df_corr_name.iloc[-1] = df_corr_name.iloc[-1].astype(str)
df_corr_name.index = df_corr_name.index + 1  
df_corr_name = df_corr_name.sort_index()

#Quality
df_corr_name.loc[-1] = ['', '', ''] + L
df_corr_name.iloc[-1] = df_corr_name.iloc[-1].astype(str)
df_corr_name.index = df_corr_name.index + 1  
df_corr_name = df_corr_name.sort_index()


new_labels = pd.MultiIndex.from_arrays([df_corr_name.columns, df_corr_name.iloc[0], df_corr_name.iloc[1]], names=['Year', 'Rating', 'Product'])
df_corr_name = df_corr_name.set_axis(new_labels, axis=1).iloc[3:].reset_index().drop('index', axis = 1)


#POPULATE CORRELATION
for i, j in df_corr_name.iterrows(): 
    i = df_corr_name.index.tolist()[0]
    while i <= len(df_corr_name.index):
        df_corr_name.iloc[i:i+5, i+3:i+8] = 1.0
        i += 5


for i, j in df_corr_name.iterrows():
    df_corr_name.iloc[i][i+1] = float(0)
row, col = 5 * len(data['Product'].unique().tolist()), 5 * len(data['Product'].unique().tolist()) + 1
df_corr_name = pd.DataFrame.from_records([[0.5]*col]*row)
df_corr_name = df_corr_name.loc[ : , df_corr_name.columns != 0]
df_corr_name

#CREATE NEW COLUMNS


#year
tenor_list = cycle(['Year 1', 'Year 2', 'Year 3', 'Year 4', 'Year 5'])
df_corr_name['Year'] = [next(tenor_list) for i in range(len(df_corr_name))]
df_corr_name.insert(0, 'Year', df_corr_name.pop('Year'))


#product
name_list = data['Product'].unique().tolist()
rep = 5
df_corr_name['Product'] = [ele for ele in name_list for i in range(rep)]
df_corr_name.insert(1, 'Product', df_corr_name.pop('Product'))

#rating
df_tcc_quality = data[['Product', 'Rating']].drop_duplicates()
quality_list = [list(i) for i in zip(df_tcc_quality['Product'], df_tcc_quality['Rating'])]
tcc_list_100 = df_corr_name['Product'].tolist()
L = []
for i in range(len(tcc_list_100)):
    for j in range(len(quality_list)):
        if tcc_list_100[i] == quality_list[j][0]:
            L.append(quality_list[j][1])
df_corr_name['Rating'] = L
df_corr_name.insert(2, 'Rating', df_corr_name.pop('Rating'))


#HEADERS
#Year
df_corr_name.loc[-1] = ['', '', ''] + [next(tenor_list) for i in range(len(df_corr_name))]
df_corr_name.iloc[-1] = df_corr_name.iloc[-1].astype(str)
df_corr_name.index = df_corr_name.index + 1 
df_corr_name = df_corr_name.sort_index()

#Name
df_corr_name.loc[-1] = ['', '', ''] + [ele for ele in name_list for i in range(rep)]
df_corr_name.iloc[-1] = df_corr_name.iloc[-1].astype(str)
df_corr_name.index = df_corr_name.index + 1  
df_corr_name = df_corr_name.sort_index()

#Quality
df_corr_name.loc[-1] = ['', '', ''] + L
df_corr_name.iloc[-1] = df_corr_name.iloc[-1].astype(str)
df_corr_name.index = df_corr_name.index + 1  
df_corr_name = df_corr_name.sort_index()


new_labels = pd.MultiIndex.from_arrays([df_corr_name.columns, df_corr_name.iloc[0], df_corr_name.iloc[1]], names=['Year', 'Rating', 'Product'])
df_corr_name = df_corr_name.set_axis(new_labels, axis=1).iloc[3:].reset_index().drop('index', axis = 1)


#POPULATE CORRELATION
for i, j in df_corr_name.iterrows(): 
    i = df_corr_name.index.tolist()[0]
    while i <= len(df_corr_name.index):
        df_corr_name.iloc[i:i+5, i+3:i+8] = 1.0
        i += 5


for i, j in df_corr_name.iterrows():
    df_corr_name.iloc[i][i+1] = float(0)
from itertools import cycle

import pandas as pd

row, col = 5 * len(data['Product'].unique().tolist()), 5 * len(data['Product'].unique().tolist()) + 1
df_corr_name = pd.DataFrame.from_records([[0.5]*col]*row)
df_corr_name = df_corr_name.loc[ : , df_corr_name.columns != 0]
df_corr_name

#CREATE NEW COLUMNS


#year
tenor_list = cycle(['Year 1', 'Year 2', 'Year 3', 'Year 4', 'Year 5'])
df_corr_name['Year'] = [next(tenor_list) for i in range(len(df_corr_name))]
df_corr_name.insert(0, 'Year', df_corr_name.pop('Year'))


#product
name_list = data['Product'].unique().tolist()
rep = 5
df_corr_name['Product'] = [ele for ele in name_list for i in range(rep)]
df_corr_name.insert(1, 'Product', df_corr_name.pop('Product'))

#rating
df_tcc_quality = data[['Product', 'Rating']].drop_duplicates()
quality_list = [list(i) for i in zip(df_tcc_quality['Product'], df_tcc_quality['Rating'])]
tcc_list_100 = df_corr_name['Product'].tolist()
L = []
for i in range(len(tcc_list_100)):
    for j in range(len(quality_list)):
        if tcc_list_100[i] == quality_list[j][0]:
            L.append(quality_list[j][1])
df_corr_name['Rating'] = L
df_corr_name.insert(2, 'Rating', df_corr_name.pop('Rating'))


#HEADERS
#Year
df_corr_name.loc[-1] = ['', '', ''] + [next(tenor_list) for i in range(len(df_corr_name))]
df_corr_name.iloc[-1] = df_corr_name.iloc[-1].astype(str)
df_corr_name.index = df_corr_name.index + 1 
df_corr_name = df_corr_name.sort_index()

#Name
df_corr_name.loc[-1] = ['', '', ''] + [ele for ele in name_list for i in range(rep)]
df_corr_name.iloc[-1] = df_corr_name.iloc[-1].astype(str)
df_corr_name.index = df_corr_name.index + 1  
df_corr_name = df_corr_name.sort_index()

#Quality
df_corr_name.loc[-1] = ['', '', ''] + L
df_corr_name.iloc[-1] = df_corr_name.iloc[-1].astype(str)
df_corr_name.index = df_corr_name.index + 1  
df_corr_name = df_corr_name.sort_index()


new_labels = pd.MultiIndex.from_arrays([df_corr_name.columns, df_corr_name.iloc[0], df_corr_name.iloc[1]], names=['Year', 'Rating', 'Product'])
df_corr_name = df_corr_name.set_axis(new_labels, axis=1).iloc[3:].reset_index().drop('index', axis = 1)


#POPULATE CORRELATION
for i, j in df_corr_name.iterrows(): 
    i = df_corr_name.index.tolist()[0]
    while i <= len(df_corr_name.index):
        df_corr_name.iloc[i:i+5, i+3:i+8] = 1.0
        i += 5


for i, j in df_corr_name.iterrows():
    df_corr_name.iloc[i][i+1] = float(0)
edited title
Link
Laura
  • 81
  • 4

Alternatives to iterrow loops in python correlation matricespandas dataframes

added 146 characters in body
Source Link
Laura
  • 81
  • 4
  • Values at diagonal are 0
  • If any cell has the same column and row’s product names, its value is 1, otherwise 0.5, such as the below output: enter image description hereProduct correlation matrix should look like this

enter image description hereExpected rating correlation matrix

    df_name = df_corr_name.iloc[:, 3:]
    df_quality = df_corr_quality.iloc[:, 3:]
    df_pkl = df_name.to_numpy() * df_quality.to_numpy()    
    
    s = data [['Price']].to_numpy()
    v = df_pkl
    t = np.multiply(s, s.transpose())
    u = np.multiply(t, v)
    z = pd.DataFrame(u)

The final output is:

print(z)

Expected outputs

  • Values at diagonal are 0
  • If any cell has the same column and row’s product names, its value is 1, otherwise 0.5, such as the below output: enter image description here

enter image description here

    df_name = df_corr_name.iloc[:, 3:]
    df_quality = df_corr_quality.iloc[:, 3:]
    df_pkl = df_name.to_numpy() * df_quality.to_numpy()    
    
    s = data [['Price']].to_numpy()
    v = df_pkl
    t = np.multiply(s, s.transpose())
    u = np.multiply(t, v)
    z = pd.DataFrame(u)
  • Values at diagonal are 0
  • If any cell has the same column and row’s product names, its value is 1, otherwise 0.5, such as the below output: Product correlation matrix should look like this

Expected rating correlation matrix

    df_name = df_corr_name.iloc[:, 3:]
    df_quality = df_corr_quality.iloc[:, 3:]
    df_pkl = df_name.to_numpy() * df_quality.to_numpy()    
    
    s = data [['Price']].to_numpy()
    v = df_pkl
    t = np.multiply(s, s.transpose())
    u = np.multiply(t, v)
    z = pd.DataFrame(u)

The final output is:

print(z)

Expected outputs

deleted 33 characters in body
Source Link
toolic
  • 16.4k
  • 6
  • 29
  • 221
Loading
Source Link
Laura
  • 81
  • 4
Loading