Social Network Analysis

by lksfr

social_network_anaylsis/friendship_predictor.ipynb

Social Network Analysis

#importing packages
%matplotlib inline
import networkx as nx
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#creating the graph
G = nx.Graph()
#adding edges
G.add_edge('Jack', 'Mike')
G.add_edge('Jack', 'Pete')
G.add_edge('Jack', 'Cindy')
G.add_edge('Cindy', 'Liz')
G.add_edge('Mike', 'Liz')
G.add_edge('Mike', 'Roger')
G.add_edge('Roger', 'Liz')
G.add_edge('Joana', 'Liz')
G.add_edge('Elle', 'Pete')
G.add_edge('Cindy', 'Mark')
G.add_edge('Steve', 'Mark')
G.add_edge('Elle', 'Steve')
G.add_edge('Steve', 'Ronald')
G.add_edge('Ronald', 'Mark')
#first visualization
plt.figure(figsize=(10,8))
plt.title('Visualization of Social Network', fontsize=20) #adding a title
pos = nx.circular_layout(G) #specifying a circular layout
nx.draw_networkx(G, pos, alpha=0.7, with_labels=False, edge_color='.4')
plt.axis('off') #turning off the axis

#adding the name labels to the nodes
plt.text(1.12,0.07,s='Jack', fontsize=14, bbox=dict(facecolor='red', alpha=0.5),horizontalalignment='center')
plt.text(0.87,0.64,s='Mike', fontsize=14, bbox=dict(facecolor='red', alpha=0.5),horizontalalignment='center')
plt.text(0.54, 0.9,s='Pete', fontsize=14, bbox=dict(facecolor='red', alpha=0.5),horizontalalignment='center')
plt.text(0, 1,s='Cindy', fontsize=14, bbox=dict(facecolor='red', alpha=0.5),horizontalalignment='center')
plt.text(-0.654, 0.85,s='Liz', fontsize=14, bbox=dict(facecolor='red', alpha=0.5),horizontalalignment='center')
plt.text(-1.1, 0.28173262,s='Roger', fontsize=14, bbox=dict(facecolor='red', alpha=0.5),horizontalalignment='center')
plt.text(-1.1, -0.3,s='Joana', fontsize=14, bbox=dict(facecolor='red', alpha=0.5),horizontalalignment='center')
plt.text(-0.65486072, -0.89,s='Elle', fontsize=14, bbox=dict(facecolor='red', alpha=0.5),horizontalalignment='center')
plt.text(-0.14231501, -1.13,s='Mark', fontsize=14, bbox=dict(facecolor='red', alpha=0.5),horizontalalignment='center')
plt.text(0.41541511, -1.06,s='Steve', fontsize=14, bbox=dict(facecolor='red', alpha=0.5),horizontalalignment='center')
plt.text(0.89125346, -0.7,s='Ronald', fontsize=14, bbox=dict(facecolor='red', alpha=0.5),horizontalalignment='center')
Text(0.891253,-0.7,'Ronald')
#printing some general info 
print(nx.info(G))
Name: 
Type: Graph
Number of nodes: 11
Number of edges: 14
Average degree:   2.5455
#creating df from graph
df = pd.DataFrame(index=G.nodes())
#taking a look at empty df
df
Jack
Mike
Pete
Cindy
Liz
Roger
Joana
Elle
Mark
Steve
Ronald
#adding centrality characteristics of nodes
df['clustering'] = pd.Series(nx.clustering(G))
df['degree'] = pd.Series(dict(G.degree()).values())[0]
df['degree_centrality'] = pd.Series(nx.degree_centrality(G))
df['closeness'] = pd.Series(nx.closeness_centrality(G))
df['betweeness'] = pd.Series(nx.betweenness_centrality(G, normalized=True))
df['pr'] = pd.Series(nx.pagerank(G))
#taking a look at more specific node importance measures
df
clustering degree degree_centrality closeness betweeness pr
Jack 0.000000 3 0.3 0.500000 0.255556 0.103966
Mike 0.333333 3 0.3 0.434783 0.125926 0.103670
Pete 0.000000 2 0.2 0.416667 0.125926 0.075299
Cindy 0.000000 3 0.3 0.555556 0.400000 0.102197
Liz 0.166667 4 0.4 0.476190 0.329630 0.139678
Roger 1.000000 2 0.2 0.384615 0.000000 0.072691
Joana 0.000000 1 0.1 0.333333 0.000000 0.043318
Elle 0.000000 2 0.2 0.370370 0.074074 0.075776
Mark 0.333333 3 0.3 0.476190 0.274074 0.103838
Steve 0.333333 3 0.3 0.400000 0.103704 0.106371
Ronald 1.000000 2 0.2 0.370370 0.000000 0.073195
cn = [(e[0], e[1], len(list(nx.common_neighbors(G,e[0], e[1])))) for e in nx.non_edges(G)]
len(cn)
41
jc = list(nx.jaccard_coefficient(G))
len(jc)
41
ra = list(nx.resource_allocation_index(G))
len(ra)
41
aa = list(nx.adamic_adar_index(G))
len(aa)
41
pa = list(nx.preferential_attachment(G))
len(pa)
41
#extracting index from score results
index = pd.Series([(e[0], e[1]) for e in cn])
#initializing new empty df
df = pd.DataFrame()
#extracting score results and transforming them into a pandas series
cn = pd.Series([(e[2]) for e in cn])
jc = pd.Series([(e[2]) for e in jc])
ra = pd.Series([(e[2]) for e in ra])
aa = pd.Series([(e[2]) for e in aa])
pa = pd.Series([(e[2]) for e in pa])
#adding scores to df
df['common_neigh'] = cn
df['jaccard'] = jc
df['resource_alloc'] = ra
df['adamic_adar'] = aa
df['pref_attachment'] = pa
#scaling scores so that mean is between 0 and 1
from sklearn.preprocessing import MinMaxScaler
#adding index to df
df.index = index
scaler = MinMaxScaler()
scaled_df = pd.DataFrame(scaler.fit_transform(df))
scaled_df.columns = ['common_neigh', 'jaccard', 'resource_alloc', 'adamic_adar', 'pref_attachment']
/srv/conda/lib/python3.7/site-packages/sklearn/preprocessing/data.py:334: DataConversionWarning: Data with input dtype int64, float64 were all converted to float64 by MinMaxScaler.
  return self.partial_fit(X, y)
#taking the mean and adding it as a new column
scaled_df['mean_score'] = (scaled_df['common_neigh'] + scaled_df['jaccard'] + scaled_df['resource_alloc']\
+ scaled_df['adamic_adar'] + scaled_df['pref_attachment'])/5
scaled_df.index = index
#displaying df sorted by mean score in descending order
scaled_df.sort_values('mean_score', ascending=False)
common_neigh jaccard resource_alloc adamic_adar pref_attachment mean_score
(Jack, Liz) 1.0 0.800000 1.000 1.000000 1.0 0.960000
(Cindy, Mike) 1.0 1.000000 0.875 0.896241 0.7 0.894248
(Pete, Steve) 0.5 0.500000 0.750 0.792481 0.4 0.588496
(Jack, Elle) 0.5 0.500000 0.750 0.792481 0.4 0.588496
(Mark, Liz) 0.5 0.333333 0.500 0.500000 1.0 0.566667
(Steve, Cindy) 0.5 0.400000 0.500 0.500000 0.7 0.520000
(Mark, Jack) 0.5 0.400000 0.500 0.500000 0.7 0.520000
(Ronald, Cindy) 0.5 0.500000 0.500 0.500000 0.4 0.480000
(Jack, Roger) 0.5 0.500000 0.500 0.500000 0.4 0.480000
(Pete, Cindy) 0.5 0.500000 0.500 0.500000 0.4 0.480000
(Pete, Mike) 0.5 0.500000 0.500 0.500000 0.4 0.480000
(Mark, Elle) 0.5 0.500000 0.500 0.500000 0.4 0.480000
(Ronald, Elle) 0.5 0.666667 0.500 0.500000 0.2 0.473333
(Joana, Roger) 0.5 1.000000 0.375 0.396241 0.0 0.454248
(Roger, Cindy) 0.5 0.500000 0.375 0.396241 0.4 0.434248
(Joana, Mike) 0.5 0.666667 0.375 0.396241 0.1 0.407581
(Joana, Cindy) 0.5 0.666667 0.375 0.396241 0.1 0.407581
(Steve, Liz) 0.0 0.000000 0.000 0.000000 1.0 0.200000
(Steve, Mike) 0.0 0.000000 0.000 0.000000 0.7 0.140000
(Jack, Steve) 0.0 0.000000 0.000 0.000000 0.7 0.140000
(Mark, Mike) 0.0 0.000000 0.000 0.000000 0.7 0.140000
(Elle, Liz) 0.0 0.000000 0.000 0.000000 0.6 0.120000
(Pete, Liz) 0.0 0.000000 0.000 0.000000 0.6 0.120000
(Ronald, Liz) 0.0 0.000000 0.000 0.000000 0.6 0.120000
(Pete, Mark) 0.0 0.000000 0.000 0.000000 0.4 0.080000
(Steve, Roger) 0.0 0.000000 0.000 0.000000 0.4 0.080000
(Mike, Elle) 0.0 0.000000 0.000 0.000000 0.4 0.080000
(Mark, Roger) 0.0 0.000000 0.000 0.000000 0.4 0.080000
(Cindy, Elle) 0.0 0.000000 0.000 0.000000 0.4 0.080000
(Ronald, Jack) 0.0 0.000000 0.000 0.000000 0.4 0.080000
(Ronald, Mike) 0.0 0.000000 0.000 0.000000 0.4 0.080000
(Roger, Elle) 0.0 0.000000 0.000 0.000000 0.2 0.040000
(Ronald, Roger) 0.0 0.000000 0.000 0.000000 0.2 0.040000
(Ronald, Pete) 0.0 0.000000 0.000 0.000000 0.2 0.040000
(Pete, Roger) 0.0 0.000000 0.000 0.000000 0.2 0.040000
(Joana, Jack) 0.0 0.000000 0.000 0.000000 0.1 0.020000
(Mark, Joana) 0.0 0.000000 0.000 0.000000 0.1 0.020000
(Joana, Steve) 0.0 0.000000 0.000 0.000000 0.1 0.020000
(Pete, Joana) 0.0 0.000000 0.000 0.000000 0.0 0.000000
(Joana, Elle) 0.0 0.000000 0.000 0.000000 0.0 0.000000
(Ronald, Joana) 0.0 0.000000 0.000 0.000000 0.0 0.000000
#extracting first and second name from the tuple index and turning them into separate columns
first_person = []
second_person = []
for el in scaled_df.index:
    first_person.append(el[0])
    second_person.append(el[1])
    
first_person = pd.Series(first_person)
second_person = pd.Series(second_person)

scaled_df.reset_index(inplace=True)
scaled_df = pd.concat([scaled_df, first_person.rename('first_p'), second_person.rename('second_p')], axis=1)
scaled_df
index common_neigh jaccard resource_alloc adamic_adar pref_attachment mean_score first_p second_p
0 (Ronald, Pete) 0.0 0.000000 0.000 0.000000 0.2 0.040000 Ronald Pete
1 (Ronald, Joana) 0.0 0.000000 0.000 0.000000 0.0 0.000000 Ronald Joana
2 (Ronald, Jack) 0.0 0.000000 0.000 0.000000 0.4 0.080000 Ronald Jack
3 (Ronald, Roger) 0.0 0.000000 0.000 0.000000 0.2 0.040000 Ronald Roger
4 (Ronald, Cindy) 0.5 0.500000 0.500 0.500000 0.4 0.480000 Ronald Cindy
5 (Ronald, Mike) 0.0 0.000000 0.000 0.000000 0.4 0.080000 Ronald Mike
6 (Ronald, Elle) 0.5 0.666667 0.500 0.500000 0.2 0.473333 Ronald Elle
7 (Ronald, Liz) 0.0 0.000000 0.000 0.000000 0.6 0.120000 Ronald Liz
8 (Pete, Mark) 0.0 0.000000 0.000 0.000000 0.4 0.080000 Pete Mark
9 (Pete, Joana) 0.0 0.000000 0.000 0.000000 0.0 0.000000 Pete Joana
10 (Pete, Steve) 0.5 0.500000 0.750 0.792481 0.4 0.588496 Pete Steve
11 (Pete, Roger) 0.0 0.000000 0.000 0.000000 0.2 0.040000 Pete Roger
12 (Pete, Cindy) 0.5 0.500000 0.500 0.500000 0.4 0.480000 Pete Cindy
13 (Pete, Mike) 0.5 0.500000 0.500 0.500000 0.4 0.480000 Pete Mike
14 (Pete, Liz) 0.0 0.000000 0.000 0.000000 0.6 0.120000 Pete Liz
15 (Mark, Joana) 0.0 0.000000 0.000 0.000000 0.1 0.020000 Mark Joana
16 (Mark, Jack) 0.5 0.400000 0.500 0.500000 0.7 0.520000 Mark Jack
17 (Mark, Roger) 0.0 0.000000 0.000 0.000000 0.4 0.080000 Mark Roger
18 (Mark, Mike) 0.0 0.000000 0.000 0.000000 0.7 0.140000 Mark Mike
19 (Mark, Elle) 0.5 0.500000 0.500 0.500000 0.4 0.480000 Mark Elle
20 (Mark, Liz) 0.5 0.333333 0.500 0.500000 1.0 0.566667 Mark Liz
21 (Joana, Jack) 0.0 0.000000 0.000 0.000000 0.1 0.020000 Joana Jack
22 (Joana, Steve) 0.0 0.000000 0.000 0.000000 0.1 0.020000 Joana Steve
23 (Joana, Roger) 0.5 1.000000 0.375 0.396241 0.0 0.454248 Joana Roger
24 (Joana, Cindy) 0.5 0.666667 0.375 0.396241 0.1 0.407581 Joana Cindy
25 (Joana, Mike) 0.5 0.666667 0.375 0.396241 0.1 0.407581 Joana Mike
26 (Joana, Elle) 0.0 0.000000 0.000 0.000000 0.0 0.000000 Joana Elle
27 (Jack, Steve) 0.0 0.000000 0.000 0.000000 0.7 0.140000 Jack Steve
28 (Jack, Elle) 0.5 0.500000 0.750 0.792481 0.4 0.588496 Jack Elle
29 (Jack, Roger) 0.5 0.500000 0.500 0.500000 0.4 0.480000 Jack Roger
30 (Jack, Liz) 1.0 0.800000 1.000 1.000000 1.0 0.960000 Jack Liz
31 (Steve, Cindy) 0.5 0.400000 0.500 0.500000 0.7 0.520000 Steve Cindy
32 (Steve, Mike) 0.0 0.000000 0.000 0.000000 0.7 0.140000 Steve Mike
33 (Steve, Roger) 0.0 0.000000 0.000 0.000000 0.4 0.080000 Steve Roger
34 (Steve, Liz) 0.0 0.000000 0.000 0.000000 1.0 0.200000 Steve Liz
35 (Roger, Cindy) 0.5 0.500000 0.375 0.396241 0.4 0.434248 Roger Cindy
36 (Roger, Elle) 0.0 0.000000 0.000 0.000000 0.2 0.040000 Roger Elle
37 (Cindy, Mike) 1.0 1.000000 0.875 0.896241 0.7 0.894248 Cindy Mike
38 (Cindy, Elle) 0.0 0.000000 0.000 0.000000 0.4 0.080000 Cindy Elle
39 (Mike, Elle) 0.0 0.000000 0.000 0.000000 0.4 0.080000 Mike Elle
40 (Elle, Liz) 0.0 0.000000 0.000 0.000000 0.6 0.120000 Elle Liz
#defining a function to easily look up most likely connection for every individual
def most_likely_friend(name):
    """
    Input: String of name of individual
    Output: Data Frame containing three columns: Person 1, Person 2, and the mean connection probability score 
    """
    result_df = scaled_df[(scaled_df.first_p == name) | (scaled_df.second_p == name)].sort_values('mean_score', ascending=False)
    result_df.reset_index(drop=True, inplace=True)
        
       
    result_df = result_df[['first_p','second_p', 'mean_score']]
    result_df.columns = ['Person 1', 'Person 2', 'Probability of Connection']

    return result_df
    
most_likely_friend('Mark')
Person 1 Person 2 Probability of Connection
0 Mark Liz 0.566667
1 Mark Jack 0.520000
2 Mark Elle 0.480000
3 Mark Mike 0.140000
4 Pete Mark 0.080000
5 Mark Roger 0.080000
6 Mark Joana 0.020000
#taking the community structure into account
#assigning each node to a community
G.node['Jack']['community'] = 0
G.node['Mike']['community'] = 0
G.node['Pete']['community'] = 0
G.node['Cindy']['community'] = 0
G.node['Liz']['community'] = 0
G.node['Roger']['community'] = 0
G.node['Joana']['community'] = 0
G.node['Elle']['community'] = 1
G.node['Mark']['community'] = 1
G.node['Steve']['community'] = 1
G.node['Ronald']['community'] = 1
#calculating the common neighbors and resource allocation score according to Soundarajan Hopcroft 
sh = list(nx.cn_soundarajan_hopcroft(G))
cra = list(nx.ra_index_soundarajan_hopcroft(G))
index2 = pd.Series([(e[0], e[1]) for e in sh])
#creating a data frame
df2 = pd.DataFrame() #initializing empty df

sh = pd.Series([(e[2]) for e in sh]) #extracting score from list
cra = pd.Series([(e[2]) for e in cra])

df2['sh'] = sh #adding score columns to df
df2['cra'] = cra

df2.index = index2 #adding index to df
df2
sh cra
(Ronald, Pete) 0 0.000000
(Ronald, Joana) 0 0.000000
(Ronald, Jack) 0 0.000000
(Ronald, Roger) 0 0.000000
(Ronald, Cindy) 1 0.000000
(Ronald, Mike) 0 0.000000
(Ronald, Elle) 2 0.333333
(Ronald, Liz) 0 0.000000
(Pete, Mark) 0 0.000000
(Pete, Joana) 0 0.000000
(Pete, Steve) 1 0.000000
(Pete, Roger) 0 0.000000
(Pete, Cindy) 2 0.333333
(Pete, Mike) 2 0.333333
(Pete, Liz) 0 0.000000
(Mark, Joana) 0 0.000000
(Mark, Jack) 1 0.000000
(Mark, Roger) 0 0.000000
(Mark, Mike) 0 0.000000
(Mark, Elle) 2 0.333333
(Mark, Liz) 1 0.000000
(Joana, Jack) 0 0.000000
(Joana, Steve) 0 0.000000
(Joana, Roger) 2 0.250000
(Joana, Cindy) 2 0.250000
(Joana, Mike) 2 0.250000
(Joana, Elle) 0 0.000000
(Jack, Steve) 0 0.000000
(Jack, Elle) 1 0.000000
(Jack, Roger) 2 0.333333
(Jack, Liz) 4 0.666667
(Steve, Cindy) 1 0.000000
(Steve, Mike) 0 0.000000
(Steve, Roger) 0 0.000000
(Steve, Liz) 0 0.000000
(Roger, Cindy) 2 0.250000
(Roger, Elle) 0 0.000000
(Cindy, Mike) 4 0.583333
(Cindy, Elle) 0 0.000000
(Mike, Elle) 0 0.000000
(Elle, Liz) 0 0.000000
#normalizing the scores and adding an index
scaled_df2 = pd.DataFrame(scaler.fit_transform(df2))
scaled_df2.columns = ['common_neigh', 'resource_alloc']
scaled_df2.index = index2
/srv/conda/lib/python3.7/site-packages/sklearn/preprocessing/data.py:334: DataConversionWarning: Data with input dtype int64, float64 were all converted to float64 by MinMaxScaler.
  return self.partial_fit(X, y)
#taking the mean
scaled_df2['mean_score'] = (scaled_df2['common_neigh'] + scaled_df2['resource_alloc'])/2
#displaying df sorted my mean score
scaled_df2.sort_values('mean_score', ascending=False)
common_neigh resource_alloc mean_score
(Jack, Liz) 1.00 1.000 1.0000
(Cindy, Mike) 1.00 0.875 0.9375
(Pete, Cindy) 0.50 0.500 0.5000
(Mark, Elle) 0.50 0.500 0.5000
(Jack, Roger) 0.50 0.500 0.5000
(Ronald, Elle) 0.50 0.500 0.5000
(Pete, Mike) 0.50 0.500 0.5000
(Joana, Mike) 0.50 0.375 0.4375
(Joana, Roger) 0.50 0.375 0.4375
(Joana, Cindy) 0.50 0.375 0.4375
(Roger, Cindy) 0.50 0.375 0.4375
(Pete, Steve) 0.25 0.000 0.1250
(Steve, Cindy) 0.25 0.000 0.1250
(Mark, Jack) 0.25 0.000 0.1250
(Ronald, Cindy) 0.25 0.000 0.1250
(Jack, Elle) 0.25 0.000 0.1250
(Mark, Liz) 0.25 0.000 0.1250
(Steve, Roger) 0.00 0.000 0.0000
(Steve, Liz) 0.00 0.000 0.0000
(Steve, Mike) 0.00 0.000 0.0000
(Roger, Elle) 0.00 0.000 0.0000
(Cindy, Elle) 0.00 0.000 0.0000
(Jack, Steve) 0.00 0.000 0.0000
(Joana, Elle) 0.00 0.000 0.0000
(Mike, Elle) 0.00 0.000 0.0000
(Ronald, Pete) 0.00 0.000 0.0000
(Joana, Steve) 0.00 0.000 0.0000
(Joana, Jack) 0.00 0.000 0.0000
(Ronald, Joana) 0.00 0.000 0.0000
(Mark, Mike) 0.00 0.000 0.0000
(Mark, Roger) 0.00 0.000 0.0000
(Mark, Joana) 0.00 0.000 0.0000
(Pete, Liz) 0.00 0.000 0.0000
(Pete, Roger) 0.00 0.000 0.0000
(Pete, Joana) 0.00 0.000 0.0000
(Pete, Mark) 0.00 0.000 0.0000
(Ronald, Liz) 0.00 0.000 0.0000
(Ronald, Mike) 0.00 0.000 0.0000
(Ronald, Roger) 0.00 0.000 0.0000
(Ronald, Jack) 0.00 0.000 0.0000
(Elle, Liz) 0.00 0.000 0.0000
#taking the names out of the index into separate columns
first_person = []
second_person = []
for el in scaled_df2.index:
    first_person.append(el[0])
    second_person.append(el[1])
    
first_person = pd.Series(first_person)
second_person = pd.Series(second_person)

scaled_df2.reset_index(inplace=True)
scaled_df2 = pd.concat([scaled_df2, first_person.rename('first_p'), second_person.rename('second_p')], axis=1)
scaled_df2
index common_neigh resource_alloc mean_score first_p second_p
0 (Ronald, Pete) 0.00 0.000 0.0000 Ronald Pete
1 (Ronald, Joana) 0.00 0.000 0.0000 Ronald Joana
2 (Ronald, Jack) 0.00 0.000 0.0000 Ronald Jack
3 (Ronald, Roger) 0.00 0.000 0.0000 Ronald Roger
4 (Ronald, Cindy) 0.25 0.000 0.1250 Ronald Cindy
5 (Ronald, Mike) 0.00 0.000 0.0000 Ronald Mike
6 (Ronald, Elle) 0.50 0.500 0.5000 Ronald Elle
7 (Ronald, Liz) 0.00 0.000 0.0000 Ronald Liz
8 (Pete, Mark) 0.00 0.000 0.0000 Pete Mark
9 (Pete, Joana) 0.00 0.000 0.0000 Pete Joana
10 (Pete, Steve) 0.25 0.000 0.1250 Pete Steve
11 (Pete, Roger) 0.00 0.000 0.0000 Pete Roger
12 (Pete, Cindy) 0.50 0.500 0.5000 Pete Cindy
13 (Pete, Mike) 0.50 0.500 0.5000 Pete Mike
14 (Pete, Liz) 0.00 0.000 0.0000 Pete Liz
15 (Mark, Joana) 0.00 0.000 0.0000 Mark Joana
16 (Mark, Jack) 0.25 0.000 0.1250 Mark Jack
17 (Mark, Roger) 0.00 0.000 0.0000 Mark Roger
18 (Mark, Mike) 0.00 0.000 0.0000 Mark Mike
19 (Mark, Elle) 0.50 0.500 0.5000 Mark Elle
20 (Mark, Liz) 0.25 0.000 0.1250 Mark Liz
21 (Joana, Jack) 0.00 0.000 0.0000 Joana Jack
22 (Joana, Steve) 0.00 0.000 0.0000 Joana Steve
23 (Joana, Roger) 0.50 0.375 0.4375 Joana Roger
24 (Joana, Cindy) 0.50 0.375 0.4375 Joana Cindy
25 (Joana, Mike) 0.50 0.375 0.4375 Joana Mike
26 (Joana, Elle) 0.00 0.000 0.0000 Joana Elle
27 (Jack, Steve) 0.00 0.000 0.0000 Jack Steve
28 (Jack, Elle) 0.25 0.000 0.1250 Jack Elle
29 (Jack, Roger) 0.50 0.500 0.5000 Jack Roger
30 (Jack, Liz) 1.00 1.000 1.0000 Jack Liz
31 (Steve, Cindy) 0.25 0.000 0.1250 Steve Cindy
32 (Steve, Mike) 0.00 0.000 0.0000 Steve Mike
33 (Steve, Roger) 0.00 0.000 0.0000 Steve Roger
34 (Steve, Liz) 0.00 0.000 0.0000 Steve Liz
35 (Roger, Cindy) 0.50 0.375 0.4375 Roger Cindy
36 (Roger, Elle) 0.00 0.000 0.0000 Roger Elle
37 (Cindy, Mike) 1.00 0.875 0.9375 Cindy Mike
38 (Cindy, Elle) 0.00 0.000 0.0000 Cindy Elle
39 (Mike, Elle) 0.00 0.000 0.0000 Mike Elle
40 (Elle, Liz) 0.00 0.000 0.0000 Elle Liz
#adapting previously defined function
def most_likely_friend2(name):
    """
    Input: String of name of individual
    Output: Data Frame containing three columns: Person 1, Person 2, and the mean connection probability score 
    """
    result_df = scaled_df2[(scaled_df2.first_p == name) | (scaled_df2.second_p == name)].sort_values('mean_score', ascending=False)
    result_df.reset_index(drop=True, inplace=True)
        
       
    result_df = result_df[['first_p','second_p', 'mean_score']]
    result_df.columns = ['Person 1', 'Person 2', 'Probability of Connection']

    return result_df
most_likely_friend2('Mark')
Person 1 Person 2 Probability of Connection
0 Mark Elle 0.500
1 Mark Jack 0.125
2 Mark Liz 0.125
3 Pete Mark 0.000
4 Mark Joana 0.000
5 Mark Roger 0.000
6 Mark Mike 0.000