Start from clean slate and free up memory

rm(list = ls())
gc()

Load R packages

packages <- c("statnet", "igraph", "RSiena", "EpiModel", "kableExtra", "netdiffuseR", 
    "sna", "ergm", "coda", "lattice", "plyr", "dplyr", "tidyr", "magrittr", 
    "mosaic", "snatools", "tidyverse", "ggplot2", "ggnetwork", "visNetwork", 
    "GGally", "ggraph", "networkD3", "ndtv", "amen", "knitr", "rio", "maps", 
    "googleAuthR", "ggmap", "smacof", "grid", "rworldmap", "visNetwork", "networkD3", 
    "ggraph", "tidygraph", "polyclip", "tweenr", "ROAuth", "twitteR", "tweetscores", 
    "devtools", "streamR", "gganimate", "gifski_renderer", "av", "r2d3")
missing.packages <- which(!packages %in% installed.packages()[, "Package"])
if (length(missing.packages)) install.packages(packages[missing.packages])
lapply(packages, require, character.only = T)
library(devtools)
install_github("pablobarbera/twitter_ideology/pkg/tweetscores")
install.packages("gifski", type = "source")

1 Handing Network Data

1.1 Matrix Operations

Creating matrix

m <- matrix(data=1, nrow=5, ncol=4)
dim(m)
## [1] 5 4
m <- matrix(1:10,10,10)

Select matrix elements:

m[2, 3]  # Matrix m, row 2, column 3 - a single cell
## [1] 2
m[2, ]  # The whole second row of m as a vector
##  [1] 2 2 2 2 2 2 2 2 2 2
m[, 2]  # The whole second column of m as a vector
##  [1]  1  2  3  4  5  6  7  8  9 10
m[1:2, 4:6]  # submatrix: rows 1 and 2, columns 4, 5 and 6
##      [,1] [,2] [,3]
## [1,]    1    1    1
## [2,]    2    2    2
m[-1, ]  # all rows *except* the first one
##       [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
##  [1,]    2    2    2    2    2    2    2    2    2     2
##  [2,]    3    3    3    3    3    3    3    3    3     3
##  [3,]    4    4    4    4    4    4    4    4    4     4
##  [4,]    5    5    5    5    5    5    5    5    5     5
##  [5,]    6    6    6    6    6    6    6    6    6     6
##  [6,]    7    7    7    7    7    7    7    7    7     7
##  [7,]    8    8    8    8    8    8    8    8    8     8
##  [8,]    9    9    9    9    9    9    9    9    9     9
##  [9,]   10   10   10   10   10   10   10   10   10    10

Import Camp data: - these data were collected by Steve Borgatti, Russ Bernard, Bert Pelto and Gery Ryan at the 1992 NSF Summer Institute on Research Methods in Cultural Anthropology. This was a 3 week course given to 14 carefully selected participants. Network data were collected at the end of each week. These data were collected at the end of the second week. The data were collected by placing each person’s name on a card and asking each respondent to sort the cards in order of how much interaction they had with that person since the beginning of the course (known informally as “camp”). This results in rank order data in which a “1” indicates the most interaction while a “17” indicates the least interaction.

camp92 <- as.matrix(rio::import("./data/camp92.txt"))[,-1]
camp92 <- apply(camp92, 2, as.numeric)
rownames(camp92) <- colnames(camp92)

Transpose matrix:

print(camp92)
##         HOLLY BRAZEY CAROL PAM PAT JENNIE PAULINE ANN MICHAEL BILL LEE DON
## HOLLY       0      2    15   8   4     12      10   5       3   11  13   1
## BRAZEY      1      0    12   2  10     11       5   7       9   17   3   8
## CAROL      17     15     0   1   2      4       6  12       7   16  11  10
## PAM         9      5     6   0   3      4       1   2       8   15  16  13
## PAT         4     10     8   3   0      1       2  14       9   16   7  13
## JENNIE     11      9     4   2   1      0       7   3      15   16  10  14
## PAULINE    14     10     5   1   3      4       0   7       6   17  12  11
## ANN         8      9    12   2  10      1       7   0       3   15  17   5
## MICHAEL     3     11     4   6   9     13       7   8       0   17  10   1
## BILL        3      4     8  15  10     16      17  13       2    0  11   1
## LEE         5      6     9  14   7     15      12  17       8   13   0   2
## DON         3      9    13  14  12     17      15  16       1   10   4   0
## JOHN       17     11     1   5   9     15       3  16       8   14  12   6
## HARRY       4     17     5  11  14     16       9   8       1   12   3   2
## GERY       11      6     9  13  15     12      17  16       4   10   8   7
## STEVE      10      9     5  12   8     15       7  14      11   17   3   6
## BERT        7      4     9   6  13     11      14  15      10   16   5   8
## RUSS        2      9    10  17  16     11      15  14      13    3   8   7
##         JOHN HARRY GERY STEVE BERT RUSS
## HOLLY     16     9   17     7    6   14
## BRAZEY    15    13   16     6    4   14
## CAROL      3     5   13     8    9   14
## PAM        7    12   17    11   10   14
## PAT       11    12   17     5    6   15
## JENNIE    12    13   17     5    6    8
## PAULINE    2    13   16     9    8   15
## ANN       16     4   13    11    6   14
## MICHAEL   14     2   15    12    5   16
## BILL       9    14   12     6    7    5
## LEE       11     4   16     1    3   10
## DON       11     2    6     8    5    7
## JOHN       0     4    7    10   13    2
## HARRY      6     0   13    10    7   15
## GERY       5    14    0     2    3    1
## STEVE     16    13    4     0    1    2
## BERT      17    12    3     1    0    2
## RUSS       6    12    1     4    5    0
t(camp92)
##         HOLLY BRAZEY CAROL PAM PAT JENNIE PAULINE ANN MICHAEL BILL LEE DON
## HOLLY       0      1    17   9   4     11      14   8       3    3   5   3
## BRAZEY      2      0    15   5  10      9      10   9      11    4   6   9
## CAROL      15     12     0   6   8      4       5  12       4    8   9  13
## PAM         8      2     1   0   3      2       1   2       6   15  14  14
## PAT         4     10     2   3   0      1       3  10       9   10   7  12
## JENNIE     12     11     4   4   1      0       4   1      13   16  15  17
## PAULINE    10      5     6   1   2      7       0   7       7   17  12  15
## ANN         5      7    12   2  14      3       7   0       8   13  17  16
## MICHAEL     3      9     7   8   9     15       6   3       0    2   8   1
## BILL       11     17    16  15  16     16      17  15      17    0  13  10
## LEE        13      3    11  16   7     10      12  17      10   11   0   4
## DON         1      8    10  13  13     14      11   5       1    1   2   0
## JOHN       16     15     3   7  11     12       2  16      14    9  11  11
## HARRY       9     13     5  12  12     13      13   4       2   14   4   2
## GERY       17     16    13  17  17     17      16  13      15   12  16   6
## STEVE       7      6     8  11   5      5       9  11      12    6   1   8
## BERT        6      4     9  10   6      6       8   6       5    7   3   5
## RUSS       14     14    14  14  15      8      15  14      16    5  10   7
##         JOHN HARRY GERY STEVE BERT RUSS
## HOLLY     17     4   11    10    7    2
## BRAZEY    11    17    6     9    4    9
## CAROL      1     5    9     5    9   10
## PAM        5    11   13    12    6   17
## PAT        9    14   15     8   13   16
## JENNIE    15    16   12    15   11   11
## PAULINE    3     9   17     7   14   15
## ANN       16     8   16    14   15   14
## MICHAEL    8     1    4    11   10   13
## BILL      14    12   10    17   16    3
## LEE       12     3    8     3    5    8
## DON        6     2    7     6    8    7
## JOHN       0     6    5    16   17    6
## HARRY      4     0   14    13   12   12
## GERY       7    13    0     4    3    1
## STEVE     10    10    2     0    1    4
## BERT      13     7    3     1    0    5
## RUSS       2    15    1     2    2    0

1.2 Dichotomizing (thinning network)

campnet <- ifelse(camp92 > 15, 1, 0)
print(campnet)
##         HOLLY BRAZEY CAROL PAM PAT JENNIE PAULINE ANN MICHAEL BILL LEE DON
## HOLLY       0      0     0   0   0      0       0   0       0    0   0   0
## BRAZEY      0      0     0   0   0      0       0   0       0    1   0   0
## CAROL       1      0     0   0   0      0       0   0       0    1   0   0
## PAM         0      0     0   0   0      0       0   0       0    0   1   0
## PAT         0      0     0   0   0      0       0   0       0    1   0   0
## JENNIE      0      0     0   0   0      0       0   0       0    1   0   0
## PAULINE     0      0     0   0   0      0       0   0       0    1   0   0
## ANN         0      0     0   0   0      0       0   0       0    0   1   0
## MICHAEL     0      0     0   0   0      0       0   0       0    1   0   0
## BILL        0      0     0   0   0      1       1   0       0    0   0   0
## LEE         0      0     0   0   0      0       0   1       0    0   0   0
## DON         0      0     0   0   0      1       0   1       0    0   0   0
## JOHN        1      0     0   0   0      0       0   1       0    0   0   0
## HARRY       0      1     0   0   0      1       0   0       0    0   0   0
## GERY        0      0     0   0   0      0       1   1       0    0   0   0
## STEVE       0      0     0   0   0      0       0   0       0    1   0   0
## BERT        0      0     0   0   0      0       0   0       0    1   0   0
## RUSS        0      0     0   1   1      0       0   0       0    0   0   0
##         JOHN HARRY GERY STEVE BERT RUSS
## HOLLY      1     0    1     0    0    0
## BRAZEY     0     0    1     0    0    0
## CAROL      0     0    0     0    0    0
## PAM        0     0    1     0    0    0
## PAT        0     0    1     0    0    0
## JENNIE     0     0    1     0    0    0
## PAULINE    0     0    1     0    0    0
## ANN        1     0    0     0    0    0
## MICHAEL    0     0    0     0    0    1
## BILL       0     0    0     0    0    0
## LEE        0     0    1     0    0    0
## DON        0     0    0     0    0    0
## JOHN       0     0    0     0    0    0
## HARRY      0     0    0     0    0    0
## GERY       0     0    0     0    0    0
## STEVE      1     0    0     0    0    0
## BERT       1     0    0     0    0    0
## RUSS       0     0    0     0    0    0

1.3 Transposing and multiplying networks multiplication

# Transposing
t(campnet)
##         HOLLY BRAZEY CAROL PAM PAT JENNIE PAULINE ANN MICHAEL BILL LEE DON
## HOLLY       0      0     1   0   0      0       0   0       0    0   0   0
## BRAZEY      0      0     0   0   0      0       0   0       0    0   0   0
## CAROL       0      0     0   0   0      0       0   0       0    0   0   0
## PAM         0      0     0   0   0      0       0   0       0    0   0   0
## PAT         0      0     0   0   0      0       0   0       0    0   0   0
## JENNIE      0      0     0   0   0      0       0   0       0    1   0   1
## PAULINE     0      0     0   0   0      0       0   0       0    1   0   0
## ANN         0      0     0   0   0      0       0   0       0    0   1   1
## MICHAEL     0      0     0   0   0      0       0   0       0    0   0   0
## BILL        0      1     1   0   1      1       1   0       1    0   0   0
## LEE         0      0     0   1   0      0       0   1       0    0   0   0
## DON         0      0     0   0   0      0       0   0       0    0   0   0
## JOHN        1      0     0   0   0      0       0   1       0    0   0   0
## HARRY       0      0     0   0   0      0       0   0       0    0   0   0
## GERY        1      1     0   1   1      1       1   0       0    0   1   0
## STEVE       0      0     0   0   0      0       0   0       0    0   0   0
## BERT        0      0     0   0   0      0       0   0       0    0   0   0
## RUSS        0      0     0   0   0      0       0   0       1    0   0   0
##         JOHN HARRY GERY STEVE BERT RUSS
## HOLLY      1     0    0     0    0    0
## BRAZEY     0     1    0     0    0    0
## CAROL      0     0    0     0    0    0
## PAM        0     0    0     0    0    1
## PAT        0     0    0     0    0    1
## JENNIE     0     1    0     0    0    0
## PAULINE    0     0    1     0    0    0
## ANN        1     0    1     0    0    0
## MICHAEL    0     0    0     0    0    0
## BILL       0     0    0     1    1    0
## LEE        0     0    0     0    0    0
## DON        0     0    0     0    0    0
## JOHN       0     0    0     1    1    0
## HARRY      0     0    0     0    0    0
## GERY       0     0    0     0    0    0
## STEVE      0     0    0     0    0    0
## BERT       0     0    0     0    0    0
## RUSS       0     0    0     0    0    0
# Matrix multiplication as sum of vector dot-products
campnet %*% campnet  # Multiplying adjacency matrix by itself (A^2). Result? A_ij shows # 2-length paths. Diagonal shows degree of node i. E.g. Calculate entry A_21 and soon realise we're checking for presence of 1s in both second row (Brazey's out-ties) and first column (Holly's in-ties), which is a directed two-path from Brazey to Holly.
##         HOLLY BRAZEY CAROL PAM PAT JENNIE PAULINE ANN MICHAEL BILL LEE DON
## HOLLY       1      0     0   0   0      0       1   2       0    0   0   0
## BRAZEY      0      0     0   0   0      1       2   1       0    0   0   0
## CAROL       0      0     0   0   0      1       1   0       0    0   0   0
## PAM         0      0     0   0   0      0       1   2       0    0   0   0
## PAT         0      0     0   0   0      1       2   1       0    0   0   0
## JENNIE      0      0     0   0   0      1       2   1       0    0   0   0
## PAULINE     0      0     0   0   0      1       2   1       0    0   0   0
## ANN         1      0     0   0   0      0       0   2       0    0   0   0
## MICHAEL     0      0     0   1   1      1       1   0       0    0   0   0
## BILL        0      0     0   0   0      0       0   0       0    2   0   0
## LEE         0      0     0   0   0      0       1   1       0    0   1   0
## DON         0      0     0   0   0      0       0   0       0    1   1   0
## JOHN        0      0     0   0   0      0       0   0       0    0   1   0
## HARRY       0      0     0   0   0      0       0   0       0    2   0   0
## GERY        0      0     0   0   0      0       0   0       0    1   1   0
## STEVE       1      0     0   0   0      1       1   1       0    0   0   0
## BERT        1      0     0   0   0      1       1   1       0    0   0   0
## RUSS        0      0     0   0   0      0       0   0       0    1   1   0
##         JOHN HARRY GERY STEVE BERT RUSS
## HOLLY      0     0    0     0    0    0
## BRAZEY     0     0    0     0    0    0
## CAROL      1     0    1     0    0    0
## PAM        0     0    1     0    0    0
## PAT        0     0    0     0    0    0
## JENNIE     0     0    0     0    0    0
## PAULINE    0     0    0     0    0    0
## ANN        0     0    1     0    0    0
## MICHAEL    0     0    0     0    0    0
## BILL       0     0    2     0    0    0
## LEE        1     0    0     0    0    0
## DON        1     0    1     0    0    0
## JOHN       2     0    1     0    0    0
## HARRY      0     0    2     0    0    0
## GERY       1     0    1     0    0    0
## STEVE      0     0    0     0    0    0
## BERT       0     0    0     0    0    0
## RUSS       0     0    2     0    0    0
print(campnet)
##         HOLLY BRAZEY CAROL PAM PAT JENNIE PAULINE ANN MICHAEL BILL LEE DON
## HOLLY       0      0     0   0   0      0       0   0       0    0   0   0
## BRAZEY      0      0     0   0   0      0       0   0       0    1   0   0
## CAROL       1      0     0   0   0      0       0   0       0    1   0   0
## PAM         0      0     0   0   0      0       0   0       0    0   1   0
## PAT         0      0     0   0   0      0       0   0       0    1   0   0
## JENNIE      0      0     0   0   0      0       0   0       0    1   0   0
## PAULINE     0      0     0   0   0      0       0   0       0    1   0   0
## ANN         0      0     0   0   0      0       0   0       0    0   1   0
## MICHAEL     0      0     0   0   0      0       0   0       0    1   0   0
## BILL        0      0     0   0   0      1       1   0       0    0   0   0
## LEE         0      0     0   0   0      0       0   1       0    0   0   0
## DON         0      0     0   0   0      1       0   1       0    0   0   0
## JOHN        1      0     0   0   0      0       0   1       0    0   0   0
## HARRY       0      1     0   0   0      1       0   0       0    0   0   0
## GERY        0      0     0   0   0      0       1   1       0    0   0   0
## STEVE       0      0     0   0   0      0       0   0       0    1   0   0
## BERT        0      0     0   0   0      0       0   0       0    1   0   0
## RUSS        0      0     0   1   1      0       0   0       0    0   0   0
##         JOHN HARRY GERY STEVE BERT RUSS
## HOLLY      1     0    1     0    0    0
## BRAZEY     0     0    1     0    0    0
## CAROL      0     0    0     0    0    0
## PAM        0     0    1     0    0    0
## PAT        0     0    1     0    0    0
## JENNIE     0     0    1     0    0    0
## PAULINE    0     0    1     0    0    0
## ANN        1     0    0     0    0    0
## MICHAEL    0     0    0     0    0    1
## BILL       0     0    0     0    0    0
## LEE        0     0    1     0    0    0
## DON        0     0    0     0    0    0
## JOHN       0     0    0     0    0    0
## HARRY      0     0    0     0    0    0
## GERY       0     0    0     0    0    0
## STEVE      1     0    0     0    0    0
## BERT       1     0    0     0    0    0
## RUSS       0     0    0     0    0    0
campnet %*% t(campnet)  # Multiplying by its transpose, what do we get? Ties in common.
##         HOLLY BRAZEY CAROL PAM PAT JENNIE PAULINE ANN MICHAEL BILL LEE DON
## HOLLY       2      1     0   1   1      1       1   1       0    0   1   0
## BRAZEY      1      2     1   1   2      2       2   0       1    0   1   0
## CAROL       0      1     2   0   1      1       1   0       1    0   0   0
## PAM         1      1     0   2   1      1       1   1       0    0   1   0
## PAT         1      2     1   1   2      2       2   0       1    0   1   0
## JENNIE      1      2     1   1   2      2       2   0       1    0   1   0
## PAULINE     1      2     1   1   2      2       2   0       1    0   1   0
## ANN         1      0     0   1   0      0       0   2       0    0   0   0
## MICHAEL     0      1     1   0   1      1       1   0       2    0   0   0
## BILL        0      0     0   0   0      0       0   0       0    2   0   1
## LEE         1      1     0   1   1      1       1   0       0    0   2   1
## DON         0      0     0   0   0      0       0   0       0    1   1   2
## JOHN        0      0     1   0   0      0       0   0       0    0   1   1
## HARRY       0      0     0   0   0      0       0   0       0    1   0   1
## GERY        0      0     0   0   0      0       0   0       0    1   1   1
## STEVE       1      1     1   0   1      1       1   1       1    0   0   0
## BERT        1      1     1   0   1      1       1   1       1    0   0   0
## RUSS        0      0     0   0   0      0       0   0       0    0   0   0
##         JOHN HARRY GERY STEVE BERT RUSS
## HOLLY      0     0    0     1    1    0
## BRAZEY     0     0    0     1    1    0
## CAROL      1     0    0     1    1    0
## PAM        0     0    0     0    0    0
## PAT        0     0    0     1    1    0
## JENNIE     0     0    0     1    1    0
## PAULINE    0     0    0     1    1    0
## ANN        0     0    0     1    1    0
## MICHAEL    0     0    0     1    1    0
## BILL       0     1    1     0    0    0
## LEE        1     0    1     0    0    0
## DON        1     1    1     0    0    0
## JOHN       2     0    1     0    0    0
## HARRY      0     2    0     0    0    0
## GERY       1     0    2     0    0    0
## STEVE      0     0    0     2    2    0
## BERT       0     0    0     2    2    0
## RUSS       0     0    0     0    0    2
# Element-wise multiplication (both matrices must have same dimension)
campnet * t(campnet)  # Note that * is element-wise multiplication. What do we get? Symmetric Adjacency Matrix under the max/strong rule.
##         HOLLY BRAZEY CAROL PAM PAT JENNIE PAULINE ANN MICHAEL BILL LEE DON
## HOLLY       0      0     0   0   0      0       0   0       0    0   0   0
## BRAZEY      0      0     0   0   0      0       0   0       0    0   0   0
## CAROL       0      0     0   0   0      0       0   0       0    0   0   0
## PAM         0      0     0   0   0      0       0   0       0    0   0   0
## PAT         0      0     0   0   0      0       0   0       0    0   0   0
## JENNIE      0      0     0   0   0      0       0   0       0    1   0   0
## PAULINE     0      0     0   0   0      0       0   0       0    1   0   0
## ANN         0      0     0   0   0      0       0   0       0    0   1   0
## MICHAEL     0      0     0   0   0      0       0   0       0    0   0   0
## BILL        0      0     0   0   0      1       1   0       0    0   0   0
## LEE         0      0     0   0   0      0       0   1       0    0   0   0
## DON         0      0     0   0   0      0       0   0       0    0   0   0
## JOHN        1      0     0   0   0      0       0   1       0    0   0   0
## HARRY       0      0     0   0   0      0       0   0       0    0   0   0
## GERY        0      0     0   0   0      0       1   0       0    0   0   0
## STEVE       0      0     0   0   0      0       0   0       0    0   0   0
## BERT        0      0     0   0   0      0       0   0       0    0   0   0
## RUSS        0      0     0   0   0      0       0   0       0    0   0   0
##         JOHN HARRY GERY STEVE BERT RUSS
## HOLLY      1     0    0     0    0    0
## BRAZEY     0     0    0     0    0    0
## CAROL      0     0    0     0    0    0
## PAM        0     0    0     0    0    0
## PAT        0     0    0     0    0    0
## JENNIE     0     0    0     0    0    0
## PAULINE    0     0    1     0    0    0
## ANN        1     0    0     0    0    0
## MICHAEL    0     0    0     0    0    0
## BILL       0     0    0     0    0    0
## LEE        0     0    0     0    0    0
## DON        0     0    0     0    0    0
## JOHN       0     0    0     0    0    0
## HARRY      0     0    0     0    0    0
## GERY       0     0    0     0    0    0
## STEVE      0     0    0     0    0    0
## BERT       0     0    0     0    0    0
## RUSS       0     0    0     0    0    0

1.4 Symmetrizing

sna::symmetrize(campnet, rule = "weak")  # OR rule, if either i->j or i<-j exist;
##       [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
##  [1,]    0    0    1    0    0    0    0    0    0     0     0     0     1
##  [2,]    0    0    0    0    0    0    0    0    0     1     0     0     0
##  [3,]    1    0    0    0    0    0    0    0    0     1     0     0     0
##  [4,]    0    0    0    0    0    0    0    0    0     0     1     0     0
##  [5,]    0    0    0    0    0    0    0    0    0     1     0     0     0
##  [6,]    0    0    0    0    0    0    0    0    0     1     0     1     0
##  [7,]    0    0    0    0    0    0    0    0    0     1     0     0     0
##  [8,]    0    0    0    0    0    0    0    0    0     0     1     1     1
##  [9,]    0    0    0    0    0    0    0    0    0     1     0     0     0
## [10,]    0    1    1    0    1    1    1    0    1     0     0     0     0
## [11,]    0    0    0    1    0    0    0    1    0     0     0     0     0
## [12,]    0    0    0    0    0    1    0    1    0     0     0     0     0
## [13,]    1    0    0    0    0    0    0    1    0     0     0     0     0
## [14,]    0    1    0    0    0    1    0    0    0     0     0     0     0
## [15,]    1    1    0    1    1    1    1    1    0     0     1     0     0
## [16,]    0    0    0    0    0    0    0    0    0     1     0     0     1
## [17,]    0    0    0    0    0    0    0    0    0     1     0     0     1
## [18,]    0    0    0    1    1    0    0    0    1     0     0     0     0
##       [,14] [,15] [,16] [,17] [,18]
##  [1,]     0     1     0     0     0
##  [2,]     1     1     0     0     0
##  [3,]     0     0     0     0     0
##  [4,]     0     1     0     0     1
##  [5,]     0     1     0     0     1
##  [6,]     1     1     0     0     0
##  [7,]     0     1     0     0     0
##  [8,]     0     1     0     0     0
##  [9,]     0     0     0     0     1
## [10,]     0     0     1     1     0
## [11,]     0     1     0     0     0
## [12,]     0     0     0     0     0
## [13,]     0     0     1     1     0
## [14,]     0     0     0     0     0
## [15,]     0     0     0     0     0
## [16,]     0     0     0     0     0
## [17,]     0     0     0     0     0
## [18,]     0     0     0     0     0
sna::symmetrize(campnet, rule = "strong")  # AND rule, iff i<->j exist;
##       [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
##  [1,]    0    0    0    0    0    0    0    0    0     0     0     0     1
##  [2,]    0    0    0    0    0    0    0    0    0     0     0     0     0
##  [3,]    0    0    0    0    0    0    0    0    0     0     0     0     0
##  [4,]    0    0    0    0    0    0    0    0    0     0     0     0     0
##  [5,]    0    0    0    0    0    0    0    0    0     0     0     0     0
##  [6,]    0    0    0    0    0    0    0    0    0     1     0     0     0
##  [7,]    0    0    0    0    0    0    0    0    0     1     0     0     0
##  [8,]    0    0    0    0    0    0    0    0    0     0     1     0     1
##  [9,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [10,]    0    0    0    0    0    1    1    0    0     0     0     0     0
## [11,]    0    0    0    0    0    0    0    1    0     0     0     0     0
## [12,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [13,]    1    0    0    0    0    0    0    1    0     0     0     0     0
## [14,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [15,]    0    0    0    0    0    0    1    0    0     0     0     0     0
## [16,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [17,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [18,]    0    0    0    0    0    0    0    0    0     0     0     0     0
##       [,14] [,15] [,16] [,17] [,18]
##  [1,]     0     0     0     0     0
##  [2,]     0     0     0     0     0
##  [3,]     0     0     0     0     0
##  [4,]     0     0     0     0     0
##  [5,]     0     0     0     0     0
##  [6,]     0     0     0     0     0
##  [7,]     0     1     0     0     0
##  [8,]     0     0     0     0     0
##  [9,]     0     0     0     0     0
## [10,]     0     0     0     0     0
## [11,]     0     0     0     0     0
## [12,]     0     0     0     0     0
## [13,]     0     0     0     0     0
## [14,]     0     0     0     0     0
## [15,]     0     0     0     0     0
## [16,]     0     0     0     0     0
## [17,]     0     0     0     0     0
## [18,]     0     0     0     0     0
sna::symmetrize(campnet, rule = "upper")  # copy the upper triangle over the lower;
##       [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
##  [1,]    0    0    0    0    0    0    0    0    0     0     0     0     1
##  [2,]    0    0    0    0    0    0    0    0    0     1     0     0     0
##  [3,]    0    0    0    0    0    0    0    0    0     1     0     0     0
##  [4,]    0    0    0    0    0    0    0    0    0     0     1     0     0
##  [5,]    0    0    0    0    0    0    0    0    0     1     0     0     0
##  [6,]    0    0    0    0    0    0    0    0    0     1     0     0     0
##  [7,]    0    0    0    0    0    0    0    0    0     1     0     0     0
##  [8,]    0    0    0    0    0    0    0    0    0     0     1     0     1
##  [9,]    0    0    0    0    0    0    0    0    0     1     0     0     0
## [10,]    0    1    1    0    1    1    1    0    1     0     0     0     0
## [11,]    0    0    0    1    0    0    0    1    0     0     0     0     0
## [12,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [13,]    1    0    0    0    0    0    0    1    0     0     0     0     0
## [14,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [15,]    1    1    0    1    1    1    1    0    0     0     1     0     0
## [16,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [17,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [18,]    0    0    0    0    0    0    0    0    1     0     0     0     0
##       [,14] [,15] [,16] [,17] [,18]
##  [1,]     0     1     0     0     0
##  [2,]     0     1     0     0     0
##  [3,]     0     0     0     0     0
##  [4,]     0     1     0     0     0
##  [5,]     0     1     0     0     0
##  [6,]     0     1     0     0     0
##  [7,]     0     1     0     0     0
##  [8,]     0     0     0     0     0
##  [9,]     0     0     0     0     1
## [10,]     0     0     0     0     0
## [11,]     0     1     0     0     0
## [12,]     0     0     0     0     0
## [13,]     0     0     0     0     0
## [14,]     0     0     0     0     0
## [15,]     0     0     0     0     0
## [16,]     0     0     0     0     0
## [17,]     0     0     0     0     0
## [18,]     0     0     0     0     0
sna::symmetrize(campnet, rule = "lower")  # copy the lower triangle over the upper;
##       [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
##  [1,]    0    0    1    0    0    0    0    0    0     0     0     0     1
##  [2,]    0    0    0    0    0    0    0    0    0     0     0     0     0
##  [3,]    1    0    0    0    0    0    0    0    0     0     0     0     0
##  [4,]    0    0    0    0    0    0    0    0    0     0     0     0     0
##  [5,]    0    0    0    0    0    0    0    0    0     0     0     0     0
##  [6,]    0    0    0    0    0    0    0    0    0     1     0     1     0
##  [7,]    0    0    0    0    0    0    0    0    0     1     0     0     0
##  [8,]    0    0    0    0    0    0    0    0    0     0     1     1     1
##  [9,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [10,]    0    0    0    0    0    1    1    0    0     0     0     0     0
## [11,]    0    0    0    0    0    0    0    1    0     0     0     0     0
## [12,]    0    0    0    0    0    1    0    1    0     0     0     0     0
## [13,]    1    0    0    0    0    0    0    1    0     0     0     0     0
## [14,]    0    1    0    0    0    1    0    0    0     0     0     0     0
## [15,]    0    0    0    0    0    0    1    1    0     0     0     0     0
## [16,]    0    0    0    0    0    0    0    0    0     1     0     0     1
## [17,]    0    0    0    0    0    0    0    0    0     1     0     0     1
## [18,]    0    0    0    1    1    0    0    0    0     0     0     0     0
##       [,14] [,15] [,16] [,17] [,18]
##  [1,]     0     0     0     0     0
##  [2,]     1     0     0     0     0
##  [3,]     0     0     0     0     0
##  [4,]     0     0     0     0     1
##  [5,]     0     0     0     0     1
##  [6,]     1     0     0     0     0
##  [7,]     0     1     0     0     0
##  [8,]     0     1     0     0     0
##  [9,]     0     0     0     0     0
## [10,]     0     0     1     1     0
## [11,]     0     0     0     0     0
## [12,]     0     0     0     0     0
## [13,]     0     0     1     1     0
## [14,]     0     0     0     0     0
## [15,]     0     0     0     0     0
## [16,]     0     0     0     0     0
## [17,]     0     0     0     0     0
## [18,]     0     0     0     0     0

1.5 Network Modes

Brief intro to dataset: “The National Longitudinal Study of Adolescent to Adult Health (Add Health) is a longitudinal study of a nationally representative sample of adolescents in grades 7-12 in the United States during the 1994-95 school year. Add Health combines longitudinal survey data on respondents’ social, economic, psychological and physical well-being with contextual data on the family, neighborhood, community, school, friendships, peer groups, and romantic relationships, providing unique opportunities to study how social environments and behaviors in adolescence are linked to health and achievement outcomes in young adulthood.”

Tidyverse grammar: - Selecting: always refers to selecting the columns you want. - Arranging: reorder rows with respect to columns. - Mutating: refers to creating a new variable based on operations peformed on another variable. - Filtering: refers to filtering by rows - Renaming: refers to relabeling column names. - Gathering: refers to gathering columns to transform a wide data set into a long one. - Summarizing: refers to generating summary statitics for a given variable. - Separating: refers to splitting delimited values in one column into multiple columns - Distinct: Eliminates all duplicate values - Joining: refers to merging data sets using key variable.

  • Import and arrange AddHealth data
AHS_WPVAR <- rio::import('./data/ahs_wpvar.csv')
commcnt sdummy ego_nid mfnid_1 mfnid_2 mfnid_3 mfnid_4 mfnid_5 mfact_1 mfact_2 mfact_3 mfact_4 mfact_5
1 1 1 52 99999 NA NA NA 0 0 NA NA NA
1 1 2 99999 33 57 59 62 1 1 1 1 1
1 1 3 NA NA NA NA NA NA NA NA NA NA
1 1 4 NA NA NA NA NA NA NA NA NA NA
1 1 5 99999 NA NA NA NA 2 NA NA NA NA
1 1 6 99999 19 26 47 NA 1 1 1 0 NA

mf stands for “male friend”; ff female friend; mfact stands for # of acts;

  • Transform data wide to long as to capture all ties between Ego and Alters. Alters currently organised in different columns. Two variables: tie and frequency of interaction (number of acts).
AHS_Edges <- AHS_WPVAR %>%
      select(ego_nid, mfnid_1:mfnid_5, ffnid_1:ffnid_5, commcnt, sex) %>% 
      filter(commcnt == 7) %>%
      gather(Alter_Label, value, mfnid_1:mfnid_5, ffnid_1:ffnid_5, na.rm = TRUE) %>% 
      arrange(ego_nid, sex) %>% 
      filter(value != 99999) %>%
      select(ego_nid, value) %>%  
      rename(Sender = `ego_nid`, Target = `value`)     
  • Creating list of nodes
AHS_Nodes <- AHS_Edges %>% 
  gather(Alter_Label, value, Sender, Target, na.rm = TRUE) %>%
  select(value) %>% rename(ego_nid = `value`)
  AHS_Nodes <- AHS_Nodes %>% distinct(ego_nid)
  
AHS_Nodes <- AHS_Nodes %>% (add_rownames) %>% rename (Sender_ID = rowname) %>%
                mutate(Sender_ID = as.numeric(Sender_ID)) 
  • Merging/Joining Numeric IDs into the Edgelist
AHS_Nodes <- AHS_Nodes %>% rename(Sender = `ego_nid`)
AHS_Edges <- AHS_Edges %>% left_join(AHS_Nodes, by = c("Sender"))
AHS_Nodes <- AHS_Nodes %>% rename(Target = `Sender`, Target_ID = `Sender_ID`)
AHS_Edges <- AHS_Edges %>% left_join(AHS_Nodes, by = c("Target"))
  • Tidying up edgelists and list of nodes
AHS_Edges <- AHS_Edges %>%
  select(Sender_ID, Target_ID)  %>%
  rename(Sender = `Sender_ID`, Target = `Target_ID`)

AHS_Nodes <- AHS_Nodes %>%
  rename (ego_nid = `Target`, ID = `Target_ID`)
  • Adding attributes
AHS_Attributes <- AHS_WPVAR %>%
  select(commcnt, ego_nid, sex, grade, race5) %>%
  filter(commcnt == 7)

AHS_Nodes <- AHS_Nodes %>%
  left_join(AHS_Attributes, by = c("ego_nid"))

save(AHS_Edges,file="AHS_Edges.Rda")
save(AHS_Nodes, file="AHS_Nodes.Rda")
  • Creating adjacency matrix
number.nodes <- length(unique(c(AHS_Edges$Sender, AHS_Edges$Target)))
ties <- cbind(a=AHS_Edges$Sender, b=AHS_Edges$Target)
ahs.adjmat <- matrix(0, number.nodes, number.nodes)
ahs.adjmat[ties] <- 1
ahs.adjmat[1:15, 1:15]
##       [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
##  [1,]    0    0    0    0    0    0    0    0    0     0     0     0     0
##  [2,]    0    0    0    0    0    0    0    0    0     0     0     0     0
##  [3,]    0    0    0    0    0    0    0    0    0     0     0     0     0
##  [4,]    0    0    0    0    0    0    0    0    0     0     0     0     0
##  [5,]    0    0    0    0    0    0    0    0    0     0     0     0     0
##  [6,]    0    0    0    0    0    0    0    0    0     0     0     0     0
##  [7,]    0    0    0    0    0    0    0    0    0     0     0     1     0
##  [8,]    0    0    0    0    0    0    0    0    0     0     0     0     0
##  [9,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [10,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [11,]    0    0    0    0    0    0    0    0    0     1     0     0     0
## [12,]    0    0    0    0    1    0    1    0    0     0     0     0     0
## [13,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [14,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [15,]    0    0    0    0    0    0    0    0    0     0     0     0     0
##       [,14] [,15]
##  [1,]     0     0
##  [2,]     0     0
##  [3,]     0     0
##  [4,]     0     0
##  [5,]     0     0
##  [6,]     0     0
##  [7,]     0     0
##  [8,]     0     0
##  [9,]     0     0
## [10,]     0     0
## [11,]     0     0
## [12,]     0     0
## [13,]     0     0
## [14,]     0     0
## [15,]     0     0

1.5.1 One-mode network

1.5.1.1 with statnet: from an edgelist

print(AHS_Edges[1:20,])
##    Sender Target
## 1       1    108
## 2       1    112
## 3       1    166
## 4       1    113
## 5       1    132
## 6       1    151
## 7       1    204
## 8       2     51
## 9       3     85
## 10      3    117
## 11      3    183
## 12      3    235
## 13      3     56
## 14      3     57
## 15      3     64
## 16      3    182
## 17      3    214
## 18      4    131
## 19      5    243
## 20      6     85
ahs.net <- network::network(AHS_Edges, matrix.type = "edgelist")
print(ahs.net)
##  Network attributes:
##   vertices = 440 
##   directed = TRUE 
##   hyper = FALSE 
##   loops = FALSE 
##   multiple = FALSE 
##   bipartite = FALSE 
##   total edges= 2099 
##     missing edges= 0 
##     non-missing edges= 2099 
## 
##  Vertex attribute names: 
##     vertex.names 
## 
##  Edge attribute names not shown

1.5.1.2 with statnet: from an adjacency matrix

adj.mat <- network::as.matrix.network(ahs.net)
adj.mat[1:20, 1:20]
##    1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 1  0 0 0 0 0 0 0 0 0  0  0  0  0  0  0  0  0  0  0  0
## 2  0 0 0 0 0 0 0 0 0  0  0  0  0  0  0  0  0  0  0  0
## 3  0 0 0 0 0 0 0 0 0  0  0  0  0  0  0  0  0  0  0  0
## 4  0 0 0 0 0 0 0 0 0  0  0  0  0  0  0  0  0  0  0  0
## 5  0 0 0 0 0 0 0 0 0  0  0  0  0  0  0  0  0  0  0  0
## 6  0 0 0 0 0 0 0 0 0  0  0  0  0  0  0  0  0  0  0  0
## 7  0 0 0 0 0 0 0 0 0  0  0  1  0  0  0  0  0  0  0  0
## 8  0 0 0 0 0 0 0 0 0  0  0  0  0  0  0  0  0  0  0  0
## 9  0 0 0 0 0 0 0 0 0  0  0  0  0  0  0  0  0  0  0  0
## 10 0 0 0 0 0 0 0 0 0  0  0  0  0  0  0  0  0  0  0  0
## 11 0 0 0 0 0 0 0 0 0  1  0  0  0  0  0  0  0  0  0  0
## 12 0 0 0 0 1 0 1 0 0  0  0  0  0  0  0  0  0  0  0  0
## 13 0 0 0 0 0 0 0 0 0  0  0  0  0  0  0  0  0  0  0  0
## 14 0 0 0 0 0 0 0 0 0  0  0  0  0  0  0  0  0  0  0  0
## 15 0 0 0 0 0 0 0 0 0  0  0  0  0  0  0  0  0  0  0  0
## 16 0 0 0 0 0 0 0 0 0  0  0  0  0  0  0  0  0  0  0  0
## 17 0 0 0 0 0 0 0 0 0  0  0  0  0  0  0  0  0  0  0  0
## 18 0 0 0 0 0 0 0 0 0  0  0  0  0  0  0  0  0  0  0  0
## 19 0 0 0 0 0 0 0 0 0  0  0  0  0  0  0  0  0  0  0  0
## 20 0 0 0 0 0 0 0 0 0  0  0  0  0  0  0  0  0  0  0  0
ahs.net <- network::network(adj.mat, matrix.type = "adjacency")
print(ahs.net)
##  Network attributes:
##   vertices = 440 
##   directed = TRUE 
##   hyper = FALSE 
##   loops = FALSE 
##   multiple = FALSE 
##   bipartite = FALSE 
##   total edges= 2099 
##     missing edges= 0 
##     non-missing edges= 2099 
## 
##  Vertex attribute names: 
##     vertex.names 
## 
##  Edge attribute names not shown

1.5.1.3 with igraph: from an edgelist

print(as.matrix(AHS_Edges[1:10,]))
##    Sender Target
## 1       1    108
## 2       1    112
## 3       1    166
## 4       1    113
## 5       1    132
## 6       1    151
## 7       1    204
## 8       2     51
## 9       3     85
## 10      3    117
ahs.net <- igraph::graph_from_edgelist(as.matrix(AHS_Edges))
print(ahs.net)
## IGRAPH 9045073 D--- 440 2099 -- 
## + edges from 9045073:
##  [1]  1->108  1->112  1->166  1->113  1->132  1->151  1->204  2-> 51
##  [9]  3-> 85  3->117  3->183  3->235  3-> 56  3-> 57  3-> 64  3->182
## [17]  3->214  4->131  5->243  6-> 85  6->117  6->167  6->183  6->185
## [25]  6-> 56  6->165  6->179  6->214  7-> 12  7-> 68  7->149  8->111
## [33]  8->183  8->186  8->241  8-> 86  8->215  9->102  9->181  9->207
## [41]  9->236  9-> 55  9-> 56  9-> 67 10->411 10-> 33 11->411 11->243
## [49] 11-> 10 12->  5 12->  7 12-> 71 13-> 88 13->216 13->412 14->248
## [57] 14-> 71 14-> 83 14->149 15-> 30 15->130 15->178 15-> 81 15->106
## [65] 15->151 15->215 16->413 16->186 16->220 16-> 57 16->135 16->211
## + ... omitted several edges

Let’s understand the information contained in an igraph object:

  • IGRAPH simply annotates g as an igraph object
  • 704e0f7 or whatever follows IGRAPH is simply how igraph identifies the g for itself
  • D--- refers to descriptive details of g :
    • U would tell us that g is an undirected graph
    • D tells us that g is directed graph
    • N would that g is a named graph, in that the vertices have a name attribute
    • -- refers to attributes not applicable to g , but you may see them in the future:
      • W would refer to a weighted graph, where edges have a weight attribute
      • B would refer to a bipartite graph, where vertices have a type attribute
  • 440 refers to the number of vertices in g
  • 2099 refers to the number of edges in g
  • attr: would display a list of attributes within the graph. There are no attributes in this network. But, in cases where you load networks that use names instead of numbers, you will see name listed after attr: . You will see multiple attributes in the future.
    • (v/c) , which will appear following name , tells us that it is a vertex attribute of a character data type. character is simply what R calls a string.
    • In the future we will also see:
      • (e/c) or (e/n) referring to edge attributes that are of character or numeric data types
      • (g/c) or (g/n) referring to graph attributes that are of character or numeric data types
  • + edges from 704e0f7: lists a sample of g ’s edges using the names of the vertices which they connect.

1.5.1.4 with igraph: from an adjacency matrix

ahs.net <- igraph::graph.adjacency(ahs.adjmat, mode="directed", weighted=NULL, diag=FALSE)
print(ahs.net)
## IGRAPH ba2f6b9 D--- 440 2099 -- 
## + edges from ba2f6b9:
##  [1]  1->108  1->112  1->113  1->132  1->151  1->166  1->204  2-> 51
##  [9]  3-> 56  3-> 57  3-> 64  3-> 85  3->117  3->182  3->183  3->214
## [17]  3->235  4->131  5->243  6-> 56  6-> 85  6->117  6->165  6->167
## [25]  6->179  6->183  6->185  6->214  7-> 12  7-> 68  7->149  8-> 86
## [33]  8->111  8->183  8->186  8->215  8->241  9-> 55  9-> 56  9-> 67
## [41]  9->102  9->181  9->207  9->236 10-> 33 10->411 11-> 10 11->243
## [49] 11->411 12->  5 12->  7 12-> 71 13-> 88 13->216 13->412 14-> 71
## [57] 14-> 83 14->149 14->248 15-> 30 15-> 81 15->106 15->130 15->151
## [65] 15->178 15->215 16-> 57 16->135 16->186 16->211 16->214 16->220
## + ... omitted several edges

1.5.1.5 From a nodelist? igraph only package to handle nodelists; so, use edgelists instead.

ahs.nodelist <- NULL
for (i in unique(AHS_Edges$Sender)) {
    nodelist <- unlist(strsplit(paste(c(i, AHS_Edges$Target[AHS_Edges$Sender == 
        i]), collapse = ","), ","))
    names(nodelist) <- c("sender", paste0(rep("target", length(nodelist) - 1), 
        1:(length(nodelist) - 1)))
    ahs.nodelist <- plyr::rbind.fill(ahs.nodelist, data.frame(t(as.matrix(nodelist))))
}
ahs.nodelist <- as.matrix(ahs.nodelist)[, -1]  # Make sure row.names == sender columns
ahs.nodelist <- split(ahs.nodelist, 1:nrow(ahs.nodelist))
ahs.nodelist <- lapply(ahs.nodelist, function(x) x[!is.na(x)])
print(ahs.nodelist[1:10])
## $`1`
## [1] "108" "112" "166" "113" "132" "151" "204"
## 
## $`2`
## [1] "51"
## 
## $`3`
## [1] "85"  "117" "183" "235" "56"  "57"  "64"  "182" "214"
## 
## $`4`
## [1] "131"
## 
## $`5`
## [1] "243"
## 
## $`6`
## [1] "85"  "117" "167" "183" "185" "56"  "165" "179" "214"
## 
## $`7`
## [1] "12"  "68"  "149"
## 
## $`8`
## [1] "111" "183" "186" "241" "86"  "215"
## 
## $`9`
## [1] "102" "181" "207" "236" "55"  "56"  "67" 
## 
## $`10`
## [1] "411" "33"
ahs.net <- igraph::graph_from_adj_list(ahs.nodelist, mode = "out")
print(ahs.net)
## IGRAPH acde59e D--- 440 2099 -- 
## + edges from acde59e:
##  [1]  1->108  1->112  1->166  1->113  1->132  1->151  1->204  2-> 51
##  [9]  3-> 85  3->117  3->183  3->235  3-> 56  3-> 57  3-> 64  3->182
## [17]  3->214  4->131  5->243  6-> 85  6->117  6->167  6->183  6->185
## [25]  6-> 56  6->165  6->179  6->214  7-> 12  7-> 68  7->149  8->111
## [33]  8->183  8->186  8->241  8-> 86  8->215  9->102  9->181  9->207
## [41]  9->236  9-> 55  9-> 56  9-> 67 10->411 10-> 33 11->411 11->243
## [49] 11-> 10 12->  5 12->  7 12-> 71 13-> 88 13->216 13->412 14->248
## [57] 14-> 71 14-> 83 14->149 15-> 30 15->130 15->178 15-> 81 15->106
## [65] 15->151 15->215 16->413 16->186 16->220 16-> 57 16->135 16->211
## + ... omitted several edges

1.5.2 Working with Network Objects

1.5.2.1 with statnet

  • Attributes
ahs.net <- network::network(AHS_Edges, matrix.type = "edgelist")
# Node attributes
network::set.vertex.attribute(ahs.net, "sex", AHS_Nodes$sex)
library(statnet)
ahs.net %v% "race" <- AHS_Nodes$race5
detach("package:statnet", unload=TRUE)
network::list.vertex.attributes(ahs.net)
## [1] "na"           "race"         "sex"          "vertex.names"
network::get.vertex.attribute(ahs.net, "race")
##   [1] 3 1 1 1 3 1 1 1 1 3 1 1 1 1 1 1 1 1 1 5 1 1 1 1 5 1 1 1 1 1 1 1 1 1 1
##  [36] 1 1 1 5 1 1 1 1 0 5 5 1 1 1 1 5 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 5 1 1 1
##  [71] 0 1 3 1 1 1 3 1 5 1 1 5 1 1 1 1 1 1 1 1 1 1 1 1 1 5 1 1 5 1 5 1 1 1 3
## [106] 1 1 1 1 1 1 1 1 1 1 5 1 3 1 1 1 1 1 5 1 1 5 1 1 1 1 1 1 5 1 1 1 1 3 1
## [141] 1 1 1 1 1 1 1 3 5 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 1 1 5 1 1 1 1 1
## [176] 5 1 1 1 1 1 1 3 5 1 1 1 5 1 1 1 1 1 1 1 1 5 3 1 3 5 3 3 5 4 1 1 1 5 1
## [211] 1 1 1 1 1 1 1 1 5 1 1 1 1 1 1 1 1 1 5 1 1 1 1 1 1 1 1 1 5 1 1 1 1 1 3
## [246] 1 1 1 1 1 1 5 1 5 0 1 5 1 1 0 1 1 5 1 1 1 3 1 1 1 1 5 5 1 3 3 1 1 1 5
## [281] 1 1 1 1 3 1 1 1 1 0 1 1 1 1 5 1 1 1 1 3 1 1 1 1 5 3 1 1 1 1 1 1 5 1 1
## [316] 1 5 3 1 4 5 1 1 3 1 1 1 1 0 1 5 5 1 1 1 1 5 1 1 5 1 1 1 1 1 1 5 1 1 5
## [351] 1 1 5 1 1 1 1 1 1 1 1 1 1 1 1 5 1 1 1 1 1 1 1 3 5 3 1 1 5 1 1 5 3 5 3
## [386] 5 1 1 1 1 1 1 5 1 3 1 1 1 1 5 0 1 1 1 1 5 1 1 1 1 1 1 3 1 5 3 3 3 5 5
## [421] 1 5 2 5 3 5 1 1 5 1 5 5 1 1 0 5 1 5 3 1
network::delete.vertex.attribute(ahs.net, "sex")

# Edge attributes
network::set.edge.attribute(ahs.net, "value", 1)
ahs.net %e% "value" <- 1
network::list.edge.attributes(ahs.net)
## [1] "na"    "value"
# Network attributes
network::set.network.attribute(ahs.net, "degree distribution", 1:10)
  • Visualization
ahs.net <- network::network(AHS_Edges, matrix.type = "edgelist")
AHS_Nodes$gender <- ifelse(AHS_Nodes$sex==1, "M", "F")
network::set.vertex.attribute(ahs.net, "gender", AHS_Nodes$gender)
nodeColors <- ifelse(AHS_Nodes$gender == "M", "hotpink", "dodgerblue")
gender.label <- network::get.vertex.attribute(ahs.net, "gender")
gplot(ahs.net, gmode = "digraph", displaylabels = TRUE, vertex.col = nodeColors, 
    label = gender.label, label.cex = 0.5, label.col = nodeColors, mode="fruchtermanreingold",
    usearrows = FALSE, usecurve = TRUE, edge.curve = 0.1)

  • Export to other formats
head(network::as.matrix.network(ahs.net, matrix.type = "edgelist")) # Edgelist
network::as.matrix.network(ahs.net, matrix.type="adjacency")[1:5, 1:5] # Adjacency Matrix
head(sna::as.edgelist.sna(ahs.net))
sna::as.sociomatrix.sna(ahs.net)[1:5, 1:5]

1.5.2.2 with igraph

  • Attributes
ahs.net <- igraph::graph.data.frame(AHS_Edges)
ahs.net <- igraph::set.vertex.attribute(ahs.net, "gender", index=igraph::V(ahs.net), AHS_Nodes$gender)
names(igraph::get.vertex.attribute(ahs.net))
## [1] "name"   "gender"
summary(ahs.net)
## IGRAPH fd8ddc4 DN-- 440 2099 -- 
## + attr: name (v/c), gender (v/c)
  • Network metadata
igraph::ecount(ahs.net)
## [1] 2099
igraph::vcount(ahs.net)
## [1] 440
igraph::V(ahs.net)[1:10]
## + 10/440 vertices, named, from fd8ddc4:
##  [1] 1  2  3  4  5  6  7  8  9  10
igraph::no.clusters(ahs.net)
## [1] 4
igraph::is.connected(ahs.net)
## [1] FALSE
igraph::is.directed(ahs.net)
## [1] TRUE
igraph::get.edgelist(ahs.net)[1:10,]
##       [,1] [,2] 
##  [1,] "1"  "108"
##  [2,] "1"  "112"
##  [3,] "1"  "166"
##  [4,] "1"  "113"
##  [5,] "1"  "132"
##  [6,] "1"  "151"
##  [7,] "1"  "204"
##  [8,] "2"  "51" 
##  [9,] "3"  "85" 
## [10,] "3"  "117"
  • Visualization
ahs_layout <- igraph::layout.fruchterman.reingold(ahs.net)
vertex.colors <- igraph::get.vertex.attribute(ahs.net,"gender")
vertex.colors <- ifelse(vertex.colors == "M", "blue", "pink")
plot(ahs.net, layout=ahs_layout, vertex.color=vertex.colors, 
     vertex.label=NA, edge.arrow.size=.1, vertex.size=3)

1.5.2.3 with sna_tools

ahs.net <- igraph::graph.data.frame(AHS_Edges)
summary(ahs.net)
## IGRAPH 0f6eab6 DN-- 440 2099 -- 
## + attr: name (v/c)
ahs.net <- ahs.net %>% snatools::as_network()
print(ahs.net)
##  Network attributes:
##   vertices = 440 
##   directed = TRUE 
##   hyper = FALSE 
##   loops = FALSE 
##   multiple = FALSE 
##   bipartite = FALSE 
##   total edges= 2099 
##     missing edges= 0 
##     non-missing edges= 2099 
## 
##  Vertex attribute names: 
##     .vrt_id vertex.names 
## 
##  Edge attribute names not shown
ahs.net <- ahs.net %>% snatools::as_igraph()
summary(ahs.net)
## IGRAPH 03ecf9e DN-- 440 2099 -- 
## + attr: .vrt_id (v/n), name (v/c), .edg_id (e/n)

1.5.3 Two-mode networks

Suppose we want to analyse how people are connected based on their voting patterns? That is, we want to draw a network where a tie is present between a dyad if they voted the same way on a bill. We need to perform a two-mode to one-mode conversion.

Case in point: Brexit indicative votes. Indicative votes are where MPs vote on a series of options designed to test the will of Parliament to see what, if anything, commands a majority. In the case of Brexit, supporters of indicative votes believe it could provide a way out of the current political stalemate. Usually the government has control over what happens day-to-day in Parliament, but MPs have backed a proposal by a cross-party group of MPs, including Labour’s Hilary Benn and Conservative Sir Oliver Letwin, to take control of the timetable. Under the cross-party plan, MPs can put forward their preferred Brexit plans to Speaker John Bercow, who will select all or some of these options for debate. MPs may be asked to consider whether any plan agreed by Parliament should be put back to the public in another referendum, or whether the UK should stop Brexit altogether by revoking its notification of Article 50. Following a debate on the various options, at 7pm the Commons will be suspended for 30 minutes so MPs can vote on the each plan. During the indicative votes, MPs will enter one of the division lobbies - the corridors in the House of Commons where votes are normally counted - and will receive a paper ballot. They can vote “yes” or “no” on as many options as they are prepared to support.

  • Analysing votes individually
## Import Brexit indicative votes
brexit.votes <- readRDS("./data/indicative_votes.rds")
brexit.edgelist <- data.frame(brexit.votes %>% select(Member, Party, Vote, description) %>% spread(description, Vote))
brexit.edgelist <- brexit.edgelist[,-ncol(brexit.edgelist)]
colnames(brexit.edgelist) <- c("mp", "party", "baron", "beckett", "boles", "cherry", "clarke", "corbyn", "eusrace", "fysh")
votes <- c("baron", "beckett", "boles", "cherry", "clarke", "corbyn", "eusrace", "fysh")
mp party baron beckett boles cherry clarke corbyn eusrace fysh
?rfhlaith Begley Sinn F?in No Vote Recorded No Vote Recorded No Vote Recorded No Vote Recorded No Vote Recorded No Vote Recorded No Vote Recorded No Vote Recorded
Adam Afriyie Conservative Aye No No No No No No Aye
Adam Holloway Conservative Aye No No No No No No Aye
Adrian Bailey Labour No Aye Aye No Vote Recorded Aye Aye No Vote Recorded No
Afzal Khan Labour No Aye Aye No Vote Recorded Aye Aye No Vote Recorded No
Alan Brown Scottish National Party No Aye No Vote Recorded Aye No Vote Recorded No Vote Recorded No No
brexit.edgelist <- brexit.edgelist %>% mutate_at(votes, function(x) ifelse(x=="Aye", 1, ifelse(x=="No", 0, NA)))
mp party baron beckett boles cherry clarke corbyn eusrace fysh
?rfhlaith Begley Sinn F?in NA NA NA NA NA NA NA NA
Adam Afriyie Conservative 1 0 0 0 0 0 0 1
Adam Holloway Conservative 1 0 0 0 0 0 0 1
Adrian Bailey Labour 0 1 1 NA 1 1 NA 0
Afzal Khan Labour 0 1 1 NA 1 1 NA 0
Alan Brown Scottish National Party 0 1 NA 1 NA NA 0 0
  • Analysing all votes together
brexit.votes <- data.frame(readRDS("./data/indicative_votes.rds") %>% select(Member, Party, Vote, description))

# Remove non-recorded votes
brexit.votes <- brexit.votes[-grep("No Vote Recorded|No", brexit.votes$Vote),]
brexit.votes <- data.frame(mp=brexit.votes$Member, party=brexit.votes$Party, motion=brexit.votes$description)
brexit.votes[1:10,]
mp party motion
Wendy Morton Conservative Baron (No Deal)
Nigel Mills Conservative Fysh (tariffs)
Nigel Mills Conservative Baron (No Deal)
Nick Herbert Conservative Clarke (CU)
Nick Herbert Conservative Boles (CM2.0)
Nick Herbert Conservative Eustace (EFTA)
Gloria De Piero Labour Corbyn (Lab.)
Gloria De Piero Labour Clarke (CU)
Gloria De Piero Labour Boles (CM2.0)
Damian Green Conservative Clarke (CU)
brexit.votes.2mode.edgelist <- brexit.votes[,c(1,3)]
brexit.votes.2mode.adj <- as.matrix(table(brexit.votes.2mode.edgelist))
brexit.votes.2mode.adj[1:10, 1:7]
##                 motion
## mp               Baron (No Deal) Beckett (Public vote) Boles (CM2.0)
##   Adam Afriyie                 1                     0             0
##   Adam Holloway                1                     0             0
##   Adrian Bailey                0                     1             1
##   Afzal Khan                   0                     1             1
##   Alan Brown                   0                     1             0
##   Alan Campbell                0                     1             0
##   Alan Duncan                  0                     0             0
##   Alan Mak                     1                     0             0
##   Alan Whitehead               0                     1             1
##   Albert Owen                  0                     1             1
##                 motion
## mp               Cherry (Revocation) Clarke (CU) Corbyn (Lab.)
##   Adam Afriyie                     0           0             0
##   Adam Holloway                    0           0             0
##   Adrian Bailey                    0           1             1
##   Afzal Khan                       0           1             1
##   Alan Brown                       1           0             0
##   Alan Campbell                    0           1             1
##   Alan Duncan                      1           0             0
##   Alan Mak                         0           0             0
##   Alan Whitehead                   1           1             1
##   Albert Owen                      1           1             1
##                 motion
## mp               Eustace (EFTA)
##   Adam Afriyie                0
##   Adam Holloway               0
##   Adrian Bailey               0
##   Afzal Khan                  0
##   Alan Brown                  0
##   Alan Campbell               0
##   Alan Duncan                 0
##   Alan Mak                    0
##   Alan Whitehead              0
##   Albert Owen                 0
# Creating igraph object for 2-mode network
brexit.2mode.graph <- igraph::graph.incidence(brexit.votes.2mode.adj, mode="all")
igraph::V(brexit.2mode.graph)$color <- ""
igraph::V(brexit.2mode.graph)$color[1:(igraph::vcount(brexit.2mode.graph)-7)] <- "red"  
igraph::V(brexit.2mode.graph)$color[(igraph::vcount(brexit.2mode.graph)-7):igraph::vcount(brexit.2mode.graph)] <- "green"
igraph::V(brexit.2mode.graph)$label <- igraph::V(brexit.2mode.graph)$name
igraph::V(brexit.2mode.graph)$label.color <- rgb(0,0,.2,.5)
igraph::V(brexit.2mode.graph)$label.cex <- .4
igraph::V(brexit.2mode.graph)$size <- 6
igraph::V(brexit.2mode.graph)$frame.color <- NA
igraph::E(brexit.2mode.graph)$color <- rgb(.5,.5,0,.2)
brexit_layout <- igraph::layout.fruchterman.reingold(brexit.2mode.graph)
plot(brexit.2mode.graph, layout=brexit_layout)

To convert a two-mode incidence matrix to a one-mode adjacency matrix, one can simply multiply an incidence matrix by its transpose, which sum the common 1’s between rows.

\[\begin{align*} AB = \left[ \begin{array}{cc} a & b \\ c & d \end{array} \right] \left[ \begin{array}{cc} e & f \\ g & h \end{array} \right] = \left[ \begin{array}{cc} ae+bg & af+bh \\ ce+dg & cf+dh \end{array} \right] \end{align*}\]

Now multiplying the matrix by its transpose:

\[\begin{align*} AA' = \left[ \begin{array}{cc} a & b \\ c & d \end{array} \right] \left[ \begin{array}{cc} a & c \\ b & d \end{array} \right] = \left[ \begin{array}{cc} aa+bb & ac+bd \\ ca+db & cc+dd \end{array} \right] \end{align*}\]

Because our incidence matrix consists of 0’s and 1’s, the off-diagonal entries represent the total number of common columns, which is exactly what we wanted.

# One-mode projection
brexit.votes.1mode.proj <- brexit.votes.2mode.adj %*% t(brexit.votes.2mode.adj)
# Alternatively, use tcrossprod() function, which is much faster as it creates sparse matrices
brexit.votes.1mode.proj[1:5, 1:5] 
##                mp
## mp              Adam Afriyie Adam Holloway Adrian Bailey Afzal Khan
##   Adam Afriyie             2             2             0          0
##   Adam Holloway            2             2             0          0
##   Adrian Bailey            0             0             4          4
##   Afzal Khan               0             0             4          4
##   Alan Brown               0             0             1          1
##                mp
## mp              Alan Brown
##   Adam Afriyie           0
##   Adam Holloway          0
##   Adrian Bailey          1
##   Afzal Khan             1
##   Alan Brown             2
brexit.votes.1mode.graph <- igraph::graph.adjacency(brexit.votes.1mode.proj, mode="undirected", weighted=TRUE, diag=FALSE) # Weighted = TRUE (MPs vote on MULTIPLE motions together)
brexit_layout <- igraph::layout.fruchterman.reingold(brexit.votes.1mode.graph)
party <- data.frame(mp=as.character(igraph::V(brexit.votes.1mode.graph)$name)) %>% left_join(brexit.edgelist[,1:2])
party$party <- as.character(party$party)
party <- party %>% mutate(color = case_when(party=="Conservative" ~ "blue" , 
                                      party=="Democratic Unionist Party" ~ "pink",
                                      party=="Green Party" ~ "green", 
                                      party=="Independent" ~ "gray",
                                      party=="Labour" ~ "red", 
                                      party=="Liberal Democrat" ~ "orange",
                                      party=="Plaid Cymru" ~ "darkgreen",
                                      party=="Scottish National Party" ~ "yellow"))
igraph::V(brexit.votes.1mode.graph)$party <- as.character(party$party)
igraph::V(brexit.votes.1mode.graph)$color <- as.character(party$color)
igraph::V(brexit.votes.1mode.graph)$label.color <- "black"
igraph::E(brexit.votes.1mode.graph)$width <- igraph::E(brexit.votes.1mode.graph)$weight/20
igraph::E(brexit.votes.1mode.graph)$edge.color <- "gray10"
edge.start <- igraph::ends(brexit.votes.1mode.graph, es=igraph::E(brexit.votes.1mode.graph), names=F)[,1]
edge.col <- igraph::V(brexit.votes.1mode.graph)$color[edge.start]
plot(brexit.votes.1mode.graph, edge.arrow.size=.1, edge.curved=.1, layout=brexit_layout,
     vertex.size=3, vertex.label=NA, edge.color=edge.col)

library(ggraph)
library(gganimate)
library(tidygraph)
library(gifski)
library(stringi)

# Amimation without edges

graph <-brexit.votes.1mode.graph %>% as_tbl_graph()
graph

layout_list <- list(
  list(layout = 'star'),
  list(layout = 'circle'),
  list(layout = 'gem'),
  list(layout = 'graphopt'),
  list(layout = 'grid'),
  list(layout = 'mds'),
  list(layout = 'randomly'),
  list(layout = 'fr'),
  list(layout = 'kk'),
  list(layout = 'nicely'),
  list(layout = 'lgl'),
  list(layout = 'drl'))

#filtered_graph <- graph %>% mutate(community = group_walktrap())

layouts <- graph %>% 
  invoke_map('create_layout', layout_list, graph = .) %>% 
  set_names(unlist(layout_list)) %>% 
  bind_rows(.id = 'layout')

dummy_layout <- create_layout(graph, 'nicely')
attr(layouts, 'graph') <- attr(dummy_layout, 'graph')
attr(layouts, 'circular') <- FALSE

# Plot with singular layout + edges
graph %>% ggraph(layout="nicely") +
    geom_node_point(aes(col = as.factor(color))) +
    geom_edge_arc(alpha = 0.01, aes(edge_width = width/3, col="gray")) +
    theme_graph() + theme(legend.position = 'none') +
    labs(title = "Commons' Brexit Parties",
      subtitle = 'Using nicely layout engine')
ggsave("./output/brexit_toolarge.png")


# Animation with multiple layouts
g <- ggraph(layouts.sub) +
  geom_node_point(aes(col = as.factor(color))) +
  theme_graph() +
  theme(legend.position = 'none') +
  labs(title = "Commons' Brexit Parties",
       subtitle = 'Using {closest_state} layout engine') +
  transition_states(layout, 1, 2) +
  ease_aes('linear') +
  view_follow()

animated.graph <- animate(g, fps = 30, nframes = 1000)
gganimate::anim_save("brexit_animation.gif", animation = last_animation(), path = "./output/")
save(animated.graph, file="./data/animated_Brexit.RData")

1.5.4 Interactive visualization with visNetwork and networkD3

  • visNetwork
edges <- data.frame(igraph::as_edgelist(brexit.votes.1mode.graph, names=TRUE))
colnames(edges) <- c("from", "to")
edges$weight <- igraph::E(brexit.votes.1mode.graph)$weight
edges <- mutate(edges, width = weight/5)
edges[1:5,]
from to weight width
Adam Afriyie Adam Holloway 2 0.4
Adam Afriyie Alan Mak 2 0.4
Adam Afriyie Alister Jack 1 0.2
Adam Afriyie Amanda Milling 1 0.2
Adam Afriyie Andrea Jenkyns 2 0.4
nodes <- data.frame(id=igraph::V(brexit.votes.1mode.graph)$name,
                    color=igraph::V(brexit.votes.1mode.graph)$color)

visNetwork(nodes, edges) %>% visIgraphLayout(layout = "layout_with_fr") %>% 
  visEdges(arrows = "middle")
  • Network D3 graph with networkD3
edges_d3 <- data.frame(igraph::as_edgelist(igraph::graph_from_adjacency_matrix(campnet), names=TRUE))
colnames(edges_d3) <- c("from", "to")
edges_d3$weight <- igraph::E(igraph::graph_from_adjacency_matrix(campnet))$weight
nodes_d3 <- data.frame(label=igraph::V(igraph::graph_from_adjacency_matrix(campnet))$name,
                  id=as.numeric(as.factor(igraph::V(igraph::graph_from_adjacency_matrix(campnet))$name))-1)
edges_d3 <- edges_d3 %>% mutate_at(c("from", "to"), function(x) as.numeric(as.factor(x))-1)
forceNetwork(Links = edges_d3, Nodes = nodes_d3, Source = "from", Target = "to", 
             NodeID = "label", Group = "id", opacity = 0.9, fontSize = 16, zoom = TRUE)
#saveNetwork(d3, paste0(getwd(), "/data/", "d3plot.html"))
#options(browser="/Applications/Safari.app/Contents/MacOS/Safari")
#browseURL("./data/d3plot.html")

1.6 Export to Gephi

# This function takes an igraph object and converts to GEXF format. It needs two inputs:
# 1. g: igraph object
# 2. filepath: file location where output should be saved
saveAsGEXF = function(g, filepath){
  require(igraph)
  require(rgexf)
  
  # gexf nodes require two column data frame (id, label)
  # check if the input vertices has label already present
  # if not, just have the ids themselves as the label
  if(is.null(V(g)$label))
    V(g)$label <- as.character(V(g))
  
  # similarily if edges does not have weight, add default 1 weight
  if(is.null(E(g)$weight))
    E(g)$weight <- rep.int(1, ecount(g))
  
  nodes <- data.frame(cbind(V(g), V(g)$label))
  edges <- ends(g, E(g), names=FALSE)

  # combine all node attributes into a matrix (and take care of & for xml)
  vAttrNames <- setdiff(list.vertex.attributes(g), "label") 
  nodesAtt <- data.frame(sapply(vAttrNames, function(attr) sub("&", "&",get.vertex.attribute(g, attr))))
  
  # combine all edge attributes into a matrix (and take care of & for xml)
  eAttrNames <- setdiff(list.edge.attributes(g), "weight") 
  edgesAtt <- data.frame(sapply(eAttrNames, function(attr) sub("&", "&",get.edge.attribute(g, attr))))
  
  # combine all graph attributes into a meta-data
  graphAtt <- sapply(list.graph.attributes(g), function(attr) sub("&", "&",get.graph.attribute(g, attr)))
  
  # generate the gexf object
  output <- write.gexf(nodes, edges, 
                       edgesWeight=E(g)$weight,
                       edgesAtt = edgesAtt,
                       nodesAtt = nodesAtt,
                       meta=c(list(creator="Adapted from Gopalakrishna Palem", description="igraph -> gexf converted file", keywords="igraph, gexf, R, rgexf"), graphAtt))
  
  print(output, filepath, replace=T)
}
saveAsGEXF(brexit.votes.1mode.graph, "./output/brexit_votes.gexf")

2 Practicum: Multidimensional Scaling

Let’s assume we do not have geographical coordinates of places. We only have a matrix of distances between each point. Can we draw an accurate map using the between-places distances?

Multidimensional scaling is able to find a configuration of the observations (eg places, cities) in a low dimensional map (we will work with 2D maps) such that the original pair-wise between-places distances are preserved as much as possible.

Distances between cities are, broadly, proximity scores, i.e., dis/similarity scores:

  • similarity the higher the pair-wise score, the higher the proximity;
  • dissimilarity the higher the pair-wise score, the higher the distance;

“Multidimensional scaling (MDS) is a technique for the analysis of similarity or dissimilarity data on a set of objects. MDS attempts to model such data as distances among points in a geometric space. The main reason for doing this is that one wants a graphical display of the structure of the data, one that is much easier to understand than an array of numbers and, moreover, one that displays the essential information in the data, smoothing out noise.” (Borg and Groenen, 2005)

MDS transform the proximity values into disparities, using a transformation function s.t.:

\[disparity_{ij} = f(proximity_{ij})\]

The choice of transformation defines the types of MDS models (metric or non-metric). How the algorithm works:

  1. Assign points to arbitrary coordinates in p-dimensional space;
  2. Compute euclidean distances among all pairs of points to form \(\hat{\delta}\) matrix
  3. Compare \(\hat{\delta}\) matrix with the input \(\delta\) matrix by evaluating the stress function. The smaller the value the greater the correspondence between the two.
  4. Adjust coordinates of each point in the direction that best minimises the stress level.
  5. Repeat steps 2 through 4 until

\[Stress -1 = \sigma_i = \sqrt{\frac{ \sum[ \hat{d_{ij}} - d_{ij} ]^2 }{\sum d_{ij}^2}} = \sqrt{\frac{\sum[f(p_{ij}) - d_{ij}]^2}{\sum d_{ij}^2}}\] where \(\hat{d:{ij}}\) and \(f(p_{ij}\) are the transformation of the input values according to a metric, and \(d_{ij}\) are the original pair-wise distances.

This statistic compares the raw input data directly to the map distances. It goes to 0 if the “MDS” solution perfectly reproduces the measured distances. Stress levels (rule of thumb):

  • $<0.1 = $ excellent;
  • $>0.15 = $ unacceptable;

In general, longer distances tend to be more accurate than shorter distances, so larger patterns are still visible even when stress is high. High stress means that there are distortions of the input data, which can be spread out or concentrated. The stress function accentuates discrepancies in larger distances.

The choice of the transformation defines the MDS model. There are three options ratio MDS, interval MDS (both metric scaling), where \(p_{ij} \longrightarrow b.p_{ij}\) and \(p_{ij} \longrightarrow a + b.p_{ij}\), respectively; and ordinal MDS (non-metric), where the transformation function only preserves the rank order of the proximities \(p_{ij} < p_{kl} \Rightarrow d_{ij} \leq d_{kl}\). The absolute values of distances in non-metric MDS are not meaninfgul, since the algorithm tries to preserve the rank-order, i.e. relative distances.

In R, there are several functions/packages for MDS: - cmdscale in stats - mds in smacof package (an de Leeuw, Patrick Mair, 2009)

A note on interpretation: - Axis are meaningless unless we scale them; - Direction is arbitrary; - we can never know how many dimensions there are; mathematically, a non-zero stress value occurs for only one reason – insuficient dimensionality.

library(maps)
library(googleAuthR)
library(ggmap)
library(smacof)
library(grid)
library(rworldmap)

2.1 MDS using distance data between US cities. Steps:

    1. Get geocoded location of American cities
states <- map_data("state")
geo.city <-  c("Atlanta", "Boston", "Dallas", "Indianapolis", "Los Angeles", "Memphis", "St. Louis", "Spokane", "Tempa")
#register google API key to activate Geocoding
#register_google(key="AIzaSyDUdRWWmcNPkeSgYwHuzohopZNQYuHjRJI")
has_google_key()
## [1] TRUE
geo.codes <- geocode(geo.city)
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Atlanta&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Boston&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Dallas&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Indianapolis&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Los+Angeles&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Memphis&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=St.+Louis&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Spokane&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Tempa&key=xxx
geo.data <- data.frame(name = geo.city, lon = geo.codes[, 1], lat = geo.codes[, 2])
    1. Plot map
usa <- data.frame(region=states$region, group=states$group, lon=states$long, lat=states$lat)
ggplot() + geom_polygon(data = usa, aes(x=lon, y=lat, group = group, fill=I("grey45"))) + 
    geom_path(color = "gray") + #coord_map(project="globular") +
    xlab("Longitude") + ylab("Latitude") +
    geom_point(data= geo.data, mapping=aes(x = geo.data$lon, y = geo.data$lat)) +
    geom_text(data=geo.data, aes(x = geo.data$lon, y = geo.data$lat, label=name),hjust=0, vjust=0)

  1. Get distances between each city dyad and run metric MDS
usacitydist <- as.matrix(UScitiesD)
print(usacitydist)
##               Atlanta Chicago Denver Houston LosAngeles Miami NewYork
## Atlanta             0     587   1212     701       1936   604     748
## Chicago           587       0    920     940       1745  1188     713
## Denver           1212     920      0     879        831  1726    1631
## Houston           701     940    879       0       1374   968    1420
## LosAngeles       1936    1745    831    1374          0  2339    2451
## Miami             604    1188   1726     968       2339     0    1092
## NewYork           748     713   1631    1420       2451  1092       0
## SanFrancisco     2139    1858    949    1645        347  2594    2571
## Seattle          2182    1737   1021    1891        959  2734    2408
## Washington.DC     543     597   1494    1220       2300   923     205
##               SanFrancisco Seattle Washington.DC
## Atlanta               2139    2182           543
## Chicago               1858    1737           597
## Denver                 949    1021          1494
## Houston               1645    1891          1220
## LosAngeles             347     959          2300
## Miami                 2594    2734           923
## NewYork               2571    2408           205
## SanFrancisco             0     678          2442
## Seattle                678       0          2329
## Washington.DC         2442    2329             0
mds.usa <- mds(delta = usacitydist,ndim = 2, type = "ratio")
print(mds.usa$stress)
## [1] 0.001851192
mds.usa$conf[,1] <- cmdscale(UScitiesD, k=2, eig=T)$points[,1]
mds.usa$conf[,2] <- cmdscale(UScitiesD, k=2, eig=T)$points[,2]
ggplot() +geom_point(data = data.frame(as.matrix(mds.usa$conf)) , mapping = aes(x = -mds.usa$conf[,1] , y = mds.usa$conf[,2]),alpha = 0.5 , color = "blue", size = 5 ) + geom_text(data = as.data.frame(as.matrix(mds.usa$conf)), mapping = aes(x = mds.usa$conf[,1] , y = -mds.usa$conf[,2]), label = rownames(mds.usa$conf))

# Flip the axis
ggplot() +  geom_point(data = data.frame(as.matrix(mds.usa$conf)) , mapping = aes(x = -mds.usa$conf[,1] , y = -mds.usa$conf[,2]), alpha = 0.5 , color = "blue", size = 5 ) + geom_text(data = as.data.frame(as.matrix(mds.usa$conf)), mapping = aes(x = -mds.usa$conf[,1] , y = -mds.usa$conf[,2]), label = rownames(mds.usa$conf) )

  1. Re-scale MDS coordinates by mean and std of real longitude and latitude values. Formula: \(y_i = \mu_2 + (x_i-\mu_1) * \frac{s_2}{s_1}\)
real.mean <- apply(geo.data[,2:3], 2, mean)
real.sd <- apply(geo.data[,2:3], 2, sd)
usacity.mds.loc <- data.frame(city = rownames(as.matrix(mds.usa$conf)), d1 = -data.frame(as.matrix(mds.usa$conf))$D1, d2 = -data.frame(as.matrix(mds.usa$conf))$D2)
usacity.mds.loc.rscaled <- usacity.mds.loc
# rescale dimension 1 with longitude scale
usacity.mds.loc.rscaled$d1 <- as.numeric(real.mean[which(names(real.mean)=="lon")]) + (usacity.mds.loc.rscaled$d1-mean(usacity.mds.loc.rscaled$d1)) * ( as.numeric(real.sd[which(names(real.mean)=="lon")])/sd(usacity.mds.loc.rscaled$d1))
# rescale dimension 2 with latitude scale
usacity.mds.loc.rscaled$d2 <- as.numeric(real.mean[which(names(real.mean)=="lat")]) + (usacity.mds.loc.rscaled$d2-mean(usacity.mds.loc.rscaled$d2)) * ( as.numeric(real.sd[which(names(real.mean)=="lat")])/sd(usacity.mds.loc.rscaled$d2)) 
  1. Join re-scaled MDS map with Cartesian map
ggplot() + geom_polygon(data = usa, aes(x = lon, y = lat, group = group, fill = I("grey45"))) + 
              geom_path(color = "gray") + xlab("Longitude") + ylab("Latitude") + #coord_map(project="globular") +
              geom_point(data = geo.data, mapping=aes(x = geo.data$lon, y = geo.data$lat)) +
              geom_text(data = geo.data, aes(x = geo.data$lon, y = geo.data$lat, label = name),hjust=0, vjust=0) +
              # Add rescaled MDS map as a new geom_point layer
              geom_point(data = usacity.mds.loc.rscaled , mapping = aes(x = usacity.mds.loc.rscaled$d1 , y = usacity.mds.loc.rscaled$d2), 
                alpha = 0.5 , color = "blue", size = 2 )

2.2 MDS between European cities

worldMap <- getMap()
europeanUnion <- c("Austria","Belgium","Bulgaria","Croatia","Cyprus",
                   "Czech Rep.","Denmark","Estonia","Finland","France",
                   "Germany","Greece","Hungary","Ireland","Italy","Latvia",
                   "Lithuania","Luxembourg","Malta","Netherlands","Poland",
                   "Portugal","Romania","Slovakia","Slovenia","Spain",
                   "Sweden","United Kingdom")
indEU <- which(worldMap$NAME%in%europeanUnion)
europeCoords <- lapply(indEU, function(i){
  df <- data.frame(worldMap@polygons[[i]]@Polygons[[1]]@coords)
  df$region =as.character(worldMap$NAME[i])
  colnames(df) <- list("long", "lat", "region")
  return(df)
})
europeCoords <- do.call("rbind", europeCoords)
value <- sample(x = seq(0,3,by = 0.1), size = length(europeanUnion),
                replace = TRUE)
europeanUnionTable <- data.frame(country = europeanUnion, value = value)
europeCoords$value <- europeanUnionTable$value[match(europeCoords$region,europeanUnionTable$country)]
ggplot() + geom_polygon(data = europeCoords, aes(x = long, y = lat, group = region, fill = I("grey45")),
                             colour = "black", size = 0.1) + coord_map(xlim = c(-13, 35),  ylim = c(32, 71))

europcitycoord <- rio::import("./data/european_cities_coord.xlsx")
europcitycoord$long <- as.numeric(europcitycoord$long)
ggplot() + geom_polygon(data = europeCoords, aes(x = long, y = lat, group = region, fill = I("grey45")),
                             colour = "black", size = 0.1) + coord_map(xlim = c(-13, 35),  ylim = c(32, 71)) +
          geom_point(data= europcitycoord, mapping=aes(x = europcitycoord$long, y = europcitycoord$lat)) +
          geom_text(data=europcitycoord, aes(x = europcitycoord$long, y = europcitycoord$lat, label=city), size=3, hjust=0, vjust=0)

  • Distance data and MDS
europcitydist <- as.matrix(eurodist)
europcitydist <- europcitydist[-9, -9]
mds <- mds(delta = europcitydist,ndim = 2, type = "ratio" )
print(mds$stress)
## [1] 0.07819836
ggplot() +  geom_point(data = data.frame(as.matrix(mds$conf)) , mapping = aes(x = mds$conf[,1] , y = mds$conf[,2]), 
              alpha = 0.5 , color = "blue", size = 5 ) + geom_text(data = as.data.frame(as.matrix(mds$conf)), 
              mapping = aes(x = mds$conf[,1] , y = mds$conf[,2]), label = rownames(mds$conf) )

## Are the axes flippd? Do they need adjustment?
real.mean <- apply(europcitycoord[,2:3], 2, mean)
real.sd <- apply(europcitycoord[,2:3], 2, sd)
## Need to adjust d1 and d2 below with correct signs?
city.mds.loc <- data.frame(city = rownames(as.matrix(mds$conf)), d1 = data.frame(as.matrix(mds$conf))$D1, d2 = data.frame(as.matrix(mds$conf))$D2)
city.mds.loc.rscaled <- city.mds.loc
city.mds.loc.rscaled$d1 <- as.numeric(real.mean[2]) + (city.mds.loc.rscaled$d1-mean(city.mds.loc.rscaled$d1)) * ( as.numeric(real.sd[2])/sd(city.mds.loc.rscaled$d1)) # rescale dimension 1 with longitude scale
city.mds.loc.rscaled$d2 <- as.numeric(real.mean[1]) + (city.mds.loc.rscaled$d2-mean(city.mds.loc.rscaled$d2)) * ( as.numeric(real.sd[1])/sd(city.mds.loc.rscaled$d2)) # rescale dimension 2 with latitude scale
ggplot() + geom_polygon(data = europeCoords, aes(x = long, y = lat, group = region, fill = I("grey45")),
                             colour = "black", size = 0.1) + coord_map(xlim = c(-13, 35),  ylim = c(32, 71)) +
          geom_point(data= europcitycoord, mapping=aes(x = europcitycoord$long, y = europcitycoord$lat)) +
          geom_text(data=europcitycoord, aes(x = europcitycoord$long, y = europcitycoord$lat, label=city), size=3, hjust=0, vjust=0) +
          geom_point(data = city.mds.loc.rscaled , mapping = aes(x = city.mds.loc.rscaled$d1 , y = city.mds.loc.rscaled$d2), 
              alpha = 0.5 , color = "blue", size = 2 )

2.3 MDS on geodesic Distances

campnet
##         HOLLY BRAZEY CAROL PAM PAT JENNIE PAULINE ANN MICHAEL BILL LEE DON
## HOLLY       0      0     0   0   0      0       0   0       0    0   0   0
## BRAZEY      0      0     0   0   0      0       0   0       0    1   0   0
## CAROL       1      0     0   0   0      0       0   0       0    1   0   0
## PAM         0      0     0   0   0      0       0   0       0    0   1   0
## PAT         0      0     0   0   0      0       0   0       0    1   0   0
## JENNIE      0      0     0   0   0      0       0   0       0    1   0   0
## PAULINE     0      0     0   0   0      0       0   0       0    1   0   0
## ANN         0      0     0   0   0      0       0   0       0    0   1   0
## MICHAEL     0      0     0   0   0      0       0   0       0    1   0   0
## BILL        0      0     0   0   0      1       1   0       0    0   0   0
## LEE         0      0     0   0   0      0       0   1       0    0   0   0
## DON         0      0     0   0   0      1       0   1       0    0   0   0
## JOHN        1      0     0   0   0      0       0   1       0    0   0   0
## HARRY       0      1     0   0   0      1       0   0       0    0   0   0
## GERY        0      0     0   0   0      0       1   1       0    0   0   0
## STEVE       0      0     0   0   0      0       0   0       0    1   0   0
## BERT        0      0     0   0   0      0       0   0       0    1   0   0
## RUSS        0      0     0   1   1      0       0   0       0    0   0   0
##         JOHN HARRY GERY STEVE BERT RUSS
## HOLLY      1     0    1     0    0    0
## BRAZEY     0     0    1     0    0    0
## CAROL      0     0    0     0    0    0
## PAM        0     0    1     0    0    0
## PAT        0     0    1     0    0    0
## JENNIE     0     0    1     0    0    0
## PAULINE    0     0    1     0    0    0
## ANN        1     0    0     0    0    0
## MICHAEL    0     0    0     0    0    1
## BILL       0     0    0     0    0    0
## LEE        0     0    1     0    0    0
## DON        0     0    0     0    0    0
## JOHN       0     0    0     0    0    0
## HARRY      0     0    0     0    0    0
## GERY       0     0    0     0    0    0
## STEVE      1     0    0     0    0    0
## BERT       1     0    0     0    0    0
## RUSS       0     0    0     0    0    0
camp.geo <- igraph::distances(igraph::graph_from_adjacency_matrix(campnet, mode="directed"), mode="all")
camp.geo
##         HOLLY BRAZEY CAROL PAM PAT JENNIE PAULINE ANN MICHAEL BILL LEE DON
## HOLLY       0      2     1   2   2      2       2   2       3    2   2   3
## BRAZEY      2      0     2   2   2      2       2   2       2    1   2   3
## CAROL       1      2     0   3   2      2       2   3       2    1   3   3
## PAM         2      2     3   0   2      2       2   2       2    3   1   3
## PAT         2      2     2   2   0      2       2   2       2    1   2   3
## JENNIE      2      2     2   2   2      0       2   2       2    1   2   1
## PAULINE     2      2     2   2   2      2       0   2       2    1   2   3
## ANN         2      2     3   2   2      2       2   0       4    3   1   1
## MICHAEL     3      2     2   2   2      2       2   4       0    1   3   3
## BILL        2      1     1   3   1      1       1   3       1    0   3   2
## LEE         2      2     3   1   2      2       2   1       3    3   0   2
## DON         3      3     3   3   3      1       3   1       3    2   2   0
## JOHN        1      3     2   3   3      3       3   1       3    2   2   2
## HARRY       3      1     3   3   3      1       3   3       3    2   3   2
## GERY        1      1     2   1   1      1       1   1       3    2   1   2
## STEVE       2      2     2   4   2      2       2   2       2    1   3   3
## BERT        2      2     2   4   2      2       2   2       2    1   3   3
## RUSS        3      3     3   1   1      3       3   3       1    2   2   4
##         JOHN HARRY GERY STEVE BERT RUSS
## HOLLY      1     3    1     2    2    3
## BRAZEY     3     1    1     2    2    3
## CAROL      2     3    2     2    2    3
## PAM        3     3    1     4    4    1
## PAT        3     3    1     2    2    1
## JENNIE     3     1    1     2    2    3
## PAULINE    3     3    1     2    2    3
## ANN        1     3    1     2    2    3
## MICHAEL    3     3    3     2    2    1
## BILL       2     2    2     1    1    2
## LEE        2     3    1     3    3    2
## DON        2     2    2     3    3    4
## JOHN       0     4    2     1    1    4
## HARRY      4     0    2     3    3    4
## GERY       2     2    0     3    3    2
## STEVE      1     3    3     0    2    3
## BERT       1     3    3     2    0    3
## RUSS       4     4    2     3    3    0
camp.mds <- smacof::mds(camp.geo, ndim=2, type="ordinal")
ggplot() +  geom_point(data = data.frame(as.matrix(camp.mds$conf)) , mapping = aes(x = camp.mds$conf[,1] , y = camp.mds$conf[,2]), alpha = 0.5 , color = "blue", size = 5 ) + geom_text(data = as.data.frame(as.matrix(camp.mds$conf)), mapping = aes(x = camp.mds$conf[,1] , y = camp.mds$conf[,2]), label = rownames(camp.mds$conf))

camp.mds$stress
## [1] 0.1002575

3 Practicum: Twitter network

library(twitteR)
library(ROAuth)
library(beepr)

3.1 Twiter authentication

  • streamR supports (and recommends!) authentication via OAuth. The same oauth token can be used for both twitteR and `streamR. After creating an application and obtaining the consumer key and consumer secret, we can create oauth credentials using the ROAuth package, which can be saved in local disk for future sessions:
  • Here we bypass OAuth authentication handshake by getting access_token and access_token_secret
# Step 1: set up credentials

credentials <- list(consumer_key = "CONSUMER_KEY",
   consumer_secret = "CONSUMER_SECRET",
   access_token="ACCESS_TOKEN",
   access_token_secret = "ACCESS_TOKEN_SECRET")
# Step 2: authenticate
my_oauth <- twitteR::setup_twitter_oauth(credentials$consumer_key,credentials$consumer_secret,credentials$access_token,credentials$access_token_secret)
save(my_oauth, file="./authentication/my_oauth.Rdata")
load("./authentication/my_oauth.Rdata")

3.2 Iterative following network

Let’s pick Elias Dinas and use his twitter connections to construct a network of EUI scholars

## Getting data for seed user
seed <- getUser("@EliasDinas")
seed.n <- seed$screenName
print(seed.n)
## [1] "EliasDinas"

Get twitter users Elias is following, and users that are following Elias (we won’t use them in this lab)

following <- seed$getFriends()
following.n <- as.character(lapply(following, function(x) x$getScreenName()))
followers <- seed$getFollowers()
followers.n <- as.character(lapply(followers, function(x) x$getScreenName()))
head(following)
## $`777915348335091712`
## [1] "St_Theodorakis"
## 
## $`338899906`
## [1] "salvadostv"
## 
## $`264963674`
## [1] "tediperez"
## 
## $`1040534176599089152`
## [1] "ThistedPeter"
## 
## $`87291538`
## [1] "thistlejohn"
## 
## $`786537772987191296`
## [1] "Tsipras_int2"
head(following.n)
## [1] "St_Theodorakis" "salvadostv"     "tediperez"      "ThistedPeter"  
## [5] "thistlejohn"    "Tsipras_int2"
# storing in follow.list
follow.list <- list()
follow.list[[seed.n]] <- following.n
names(follow.list)
## [1] "EliasDinas"
follow.list[[seed.n]][1:5] # So far the list only has Elias and his friends
## [1] "St_Theodorakis" "salvadostv"     "tediperez"      "ThistedPeter"  
## [5] "thistlejohn"

Get description of users Elias is following, search for “EUI”-related strings

descriptions <- as.character(lapply(following, function(x) x$getDescription()))
## extracting description of users
descriptions[1:5]
## [1] "Ο επικεφαλής του Ποταμιού συνομιλεί μαζί σας (ενίοτε με τη βοήθεια συνεργατών του)."                                                                                                                         
## [2] "\U0001f4fa Con @JordiEvole en laSexta | Somos @delbarriotv | También @salvadostv en Instagram y Facebook."                                                                                                   
## [3] "Promoteo. Limpio, fijo y doy esplendor a lo que haya que comunicar."                                                                                                                                         
## [4] "Professor of Political Science, Univ. of Copenhagen."                                                                                                                                                        
## [5] "Citizen of nowhere, HE tourist, second rate Canadian. Made in \U0001f1ec\U0001f1f7 born in \U0001f1e8\U0001f1e6 raised in \U0001f1e7\U0001f1ea loitered in \U0001f3f4󠁧󠁢󠁳󠁣󠁴󠁿 Westminster correspondent @thescotsman"
# function to subset only users tied to EUI
extract.eui <- function(descriptions){
    output <- grep('\\beui\\b|\\beuropean university institute\\b|\\bEuropeanUni\\b', descriptions, ignore.case=TRUE)
    return(output)
}
eui.following <- extract.eui(descriptions)
descriptions[eui.following][1:10]
##  [1] "@ICPSRSummer European Field Experiments Summer School, July 8-12 2019, @europeanuni"                                                                             
##  [2] "#eu, #migration, public policy, #bigdata (and small), research methods & design, #dataviz, politics, public opinion, #bureaucracy @PA_UniLeiden @EuropeanUni"    
##  [3] "Political Economist | Assistant Prof at @CUNEF | PhD from @Europeanuni | Jo sóc qui sóc, si vols veure'm em veus, Ovidi Montllor."                               
##  [4] "Postdoc @GSI_Muenchen. PhD @EuropeanUni. Comparative politics & political sociology. «Mourrons pour des idées, d'accord... mais de mort lente!». Own views, etc."
##  [5] "PhD researcher in #PoliSci at @EuropeanUni; co-organizer of the @BehaviourEui  | Previously @ISCTEIUL |  Political behaviour, radical right, protest politics."  
##  [6] "International hub of high quality research and reflection on the European Union, launched in 2018 & hosted by the @RobSchuCentre at the @EuropeanUni"            
##  [7] "Chair in Migration Studies at @europeanuni @robschucentre Deputy Director of @migrpolcentre. \nEconomics | Politics | Migration | Public Policy."                
##  [8] "Professor, European University Institute. Directs Cultural Pluralism Area. Teaches at College of Europe. Editor of Journal of Immigrant and Refugee Studies."    
##  [9] "I'm Director and Professor at the @RobSchuCentre and Director of the Global Governance Programme @EuropeanUni"                                                   
## [10] "The European University Institute (EUI) is a distinctly international postgraduate teaching and research institute, opened in 1976."
# Get screen names for twitter accounts descriptions matching "eui"
eui.users <- c(seed$screenName, following.n[eui.following])
# Elias is following 15 EUI-related twitter accounts
print(eui.users)[1:10]
##  [1] "EliasDinas"      "EFESS2017"       "DToshkov"       
##  [4] "moragasai"       "guillemvidal_"   "ValentimVicente"
##  [7] "EGPProg"         "MartinRuhs"      "triandafyllidou"
## [10] "BrigidLaffan"    "EuropeanUni"     "EUISoU"         
## [13] "RobSchuCentre"   "JeroenMoes"      "jamayoralda"    
## [16] "gpapak"
##  [1] "EliasDinas"      "EFESS2017"       "DToshkov"       
##  [4] "moragasai"       "guillemvidal_"   "ValentimVicente"
##  [7] "EGPProg"         "MartinRuhs"      "triandafyllidou"
## [10] "BrigidLaffan"

Loop over twitter accounts connected to Elias that mention EUI in their bio, search their friends, parse their friends for EUI mentions in bio, update of EUI users, repeat.

while (length(eui.users) > length(follow.list)){

    # pick first user not done
    user <- sample(eui.users[eui.users %in% names(follow.list)==FALSE], 1)
    user <- getUser(user)
    user.n <- user$screenName
    cat(user.n, "\n")

    # download list of users he/she follows
    limit <- getCurRateLimitInfo()[55,3]
    while (limit == "0"){
        cat("sleeping for one minute")
        Sys.sleep(70)
        limit <- getCurRateLimitInfo()[55,3]
    }
    if(user$protected){
      follow.list[[user.n]] <- ""
      next}
    following <- user$getFriends()
    friends <- as.character(lapply(following, function(x) x$getScreenName()))
    follow.list[[user.n]] <- friends
    descriptions <- as.character(lapply(following, function(x) x$getDescription()))

    # subset and add users from EUI
    eui <- extract.eui(descriptions)
    new.users <- lapply(following[eui], function(x) x$getScreenName())
    new.users <- as.character(new.users)
    eui.users <- unique(c(eui.users, new.users))

    # if rate limit is hit, wait for a minute
    limit <- getCurRateLimitInfo()[44,3]
    while (limit == "0"){
        cat("sleeping for one minute")
        Sys.sleep(60)
        limit <- getCurRateLimitInfo()[44,3]
    }
    print(eui.users)
}
# This package will save your life. It beeps when the code stops running.
beepr::beep(sound=3)
length(eui.users) # EUI users identified in this iterative procedure
## [1] 283
length(follow.list) # EUI users for whom we retrieved list of followers
## [1] 47
# Plotting network of EUI users

eui.users.retrieved <- names(follow.list)
adjMatrix <- lapply(follow.list, function(x) (eui.users.retrieved %in% x)*1)
adjMatrix <- matrix(unlist(adjMatrix), nrow=length(eui.users.retrieved), byrow=TRUE, dimnames=list(eui.users.retrieved, eui.users.retrieved))
network <- igraph::graph.adjacency(adjMatrix)
igraph::V(network)$size <- igraph::degree(network, mode="in")
igraph::V(network)$label.cex <- (igraph::degree(network, mode="in")/max(igraph::degree(network, mode="in"))*1.25)+0.3
set.seed(777)
l <- igraph::layout.fruchterman.reingold(network, niter=5000)
plot(network, layout=l, edge.width=0.5, edge.arrow.size=0.1, vertex.label.color="black", vertex.shape="none", margin=-.2,
     vertex.label.font=1, vertex.label.color="gray40")

saveAsGEXF(network, "./output/eui.gexf")

3.3 Retweets network

library(streamR)
filterStream(file.name="./data/alabama-abortion-streaming-tweets.json", track=c("#AlabamaHatesWomen","#AlabamaSenate","#AlabamaAbortionBan"),locations=c(-125, 25, -66, 50),timeout=100, oauth = credentials)
tweets <- parseTweets("./data/alabama-abortion-streaming-tweets.json")
## 2179 tweets have been parsed.

And use the maps library to see where most tweets are coming from. Note that there are two types of geographic information on tweets: lat/lon (from geolocated tweets) and place_lat and place_lon (from tweets with place information). We will work with whatever is available.

library(maps)
library(ggplot2)
tweets$lat <- ifelse(is.na(tweets$lat), tweets$place_lat, tweets$lat)
tweets$lon <- ifelse(is.na(tweets$lon), tweets$place_lon, tweets$lon)
tweets.map <- tweets[!is.na(tweets$lat),]
states <- map.where("state", tweets.map$lon, tweets.map$lat)
head(sort(table(states), decreasing=TRUE))
## states
##        texas   california      florida         ohio pennsylvania 
##          204          122          102           72           65 
##      georgia 
##           64

Map of the exact locations of the tweets.

## First create a data frame with the map data 
map.data <- map_data("state")
# 1) map base
ggplot(map.data) + geom_map(aes(map_id = region), map = map.data, fill = "grey90", 
    color = "grey50", size = 0.25) + expand_limits(x = map.data$long, y = map.data$lat) + 
    # 2) limits for x and y axis
    scale_x_continuous(limits=c(-125,-66)) + scale_y_continuous(limits=c(25,50)) +
    # 3) adding the dot for each tweet
    geom_point(data = tweets, 
    aes(x = lon, y = lat), size = 1, alpha = 1, color = "blue") +
    # 4) removing unnecessary graph elements
    theme(axis.line = element_blank(), 
        axis.text = element_blank(), 
        axis.ticks = element_blank(), 
        axis.title = element_blank(), 
        panel.background = element_blank(), 
        panel.border = element_blank(), 
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), 
        plot.background = element_blank()) 

  • Creating a network of retweets
# subset only RTs
rts <- tweets[grep("RT @", tweets$text),]

edges <- data.frame(
  node1 = rts$screen_name,
  node2 = gsub('.*RT @([a-zA-Z0-9_]+):? ?.*', rts$text, repl="\\1"),
  stringsAsFactors=F
)

g <- simplify(igraph::graph_from_data_frame(d=edges, directed=TRUE))
plot(g, layout=igraph::layout.fruchterman.reingold(g, niter=5000), edge.width=0.1, edge.arrow.size=0.1, vertex.label=NA, vertex.size=2, edge.curved=.1)

4 Practicum: Centrality, Clustering & Transitivity

Import Brexit data

brexit.votes <- data.frame(readRDS("./data/indicative_votes.rds") %>% select(Member, Party, Vote, description))

# Remove non-recorded votes
brexit.votes <- brexit.votes[-grep("No Vote Recorded|No", brexit.votes$Vote),]
brexit.votes <- data.frame(mp=brexit.votes$Member, party=brexit.votes$Party, motion=brexit.votes$description)
brexit.votes.2mode.edgelist <- brexit.votes[,c(1,3)]
brexit.votes.2mode.adj <- as.matrix(table(brexit.votes.2mode.edgelist))
brexit.votes.2mode.adj[1:10, 1:7]
##                 motion
## mp               Baron (No Deal) Beckett (Public vote) Boles (CM2.0)
##   Adam Afriyie                 1                     0             0
##   Adam Holloway                1                     0             0
##   Adrian Bailey                0                     1             1
##   Afzal Khan                   0                     1             1
##   Alan Brown                   0                     1             0
##   Alan Campbell                0                     1             0
##   Alan Duncan                  0                     0             0
##   Alan Mak                     1                     0             0
##   Alan Whitehead               0                     1             1
##   Albert Owen                  0                     1             1
##                 motion
## mp               Cherry (Revocation) Clarke (CU) Corbyn (Lab.)
##   Adam Afriyie                     0           0             0
##   Adam Holloway                    0           0             0
##   Adrian Bailey                    0           1             1
##   Afzal Khan                       0           1             1
##   Alan Brown                       1           0             0
##   Alan Campbell                    0           1             1
##   Alan Duncan                      1           0             0
##   Alan Mak                         0           0             0
##   Alan Whitehead                   1           1             1
##   Albert Owen                      1           1             1
##                 motion
## mp               Eustace (EFTA)
##   Adam Afriyie                0
##   Adam Holloway               0
##   Adrian Bailey               0
##   Afzal Khan                  0
##   Alan Brown                  0
##   Alan Campbell               0
##   Alan Duncan                 0
##   Alan Mak                    0
##   Alan Whitehead              0
##   Albert Owen                 0
# Creating igraph object for 2-mode network
brexit.2mode.graph <- igraph::graph.incidence(brexit.votes.2mode.adj, mode="all")

Project two-mode network onto one-mode

# 1. Get one mode projection from igraph function

igraph::V(brexit.2mode.graph)$type <- igraph::bipartite.mapping(brexit.2mode.graph)$type
brexit.votes.1mode.proj <- igraph::bipartite.projection(brexit.2mode.graph)
plot(brexit.votes.1mode.proj$proj1) # rows projection, [mp X mp] matrix

plot(brexit.votes.1mode.proj$proj2) # columns projection, [motion X motion] matrix

# 2. Without igraph's canned function

brexit.votes.1mode.proj <- brexit.votes.2mode.adj %*% t(brexit.votes.2mode.adj)
# Alternatively, use tcrossprod() function, which is much faster as it creates sparse matrices
brexit.votes.1mode.proj[1:5, 1:5] 
##                mp
## mp              Adam Afriyie Adam Holloway Adrian Bailey Afzal Khan
##   Adam Afriyie             2             2             0          0
##   Adam Holloway            2             2             0          0
##   Adrian Bailey            0             0             4          4
##   Afzal Khan               0             0             4          4
##   Alan Brown               0             0             1          1
##                mp
## mp              Alan Brown
##   Adam Afriyie           0
##   Adam Holloway          0
##   Adrian Bailey          1
##   Afzal Khan             1
##   Alan Brown             2
brexit.votes.1mode.graph <- igraph::graph.adjacency(brexit.votes.1mode.proj, mode="undirected", weighted=TRUE, diag=FALSE) # Weighted = TRUE (MPs vote on MULTIPLE motions together)

# Set graph, edge and node attributes
brexit_layout <- igraph::layout.fruchterman.reingold(brexit.votes.1mode.graph)
party <- data.frame(mp=as.character(igraph::V(brexit.votes.1mode.graph)$name)) %>% left_join(brexit.edgelist[,1:2])
## Joining, by = "mp"
## Warning: Column `mp` joining factor and character vector, coercing into
## character vector
party$party <- as.character(party$party)
party <- party %>% mutate(color = case_when(party=="Conservative" ~ "blue" , 
                                      party=="Democratic Unionist Party" ~ "pink",
                                      party=="Green Party" ~ "green", 
                                      party=="Independent" ~ "gray",
                                      party=="Labour" ~ "red", 
                                      party=="Liberal Democrat" ~ "orange",
                                      party=="Plaid Cymru" ~ "darkgreen",
                                      party=="Scottish National Party" ~ "yellow"))
igraph::V(brexit.votes.1mode.graph)$party <- as.character(party$party)
igraph::V(brexit.votes.1mode.graph)$color <- as.character(party$color)
igraph::V(brexit.votes.1mode.graph)$label.color <- "black"
igraph::E(brexit.votes.1mode.graph)$width <- igraph::E(brexit.votes.1mode.graph)$weight/20
igraph::E(brexit.votes.1mode.graph)$edge.color <- "gray10"
edge.start <- igraph::ends(brexit.votes.1mode.graph, es=igraph::E(brexit.votes.1mode.graph), names=F)[,1]
edge.col <- igraph::V(brexit.votes.1mode.graph)$color[edge.start]

4.1 Local network Measures

local.net.stats <- function(graph.obj) {
  require(igraph)
  g <- graph.obj
  if (igraph::is.directed(g)) {
    # Centrality
      V(g)$indegree <- degree(g, mode = "in")
      V(g)$outdegree <- degree(g, mode = "out")
      V(g)$ineigenvector <-eigen_centrality(g, directed = TRUE)$vector # Only returns in-eigenvector centrality
      V(g)$eigenvector <- eigen_centrality(as.undirected(g, mode = "collapse"))$vector
      if(!is.connected(g)) print("Warning: the graph is disconnected; closeness centrality is undefined")
      V(g)$incloseness <- closeness(g, mode = "in")
      V(g)$outcloseness <- closeness(g, mode = "out")
      V(g)$betweenness <- betweenness(g, directed = TRUE)
    # Cutpoints
      V(g)$cutpoints <- 0
      V(g)$cutpoints[articulation.points(g)] <- 1
    # Transitivity
      V(g)$transitivity <-transitivity(g, type = "local") # The local transitivity of a vertex is the ratio of the triangles connected to the
      # vertex and the triples centered on the vertex. For directed graph the direction of the edges is ignored.
    # Edge Betweenness
      E(g)$edge.betwe <-edge_betweenness(g, directed = T, weights = NA) # Edge betweenness
      print(E(g)[which(E(g)$edge.betwe == max(E(g)$edge.betwe))]) ## EDGE WITH HIGHEST BETWEENNESS CENTRALITY
    msg <- paste0("Do you want to run the bridges routine? Based on the number of edges, it may take ",round((-99.3 + length(E(g))*0.0155)/60, digits=0)," minutes to run the procedure.")
    do.bridges <- menu(c("Yes", "No"), title=msg)
    if(as.integer(do.bridges)==1){
      #Bridges
        gcompstr.length <- length(lapply(unique( membership(clusters(g, mode="strong"))), function(x) induced.subgraph(g, which( membership(clusters(g, mode="strong"))==x))))
        E(g)$bridges <- 0
        for (e in 1:length(E(g))){
          g.del <- delete_edges(g, e)
          members <- membership(clusters(g.del, mode="strong"))
          g.del.length <- length(lapply(unique(members), function(x) induced.subgraph(g, which(members==x))))
          if(g.del.length>gcompstr.length) E(g)$bridges[e] <- 1
        }
        rm(g.del, g.del.length)
    }
    # Component membership
      sc <- clusters(g, mode = "strong")
      wc <- clusters(g, mode = "weak")
      V(g)$strong.component <- sc$membership
      V(g)$weak.component <- wc$membership
  } else {
    # Centrality
      V(g)$degree <- degree(g)
      V(g)$eigenvector <- eigen_centrality(g)$vector
      if(!is.connected(g)) print("Warning: the graph is disconnected; closeness centrality is undefined")
      V(g)$closeness <- closeness(g)
      V(g)$betweenness <- betweenness(g)
    # Transitivity
      V(g)$transitivity <-transitivity(g, type = "local") # The local transitivity of a vertex is the ratio of the triangles connected to the
      # vertex and the triples centered on the vertex. For directed graph the direction of the edges is ignored.
    # Edge Betweenness
      E(g)$edge.betwe <-edge_betweenness(g, directed = F, weights = NA) # Edge betweenness
      print(E(g)[which(E(g)$edge.betwe == max(E(g)$edge.betwe))]) ## EDGE WITH HIGHEST BETWEENNESS CENTRALITY
    # Component membership
      V(g)$component <- clusters(g, mode = "weak")$membership
    # Cutpoints
      V(g)$cutpoints <- 0
      V(g)$cutpoints[articulation.points(g)] <- 1
    msg <- paste0("Do you want to run the bridges routine? Based on the number of edges, it may take ",(-99.3 + length(E(g))*0.0155)/60," minutes to run the procedure.")
    do.bridges <- menu(c("Yes", "No"), title=msg)
    if(as.integer(do.bridges)==1){
      # Bridges
        gcompstr.length <- length(lapply(unique( membership(clusters(g, mode="weak"))), function(x) induced.subgraph(g, which( membership(clusters(g, mode="weak"))==x))))
        E(g)$bridges <- 0
        for (e in 1:length(E(g))){
          g.del <- delete_edges(g, e)
          members <- membership(clusters(g.del, mode="weak"))
          g.del.length <- length(lapply(unique(members), function(x) induced.subgraph(g, which(members==x))))
          if(g.del.length>gcompstr.length) E(g)$bridges[e] <- 1
        }
        rm(g.del, g.del.length)
    }
  }
  return(g)
}

4.2 Global network measures

global.net.stats <- function(graph.obj) {
  require(igraph)
  g <- graph.obj
  # Density
    g <- set.graph.attribute(g, 'density', graph.density(g))
  if(igraph::is.directed(g)){
      # Centralization
        g <- set.graph.attribute(g, 'centr.indeg', centr_degree(g, mode="in", normalized=T)$centralization)
        g <- set.graph.attribute(g, 'centr.outdeg', centr_degree(g, mode="out", normalized=T)$centralization)
        g <- set.graph.attribute(g, 'centr.inclos', centr_clo(g, mode="in", normalized=T)$centralization)
        g <- set.graph.attribute(g, 'centr.outclos', centr_clo(g, mode="out", normalized=T)$centralization)
        g <- set.graph.attribute(g, 'centr.betw', centr_betw(g, directed=T, normalized=T)$centralization)
      # NORMALIZED refers to graph-level centralization measure, NOT node level
        g <- set.graph.attribute(g, 'centr.eigen', centr_eigen(g, directed=T, normalized=T)$centralization)
      # Geodesic distances
        g <- set.graph.attribute(g, 'ingeodesic', distances(g, mode="in"))
        g <- set.graph.attribute(g, 'outgeodesic', distances(g, mode="out"))
        g <- set.graph.attribute(g, 'geodesic', distances(g, mode="all"))
      # Shortest paths
        g <- set.graph.attribute(g, 'shortest.path.in', as.matrix(shortest.paths(g, mode='in')))
        #To retrieve use: get.graph.attribute(g, 'shortest.path.in')
        g <- set.graph.attribute(g, 'shortest.path.out', as.matrix(shortest.paths(g, mode='out')))
      # Reciprocity
        g <- set.graph.attribute(g, 'shortest.path', reciprocity(g)) # Proportion of mutual connections
        print(paste0("This network has a reciprocity of ", reciprocity(g), " compared to ",
                    round(reciprocity(sample_gnm(vcount(g), ecount(g), directed=TRUE)), digits=2) ,
                    " of a random network with same # of nodes and edges."))
      # Transitivity
        g <- set.graph.attribute(g, "transitivity", transitivity(g, type="global")) # Directed ties ignored
      # Triad Census
        census_labels = c('003',
                          '012',
                          '102',
                          '021D',
                          '021U',
                          '021C',
                          '111D',
                          '111U',
                          '030T',
                          '030C',
                          '201',
                          '120D',
                          '120U',
                          '120C',
                          '210',
                          '300')
        g <- set.graph.attribute(g, "triad.census", data.frame(triad_type = census_labels,count = triad.census(g)))
      # Components
        g <- set.graph.attribute(g, 'n.weak.component', clusters(g, mode="weak"))
        g <- set.graph.attribute(g, 'n.strong.component', clusters(g, mode="strong"))
        
  } else {
    # Centralization
      g <- set.graph.attribute(g, 'centr.degree', centr_degree(g, mode="all", normalized=T)$centralization)
      g <- set.graph.attribute(g, 'centr.clo', centr_clo(g, mode="all", normalized=T)$centralization)
      g <- set.graph.attribute(g, 'centr.betw', centr_betw(g, directed=T, normalized=T)$centralization)
      g <- set.graph.attribute(g, 'centr.eigen', centr_eigen(g, directed=T, normalized=T)$centralization)
    # Geodesic distances
      g <- set.graph.attribute(g, 'geodesic', distances(g, mode="all"))
    # Shortest paths
      g <- set.graph.attribute(g, 'shortest.path', as.matrix(shortest.paths(g, mode='all')))
    # Global Transitivity: # the ratio of the triangles and the connected triples in the graph.
      g <- set.graph.attribute(g, "transitivity", transitivity(g, type="global")) 
    # Components
      g <- set.graph.attribute(g, 'n.weak.component', clusters(g, mode="weak"))
  }
  return(g)
}
graph <- brexit.votes.1mode.graph
graph <- global.net.stats(graph)
graph <- local.net.stats(graph)
## + 2/74414 edges from 4fbaeda (vertex names):
## [1] Huw Merriman--Joseph Johnson Huw Merriman--Sam Gyimah
graph
## IGRAPH 4fbaeda UNW- 547 74414 -- 
## + attr: density (g/n), centr.degree (g/n), centr.clo (g/n),
## | centr.betw (g/n), centr.eigen (g/n), geodesic (g/n),
## | shortest.path (g/n), transitivity (g/n), n.weak.component (g/x),
## | name (v/c), party (v/c), color (v/c), label.color (v/c), degree
## | (v/n), eigenvector (v/n), closeness (v/n), betweenness (v/n),
## | transitivity (v/n), component (v/n), cutpoints (v/n), weight
## | (e/n), width (e/n), edge.color (e/c), edge.betwe (e/n)
## + edges from 4fbaeda (vertex names):
## [1] Adam Afriyie--Adam Holloway  Adam Afriyie--Alan Mak      
## [3] Adam Afriyie--Alister Jack   Adam Afriyie--Amanda Milling
## + ... omitted several edges