Start from clean slate and free up memory
Install some packages
if (!requireNamespace("BiocManager", quietly = TRUE)){
install.packages("BiocManager")
BiocManager::install("RBGL")
} else{
library(RBGL)
}
## Warning: package 'RBGL' was built under R version 3.5.3
## Loading required package: graph
## Warning: package 'graph' was built under R version 3.5.1
## Loading required package: BiocGenerics
## Warning: package 'BiocGenerics' was built under R version 3.5.1
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind,
## colMeans, colnames, colSums, dirname, do.call, duplicated,
## eval, evalq, Filter, Find, get, grep, grepl, intersect,
## is.unsorted, lapply, lengths, Map, mapply, match, mget, order,
## paste, pmax, pmax.int, pmin, pmin.int, Position, rank, rbind,
## Reduce, rowMeans, rownames, rowSums, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which, which.max,
## which.min
Interstate alliance data
## ── Attaching packages ─────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.1 ✔ purrr 0.3.2
## ✔ tibble 2.1.1 ✔ dplyr 0.8.1
## ✔ tidyr 0.8.3 ✔ stringr 1.4.0
## ✔ readr 1.1.1 ✔ forcats 0.3.0
## Warning: package 'ggplot2' was built under R version 3.5.2
## Warning: package 'tibble' was built under R version 3.5.2
## Warning: package 'tidyr' was built under R version 3.5.2
## Warning: package 'purrr' was built under R version 3.5.2
## Warning: package 'dplyr' was built under R version 3.5.2
## Warning: package 'stringr' was built under R version 3.5.2
## ── Conflicts ────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ stringr::boundary() masks graph::boundary()
## ✖ dplyr::combine() masks BiocGenerics::combine()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## Loading required package: tergm
## Loading required package: ergm
## Loading required package: network
## Warning: package 'network' was built under R version 3.5.2
## network: Classes for Relational Data
## Version 1.15 created on 2019-04-01.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
## Mark S. Handcock, University of California -- Los Angeles
## David R. Hunter, Penn State University
## Martina Morris, University of Washington
## Skye Bender-deMoll, University of Washington
## For citation information, type citation("network").
## Type help("network-package") to get started.
##
## ergm: version 3.9.4, created on 2018-08-15
## Copyright (c) 2018, Mark S. Handcock, University of California -- Los Angeles
## David R. Hunter, Penn State University
## Carter T. Butts, University of California -- Irvine
## Steven M. Goodreau, University of Washington
## Pavel N. Krivitsky, University of Wollongong
## Martina Morris, University of Washington
## with contributions from
## Li Wang
## Kirk Li, University of Washington
## Skye Bender-deMoll, University of Washington
## Based on "statnet" project software (statnet.org).
## For license and citation information see statnet.org/attribution
## or type citation("ergm").
## NOTE: Versions before 3.6.1 had a bug in the implementation of the
## bd() constriant which distorted the sampled distribution somewhat.
## In addition, Sampson's Monks datasets had mislabeled vertices. See
## the NEWS and the documentation for more details.
## Loading required package: networkDynamic
##
## networkDynamic: version 0.9.0, created on 2016-01-12
## Copyright (c) 2016, Carter T. Butts, University of California -- Irvine
## Ayn Leslie-Cook, University of Washington
## Pavel N. Krivitsky, University of Wollongong
## Skye Bender-deMoll, University of Washington
## with contributions from
## Zack Almquist, University of California -- Irvine
## David R. Hunter, Penn State University
## Li Wang
## Kirk Li, University of Washington
## Steven M. Goodreau, University of Washington
## Jeffrey Horner
## Martina Morris, University of Washington
## Based on "statnet" project software (statnet.org).
## For license and citation information see statnet.org/attribution
## or type citation("networkDynamic").
##
## tergm: version 3.5.2, created on 2018-08-18
## Copyright (c) 2018, Pavel N. Krivitsky, University of Wollongong
## Mark S. Handcock, University of California -- Los Angeles
## with contributions from
## David R. Hunter, Penn State University
## Steven M. Goodreau, University of Washington
## Martina Morris, University of Washington
## Nicole Bohme Carnegie, New York University
## Carter T. Butts, University of California -- Irvine
## Ayn Leslie-Cook, University of Washington
## Skye Bender-deMoll
## Li Wang
## Kirk Li, University of Washington
## Based on "statnet" project software (statnet.org).
## For license and citation information see statnet.org/attribution
## or type citation("tergm").
## Loading required package: ergm.count
## Loading required package: statnet.common
## Warning: package 'statnet.common' was built under R version 3.5.2
##
## Attaching package: 'statnet.common'
## The following objects are masked from 'package:ergm':
##
## colMeans.mcmc.list, sweep.mcmc.list
## The following object is masked from 'package:BiocGenerics':
##
## order
## The following object is masked from 'package:base':
##
## order
##
## ergm.count: version 3.3.0, created on 2018-08-25
## Copyright (c) 2018, Pavel N. Krivitsky, University of Wollongong
## with contributions from
## Mark S. Handcock, University of California -- Los Angeles
## David R. Hunter, Penn State University
## Based on "statnet" project software (statnet.org).
## For license and citation information see statnet.org/attribution
## or type citation("ergm.count").
## NOTE: The form of the term 'CMP' has been changed in version 3.2
## of 'ergm.count'. See the news or help('CMP') for more information.
## Loading required package: sna
## sna: Tools for Social Network Analysis
## Version 2.4 created on 2016-07-23.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
## For citation information, type citation("sna").
## Type help(package="sna") to get started.
##
## Attaching package: 'sna'
## The following object is masked from 'package:graph':
##
## degree
## Loading required package: tsna
##
## statnet: version 2018.10, created on 2018-10-17
## Copyright (c) 2018, Mark S. Handcock, University of California -- Los Angeles
## David R. Hunter, Penn State University
## Carter T. Butts, University of California -- Irvine
## Steven M. Goodreau, University of Washington
## Pavel N. Krivitsky, University of Wollongong
## Skye Bender-deMoll
## Martina Morris, University of Washington
## Based on "statnet" project software (statnet.org).
## For license and citation information see statnet.org/attribution
## or type citation("statnet").
## unable to reach CRAN
## [1] "net" "labels"
# 'net' is the adjacency matrix
# 'labels' is a vector of country names
# Initialize a Network
AllyNet <- network(allylist$net, directed=F)
# Add the states' names
network.vertex.names(AllyNet) <- allylist$labels
# Plot the Network
gplot(AllyNet, label = network.vertex.names(AllyNet), gmode="graph")
# A more readable plot
gplot(AllyNet, label = network.vertex.names(AllyNet),edge.col="grey55", label.cex=.75, gmode="graph")
# Leave the isolates out?
gplot(AllyNet, label = network.vertex.names(AllyNet),edge.col="grey55", label.cex=.75, gmode="graph", displayisolates=F)
Senate Co-Sponsorship data
# Read in the cosponsorship data
senlist <- dget("./data/sennet.txt")
# Initialize a Network
SenNet <- network(senlist$net)
# Add the senators' names
network.vertex.names(SenNet) <- senlist$labels
# Plot the Network
gplot(SenNet, label = network.vertex.names(SenNet))
# Edges
# Thin the Network
SenNet2 <- network(senlist$net > 10)
# Look at it Again
gplot(SenNet2, label = network.vertex.names(SenNet), edge.col="grey55", label.cex=.75)
Campnet data
campnet <- read.csv(file="./data/campnet.csv", header=T, row.names=1, as.is=T)
campnet.attr <- read.csv(file="./data/campnet-attr.csv", header=T, as.is=T)
## Network attributes:
## vertices = 18
## directed = TRUE
## hyper = FALSE
## loops = FALSE
## multiple = FALSE
## bipartite = FALSE
## total edges = 54
## missing edges = 0
## non-missing edges = 54
## density = 0.1764706
##
## Vertex attributes:
## vertex.names:
## character valued attribute
## 18 valid vertex names
##
## No edge attributes
##
## Network edgelist matrix:
## [,1] [,2]
## [1,] 5 1
## [2,] 9 1
## [3,] 12 1
## [4,] 14 1
## [5,] 11 2
## [6,] 5 3
## [7,] 7 3
## [8,] 1 4
## [9,] 3 4
## [10,] 6 4
## [11,] 7 4
## [12,] 8 4
## [13,] 1 5
## [14,] 3 5
## [15,] 6 5
## [16,] 7 5
## [17,] 4 6
## [18,] 5 6
## [19,] 8 6
## [20,] 3 7
## [21,] 4 7
## [22,] 8 7
## [23,] 13 7
## [24,] 4 8
## [25,] 6 8
## [26,] 10 9
## [27,] 12 9
## [28,] 14 9
## [29,] 15 9
## [30,] 2 11
## [31,] 16 11
## [32,] 17 11
## [33,] 1 12
## [34,] 9 12
## [35,] 10 12
## [36,] 14 12
## [37,] 9 14
## [38,] 10 14
## [39,] 12 14
## [40,] 13 15
## [41,] 18 15
## [42,] 2 16
## [43,] 11 16
## [44,] 15 16
## [45,] 17 16
## [46,] 18 16
## [47,] 2 17
## [48,] 11 17
## [49,] 16 17
## [50,] 18 17
## [51,] 13 18
## [52,] 15 18
## [53,] 16 18
## [54,] 17 18
set.vertex.attribute(camp.net, "vertex.names", campnet.attr$Name) # Add name as a node attribute
set.vertex.attribute(camp.net, "Gender", campnet.attr$Gender) # Add gender as a node attribute
set.vertex.attribute(camp.net, "Role", campnet.attr$Role) # Add role as a node attribute
list.vertex.attributes(camp.net)
## [1] "Gender" "na" "Role" "vertex.names"
## Network attributes:
## vertices = 18
## directed = TRUE
## hyper = FALSE
## loops = FALSE
## multiple = FALSE
## bipartite = FALSE
## total edges = 54
## missing edges = 0
## non-missing edges = 54
## density = 0.1764706
##
## Vertex attributes:
##
## Gender:
## integer valued attribute
## 18 values
##
## Role:
## integer valued attribute
## 18 values
## vertex.names:
## character valued attribute
## 18 valid vertex names
##
## No edge attributes
##
## Network edgelist matrix:
## [,1] [,2]
## [1,] 5 1
## [2,] 9 1
## [3,] 12 1
## [4,] 14 1
## [5,] 11 2
## [6,] 5 3
## [7,] 7 3
## [8,] 1 4
## [9,] 3 4
## [10,] 6 4
## [11,] 7 4
## [12,] 8 4
## [13,] 1 5
## [14,] 3 5
## [15,] 6 5
## [16,] 7 5
## [17,] 4 6
## [18,] 5 6
## [19,] 8 6
## [20,] 3 7
## [21,] 4 7
## [22,] 8 7
## [23,] 13 7
## [24,] 4 8
## [25,] 6 8
## [26,] 10 9
## [27,] 12 9
## [28,] 14 9
## [29,] 15 9
## [30,] 2 11
## [31,] 16 11
## [32,] 17 11
## [33,] 1 12
## [34,] 9 12
## [35,] 10 12
## [36,] 14 12
## [37,] 9 14
## [38,] 10 14
## [39,] 12 14
## [40,] 13 15
## [41,] 18 15
## [42,] 2 16
## [43,] 11 16
## [44,] 15 16
## [45,] 17 16
## [46,] 18 16
## [47,] 2 17
## [48,] 11 17
## [49,] 16 17
## [50,] 18 17
## [51,] 13 18
## [52,] 15 18
## [53,] 16 18
## [54,] 17 18
# In this and other functions, mode is "digraph" for directed
# and "graph" for symmetric networks.
gden(camp.net, mode="digraph")
## [1] 0.1764706
## Mut
## 19
# Reciprocity - proportion of symmetric dyads
# dyadic - ratio of dyads where (i,j)==(j,i) to all dyads
# dyadic.nonnull - ratio of dyads where (i,j)==1 AND (j,i)==1 to all dyads where (i,j)==1
# edgewise - ratio of edges that are reciprocated to all edges where (i,j)==1
grecip(camp.net, measure="dyadic")
## Mut
## 0.8954248
## Mut
## 0.5428571
## Mut
## 0.7037037
# Geodesic distances
# By default, nodes that cannot reach each other have a geodesic distance of Inf
# remember, Inf is the constant for infinity.
# Here we'll replace it with the longest theoretically possible path length
# in the network + 1, so 18
camp.geo <- geodist(camp.net, inf.replace=18)
camp.geo
## $counts
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
## [1,] 1 0 1 1 1 2 1 1 1 0 0 1 0
## [2,] 2 1 2 2 2 4 2 2 2 0 1 2 0
## [3,] 1 0 1 1 1 2 1 1 1 0 0 1 0
## [4,] 2 0 1 1 2 1 1 1 2 0 0 2 0
## [5,] 1 0 1 3 1 1 1 1 1 0 0 1 0
## [6,] 1 0 1 1 1 1 2 1 1 0 0 1 0
## [7,] 1 0 1 1 1 2 1 1 1 0 0 1 0
## [8,] 2 0 1 1 2 1 1 1 2 0 0 2 0
## [9,] 1 0 1 1 1 2 1 1 1 0 0 1 0
## [10,] 3 0 3 3 3 6 3 3 1 1 0 1 0
## [11,] 2 1 2 2 2 4 2 2 2 0 1 2 0
## [12,] 1 0 1 1 1 2 1 1 1 0 0 1 0
## [13,] 2 3 1 1 1 2 1 1 1 0 3 1 1
## [14,] 1 0 1 1 1 2 1 1 1 0 0 1 0
## [15,] 1 1 1 1 1 2 1 1 1 0 1 1 0
## [16,] 1 1 1 1 1 2 1 1 1 0 1 1 0
## [17,] 1 1 1 1 1 2 1 1 1 0 1 1 0
## [18,] 1 2 1 1 1 2 1 1 1 0 2 1 0
## [,14] [,15] [,16] [,17] [,18]
## [1,] 1 0 0 0 0
## [2,] 2 2 1 1 2
## [3,] 1 0 0 0 0
## [4,] 2 0 0 0 0
## [5,] 1 0 0 0 0
## [6,] 1 0 0 0 0
## [7,] 1 0 0 0 0
## [8,] 2 0 0 0 0
## [9,] 1 0 0 0 0
## [10,] 1 0 0 0 0
## [11,] 2 2 1 1 2
## [12,] 1 0 0 0 0
## [13,] 1 1 2 1 1
## [14,] 1 0 0 0 0
## [15,] 1 1 1 2 1
## [16,] 1 1 1 1 1
## [17,] 1 1 1 1 1
## [18,] 1 1 1 1 1
##
## $gdist
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
## [1,] 0 18 2 1 1 2 2 2 2 18 18 1 18
## [2,] 5 0 7 6 6 7 7 7 4 18 1 5 18
## [3,] 2 18 0 1 1 2 1 2 4 18 18 3 18
## [4,] 3 18 2 0 2 1 1 1 5 18 18 4 18
## [5,] 1 18 1 2 0 1 2 2 3 18 18 2 18
## [6,] 2 18 2 1 1 0 2 1 4 18 18 3 18
## [7,] 2 18 1 1 1 2 0 2 4 18 18 3 18
## [8,] 3 18 2 1 2 1 1 0 5 18 18 4 18
## [9,] 1 18 3 2 2 3 3 3 0 18 18 1 18
## [10,] 2 18 4 3 3 4 4 4 1 0 18 1 18
## [11,] 5 1 7 6 6 7 7 7 4 18 0 5 18
## [12,] 1 18 3 2 2 3 3 3 1 18 18 0 18
## [13,] 3 4 2 2 2 3 1 3 2 18 3 3 0
## [14,] 1 18 3 2 2 3 3 3 1 18 18 1 18
## [15,] 2 3 4 3 3 4 4 4 1 18 2 2 18
## [16,] 4 2 6 5 5 6 6 6 3 18 1 4 18
## [17,] 4 2 6 5 5 6 6 6 3 18 1 4 18
## [18,] 3 3 5 4 4 5 5 5 2 18 2 3 18
## [,14] [,15] [,16] [,17] [,18]
## [1,] 2 18 18 18 18
## [2,] 5 3 1 1 2
## [3,] 4 18 18 18 18
## [4,] 5 18 18 18 18
## [5,] 3 18 18 18 18
## [6,] 4 18 18 18 18
## [7,] 4 18 18 18 18
## [8,] 5 18 18 18 18
## [9,] 1 18 18 18 18
## [10,] 1 18 18 18 18
## [11,] 5 3 1 1 2
## [12,] 1 18 18 18 18
## [13,] 3 1 2 2 1
## [14,] 0 18 18 18 18
## [15,] 2 0 1 2 1
## [16,] 4 2 0 1 1
## [17,] 4 2 1 0 1
## [18,] 3 1 1 1 0
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
## [1,] 0 18 2 1 1 2 2 2 2 18 18 1 18
## [2,] 5 0 7 6 6 7 7 7 4 18 1 5 18
## [3,] 2 18 0 1 1 2 1 2 4 18 18 3 18
## [4,] 3 18 2 0 2 1 1 1 5 18 18 4 18
## [5,] 1 18 1 2 0 1 2 2 3 18 18 2 18
## [6,] 2 18 2 1 1 0 2 1 4 18 18 3 18
## [7,] 2 18 1 1 1 2 0 2 4 18 18 3 18
## [8,] 3 18 2 1 2 1 1 0 5 18 18 4 18
## [9,] 1 18 3 2 2 3 3 3 0 18 18 1 18
## [10,] 2 18 4 3 3 4 4 4 1 0 18 1 18
## [11,] 5 1 7 6 6 7 7 7 4 18 0 5 18
## [12,] 1 18 3 2 2 3 3 3 1 18 18 0 18
## [13,] 3 4 2 2 2 3 1 3 2 18 3 3 0
## [14,] 1 18 3 2 2 3 3 3 1 18 18 1 18
## [15,] 2 3 4 3 3 4 4 4 1 18 2 2 18
## [16,] 4 2 6 5 5 6 6 6 3 18 1 4 18
## [17,] 4 2 6 5 5 6 6 6 3 18 1 4 18
## [18,] 3 3 5 4 4 5 5 5 2 18 2 3 18
## [,14] [,15] [,16] [,17] [,18]
## [1,] 2 18 18 18 18
## [2,] 5 3 1 1 2
## [3,] 4 18 18 18 18
## [4,] 5 18 18 18 18
## [5,] 3 18 18 18 18
## [6,] 4 18 18 18 18
## [7,] 4 18 18 18 18
## [8,] 5 18 18 18 18
## [9,] 1 18 18 18 18
## [10,] 1 18 18 18 18
## [11,] 5 3 1 1 2
## [12,] 1 18 18 18 18
## [13,] 3 1 2 2 1
## [14,] 0 18 18 18 18
## [15,] 2 0 1 2 1
## [16,] 4 2 0 1 1
## [17,] 4 2 1 0 1
## [18,] 3 1 1 1 0
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
## [1,] 1 0 1 1 1 2 1 1 1 0 0 1 0
## [2,] 2 1 2 2 2 4 2 2 2 0 1 2 0
## [3,] 1 0 1 1 1 2 1 1 1 0 0 1 0
## [4,] 2 0 1 1 2 1 1 1 2 0 0 2 0
## [5,] 1 0 1 3 1 1 1 1 1 0 0 1 0
## [6,] 1 0 1 1 1 1 2 1 1 0 0 1 0
## [7,] 1 0 1 1 1 2 1 1 1 0 0 1 0
## [8,] 2 0 1 1 2 1 1 1 2 0 0 2 0
## [9,] 1 0 1 1 1 2 1 1 1 0 0 1 0
## [10,] 3 0 3 3 3 6 3 3 1 1 0 1 0
## [11,] 2 1 2 2 2 4 2 2 2 0 1 2 0
## [12,] 1 0 1 1 1 2 1 1 1 0 0 1 0
## [13,] 2 3 1 1 1 2 1 1 1 0 3 1 1
## [14,] 1 0 1 1 1 2 1 1 1 0 0 1 0
## [15,] 1 1 1 1 1 2 1 1 1 0 1 1 0
## [16,] 1 1 1 1 1 2 1 1 1 0 1 1 0
## [17,] 1 1 1 1 1 2 1 1 1 0 1 1 0
## [18,] 1 2 1 1 1 2 1 1 1 0 2 1 0
## [,14] [,15] [,16] [,17] [,18]
## [1,] 1 0 0 0 0
## [2,] 2 2 1 1 2
## [3,] 1 0 0 0 0
## [4,] 2 0 0 0 0
## [5,] 1 0 0 0 0
## [6,] 1 0 0 0 0
## [7,] 1 0 0 0 0
## [8,] 2 0 0 0 0
## [9,] 1 0 0 0 0
## [10,] 1 0 0 0 0
## [11,] 2 2 1 1 2
## [12,] 1 0 0 0 0
## [13,] 1 1 2 1 1
## [14,] 1 0 0 0 0
## [15,] 1 1 1 2 1
## [16,] 1 1 1 1 1
## [17,] 1 1 1 1 1
## [18,] 1 1 1 1 1
## Average Distance/Path length
camp.graph <- igraph::graph_from_adjacency_matrix(as.matrix(campnet))
igraph::average.path.length(camp.graph) # Average distance of 2.8
## [1] 2.873786
## [1] 7
## [1] 7
# How many connected components do we have in the network? components()
# connected ="strong" means v1 and v2 are connected if there is a directed path
# from v1 to v2 and from v2 to v1.
# connected = "weak" means v1 and v2 are connected if there is a semi-path
# (i.e. path ignoring the link direction) from v1 to v2 and v2 to v1.
# Number of components:
sna::components(camp.net, connected="strong")
## Node 1, Reach 10, Total 10
## Node 2, Reach 16, Total 26
## Node 3, Reach 10, Total 36
## Node 4, Reach 10, Total 46
## Node 5, Reach 10, Total 56
## Node 6, Reach 10, Total 66
## Node 7, Reach 10, Total 76
## Node 8, Reach 10, Total 86
## Node 9, Reach 10, Total 96
## Node 10, Reach 11, Total 107
## Node 11, Reach 16, Total 123
## Node 12, Reach 10, Total 133
## Node 13, Reach 17, Total 150
## Node 14, Reach 10, Total 160
## Node 15, Reach 16, Total 176
## Node 16, Reach 16, Total 192
## Node 17, Reach 16, Total 208
## Node 18, Reach 16, Total 224
## [1] 4
## [1] 1
## Node 1, Reach 10, Total 10
## Node 2, Reach 16, Total 26
## Node 3, Reach 10, Total 36
## Node 4, Reach 10, Total 46
## Node 5, Reach 10, Total 56
## Node 6, Reach 10, Total 66
## Node 7, Reach 10, Total 76
## Node 8, Reach 10, Total 86
## Node 9, Reach 10, Total 96
## Node 10, Reach 11, Total 107
## Node 11, Reach 16, Total 123
## Node 12, Reach 10, Total 133
## Node 13, Reach 17, Total 150
## Node 14, Reach 10, Total 160
## Node 15, Reach 16, Total 176
## Node 16, Reach 16, Total 192
## Node 17, Reach 16, Total 208
## Node 18, Reach 16, Total 224
## [1] 0.2222222
# Which node belongs to which component? component.dist()
camp.comp <- component.dist(camp.net, connected="strong")
## Node 1, Reach 10, Total 10
## Node 2, Reach 16, Total 26
## Node 3, Reach 10, Total 36
## Node 4, Reach 10, Total 46
## Node 5, Reach 10, Total 56
## Node 6, Reach 10, Total 66
## Node 7, Reach 10, Total 76
## Node 8, Reach 10, Total 86
## Node 9, Reach 10, Total 96
## Node 10, Reach 11, Total 107
## Node 11, Reach 16, Total 123
## Node 12, Reach 10, Total 133
## Node 13, Reach 17, Total 150
## Node 14, Reach 10, Total 160
## Node 15, Reach 16, Total 176
## Node 16, Reach 16, Total 192
## Node 17, Reach 16, Total 208
## Node 18, Reach 16, Total 224
## $membership
## [1] 1 2 1 1 1 1 1 1 1 3 2 1 4 1 2 2 2 2
##
## $csize
## [1] 10 6 1 1
##
## $cdist
## [1] 2 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0
## [1] 1 2 1 1 1 1 1 1 1 3 2 1 4 1 2 2 2 2
## [1] 10 6 1 1
## [1] 2 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0
Which nodes in the network, if removed, will increase the number of components we find?
## [1] 1 5 11 12 18
# Let's remove one of the cutpoints and count components again.
camp.net.cut <- camp.net[-1,-1] # remember "-1" selects all bit the first row/column.
# So camp.net.cut will be camp.net with node 1 removed.
components(camp.net.cut, connected="strong")
## Node 1, Reach 9, Total 9
## Node 2, Reach 6, Total 15
## Node 3, Reach 6, Total 21
## Node 4, Reach 6, Total 27
## Node 5, Reach 6, Total 33
## Node 6, Reach 6, Total 39
## Node 7, Reach 6, Total 45
## Node 8, Reach 3, Total 48
## Node 9, Reach 4, Total 52
## Node 10, Reach 9, Total 61
## Node 11, Reach 3, Total 64
## Node 12, Reach 16, Total 80
## Node 13, Reach 3, Total 83
## Node 14, Reach 9, Total 92
## Node 15, Reach 9, Total 101
## Node 16, Reach 9, Total 110
## Node 17, Reach 9, Total 119
## [1] 5
# Fragmentation (Distance weighted)
## Function to calculate distance weighted fragmentation in R
fragmentation.dw <- function(matrix){
reciprocal <- matrix(1, nrow(matrix), ncol(matrix))/geodist(matrix)$gdist
reciprocal[reciprocal==Inf] <- 0
fit <- 1 - ((2*sum(reciprocal[lower.tri(reciprocal)]))/(nrow(matrix)*(nrow(matrix)-1)))
return(fit)
}
fragmentation.dw(campnet)
## [1] 0.6095549
# Reachability
# Vertices which are adjacent in the reachability graph are connected by one or more
# directed paths in the original graph; thus, structural equivalence classes in the
# reachability graph are synonymous with strongly connected components in the original structure.
reachability.matrix <- reachability(camp.net)
## Node 1, Reach 10, Total 10
## Node 2, Reach 16, Total 26
## Node 3, Reach 10, Total 36
## Node 4, Reach 10, Total 46
## Node 5, Reach 10, Total 56
## Node 6, Reach 10, Total 66
## Node 7, Reach 10, Total 76
## Node 8, Reach 10, Total 86
## Node 9, Reach 10, Total 96
## Node 10, Reach 11, Total 107
## Node 11, Reach 16, Total 123
## Node 12, Reach 10, Total 133
## Node 13, Reach 17, Total 150
## Node 14, Reach 10, Total 160
## Node 15, Reach 16, Total 176
## Node 16, Reach 16, Total 192
## Node 17, Reach 16, Total 208
## Node 18, Reach 16, Total 224
igraph
)First, we can find the number of cliques in a network using the count_max_cliques() function. Again, we are only interested in cliques sized 3 or greater. Note: By default, igraph treats networks as being undirected when running all operations on cliques. If you have a directed network, then you will get a warning about this every time you do a clique analysis.
campnet.g <- igraph::graph_from_adjacency_matrix(as.matrix(campnet))
igraph::count_max_cliques(campnet.g, min=3) # counts the number of cliques (minimum size=3)
## Warning in igraph::count_max_cliques(campnet.g, min = 3): At ./
## maximal_cliques_template.h:203 :Edge directions are ignored for maximal
## clique calculation
## [1] 10
Better to symmetrize campnet
campnet.g <- igraph::graph_from_adjacency_matrix(sna::symmetrize(as.matrix(campnet), rule="strong"),
mode="undirected") # Symmetrize with "strong" rule
igraph::V(campnet.g)$name <- rownames(campnet)
igraph::count_max_cliques(campnet.g, min=3) # We have 4 cliques in this network;
## [1] 4
## [1] 3
From here we can extract the cliques.
## [[1]]
## + 3/18 vertices, named, from f10bb52:
## [1] JENNIE PAM ANN
##
## [[2]]
## + 3/18 vertices, named, from f10bb52:
## [1] MICHAEL DON HARRY
##
## [[3]]
## + 3/18 vertices, named, from f10bb52:
## [1] LEE STEVE BERT
##
## [[4]]
## + 3/18 vertices, named, from f10bb52:
## [1] STEVE BERT RUSS
mc <- igraph::max_cliques(campnet.g, min=3) # To save space, we save the results as a new object, named “mc.” You can see all the cliques by simply typing in mc, or by using the double square braces ([[]])
mc[[1]]
## + 3/18 vertices, named, from f10bb52:
## [1] JENNIE PAM ANN
Plot largest cliques
## [[1]]
## + 3/18 vertices, named, from f10bb52:
## [1] RUSS STEVE BERT
##
## [[2]]
## + 3/18 vertices, named, from f10bb52:
## [1] BERT LEE STEVE
##
## [[3]]
## + 3/18 vertices, named, from f10bb52:
## [1] HARRY MICHAEL DON
##
## [[4]]
## + 3/18 vertices, named, from f10bb52:
## [1] PAM JENNIE ANN
cliq <- igraph::largest_cliques(campnet.g) # get the largest cliques of campnet
plot(igraph::induced_subgraph(campnet.g, cliq[[1]]))
par(mfrow = c(1, 2)) # set graphics to plot 1 row, 2 cols
for(i in 1:length(cliq)){ # plot the 2 largest cliques
plot(igraph::induced_subgraph(campnet.g, cliq[[i]]))
}
statnet
)# Let's go ahead and symmetrize camp.net by setting the attribute "directed" to F: this will add links (j,i) wherever (i,j) exists.
# You could symmetrize by minimum using symmmetrize() with rule="strong"
camp.sym <- camp.net
camp.sym <- set.network.attribute(camp.sym, "directed", F)
# The clique census returns a list with several important elements - let's assign that list to an object we'll call camp.net.cliques.
# The clique.comembership parameter takes values "none" (no co-membership is computed), "sum" (the total number of shared cliques for each pair of nodes is computed), or "bysize" (separate clique co-membership is computed for each clique size)
camp.net.cliques <- clique.census(camp.sym, mode = "graph", clique.comembership="sum")
camp.net.cliques # an object that now contains the results of the clique census
## $clique.count
## Agg BRAZEY BILL JOHN ANN CAROL HARRY LEE JENNIE PAULINE BERT DON STEVE
## 1 0 0 0 0 0 0 0 0 0 0 0 0 0
## 2 5 2 0 0 1 2 1 1 0 1 0 0 0
## 3 7 0 0 2 3 1 1 3 2 0 0 0 0
## 4 3 1 1 0 0 0 0 0 0 2 1 1 2
## PAM PAT RUSS GERY MICHAEL HOLLY
## 1 0 0 0 0 0 0
## 2 1 0 1 0 0 0
## 3 1 0 2 2 1 3
## 4 0 2 0 1 1 0
##
## $clique.comemb
## BRAZEY BILL JOHN ANN CAROL HARRY LEE JENNIE PAULINE BERT DON STEVE
## BRAZEY 3 0 0 1 1 0 0 0 1 0 0 1
## BILL 0 1 0 0 0 0 0 0 0 0 1 0
## JOHN 0 0 2 1 1 0 2 0 0 0 0 0
## ANN 1 0 1 4 0 1 2 2 0 0 0 0
## CAROL 1 0 1 0 3 1 1 0 0 0 0 0
## HARRY 0 0 0 1 1 2 0 1 0 0 0 0
## LEE 0 0 2 2 1 0 4 1 0 0 0 0
## JENNIE 0 0 0 2 0 1 1 2 0 0 0 0
## PAULINE 1 0 0 0 0 0 0 0 3 1 0 2
## BERT 0 0 0 0 0 0 0 0 1 1 0 1
## DON 0 1 0 0 0 0 0 0 0 0 1 0
## STEVE 1 0 0 0 0 0 0 0 2 1 0 2
## PAM 0 0 0 0 0 0 1 0 0 0 0 0
## PAT 1 0 0 0 0 0 0 0 2 1 0 2
## RUSS 0 0 0 0 0 0 0 0 1 0 0 0
## GERY 0 1 0 0 0 0 0 0 0 0 1 0
## MICHAEL 0 1 0 0 0 0 0 0 0 0 1 0
## HOLLY 0 0 0 0 0 0 0 0 0 0 0 0
## PAM PAT RUSS GERY MICHAEL HOLLY
## BRAZEY 0 1 0 0 0 0
## BILL 0 0 0 1 1 0
## JOHN 0 0 0 0 0 0
## ANN 0 0 0 0 0 0
## CAROL 0 0 0 0 0 0
## HARRY 0 0 0 0 0 0
## LEE 1 0 0 0 0 0
## JENNIE 0 0 0 0 0 0
## PAULINE 0 2 1 0 0 0
## BERT 0 1 0 0 0 0
## DON 0 0 0 1 1 0
## STEVE 0 2 0 0 0 0
## PAM 2 0 1 0 0 1
## PAT 0 2 0 0 0 0
## RUSS 1 0 3 1 0 2
## GERY 0 0 1 3 2 2
## MICHAEL 0 0 0 2 2 1
## HOLLY 1 0 2 2 1 3
##
## $cliques
## $cliques[[1]]
## NULL
##
## $cliques[[2]]
## $cliques[[2]][[1]]
## [1] 9 15
##
## $cliques[[2]][[2]]
## [1] 5 6
##
## $cliques[[2]][[3]]
## [1] 1 5
##
## $cliques[[2]][[4]]
## [1] 7 13
##
## $cliques[[2]][[5]]
## [1] 1 4
##
##
## $cliques[[3]]
## $cliques[[3]][[1]]
## [1] 15 16 18
##
## $cliques[[3]][[2]]
## [1] 4 6 8
##
## $cliques[[3]][[3]]
## [1] 13 15 18
##
## $cliques[[3]][[4]]
## [1] 4 7 8
##
## $cliques[[3]][[5]]
## [1] 3 5 7
##
## $cliques[[3]][[6]]
## [1] 3 4 7
##
## $cliques[[3]][[7]]
## [1] 16 17 18
##
##
## $cliques[[4]]
## $cliques[[4]][[1]]
## [1] 9 10 12 14
##
## $cliques[[4]][[2]]
## [1] 1 9 12 14
##
## $cliques[[4]][[3]]
## [1] 2 11 16 17
# The first element of the result list is clique.count: a matrix containing the number of cliques of different sizes (size = number of nodes in the clique). The first column (named Agg) gives you the total number of cliqies of each size,the rest of the columns show the number of cliques each node participates in.
# Note that this includes cliques of sizes 1 & 2. We have those when the largest fully connected structure includes just 1 or 2 nodes.
camp.net.cliques$clique.count
## Agg BRAZEY BILL JOHN ANN CAROL HARRY LEE JENNIE PAULINE BERT DON STEVE
## 1 0 0 0 0 0 0 0 0 0 0 0 0 0
## 2 5 2 0 0 1 2 1 1 0 1 0 0 0
## 3 7 0 0 2 3 1 1 3 2 0 0 0 0
## 4 3 1 1 0 0 0 0 0 0 2 1 1 2
## PAM PAT RUSS GERY MICHAEL HOLLY
## 1 0 0 0 0 0 0
## 2 1 0 1 0 0 0
## 3 1 0 2 2 1 3
## 4 0 2 0 1 1 0
## BRAZEY BILL JOHN ANN CAROL HARRY LEE JENNIE PAULINE BERT DON STEVE
## BRAZEY 3 0 0 1 1 0 0 0 1 0 0 1
## BILL 0 1 0 0 0 0 0 0 0 0 1 0
## JOHN 0 0 2 1 1 0 2 0 0 0 0 0
## ANN 1 0 1 4 0 1 2 2 0 0 0 0
## CAROL 1 0 1 0 3 1 1 0 0 0 0 0
## HARRY 0 0 0 1 1 2 0 1 0 0 0 0
## LEE 0 0 2 2 1 0 4 1 0 0 0 0
## JENNIE 0 0 0 2 0 1 1 2 0 0 0 0
## PAULINE 1 0 0 0 0 0 0 0 3 1 0 2
## BERT 0 0 0 0 0 0 0 0 1 1 0 1
## DON 0 1 0 0 0 0 0 0 0 0 1 0
## STEVE 1 0 0 0 0 0 0 0 2 1 0 2
## PAM 0 0 0 0 0 0 1 0 0 0 0 0
## PAT 1 0 0 0 0 0 0 0 2 1 0 2
## RUSS 0 0 0 0 0 0 0 0 1 0 0 0
## GERY 0 1 0 0 0 0 0 0 0 0 1 0
## MICHAEL 0 1 0 0 0 0 0 0 0 0 1 0
## HOLLY 0 0 0 0 0 0 0 0 0 0 0 0
## PAM PAT RUSS GERY MICHAEL HOLLY
## BRAZEY 0 1 0 0 0 0
## BILL 0 0 0 1 1 0
## JOHN 0 0 0 0 0 0
## ANN 0 0 0 0 0 0
## CAROL 0 0 0 0 0 0
## HARRY 0 0 0 0 0 0
## LEE 1 0 0 0 0 0
## JENNIE 0 0 0 0 0 0
## PAULINE 0 2 1 0 0 0
## BERT 0 1 0 0 0 0
## DON 0 0 0 1 1 0
## STEVE 0 2 0 0 0 0
## PAM 2 0 1 0 0 1
## PAT 0 2 0 0 0 0
## RUSS 1 0 3 1 0 2
## GERY 0 0 1 3 2 2
## MICHAEL 0 0 0 2 2 1
## HOLLY 1 0 2 2 1 3
# The third element of the clique census result is a list of all found cliques:
camp.net.cliques$cliques # a full list of cliques, all sizes
## [[1]]
## NULL
##
## [[2]]
## [[2]][[1]]
## [1] 9 15
##
## [[2]][[2]]
## [1] 5 6
##
## [[2]][[3]]
## [1] 1 5
##
## [[2]][[4]]
## [1] 7 13
##
## [[2]][[5]]
## [1] 1 4
##
##
## [[3]]
## [[3]][[1]]
## [1] 15 16 18
##
## [[3]][[2]]
## [1] 4 6 8
##
## [[3]][[3]]
## [1] 13 15 18
##
## [[3]][[4]]
## [1] 4 7 8
##
## [[3]][[5]]
## [1] 3 5 7
##
## [[3]][[6]]
## [1] 3 4 7
##
## [[3]][[7]]
## [1] 16 17 18
##
##
## [[4]]
## [[4]][[1]]
## [1] 9 10 12 14
##
## [[4]][[2]]
## [1] 1 9 12 14
##
## [[4]][[3]]
## [1] 2 11 16 17
## NULL
## [[1]]
## [1] 9 15
##
## [[2]]
## [1] 5 6
##
## [[3]]
## [1] 1 5
##
## [[4]]
## [1] 7 13
##
## [[5]]
## [1] 1 4
## [[1]]
## [1] 15 16 18
##
## [[2]]
## [1] 4 6 8
##
## [[3]]
## [1] 13 15 18
##
## [[4]]
## [1] 4 7 8
##
## [[5]]
## [1] 3 5 7
##
## [[6]]
## [1] 3 4 7
##
## [[7]]
## [1] 16 17 18
## [[1]]
## [1] 9 10 12 14
##
## [[2]]
## [1] 1 9 12 14
##
## [[3]]
## [1] 2 11 16 17
BiocManager::RBGL
igraph
)## HOLLY BRAZEY CAROL PAM PAT JENNIE PAULINE ANN MICHAEL
## 2 1 2 2 2 2 2 2 2
## BILL LEE DON JOHN HARRY GERY STEVE BERT RUSS
## 0 2 2 0 2 1 2 2 2
kcore <- igraph::coreness(campnet.g)
igraph::V(campnet.g)$core <- kcore # Add the cores as a vertex attribute
igraph::plot.igraph(campnet.g, vertex.color=igraph::V(campnet.g)$core)
Extracting a subnetwork from k-cores
If you would like to extract one or more of the cores as a subnetwork, then first consider the values of the various cores. Remember that k-cores are assigned according to degree within each core. That means that the densest core will have the greatest values.
In this case, let’s extract the densest core. Then, we can relax our criterion a little and extract a slightly larger subset.
To start, consider the values of the various cores.
## HOLLY BRAZEY CAROL PAM PAT JENNIE PAULINE ANN MICHAEL
## 2 1 2 2 2 2 2 2 2
## BILL LEE DON JOHN HARRY GERY STEVE BERT RUSS
## 0 2 2 0 2 1 2 2 2
## kcore
## 0 1 2
## 2 2 14
g2c <- igraph::induced_subgraph(campnet.g, kcore==2)
g2gc <- igraph::induced_subgraph(campnet.g, kcore>=2) # The value of the k-core is greater than or equal to 2
igraph::plot.igraph(g2c)
statnet
)## BRAZEY BILL JOHN ANN CAROL HARRY LEE JENNIE PAULINE
## 3 3 3 3 3 3 3 3 3
## BERT DON STEVE PAM PAT RUSS GERY MICHAEL HOLLY
## 3 3 3 3 3 3 3 3 3
# It calulates the edge betweenness of the network, removes the edge with the highest betweenness, checks to see if it broke the network into components, if it broke then it calculates modularity. It keeps doing this until modularity starts to decrease. It works on the assumption that edges with high betweenness are bridging communities together.
campnet.g <- igraph::graph_from_adjacency_matrix(sna::symmetrize(as.matrix(campnet), rule="weak"), mode="undirected")
igraph::V(campnet.g)$name <- rownames(campnet)
com <-igraph::edge.betweenness.community(campnet.g)
# WARNING: Do not assign this value to a vertex property called membership that attribute is reserved. It will mess up different aspects of your analysis and visualization. Just assign it to something like m or memb as I’ve done here.
igraph::V(campnet.g)$memb <- com$membership
# Check modularity score
igraph::modularity(com)
## [1] 0.5497959
Selecting number of groups manually
By default, the edge betweenness function will produce a group structure that corresponds with the greatest modularity value. However, the greatest modularity value is not always the best representation of how the network should be parsed.
To get a quick look at the various ways that the algorithm could divide the network, use a dendrogram. In the dendrogram, the mode=“hclust” argument allows us to use the rect=2 argument to help identify what a two group solution would look like. Try changing the number of groups to see what the various solutions would look like.
The dendrogram should give you a little better idea of how the network may be parsed and the membership of each group as it is parsed using a particular algorithm. In this case, we can see how the Karate Club network would divide into two groups using edge betweenness.
If, you would rather seek a two community solution rather than the solution that is identified as “optimal” using modularity (the default for edge betweenness and other algorithms), you may specify that - for any of the following algorithms. The example below uses edge betweenness, but you can substitute any other community detection algorithm that uses modularity.
igraph::V(campnet.g)$group <- igraph::membership(com)
# Change the number of communities in the solution.
com$membership <- igraph::cut_at(com, 2) # 2 communities
plot(com, campnet.g,layout = igraph::layout_with_kk, main="Two-Community Solution")
## HOLLY BRAZEY CAROL PAM PAT JENNIE PAULINE ANN MICHAEL
## 1 3 1 1 2 1 1 1 2
## BILL LEE DON JOHN HARRY GERY STEVE BERT RUSS
## 2 3 2 3 2 3 3 3 3
igraph::V(campnet.g)$kmeans <- km.out$cluster
igraph::V(campnet.g)$color <- km.out$cluster
plot(campnet.g, edge.arrow.size=0.1)
## [1] 24.57143
# how to estimate which is the next closest observation
hc.complete <- hclust(dist(campnet), method = "complete")
hc.average <- hclust(dist(campnet), method = "average")
hc.single <- hclust(dist(campnet), method = "single")
# complete (maximum distance)
par(mfrow = c(1, 3))
plot(hc.complete, main = "Complete Linkage",
xlab = "", sub = "", cex = 0.9)
# average
plot(hc.average, main = "Average Linkage",
xlab = "", sub = "", cex = 0.9)
# mimimum distance
plot(hc.single, main = "Single Linkage",
xlab = "", sub = "", cex = 0.9)
## HOLLY BRAZEY CAROL PAM PAT JENNIE PAULINE ANN MICHAEL
## 1 1 1 1 1 1 1 1 1
## BILL LEE DON JOHN HARRY GERY STEVE BERT RUSS
## 1 1 1 2 1 1 1 1 1
## HOLLY BRAZEY CAROL PAM PAT JENNIE PAULINE ANN MICHAEL
## 1 2 1 1 1 1 1 1 1
## BILL LEE DON JOHN HARRY GERY STEVE BERT RUSS
## 1 2 1 2 1 2 2 2 2
## HOLLY BRAZEY CAROL PAM PAT JENNIE PAULINE ANN MICHAEL
## 1 1 1 1 1 1 1 1 1
## BILL LEE DON JOHN HARRY GERY STEVE BERT RUSS
## 1 1 1 2 1 1 1 1 1
## HOLLY BRAZEY CAROL PAM PAT JENNIE PAULINE ANN MICHAEL
## 1 2 1 1 1 1 1 1 3
## BILL LEE DON JOHN HARRY GERY STEVE BERT RUSS
## 3 2 3 4 3 2 2 2 2
# use different similarity measure: correlation
campnet.cor <- as.dist(1 - cor(t(campnet) ))
plot(hclust(campnet.cor, method = "complete"),
main = "Complete Linkage with Correlation-Based Distance",
xlab = "", sub = "")
# Blockmodeling is the Classical SNA Approach
# Goal is to group nodes based on structural equivalence
# Run a block model, identifying the number of clusters
ALLYbm <- blockmodel(AllyNet, equiv.clust(AllyNet), k=6)
# Create Vector for block membership, and fill
bmem <- numeric(length(ALLYbm$block.content))
bmem[ALLYbm$order.vec] <- ALLYbm$block.membership
bcols <- c("black","white","red","blue","yellow","green")
# Take a look at them
gplot(AllyNet, label = network.vertex.names(AllyNet),edge.col="grey55", label.cex=.75, gmode="graph", displayisolates=F, vertex.col=bcols[bmem])
# Try it with 8 communities
ALLYbm <- blockmodel(AllyNet, equiv.clust(AllyNet), k=8)
bmem <- numeric(length(ALLYbm$block.content))
bmem[ALLYbm$order.vec] <- ALLYbm$block.membership
bcols <- c("black","white","red","blue","yellow", "grey50","brown","green")
gplot(AllyNet, label = network.vertex.names(AllyNet),edge.col="grey55", label.cex=.75, gmode="graph", displayisolates=F, vertex.col=bcols[bmem])
\[ \text{Modularity} = \frac{1}{2m}\frac{\sum_{ij}[A_{ij}-k_ik_j]}{2m}1(c_ic_j) \]
where,
# Modularity-based community detection popular in physics
# Modularity = Dense within communities, sparse across
library(igraph)
# Convert into a graph
AllyGR <- graph.adjacency(AllyNet,mode="undirected")
mem0 <- bmem-1 # first group is 0 in igraph
mem <- leading.eigenvector.community.step(AllyGR, membership=mem0)$membership
# Loop so membership stays the same
i <- 0
while(mean(mem0==mem)!=1){
mem0 <- mem
mem <- leading.eigenvector.community.step(AllyGR, membership=mem0)$membership
i <- i +1
}
# Get memberships and plot
mem <- mem +1
detach("package:igraph")
bcols <- c("black","white","red","blue","yellow", "grey50","brown","green", "pink", "orange")
gplot(AllyNet, label = network.vertex.names(AllyNet),edge.col="grey55", label.cex=.75, gmode="graph", displayisolates=F, vertex.col=bcols[mem])
# How Many Communities? Try mode = mds, eigen, geodist, rmds
gplot(SenNet2, label = network.vertex.names(SenNet), edge.col="grey55", label.cex=.75, displayisolates=F, mode="geodist")
# Run a blockmodel
SENbm <- blockmodel(SenNet2, equiv.clust(SenNet2), k=6)
# Membership
bmem <- numeric(length(SENbm$block.content))
bmem[SENbm$order.vec] <- SENbm$block.membership
bcols <- c("black","white","red","blue","yellow", "grey50")
# Plot
gplot(SenNet2, label = network.vertex.names(SenNet), edge.col="grey55", label.cex=.75, displayisolates=F, mode="geodist", vertex.col=bcols[bmem])
kpp.neg <- function(x,k,n.sim){
if (length(rownames(x)) == 0) rownames(x) <- c(1:nrow(x))
if (length(colnames(x)) == 0) colnames(x) <- c(1:ncol(x))
if (missing(n.sim)) n.sim <- 100
sims <- array(0, dim=c(n.sim, 2))
for (sim in 1:n.sim){
# Populate set S with 3 random nodes, without replacement
set <- strsplit(sample(colnames(x), k, replace = FALSE, prob = NULL), " ")
# Remove nodes from network x and calculate F = fit
matrix <- x[-which(rownames(x) %in% set), -which(colnames(x) %in% set)]
reciprocal <- matrix(1, nrow(matrix), ncol(matrix))/geodist(matrix)$gdist
reciprocal[reciprocal==Inf] <- 0
fit <- 1 - ((2*sum(reciprocal[lower.tri(reciprocal)]))/(nrow(matrix)*(nrow(matrix)-1)))
non.set <- strsplit(rownames(matrix), " ")
pairs <- array(0, dim=c(k*length(non.set), 2))
#colnames(pairs) <- c("Pair", "Delta_F")
terminate <- FALSE
while (terminate==FALSE){
i <- 1
for (u in set){
for (v in non.set){
pairs[i,1] <- paste(set[[which(set %in% u)]],non.set[[which(non.set %in% v)]],sep=',')
set.new <- set
set.new[[which(set %in% u)]] <- non.set[[which(non.set %in% v)]]
matrix <- x[-which(rownames(x) %in% set.new), -which(colnames(x) %in% set.new)]
reciprocal <- matrix(1, nrow(matrix), ncol(matrix))/geodist(matrix)$gdist
reciprocal[reciprocal==Inf] <- 0
fit.new <- 1 - ((2*sum(reciprocal[lower.tri(reciprocal)]))/(nrow(matrix)*(nrow(matrix)-1)))
delta.fit <- fit.new - fit
pairs[i,2] <- delta.fit
i <- i+1
}
}
if (length(pairs[which(as.numeric(pairs[,2]) == max(as.numeric(pairs[,2]))),1])>1){
all.pairs <- strsplit(pairs[which(as.numeric(pairs[,2]) == max(as.numeric(pairs[,2]))),1], ",") # All pairs that improve fit equally
pair.max <- strsplit(sample(all.pairs, 1, replace=FALSE)[[1]], " ")
} else {pair.max <- strsplit(strsplit(pairs[which(as.numeric(pairs[,2]) == max(as.numeric(pairs[,2]))),1], ",")[[1]]," ")}
terminate <- max(as.numeric(pairs[,2]))<=0
if (max(as.numeric(pairs[,2]))>0){
fit <- fit + max(as.numeric(pairs[,2]))
set[[which(set %in% pair.max)]] <- pair.max[[which(!(pair.max %in% set))]]
matrix <- x[-which(rownames(x) %in% set), -which(colnames(x) %in% set)]
non.set <- strsplit(rownames(matrix), " ")
pairs <- array(0, dim=c(k*length(non.set), 2))
colnames(pairs) <- c("Pair", "Delta_F")
}
}
output <- data.frame(optimal_set=paste("[",paste(as.character(set), collapse=","),"]", sep=''),
max_fragmentation=fit)
sims[[sim,1]] <- as.character(output$optimal_set)
sims[[sim,2]] <- output$max_fragmentation
}
count <- table(sims[,1][sims[,2]==max(as.numeric(sims[,2]))])
output <- data.frame(optimal_set=sample(names(count)[count==max(count)], 1, replace=FALSE),
max_fragmentation=max(as.numeric(sims[,2])))
return(output)
}
optimal_set | max_fragmentation |
---|---|
[HOLLY,MICHAEL] | 0.7715278 |
kpp.pos <- function(x,k,n.sim){
require(sna)
if (missing(n.sim)) n.sim <- 100
sims <- array(0, dim=c(n.sim, 4))
for (sim in 1:n.sim){
# Sub-function
colmax <- function(matrix) apply(matrix, 2, max)
# Main code
if (length(rownames(x)) == 0) rownames(x) <- c(1:nrow(x))
if (length(colnames(x)) == 0) colnames(x) <- c(1:ncol(x))
set <- strsplit(sample(colnames(x), k, replace = FALSE, prob = NULL), " ")
reciprocal <- matrix(1, nrow(x), ncol(x))/geodist(x)$gdist
reciprocal[reciprocal==Inf] <- 1
rownames(reciprocal) <- rownames(x)
colnames(reciprocal) <- colnames(x)
non.set <- strsplit(rownames(x[-which(rownames(x) %in% set), -which(colnames(x) %in% set)]), " ")
fit <- matrix(0,1,3)
fit[1,1] <- (sum(colmax(reciprocal[which(rownames(reciprocal) %in% set),which(colnames(reciprocal) %in% strsplit(colnames(x), " "))]) ))/ncol(x)
fit[1,2] <- length(which(colmax(reciprocal[which(rownames(reciprocal) %in% set),which(colnames(reciprocal) %in% strsplit(colnames(x), " "))])==1))
fit[1,3] <- length(which(colmax(reciprocal[which(rownames(reciprocal) %in% set),which(colnames(reciprocal) %in% strsplit(colnames(x), " "))])>0))/ncol(x)
#fit.alt <- (sum(colmax(reciprocal[which(rownames(reciprocal) %in% set),which(colnames(reciprocal) %in% non.set)])))/length(non.set)
pairs <- array(0, dim=c(k*length(non.set), 4))
terminate <- FALSE
while (terminate==FALSE){
i <- 1
for (u in set){
for (v in non.set){
pairs[i,1] <- paste(set[[which(set %in% u)]],non.set[[which(non.set %in% v)]],sep=',')
set.new <- set
set.new[[which(set %in% u)]] <- non.set[[which(non.set %in% v)]]
non.set.new <- non.set
non.set.new[[which(non.set %in% v)]] <- set[[which(set %in% u)]]
fit.new <- matrix(0,1,3)
fit.new[1,1] <- (sum(colmax(reciprocal[which(rownames(reciprocal) %in% set.new),
which(colnames(reciprocal) %in% strsplit(colnames(x), " "))])))/ncol(x)
fit.new[1,2] <- length(which(colmax(reciprocal[which(rownames(reciprocal) %in% set.new),
which(colnames(reciprocal) %in% strsplit(colnames(x), " "))])==1))
fit.new[1,3] <- length(which(colmax(reciprocal[which(rownames(reciprocal) %in% set.new),
which(colnames(reciprocal) %in% strsplit(colnames(x), " "))])>0))/ncol(x)
delta.fit <- fit.new[1,1] - fit[1,1]
pairs[i, 2] <- delta.fit
pairs[i, 3] <- fit.new[,2]
pairs[i, 4] <- fit.new[,3]
i <- i+1
}
}
if (length(pairs[which( as.numeric(pairs[,2]) == max(as.numeric(pairs[,2]))),1]) > 1){ # Do we have multiple optimal pairs?
all.pairs <- strsplit(pairs[which(as.numeric(pairs[,2]) == max(as.numeric(pairs[,2]))),1], ",") # All pairs that improve fit equally
pair.max <- strsplit(sample(all.pairs, 1, replace=FALSE)[[1]], " ")
} else {pair.max <- strsplit(strsplit(pairs[which(as.numeric(pairs[,2]) == max(as.numeric(pairs[,2]))),1], ",")[[1]]," ")}
terminate <- max(as.numeric(pairs[,2]))<=0 # terminate if delta.fit <= 0
if (max(as.numeric(pairs[,2]))>0){
fit[1,1] <- as.numeric(fit[1,1] + max(as.numeric(pairs[,2])))
fit[1,2] <- as.numeric(pairs[which(pairs[,1] == paste(pair.max, collapse=",")),3])
fit[1,3] <- as.numeric(pairs[which(pairs[,1] == paste(pair.max, collapse=",")),4])
set[[which(set %in% pair.max)]] <- pair.max[[which(!(pair.max %in% set))]]
non.set[[which(non.set %in% pair.max)]] <- pair.max[[which(!(pair.max %in% non.set))]]
pairs <- array(0, dim=c(k*length(non.set), 4))
colnames(pairs) <- c("Pair", "Delta_F", "Nodes", "Net_Prop")
}
}
output <- data.frame(optimal_set=paste("[",paste(as.character(set), collapse=","),"]", sep=''),
distance_w_reach=fit[,1], nodes = fit[,2], net_prop = fit[,3] )
sims[[sim,1]] <- as.character(output$optimal_set)
sims[[sim,2]] <- output$distance_w_reach
sims[[sim,3]] <- output$nodes
sims[[sim,4]] <- output$net_prop
}
count <- table(sims[,1][sims[,2]==max(as.numeric(sims[,2]))])
output <- data.frame(optimal_set=sample(names(count)[count==max(count)], 1, replace=FALSE),
distance_w_reach=max(as.numeric(sims[,2])), nodes = 0, net_prop = 0)
output$nodes <- as.numeric(sample(sims[,3][sims[,1] == as.character(output$optimal_set)], 1, replace=FALSE))
output$net_prop <- as.numeric(sample(sims[,4][sims[,1] == as.character(output$optimal_set)], 1, replace=FALSE))
return(output)
}
optimal_set | distance_w_reach | nodes | net_prop |
---|---|---|---|
[JOHN,BILL,LEE] | 0.8148148 | 12 | 1 |
Now assume the network is stochastic; We might have hypotheses about the stochastic process:
- Nodes that are similar are likely to form ties: Homophily
- Directed edges are likely to be reciprocated: Reciprocity
- A friend of a friend is a friend: Transitivity
What we have done thus far treats the network as fixed. This distinction defines the traditional divide between social and mathematical sciences approaches to network analysis.
Autocorrelation is a measure of how much those near to us influence our outcomes. This is easiest to understand in terms of time. Over time, the value of a particular stock - or any other thing of value - is very likely related to the value it held yesterday. We can observe similar situations with respect to physical proximity. The wealth of particular cities can be related to the wealth of the cities that are adjacent to them. Although it is possible for a wealthy city to be adjacent to much poorer cities, it is also likely that any increases in the wealth of a particular city can cause spillovers into neighboring communities. The spillover may not be always be direct, but there is likely to be a relationship there. Network autocorrelation works in much the same way. Though, rather than using physical proximity, network autocorrelation uses network proximity to test whether an individuals’ continuous attributes are related to those of their neighbors. In other words, we can ask quesitons about how a particular attribute appears to be related to the ties within that network. For example: Does wealth attract wealth? Do people with dissimilar wealth tend to form ties? Or, does wealth appear to have little to do with one’s immediate neighborhood?
Moran’s I ranges from -1 to 1 and is interpreted like Pearson’s Correlation Coefficient.
Using sna
package, nacf
computes the sample network covariance/correlation function for a specified variable on a given input network. Moran’s I and Geary’s C statistics at multiple orders may be computed as well.
# Extract the "wealth" attribute from the Florentine business network.
data(florentine)
family.wealth <- get.vertex.attribute(flobusiness, "wealth")
nacf(flomarriage, family.wealth, type="moran", mode="graph")[2]
## 1
## -0.3107353
Geary’s C ranges from 0 to 2.
# Compute the network correlation:
# Test the correlation using qaptest:
# qaptest(A.List.Of.Networks, Some.Network.Function, reps=100)
# where reps is the number of graphs to be generated.
# Out of a 100 permutations, how many had >= or <= correlation than the observed value?
# (the parameters g1=1 g2=2 tell the test to compare the first & second element of
# the list of networks that is the first argument of qaptest)
flo.qap <- qaptest(list(flomarriage,flobusiness), gcor, g1=1, g2=2, reps=100)
flo.qap
##
## QAP Test Results
##
## Estimated p-values:
## p(f(perm) >= f(d)): 0
## p(f(perm) <= f(d)): 1
# Create attribute network: families have a tie if they belong to the same political faction/party
party <- rio::import("./data/florentine_attributes_subset.xlsx")
g <- igraph::graph.data.frame(party, directed=FALSE) # convert to igraph object
plot(g)
igraph::V(g)$type <- igraph::bipartite_mapping(g)$type
proj <- igraph::bipartite.projection(g)
Party_Membership <- proj$proj1
floparty <- snatools::as_network(Party_Membership)
# Make sure you got the right network
plot(floparty, displaylabels=TRUE)
Since the ties in this network are binary, the MR-QAP is in essence estimating a Linear Probability Model.
nl<-netlm(flobusiness, # Dependent variable/network
list(flomarriage, floparty), # List the independent variables/networks
reps=1000) # select the number of permutations
#Examine the results
summary(nl)
##
## OLS Network Model
##
## Residuals:
## 0% 25% 50% 75% 100%
## -0.46523297 -0.17437276 -0.02941059 -0.02941059 0.97058941
##
## Coefficients:
## Estimate Pr(<=b) Pr(>=b) Pr(>=|b|)
## (intercept) 0.02941059 0.600 0.400 0.420
## x1 0.29086022 1.000 0.000 0.000
## x2 0.14496217 0.983 0.017 0.018
##
## Residual standard error: 0.3016 on 237 degrees of freedom
## Multiple R-squared: 0.1785 Adjusted R-squared: 0.1716
## F-statistic: 25.75 on 2 and 237 degrees of freedom, p-value: 7.616e-11
##
##
## Test Diagnostics:
##
## Null Hypothesis: qap
## Replications: 1000
## Coefficient Distribution Summary:
##
## (intercept) x1 x2
## Min -2.25541 -3.98010 -4.93651
## 1stQ 0.32231 -1.03740 -0.88153
## Median 0.97098 -0.11268 -0.02298
## Mean 0.91648 -0.01691 0.04040
## 3rdQ 1.59844 0.91773 0.86507
## Max 3.80390 5.03770 5.79937
nlog<-netlogit(flobusiness, list(flomarriage, floparty),reps=1000)
#Examine the results
summary(nlog)
##
## Network Logit Model
##
## Coefficients:
## Estimate Exp(b) Pr(<=b) Pr(>=b) Pr(>=|b|)
## (intercept) -3.154996 0.04263857 0.000 1.000 0.000
## x1 1.937609 6.94213207 0.998 0.002 0.002
## x2 1.379351 3.97232431 0.979 0.021 0.023
##
## Goodness of Fit Statistics:
##
## Null deviance: 332.7106 on 240 degrees of freedom
## Residual deviance: 145.2121 on 237 degrees of freedom
## Chi-Squared test of fit improvement:
## 187.4985 on 3 degrees of freedom, p-value 0
## AIC: 151.2121 BIC: 161.654
## Pseudo-R^2 Measures:
## (Dn-Dr)/(Dn-Dr+dfn): 0.4385946
## (Dn-Dr)/Dn: 0.5635484
## Contingency Table (predicted (rows) x actual (cols)):
##
## Actual
## Predicted 0 1
## 0 198 20
## 1 12 10
##
## Total Fraction Correct: 0.8666667
## Fraction Predicted 1s Correct: 0.4545455
## Fraction Predicted 0s Correct: 0.9082569
## False Negative Rate: 0.6666667
## False Positive Rate: 0.05714286
##
## Test Diagnostics:
##
## Null Hypothesis: qap
## Replications: 1000
## Distribution Summary:
##
## (intercept) x1 x2
## Min -8.276983 -2.436778 -3.371643
## 1stQ -7.712238 -0.958048 -0.953743
## Median -7.493672 -0.005606 -0.021639
## Mean -7.492062 0.102127 0.011585
## 3rdQ -7.280232 1.012739 0.826992
## Max -6.604616 4.403690 4.755617
## Import data
AHS_Base <- rio::import('./data/ahs_wpvar.csv')
AHS_adjlist <- AHS_Base %>%
select(ego_nid, mfnid_1:mfnid_5, ffnid_1:ffnid_5, grade, sex, PSMOKES,commcnt) %>%
filter(commcnt==1);
AHS_Edges <- AHS_adjlist %>%
rename( id = `ego_nid`,
gender = `sex`) %>%
gather(Alter_Label, Target, mfnid_1:mfnid_5, ffnid_1:ffnid_5, na.rm = TRUE)
AHS_Edges=AHS_Edges %>% filter (Target != 99999);
AHS_Edges=AHS_Edges %>%select(id, Target);
# Compute attributes
library(statnet)
g=as.network(AHS_Edges)
g %v% "grade" <- AHS_adjlist$grade
g %v% "sex" <- AHS_adjlist$sex
g %v% "smokes" <- AHS_adjlist$PSMOKES
# get degree scores
outdegree <- degree(g, cmode = "outdegree")
indegree<- degree(g, cmode = "indegree")
g %v% "indegree" <- indegree
g %v% "outdegree" <- outdegree
g %v% "degree" <- degree(g)
plot(g, vertex.col="grade")
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate.
##
## ==========================
## Summary of model fit
## ==========================
##
## Formula: g ~ edges
##
## Iterations: 6 out of 20
##
## Monte Carlo MLE Results:
## Estimate Std. Error MCMC % z value Pr(>|z|)
## edges -2.7275 0.0591 0 -46.15 <1e-04 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Null Deviance: 6890 on 4970 degrees of freedom
## Residual Deviance: 2293 on 4969 degrees of freedom
##
## AIC: 2295 BIC: 2302 (Smaller is better.)
#interpretation of model coefficients:
#the log-odds of any tie existing is -2.7275
#probability: exp(-2.7275)/(1+exp(-2.7275))=.0613
exp(-2.7275)/(1+exp(-2.7275))
## [1] 0.06137001
## [1] 0.06136821
# Add some covariates...
mod.homoph<-ergm(g~edges+nodematch("sex")+nodematch("grade")
+nodematch('smokes'))
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate.
##
## ==========================
## Summary of model fit
## ==========================
##
## Formula: g ~ edges + nodematch("sex") + nodematch("grade") + nodematch("smokes")
##
## Iterations: 7 out of 20
##
## Monte Carlo MLE Results:
## Estimate Std. Error MCMC % z value Pr(>|z|)
## edges -4.5787 0.1850 0 -24.752 < 1e-04 ***
## nodematch.sex 0.4621 0.1311 0 3.524 0.000425 ***
## nodematch.grade 3.0493 0.1422 0 21.446 < 1e-04 ***
## nodematch.smokes 0.4062 0.1483 0 2.738 0.006174 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Null Deviance: 6890 on 4970 degrees of freedom
## Residual Deviance: 1712 on 4966 degrees of freedom
##
## AIC: 1720 BIC: 1746 (Smaller is better.)
#add some simple volume bitscovariates...
mod.p1<-ergm(g~edges+nodematch("sex")+nodematch("grade")
+nodematch('smokes')+sender+receiver)
## Observed statistic(s) sender4, sender5, sender15, sender17, sender42, sender57, sender63, receiver4, receiver17, and receiver44 are at their smallest attainable values. Their coefficients will be fixed at -Inf.
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate.
##
## ==========================
## Summary of model fit
## ==========================
##
## Formula: g ~ edges + nodematch("sex") + nodematch("grade") + nodematch("smokes") +
## sender + receiver
##
## Iterations: 7 out of 20
##
## Monte Carlo MLE Results:
## Estimate Std. Error MCMC % z value Pr(>|z|)
## edges -6.54235 1.39273 0 -4.698 <1e-04 ***
## nodematch.sex 0.72340 0.15509 0 4.664 <1e-04 ***
## nodematch.grade 3.60634 0.17189 0 20.981 <1e-04 ***
## nodematch.smokes 1.27256 0.20774 0 6.126 <1e-04 ***
## sender2 2.22577 1.25583 0 1.772 0.0763 .
## sender3 0.85358 1.31365 0 0.650 0.5158
## sender4 -Inf 0.00000 0 -Inf <1e-04 ***
## sender5 -Inf 0.00000 0 -Inf <1e-04 ***
## sender6 1.17264 1.29221 0 0.907 0.3642
## sender7 1.82007 1.28618 0 1.415 0.1570
## sender8 2.33419 1.27155 0 1.836 0.0664 .
## sender9 2.03254 1.27211 0 1.598 0.1101
## sender10 -0.12412 1.41704 0 -0.088 0.9302
## sender11 -0.74302 1.60190 0 -0.464 0.6428
## sender12 -1.01392 1.57899 0 -0.642 0.5208
## sender13 1.44915 1.30637 0 1.109 0.2673
## sender14 1.33064 1.32471 0 1.004 0.3152
## sender15 -Inf 0.00000 0 -Inf <1e-04 ***
## sender16 2.34335 1.25453 0 1.868 0.0618 .
## sender17 -Inf 0.00000 0 -Inf <1e-04 ***
## sender18 1.99117 1.29061 0 1.543 0.1229
## sender19 -0.07322 1.40150 0 -0.052 0.9583
## sender20 1.52207 1.28439 0 1.185 0.2360
## sender21 2.09979 1.26439 0 1.661 0.0968 .
## sender22 0.88576 1.34555 0 0.658 0.5104
## sender23 1.46870 1.28751 0 1.141 0.2540
## sender24 0.88576 1.34555 0 0.658 0.5104
## sender25 1.25861 1.34235 0 0.938 0.3484
## sender26 1.34868 1.27622 0 1.057 0.2906
## sender27 0.51817 1.35188 0 0.383 0.7015
## sender28 0.30073 1.35208 0 0.222 0.8240
## sender29 2.43662 1.28473 0 1.897 0.0579 .
## sender30 1.36028 1.32210 0 1.029 0.3035
## sender31 1.88595 1.28043 0 1.473 0.1408
## sender32 0.72578 1.32209 0 0.549 0.5830
## sender33 2.09444 1.25897 0 1.664 0.0962 .
## sender34 2.92916 1.26585 0 2.314 0.0207 *
## sender35 1.48498 1.28646 0 1.154 0.2484
## sender36 2.04011 1.27129 0 1.605 0.1085
## sender37 1.05055 1.32819 0 0.791 0.4290
## sender38 2.06804 1.28481 0 1.610 0.1075
## sender39 1.03670 1.40765 0 0.736 0.4614
## sender40 1.71434 1.26586 0 1.354 0.1756
## sender41 -0.86750 1.56951 0 -0.553 0.5805
## sender42 -Inf 0.00000 0 -Inf <1e-04 ***
## sender43 1.97972 1.28468 0 1.541 0.1233
## sender44 1.84499 1.28992 0 1.430 0.1526
## sender45 0.37849 1.35551 0 0.279 0.7801
## sender46 1.20985 1.30077 0 0.930 0.3523
## sender47 -0.07322 1.40150 0 -0.052 0.9583
## sender48 0.83024 1.30801 0 0.635 0.5256
## sender49 1.82994 1.29996 0 1.408 0.1592
## sender50 0.39527 1.35435 0 0.292 0.7704
## sender51 2.59763 1.28047 0 2.029 0.0425 *
## sender52 2.39671 1.28765 0 1.861 0.0627 .
## sender53 0.74770 1.30858 0 0.571 0.5677
## sender54 0.63491 1.34110 0 0.473 0.6359
## sender55 0.87978 1.41662 0 0.621 0.5346
## sender56 1.10414 1.30234 0 0.848 0.3965
## sender57 -Inf 0.00000 0 -Inf <1e-04 ***
## sender58 2.32900 1.29162 0 1.803 0.0714 .
## sender59 0.96191 1.31592 0 0.731 0.4648
## sender60 1.20635 1.29933 0 0.928 0.3532
## sender61 2.62317 1.27827 0 2.052 0.0402 *
## sender62 -0.79590 1.57732 0 -0.505 0.6138
## sender63 -Inf 0.00000 0 -Inf <1e-04 ***
## sender64 1.37381 1.28712 0 1.067 0.2858
## sender65 2.34097 1.29990 0 1.801 0.0717 .
## sender66 2.35099 1.28906 0 1.824 0.0682 .
## sender67 0.63782 1.41165 0 0.452 0.6514
## sender68 0.77762 1.32296 0 0.588 0.5567
## sender69 1.78102 1.27294 0 1.399 0.1618
## sender70 0.09125 1.43162 0 0.064 0.9492
## sender71 0.86733 1.45067 0 0.598 0.5499
## receiver2 -1.43489 1.07362 0 -1.337 0.1814
## receiver3 -1.69160 1.06305 0 -1.591 0.1115
## receiver4 -Inf 0.00000 0 -Inf <1e-04 ***
## receiver5 -0.79045 0.97874 0 -0.808 0.4193
## receiver6 -1.66543 1.06255 0 -1.567 0.1170
## receiver7 -0.66723 0.95563 0 -0.698 0.4851
## receiver8 0.22141 0.91166 0 0.243 0.8081
## receiver9 0.36172 0.86484 0 0.418 0.6758
## receiver10 -0.87903 0.95032 0 -0.925 0.3550
## receiver11 -1.25341 1.00146 0 -1.252 0.2107
## receiver12 -2.50486 1.27755 0 -1.961 0.0499 *
## receiver13 -1.65921 1.08480 0 -1.529 0.1261
## receiver14 -0.19915 0.93421 0 -0.213 0.8312
## receiver15 -0.21006 0.90663 0 -0.232 0.8168
## receiver16 -1.03081 0.99540 0 -1.036 0.3004
## receiver17 -Inf 0.00000 0 -Inf <1e-04 ***
## receiver18 -1.00664 1.02771 0 -0.979 0.3273
## receiver19 -0.94528 0.92624 0 -1.021 0.3075
## receiver20 0.13942 0.88488 0 0.158 0.8748
## receiver21 -1.59147 1.08360 0 -1.469 0.1419
## receiver22 -1.73458 1.09012 0 -1.591 0.1116
## receiver23 -0.12357 0.88993 0 -0.139 0.8896
## receiver24 -1.73458 1.09012 0 -1.591 0.1116
## receiver25 -0.42157 0.98180 0 -0.429 0.6676
## receiver26 -1.22260 0.96921 0 -1.261 0.2072
## receiver27 0.32993 0.87520 0 0.377 0.7062
## receiver28 -2.47186 1.27910 0 -1.932 0.0533 .
## receiver29 0.47522 0.91342 0 0.520 0.6029
## receiver30 0.12959 0.91004 0 0.142 0.8868
## receiver31 0.51347 0.87020 0 0.590 0.5551
## receiver32 -1.66313 1.06639 0 -1.560 0.1189
## receiver33 0.31993 0.87070 0 0.367 0.7133
## receiver34 1.66643 0.85103 0 1.958 0.0502 .
## receiver35 0.12686 0.87443 0 0.145 0.8846
## receiver36 0.57683 0.85120 0 0.678 0.4980
## receiver37 -0.54970 0.94518 0 -0.582 0.5608
## receiver38 0.20867 0.91396 0 0.228 0.8194
## receiver39 -0.30300 0.98849 0 -0.307 0.7592
## receiver40 -1.12114 0.97563 0 -1.149 0.2505
## receiver41 -1.32436 0.96960 0 -1.366 0.1720
## receiver42 0.75364 0.87979 0 0.857 0.3917
## receiver43 -0.14799 0.92850 0 -0.159 0.8734
## receiver44 -Inf 0.00000 0 -Inf <1e-04 ***
## receiver45 -1.25725 0.99393 0 -1.265 0.2059
## receiver46 -0.15188 0.90265 0 -0.168 0.8664
## receiver47 -0.94528 0.92624 0 -1.021 0.3075
## receiver48 -0.34516 0.87911 0 -0.393 0.6946
## receiver49 -0.28730 0.97578 0 -0.294 0.7684
## receiver50 -0.47568 0.91303 0 -0.521 0.6024
## receiver51 1.82942 0.84156 0 2.174 0.0297 *
## receiver52 0.59043 0.90601 0 0.652 0.5146
## receiver53 -1.74439 1.05338 0 -1.656 0.0977 .
## receiver54 0.41471 0.84789 0 0.489 0.6248
## receiver55 -1.61996 1.28711 0 -1.259 0.2082
## receiver56 -1.72440 1.07485 0 -1.604 0.1086
## receiver57 -1.70193 1.08087 0 -1.575 0.1153
## receiver58 0.05836 0.94460 0 0.062 0.9507
## receiver59 -0.63772 0.95194 0 -0.670 0.5029
## receiver60 0.34174 0.86306 0 0.396 0.6921
## receiver61 1.19676 0.87435 0 1.369 0.1711
## receiver62 -1.68211 1.08277 0 -1.554 0.1203
## receiver63 -0.43936 1.06709 0 -0.412 0.6805
## receiver64 -2.49890 1.28600 0 -1.943 0.0520 .
## receiver65 -0.37488 1.01075 0 -0.371 0.7107
## receiver66 0.68781 0.90364 0 0.761 0.4466
## receiver67 -0.02045 0.94255 0 -0.022 0.9827
## receiver68 -0.77370 0.94078 0 -0.822 0.4108
## receiver69 0.15329 0.88341 0 0.174 0.8622
## receiver70 -1.75929 1.08335 0 -1.624 0.1044
## receiver71 -0.47914 1.11343 0 -0.430 0.6670
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Null Deviance: 6890 on 4970 degrees of freedom
## Residual Deviance: 2292 on 4826 degrees of freedom
##
## AIC: 2580 BIC: 3517 (Smaller is better.)
##
## Warning: The following terms have infinite coefficient estimates:
## sender4 sender5 sender15 sender17 sender42 sender57 sender63 receiver4 receiver17 receiver44
#model predicts better, but huge BIC cost! Simplify?
g %v% "indegree" <- indegree
g %v% "outdegree" <- outdegree
mod.p2<-ergm(g~edges+nodematch("sex")+nodematch("grade")
+nodematch('smokes')+nodeicov("indegree")+nodeocov("outdegree")
+mutual)
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Starting Monte Carlo maximum likelihood estimation (MCMLE):
## Iteration 1 of at most 20:
## Optimizing with step length 0.713993506134064.
## The log-likelihood improved by 3.018.
## Iteration 2 of at most 20:
## Optimizing with step length 1.
## The log-likelihood improved by 0.1633.
## Step length converged once. Increasing MCMC sample size.
## Iteration 3 of at most 20:
## Optimizing with step length 1.
## The log-likelihood improved by 0.00925.
## Step length converged twice. Stopping.
## Finished MCMLE.
## Evaluating log-likelihood at the estimate. Using 20 bridges: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 .
## This model was fit using MCMC. To examine model diagnostics and check for degeneracy, use the mcmc.diagnostics() function.
##
## ==========================
## Summary of model fit
## ==========================
##
## Formula: g ~ edges + nodematch("sex") + nodematch("grade") + nodematch("smokes") +
## nodeicov("indegree") + nodeocov("outdegree") + mutual
##
## Iterations: 3 out of 20
##
## Monte Carlo MLE Results:
## Estimate Std. Error MCMC % z value Pr(>|z|)
## edges -8.35498 0.39260 0 -21.281 <1e-04 ***
## nodematch.sex 0.53119 0.13287 0 3.998 <1e-04 ***
## nodematch.grade 2.60635 0.15671 0 16.632 <1e-04 ***
## nodematch.smokes 0.87509 0.15571 0 5.620 <1e-04 ***
## nodeicov.indegree 0.29132 0.02890 0 10.082 <1e-04 ***
## nodeocov.outdegree 0.33057 0.03434 0 9.628 <1e-04 ***
## mutual 2.33339 0.23244 0 10.039 <1e-04 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Null Deviance: 6890 on 4970 degrees of freedom
## Residual Deviance: 1321 on 4963 degrees of freedom
##
## AIC: 1335 BIC: 1381 (Smaller is better.)
mod.gendmut<-ergm(g~edges+nodematch("sex")+nodematch("grade")
+nodematch('smokes')+nodeicov("indegree")+nodeocov("outdegree")
+mutual(same="sex",diff=TRUE))
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Starting Monte Carlo maximum likelihood estimation (MCMLE):
## Iteration 1 of at most 20:
## Optimizing with step length 0.881023501416677.
## The log-likelihood improved by 2.597.
## Iteration 2 of at most 20:
## Optimizing with step length 1.
## The log-likelihood improved by 0.5207.
## Step length converged once. Increasing MCMC sample size.
## Iteration 3 of at most 20:
## Optimizing with step length 1.
## The log-likelihood improved by 0.04907.
## Step length converged twice. Stopping.
## Finished MCMLE.
## Evaluating log-likelihood at the estimate. Using 20 bridges: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 .
## This model was fit using MCMC. To examine model diagnostics and check for degeneracy, use the mcmc.diagnostics() function.
##
## ==========================
## Summary of model fit
## ==========================
##
## Formula: g ~ edges + nodematch("sex") + nodematch("grade") + nodematch("smokes") +
## nodeicov("indegree") + nodeocov("outdegree") + mutual(same = "sex",
## diff = TRUE)
##
## Iterations: 3 out of 20
##
## Monte Carlo MLE Results:
## Estimate Std. Error MCMC % z value Pr(>|z|)
## edges -8.20573 0.37771 0 -21.725 <1e-04 ***
## nodematch.sex -0.15054 0.17832 0 -0.844 0.399
## nodematch.grade 2.84346 0.16092 0 17.670 <1e-04 ***
## nodematch.smokes 0.95093 0.16384 0 5.804 <1e-04 ***
## nodeicov.indegree 0.30873 0.02787 0 11.079 <1e-04 ***
## nodeocov.outdegree 0.35157 0.03374 0 10.419 <1e-04 ***
## mutual.same.sex.1 2.77133 0.36424 0 7.609 <1e-04 ***
## mutual.same.sex.2 2.54105 0.34048 0 7.463 <1e-04 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Null Deviance: 6890 on 4970 degrees of freedom
## Residual Deviance: 1342 on 4962 degrees of freedom
##
## AIC: 1358 BIC: 1410 (Smaller is better.)
#let's try a transitivity term...
mod.tran<-ergm(g~edges+nodematch("sex")+nodematch("grade")
+nodematch('smokes')+nodeicov("indegree")+nodeocov("outdegree")
+mutual+triangle, control=control.ergm(MCMLE.maxit=2))
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Starting Monte Carlo maximum likelihood estimation (MCMLE):
## Iteration 1 of at most 2:
## Optimizing with step length 0.824689501238882.
## The log-likelihood improved by 16.87.
## Iteration 2 of at most 2:
## Optimizing with step length 0.
## The log-likelihood improved by < 0.0001.
## MCMLE estimation did not converge after 2 iterations. The estimated coefficients may not be accurate. Estimation may be resumed by passing the coefficients as initial values; see 'init' under ?control.ergm for details.
## Finished MCMLE.
## Evaluating log-likelihood at the estimate. Using 20 bridges: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 .
## This model was fit using MCMC. To examine model diagnostics and check for degeneracy, use the mcmc.diagnostics() function.
##
## ==========================
## Summary of model fit
## ==========================
##
## Formula: g ~ edges + nodematch("sex") + nodematch("grade") + nodematch("smokes") +
## nodeicov("indegree") + nodeocov("outdegree") + mutual + triangle
##
## Iterations: 2 out of 2
##
## Monte Carlo MLE Results:
## Estimate Std. Error MCMC % z value Pr(>|z|)
## edges -8.69220 0.53825 30 -16.149 <1e-04 ***
## nodematch.sex 1.19523 0.61457 2 1.945 0.0518 .
## nodematch.grade 1.99104 0.28221 42 7.055 <1e-04 ***
## nodematch.smokes 0.86708 0.72095 6 1.203 0.2291
## nodeicov.indegree 0.27464 0.27408 1 1.002 0.3163
## nodeocov.outdegree 0.29896 0.25075 1 1.192 0.2332
## mutual 2.78462 0.26077 20 10.679 <1e-04 ***
## triangle 0.10297 0.00205 4 50.222 <1e-04 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Null Deviance: 6890 on 4970 degrees of freedom
## Residual Deviance: 1374 on 4962 degrees of freedom
##
## AIC: 1390 BIC: 1442 (Smaller is better.)
## Sample statistics summary:
##
## Iterations = 16384:1063936
## Thinning interval = 1024
## Number of chains = 1
## Sample size per chain = 1024
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## edges -4509.3 43.367 1.35523 27.4186
## nodematch.sex -2201.1 19.666 0.61457 10.4328
## nodematch.grade -624.7 1.241 0.03878 0.3835
## nodematch.smokes -3105.3 35.080 1.09624 21.9263
## nodeicov.indegree -19051.0 136.838 4.27620 68.1539
## nodeocov.outdegree -19218.4 138.503 4.32822 99.0748
## mutual -2321.3 22.250 0.69532 15.6739
## triangle -434870.8 6125.990 191.43718 4426.4630
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## edges -4529 -4526 -4525 -4525 -4388
## nodematch.sex -2211 -2209 -2208 -2208 -2145
## nodematch.grade -626 -625 -625 -625 -621
## nodematch.smokes -3121 -3119 -3118 -3118 -3007
## nodeicov.indegree -19114 -19104 -19100 -19100 -18665
## nodeocov.outdegree -19283 -19274 -19268 -19268 -18829
## mutual -2331 -2330 -2330 -2330 -2261
## triangle -437281 -437271 -437271 -437271 -418503
##
##
## Sample statistics cross-correlations:
## edges nodematch.sex nodematch.grade
## edges 1.0000000 0.9988032 0.9433416
## nodematch.sex 0.9988032 1.0000000 0.9433881
## nodematch.grade 0.9433416 0.9433881 1.0000000
## nodematch.smokes 0.9998941 0.9989236 0.9447889
## nodeicov.indegree 0.9995541 0.9991413 0.9432099
## nodeocov.outdegree 0.9997011 0.9985191 0.9418981
## mutual 0.9969002 0.9938991 0.9360663
## triangle 0.9960616 0.9922231 0.9330684
## nodematch.smokes nodeicov.indegree nodeocov.outdegree
## edges 0.9998941 0.9995541 0.9997011
## nodematch.sex 0.9989236 0.9991413 0.9985191
## nodematch.grade 0.9447889 0.9432099 0.9418981
## nodematch.smokes 1.0000000 0.9994449 0.9995926
## nodeicov.indegree 0.9994449 1.0000000 0.9991343
## nodeocov.outdegree 0.9995926 0.9991343 1.0000000
## mutual 0.9964466 0.9953367 0.9959886
## triangle 0.9954818 0.9941390 0.9951796
## mutual triangle
## edges 0.9969002 0.9960616
## nodematch.sex 0.9938991 0.9922231
## nodematch.grade 0.9360663 0.9330684
## nodematch.smokes 0.9964466 0.9954818
## nodeicov.indegree 0.9953367 0.9941390
## nodeocov.outdegree 0.9959886 0.9951796
## mutual 1.0000000 0.9997067
## triangle 0.9997067 1.0000000
##
## Sample statistics auto-correlation:
## Chain 1
## edges nodematch.sex nodematch.grade nodematch.smokes
## Lag 0 1.0000000 1.0000000 1.0000000 1.0000000
## Lag 1024 0.9964331 0.9963597 0.9766710 0.9965261
## Lag 2048 0.9926675 0.9929122 0.9571515 0.9928165
## Lag 3072 0.9884650 0.9890803 0.9382670 0.9886378
## Lag 4096 0.9837863 0.9846509 0.9193824 0.9839010
## Lag 5120 0.9788394 0.9800698 0.9017677 0.9789091
## nodeicov.indegree nodeocov.outdegree mutual triangle
## Lag 0 1.0000000 1.0000000 1.0000000 1.0000000
## Lag 1024 0.9965225 0.9961866 0.9960680 0.9962625
## Lag 2048 0.9928775 0.9923096 0.9922663 0.9922301
## Lag 3072 0.9888387 0.9880514 0.9880966 0.9878838
## Lag 4096 0.9842480 0.9834518 0.9835571 0.9832187
## Lag 5120 0.9794242 0.9785813 0.9788004 0.9782606
##
## Sample statistics burn-in diagnostic (Geweke):
## Chain 1
##
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5
##
## edges nodematch.sex nodematch.grade
## 123.79 79.32 11.53
## nodematch.smokes nodeicov.indegree nodeocov.outdegree
## 107.19 86.23 105.61
## mutual triangle
## 477.71 4877.96
##
## Individual P-values (lower = worse):
## edges nodematch.sex nodematch.grade
## 0.000000e+00 0.000000e+00 8.861949e-31
## nodematch.smokes nodeicov.indegree nodeocov.outdegree
## 0.000000e+00 0.000000e+00 0.000000e+00
## mutual triangle
## 0.000000e+00 0.000000e+00
## Joint P-value (lower = worse): 1.757294e-182 .
## Warning in formals(fun): argument is not a function
##
## MCMC diagnostics shown here are from the last round of simulation, prior to computation of final parameter estimates. Because the final estimates are refinements of those used for this simulation run, these diagnostics may understate model performance. To directly assess the performance of the final model on in-model statistics, please use the GOF command: gof(ergmFitObject, GOF=~model).
## Warning: You appear to be calling simulate.ergm() directly. simulate.ergm()
## is a method, and will not be exported in a future version of 'ergm'. Use
## simulate() instead, or getS3method() if absolutely necessary.
## Warning: You appear to be calling simulate.formula() directly.
## simulate.formula() is a method, and will not be exported in a future
## version of 'ergm'. Use simulate() instead, or getS3method() if absolutely
## necessary.
mod.gwesp<-ergm(g~edges+nodematch("sex")+nodematch("grade")
+nodematch('smokes')+nodeicov("indegree")+nodeocov("outdegree")
+mutual+gwesp)
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Starting Monte Carlo maximum likelihood estimation (MCMLE):
## Iteration 1 of at most 20:
## Optimizing with step length 0.445437751622392.
## The log-likelihood improved by 2.689.
## Iteration 2 of at most 20:
## Optimizing with step length 0.943302294176604.
## The log-likelihood improved by 3.117.
## Iteration 3 of at most 20:
## Optimizing with step length 1.
## The log-likelihood improved by 0.4807.
## Step length converged once. Increasing MCMC sample size.
## Iteration 4 of at most 20:
## Optimizing with step length 1.
## The log-likelihood improved by 0.05456.
## Step length converged twice. Stopping.
## Finished MCMLE.
## Evaluating log-likelihood at the estimate. Using 20 bridges: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 .
## This model was fit using MCMC. To examine model diagnostics and check for degeneracy, use the mcmc.diagnostics() function.
##
## ==========================
## Summary of model fit
## ==========================
##
## Formula: g ~ edges + nodematch("sex") + nodematch("grade") + nodematch("smokes") +
## nodeicov("indegree") + nodeocov("outdegree") + mutual + gwesp
##
## Iterations: 4 out of 20
##
## Monte Carlo MLE Results:
## Estimate Std. Error MCMC % z value Pr(>|z|)
## edges -8.01473 0.37151 0 -21.573 <1e-04 ***
## nodematch.sex 0.50621 0.12844 0 3.941 <1e-04 ***
## nodematch.grade 2.20086 0.17799 0 12.365 <1e-04 ***
## nodematch.smokes 0.77823 0.14116 0 5.513 <1e-04 ***
## nodeicov.indegree 0.23843 0.03030 0 7.870 <1e-04 ***
## nodeocov.outdegree 0.27318 0.03336 0 8.189 <1e-04 ***
## mutual 2.05037 0.24496 0 8.370 <1e-04 ***
## gwesp 0.62792 0.15313 0 4.101 <1e-04 ***
## gwesp.decay 0.08652 0.17449 0 0.496 0.62
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Null Deviance: 6890 on 4970 degrees of freedom
## Residual Deviance: 1296 on 4961 degrees of freedom
##
## AIC: 1314 BIC: 1373 (Smaller is better.)
## Sample statistics summary:
##
## Iterations = 16384:4209664
## Thinning interval = 1024
## Number of chains = 1
## Sample size per chain = 4096
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## edges 0.07935 17.924 0.2801 0.8829
## nodematch.sex 0.54321 13.047 0.2039 0.6322
## nodematch.grade -0.08008 14.306 0.2235 0.8135
## nodematch.smokes -1.61597 15.304 0.2391 0.8190
## nodeicov.indegree 5.86548 104.676 1.6356 4.9985
## nodeocov.outdegree 7.17627 101.860 1.5916 4.8338
## mutual -0.35376 7.415 0.1159 0.4284
## gwesp -0.01057 21.751 0.3399 1.1789
## gwesp.decay 0.59996 12.388 0.1936 0.5946
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## edges -35.00 -12.000 0.000 12.00 36.00
## nodematch.sex -26.00 -8.000 1.000 10.00 25.00
## nodematch.grade -27.62 -10.000 -1.000 10.00 28.00
## nodematch.smokes -33.00 -11.000 -2.000 9.00 28.00
## nodeicov.indegree -202.62 -65.000 6.000 77.00 211.62
## nodeocov.outdegree -191.00 -61.000 8.000 76.00 208.00
## mutual -15.00 -5.000 0.000 4.00 14.00
## gwesp -41.91 -14.818 -0.400 14.42 43.15
## gwesp.decay -24.42 -7.391 0.691 9.17 24.68
##
##
## Sample statistics cross-correlations:
## edges nodematch.sex nodematch.grade
## edges 1.0000000 0.7925771 0.8099673
## nodematch.sex 0.7925771 1.0000000 0.6295395
## nodematch.grade 0.8099673 0.6295395 1.0000000
## nodematch.smokes 0.8638189 0.6811886 0.7164714
## nodeicov.indegree 0.9161509 0.7145758 0.6594172
## nodeocov.outdegree 0.9386747 0.7165605 0.6969631
## mutual 0.7658096 0.6237399 0.7780840
## gwesp 0.9135359 0.7156119 0.8263799
## gwesp.decay 0.7660763 0.5737103 0.7764599
## nodematch.smokes nodeicov.indegree nodeocov.outdegree
## edges 0.8638189 0.9161509 0.9386747
## nodematch.sex 0.6811886 0.7145758 0.7165605
## nodematch.grade 0.7164714 0.6594172 0.6969631
## nodematch.smokes 1.0000000 0.7266461 0.7831243
## nodeicov.indegree 0.7266461 1.0000000 0.8725607
## nodeocov.outdegree 0.7831243 0.8725607 1.0000000
## mutual 0.6776136 0.6880536 0.6973553
## gwesp 0.7833972 0.8620165 0.8742512
## gwesp.decay 0.6615952 0.7411792 0.7436687
## mutual gwesp gwesp.decay
## edges 0.7658096 0.9135359 0.7660763
## nodematch.sex 0.6237399 0.7156119 0.5737103
## nodematch.grade 0.7780840 0.8263799 0.7764599
## nodematch.smokes 0.6776136 0.7833972 0.6615952
## nodeicov.indegree 0.6880536 0.8620165 0.7411792
## nodeocov.outdegree 0.6973553 0.8742512 0.7436687
## mutual 1.0000000 0.8038057 0.7318186
## gwesp 0.8038057 1.0000000 0.8586102
## gwesp.decay 0.7318186 0.8586102 1.0000000
##
## Sample statistics auto-correlation:
## Chain 1
## edges nodematch.sex nodematch.grade nodematch.smokes
## Lag 0 1.0000000 1.0000000 1.0000000 1.0000000
## Lag 1024 0.7057531 0.7080318 0.8116032 0.7099789
## Lag 2048 0.5573692 0.5637193 0.6927091 0.5749706
## Lag 3072 0.4611024 0.4609356 0.5985411 0.4733828
## Lag 4096 0.3916500 0.3895013 0.5234031 0.4136274
## Lag 5120 0.3477180 0.3389520 0.4593983 0.3684467
## nodeicov.indegree nodeocov.outdegree mutual gwesp
## Lag 0 1.0000000 1.0000000 1.0000000 1.0000000
## Lag 1024 0.7036684 0.6927780 0.8196033 0.7577874
## Lag 2048 0.5453234 0.5342161 0.7042373 0.6322601
## Lag 3072 0.4473504 0.4370588 0.6129940 0.5390470
## Lag 4096 0.3699024 0.3670505 0.5316627 0.4580071
## Lag 5120 0.3276913 0.3249946 0.4686226 0.4089477
## gwesp.decay
## Lag 0 1.0000000
## Lag 1024 0.7381692
## Lag 2048 0.5931037
## Lag 3072 0.5018208
## Lag 4096 0.4165635
## Lag 5120 0.3589826
##
## Sample statistics burn-in diagnostic (Geweke):
## Chain 1
##
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5
##
## edges nodematch.sex nodematch.grade
## 1.0071 1.0201 1.0380
## nodematch.smokes nodeicov.indegree nodeocov.outdegree
## 1.4949 0.8828 0.4282
## mutual gwesp gwesp.decay
## 1.2240 1.1908 0.4849
##
## Individual P-values (lower = worse):
## edges nodematch.sex nodematch.grade
## 0.3138785 0.3077024 0.2992761
## nodematch.smokes nodeicov.indegree nodeocov.outdegree
## 0.1349526 0.3773230 0.6685418
## mutual gwesp gwesp.decay
## 0.2209427 0.2337345 0.6277733
## Joint P-value (lower = worse): 0.631122 .
## Warning in formals(fun): argument is not a function
##
## MCMC diagnostics shown here are from the last round of simulation, prior to computation of final parameter estimates. Because the final estimates are refinements of those used for this simulation run, these diagnostics may understate model performance. To directly assess the performance of the final model on in-model statistics, please use the GOF command: gof(ergmFitObject, GOF=~model).
##
## Goodness-of-fit for in-degree
##
## obs min mean max MC p-value
## 0 3 3 7.81 13 0.06
## 1 4 3 8.28 14 0.08
## 2 15 4 9.19 15 0.02
## 3 11 5 9.70 17 0.70
## 4 10 2 8.10 17 0.52
## 5 4 2 6.79 13 0.30
## 6 9 1 4.80 10 0.10
## 7 6 0 4.23 9 0.42
## 8 5 0 3.40 9 0.54
## 9 1 0 2.51 7 0.38
## 10 1 0 1.94 5 0.88
## 11 1 0 1.57 5 1.00
## 12 1 0 0.79 3 1.00
## 13 0 0 0.59 3 1.00
## 14 0 0 0.50 2 1.00
## 15 0 0 0.28 2 1.00
## 16 0 0 0.15 1 1.00
## 17 0 0 0.16 1 1.00
## 18 0 0 0.12 1 1.00
## 19 0 0 0.04 1 1.00
## 20 0 0 0.03 1 1.00
## 21 0 0 0.01 1 1.00
## 22 0 0 0.01 1 1.00
##
## Goodness-of-fit for out-degree
##
## obs min mean max MC p-value
## 0 7 4 9.28 15 0.48
## 1 5 1 7.97 15 0.32
## 2 8 3 8.35 16 1.00
## 3 7 3 8.13 14 0.84
## 4 9 1 7.53 14 0.68
## 5 9 1 6.55 12 0.46
## 6 12 1 6.31 14 0.06
## 7 6 1 4.83 10 0.74
## 8 5 0 3.68 9 0.62
## 9 2 0 2.60 8 0.98
## 10 1 0 2.00 5 0.76
## 11 0 0 1.31 5 0.60
## 12 0 0 0.91 5 0.88
## 13 0 0 0.52 3 1.00
## 14 0 0 0.40 2 1.00
## 15 0 0 0.22 1 1.00
## 16 0 0 0.21 2 1.00
## 17 0 0 0.10 1 1.00
## 18 0 0 0.02 1 1.00
## 19 0 0 0.05 1 1.00
## 20 0 0 0.03 1 1.00
##
## Goodness-of-fit for edgewise shared partner
##
## obs min mean max MC p-value
## esp0 64 44 65.77 93 0.80
## esp1 96 64 94.51 116 1.00
## esp2 63 48 67.97 97 0.70
## esp3 54 21 40.26 57 0.08
## esp4 16 9 21.06 40 0.56
## esp5 9 0 9.00 23 1.00
## esp6 1 0 3.42 13 0.38
## esp7 2 0 1.58 5 0.92
## esp8 0 0 0.56 3 1.00
## esp9 0 0 0.19 2 1.00
## esp10 0 0 0.07 2 1.00
## esp11 0 0 0.01 1 1.00
##
## Goodness-of-fit for minimum geodesic distance
##
## obs min mean max MC p-value
## 1 305 264 304.40 353 0.98
## 2 606 653 892.44 1162 0.00
## 3 840 917 1308.98 1641 0.00
## 4 863 683 883.79 1138 0.96
## 5 639 80 297.63 558 0.00
## 6 414 2 65.87 202 0.00
## 7 244 0 11.22 50 0.00
## 8 182 0 1.65 30 0.00
## 9 112 0 0.08 2 0.00
## 10 51 0 0.00 0 0.00
## 11 28 0 0.00 0 0.00
## 12 5 0 0.00 0 0.00
## Inf 681 742 1203.94 1880 0.00
##
## Goodness-of-fit for model statistics
##
## obs min mean max MC p-value
## edges 305 264 304.40 353 0.98
## nodematch.sex 180 158 180.10 207 0.90
## nodematch.grade 233 198 231.81 272 0.98
## nodematch.smokes 230 201 229.42 269 0.96
## nodeicov.indegree 1807 1546 1802.06 2064 0.90
## nodeocov.outdegree 1777 1504 1774.57 2072 0.96
## mutual 85 69 85.08 100 1.00
## esp#1 96 64 94.51 116 1.00
## esp#2 63 48 67.97 97 0.70
## esp#3 54 21 40.26 57 0.08
## esp#4 16 9 21.06 40 0.56
## esp#5 9 0 9.00 23 1.00
## esp#6 1 0 3.42 13 0.38
## esp#7 2 0 1.58 5 0.92
## esp#8 0 0 0.56 3 1.00
## esp#9 0 0 0.19 2 1.00
## esp#10 0 0 0.07 2 1.00
## esp#11 0 0 0.01 1 1.00
These models allow us to fit a network without having to specify the actual tie formation patterns. The most user-friendly latent space model software is in the latentnet package (more recent models are provided by the amen
package).
library(latentnet)
start.time <- Sys.time()
latent.fit <- ergmm(g ~ euclidean(d = 2))
runtime=Sys.time()-start.time;
runtime;
summary(latent.fit)
plot(latent.fit)
mcmc.diagnostics(latent.fit)
plot(gof(latent.fit))
# Can add additional dimensions...
start.time <- Sys.time()
latent.fit <- ergmm(g ~ euclidean(d = 3))
runtime=Sys.time()-start.time;
runtime;
plot(gof(latent.fit))
# ... latent groups ...
start.time <- Sys.time()
latent.fit <- ergmm(g ~ euclidean(d = 2, G = 5))
runtime=Sys.time()-start.time;
runtime;
plot(latent.fit)
plot(gof(latent.fit))
# ... or homophily effects
start.time <- Sys.time()
latent.fit <- ergmm(g ~ nodematch("grade") + nodematch("sex")+euclidean(d = 2))
runtime=Sys.time()-start.time;
runtime
summary(latent.fit)