Chapter 8 The Movie Dataset

Neo4J comes with an example dataset called movies, which you can use with:

play_movies() %>%
  call_neo4j(con)

8.1 Querying data

  • Returning data as data.frames:
'MATCH (tom {name: "Tom Hanks"}) RETURN tom' %>%
  call_neo4j(con)
## $tom
## # A tibble: 1 x 2
##    born name     
##   <int> <chr>    
## 1  1956 Tom Hanks
## 
## attr(,"class")
## [1] "neo"  "list"
'MATCH (cloudAtlas {title: "Cloud Atlas"}) RETURN cloudAtlas' %>%
  call_neo4j(con)
## $cloudAtlas
## # A tibble: 1 x 3
##   tagline                 title       released
##   <chr>                   <chr>          <int>
## 1 Everything is connected Cloud Atlas     2012
## 
## attr(,"class")
## [1] "neo"  "list"
'MATCH (people:Person) RETURN people.name LIMIT 10' %>%
  call_neo4j(con)
## $people.name
## # A tibble: 10 x 1
##    value             
##    <chr>             
##  1 Keanu Reeves      
##  2 Carrie-Anne Moss  
##  3 Laurence Fishburne
##  4 Hugo Weaving      
##  5 Lilly Wachowski   
##  6 Lana Wachowski    
##  7 Joel Silver       
##  8 Emil Eifrem       
##  9 Charlize Theron   
## 10 Al Pacino         
## 
## attr(,"class")
## [1] "neo"  "list"
'MATCH (nineties:Movie) WHERE nineties.released >= 1990 AND nineties.released < 2000 RETURN nineties.title' %>%
  call_neo4j(con)
## $nineties.title
## # A tibble: 20 x 1
##    value                 
##    <chr>                 
##  1 The Matrix            
##  2 The Devils Advocate   
##  3 A Few Good Men        
##  4 As Good as It Gets    
##  5 What Dreams May Come  
##  6 Snow Falling on Cedars
##  7 Youve Got Mail        
##  8 Sleepless in Seattle  
##  9 Joe Versus the Volcano
## 10 When Harry Met Sally  
## 11 That Thing You Do     
## 12 The Birdcage          
## 13 Unforgiven            
## 14 Johnny Mnemonic       
## 15 The Green Mile        
## 16 Hoffa                 
## 17 Apollo 13             
## 18 Twister               
## 19 Bicentennial Man      
## 20 A League of Their Own 
## 
## attr(,"class")
## [1] "neo"  "list"
'MATCH (bacon:Person {name:"Kevin Bacon"})-[*1..4]-(hollywood) RETURN DISTINCT hollywood' %>%
  call_neo4j(con)
## $hollywood
## # A tibble: 135 x 5
##     born name          tagline                        title        released
##    <int> <chr>         <chr>                          <chr>           <int>
##  1  1941 Nora Ephron   <NA>                           <NA>               NA
##  2  1968 Parker Posey  <NA>                           <NA>               NA
##  3  1963 Greg Kinnear  <NA>                           <NA>               NA
##  4  1961 Meg Ryan      <NA>                           <NA>               NA
##  5  1967 Steve Zahn    <NA>                           <NA>               NA
##  6  1973 Dave Chappel… <NA>                           <NA>               NA
##  7    NA <NA>          At odds in life... in love on… Youve Got M…     1998
##  8  1954 Madonna       <NA>                           <NA>               NA
##  9  1943 Penny Marsha… <NA>                           <NA>               NA
## 10  1962 Rosie ODonne… <NA>                           <NA>               NA
## # … with 125 more rows
## 
## attr(,"class")
## [1] "neo"  "list"
  • Returning data as graphs:
'MATCH (tom:Person {name: "Tom Hanks"})-[:ACTED_IN]->(tomHanksMovies) RETURN tom,tomHanksMovies' %>%
  call_neo4j(con, type = "graph")
## $nodes
## # A tibble: 13 x 3
##    id    label     properties
##    <chr> <list>    <list>    
##  1 144   <chr [1]> <list [3]>
##  2 71    <chr [1]> <list [2]>
##  3 67    <chr [1]> <list [3]>
##  4 162   <chr [1]> <list [3]>
##  5 78    <chr [1]> <list [3]>
##  6 85    <chr [1]> <list [3]>
##  7 111   <chr [1]> <list [3]>
##  8 105   <chr [1]> <list [3]>
##  9 150   <chr [1]> <list [3]>
## 10 130   <chr [1]> <list [3]>
## 11 73    <chr [1]> <list [3]>
## 12 161   <chr [1]> <list [3]>
## 13 159   <chr [1]> <list [3]>
## 
## attr(,"class")
## [1] "neo"  "list"
'MATCH (cloudAtlas {title: "Cloud Atlas"})<-[:DIRECTED]-(directors) RETURN directors' %>%
  call_neo4j(con, type = "graph") %>%
  extract_nodes()
## # A tibble: 3 x 3
##   id    label     properties
##   <chr> <list>    <list>    
## 1 108   <chr [1]> <list [2]>
## 2 6     <chr [1]> <list [2]>
## 3 5     <chr [1]> <list [2]>
'MATCH (tom:Person {name:"Tom Hanks"})-[:ACTED_IN]->(m)<-[:ACTED_IN]-(coActors) RETURN coActors' %>%
  call_neo4j(con, type = "graph") %>%
  unnest_graph()
## $nodes
## # A tibble: 34 x 4
##    id    value   born name          
##    <chr> <chr>  <int> <chr>         
##  1 145   Person  1950 Ed Harris     
##  2 134   Person  1955 Gary Sinise   
##  3 19    Person  1958 Kevin Bacon   
##  4 146   Person  1955 Bill Paxton   
##  5 68    Person  1968 Parker Posey  
##  6 54    Person  1963 Greg Kinnear  
##  7 34    Person  1961 Meg Ryan      
##  8 70    Person  1967 Steve Zahn    
##  9 69    Person  1973 Dave Chappelle
## 10 163   Person  1954 Madonna       
## # … with 24 more rows
## 
## attr(,"class")
## [1] "neo"  "list"

8.2 Basic data manipulation

library(tidyverse)
res <- 'MATCH (per)-[act:ACTED_IN]->(mov) RETURN per, mov' %>% 
  call_neo4j(con)  
df <- bind_cols(
   purrr::pluck(res, "per"),
   purrr::pluck(res, "mov")
)
df  %>%
  count(name, sort = TRUE) %>%
  top_n(10)
## Selecting by n
## # A tibble: 15 x 2
##    name                   n
##    <chr>              <int>
##  1 Tom Hanks             12
##  2 Keanu Reeves           7
##  3 Hugo Weaving           5
##  4 Jack Nicholson         5
##  5 Meg Ryan               5
##  6 Cuba Gooding Jr.       4
##  7 Ben Miles              3
##  8 Bill Paxton            3
##  9 Carrie-Anne Moss       3
## 10 Gene Hackman           3
## 11 Helen Hunt             3
## 12 Kevin Bacon            3
## 13 Laurence Fishburne     3
## 14 Robin Williams         3
## 15 Tom Cruise             3
df %>%
  distinct(title, released) %>%
  count(released) %>%
  ggplot(aes(released, n)) +
  geom_col(fill = viridis::viridis(1))  +
  labs(
    title = "Movies by year in the 'movies' dataset"
  ) +
  theme_minimal()