Chapter 8 The Movie Dataset
Neo4J comes with an example dataset called movies
, which you can use with:
play_movies() %>%
call_neo4j(con)
8.1 Querying data
- Returning data as data.frames:
'MATCH (tom {name: "Tom Hanks"}) RETURN tom' %>%
call_neo4j(con)
## $tom
## # A tibble: 1 x 2
## born name
## <int> <chr>
## 1 1956 Tom Hanks
##
## attr(,"class")
## [1] "neo" "list"
'MATCH (cloudAtlas {title: "Cloud Atlas"}) RETURN cloudAtlas' %>%
call_neo4j(con)
## $cloudAtlas
## # A tibble: 1 x 3
## tagline title released
## <chr> <chr> <int>
## 1 Everything is connected Cloud Atlas 2012
##
## attr(,"class")
## [1] "neo" "list"
'MATCH (people:Person) RETURN people.name LIMIT 10' %>%
call_neo4j(con)
## $people.name
## # A tibble: 10 x 1
## value
## <chr>
## 1 Keanu Reeves
## 2 Carrie-Anne Moss
## 3 Laurence Fishburne
## 4 Hugo Weaving
## 5 Lilly Wachowski
## 6 Lana Wachowski
## 7 Joel Silver
## 8 Emil Eifrem
## 9 Charlize Theron
## 10 Al Pacino
##
## attr(,"class")
## [1] "neo" "list"
'MATCH (nineties:Movie) WHERE nineties.released >= 1990 AND nineties.released < 2000 RETURN nineties.title' %>%
call_neo4j(con)
## $nineties.title
## # A tibble: 20 x 1
## value
## <chr>
## 1 The Matrix
## 2 The Devils Advocate
## 3 A Few Good Men
## 4 As Good as It Gets
## 5 What Dreams May Come
## 6 Snow Falling on Cedars
## 7 Youve Got Mail
## 8 Sleepless in Seattle
## 9 Joe Versus the Volcano
## 10 When Harry Met Sally
## 11 That Thing You Do
## 12 The Birdcage
## 13 Unforgiven
## 14 Johnny Mnemonic
## 15 The Green Mile
## 16 Hoffa
## 17 Apollo 13
## 18 Twister
## 19 Bicentennial Man
## 20 A League of Their Own
##
## attr(,"class")
## [1] "neo" "list"
'MATCH (bacon:Person {name:"Kevin Bacon"})-[*1..4]-(hollywood) RETURN DISTINCT hollywood' %>%
call_neo4j(con)
## $hollywood
## # A tibble: 135 x 5
## born name tagline title released
## <int> <chr> <chr> <chr> <int>
## 1 1941 Nora Ephron <NA> <NA> NA
## 2 1968 Parker Posey <NA> <NA> NA
## 3 1963 Greg Kinnear <NA> <NA> NA
## 4 1961 Meg Ryan <NA> <NA> NA
## 5 1967 Steve Zahn <NA> <NA> NA
## 6 1973 Dave Chappel… <NA> <NA> NA
## 7 NA <NA> At odds in life... in love on… Youve Got M… 1998
## 8 1954 Madonna <NA> <NA> NA
## 9 1943 Penny Marsha… <NA> <NA> NA
## 10 1962 Rosie ODonne… <NA> <NA> NA
## # … with 125 more rows
##
## attr(,"class")
## [1] "neo" "list"
- Returning data as graphs:
'MATCH (tom:Person {name: "Tom Hanks"})-[:ACTED_IN]->(tomHanksMovies) RETURN tom,tomHanksMovies' %>%
call_neo4j(con, type = "graph")
## $nodes
## # A tibble: 13 x 3
## id label properties
## <chr> <list> <list>
## 1 144 <chr [1]> <list [3]>
## 2 71 <chr [1]> <list [2]>
## 3 67 <chr [1]> <list [3]>
## 4 162 <chr [1]> <list [3]>
## 5 78 <chr [1]> <list [3]>
## 6 85 <chr [1]> <list [3]>
## 7 111 <chr [1]> <list [3]>
## 8 105 <chr [1]> <list [3]>
## 9 150 <chr [1]> <list [3]>
## 10 130 <chr [1]> <list [3]>
## 11 73 <chr [1]> <list [3]>
## 12 161 <chr [1]> <list [3]>
## 13 159 <chr [1]> <list [3]>
##
## attr(,"class")
## [1] "neo" "list"
'MATCH (cloudAtlas {title: "Cloud Atlas"})<-[:DIRECTED]-(directors) RETURN directors' %>%
call_neo4j(con, type = "graph") %>%
extract_nodes()
## # A tibble: 3 x 3
## id label properties
## <chr> <list> <list>
## 1 108 <chr [1]> <list [2]>
## 2 6 <chr [1]> <list [2]>
## 3 5 <chr [1]> <list [2]>
'MATCH (tom:Person {name:"Tom Hanks"})-[:ACTED_IN]->(m)<-[:ACTED_IN]-(coActors) RETURN coActors' %>%
call_neo4j(con, type = "graph") %>%
unnest_graph()
## $nodes
## # A tibble: 34 x 4
## id value born name
## <chr> <chr> <int> <chr>
## 1 145 Person 1950 Ed Harris
## 2 134 Person 1955 Gary Sinise
## 3 19 Person 1958 Kevin Bacon
## 4 146 Person 1955 Bill Paxton
## 5 68 Person 1968 Parker Posey
## 6 54 Person 1963 Greg Kinnear
## 7 34 Person 1961 Meg Ryan
## 8 70 Person 1967 Steve Zahn
## 9 69 Person 1973 Dave Chappelle
## 10 163 Person 1954 Madonna
## # … with 24 more rows
##
## attr(,"class")
## [1] "neo" "list"
8.2 Basic data manipulation
library(tidyverse)
res <- 'MATCH (per)-[act:ACTED_IN]->(mov) RETURN per, mov' %>%
call_neo4j(con)
df <- bind_cols(
purrr::pluck(res, "per"),
purrr::pluck(res, "mov")
)
df %>%
count(name, sort = TRUE) %>%
top_n(10)
## Selecting by n
## # A tibble: 15 x 2
## name n
## <chr> <int>
## 1 Tom Hanks 12
## 2 Keanu Reeves 7
## 3 Hugo Weaving 5
## 4 Jack Nicholson 5
## 5 Meg Ryan 5
## 6 Cuba Gooding Jr. 4
## 7 Ben Miles 3
## 8 Bill Paxton 3
## 9 Carrie-Anne Moss 3
## 10 Gene Hackman 3
## 11 Helen Hunt 3
## 12 Kevin Bacon 3
## 13 Laurence Fishburne 3
## 14 Robin Williams 3
## 15 Tom Cruise 3
df %>%
distinct(title, released) %>%
count(released) %>%
ggplot(aes(released, n)) +
geom_col(fill = viridis::viridis(1)) +
labs(
title = "Movies by year in the 'movies' dataset"
) +
theme_minimal()