---
title: "geoCat"
output:
flexdashboard::flex_dashboard:
vertical_layout: scroll
orientation: rows
social: menu
source_code: embed
---
# Cat Counts
Use these results without warranty or express or implied fitness for any purpose. The results are not vetted for accuracy. The results are not vetted for completeness. The results are not vetted for usability
- Limited to results from IACat and RyeCat
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE)
library(data.table)
library(tidyverse)
library(highcharter)
library(dplyr)
library(knitr)
library(maps)
library(plotly)
library(nrgiR)
data("county.fips")
data("uscountygeojson")
data("usgeojson")
theme_set(theme_minimal())
dfActivity <-
fread("../output/output_activity_count.csv_combined.csv")
# stop()
dfCountry <-
fread("../output/output_country_count.csv_combined.csv")
df <-
fread(
"../output/output_state_count.csv_combined.csv",
colClasses = c("COUNTYFP" = "character", "STATEFP" = "character")
)
df$NAME = paste0(df$STATE_NAME, ", ", df$NAME, "-", df$COUNTYFP)
# create a vector of the names that need to be mapped
mapNames <- function(df) {
namesToMap <- c("Rye8", "RyePhone", "Rye13")
# replace the names in the Name column with "RyeCat"
# if the names are not in the Name column, throw a warning
if(any(!namesToMap %in% df$Name)) warning("Some names are not in the data frame")
df$OriginalName <- df$Name
df$Name[df$Name %in% namesToMap] <- "RyeCat"
# get the names that start with sofia or that contain Papa or that start with tonga
tongaNames <- df$Name[grepl("^tonga", df$Name, ignore.case = TRUE)]
papaNames <- df$Name[grepl("papa", df$Name, ignore.case = TRUE)]
sofiaNames <- df$Name[grepl("^sofia", df$Name, ignore.case = TRUE)]
IACatNames <- c(tongaNames, papaNames, sofiaNames, "iha")
iaCatPatterns= c("^tonga", "papa", "^sofia", "iha")
# map the IACatNames to IACat
df$Name[df$Name %in% IACatNames] <- "IACat"
return(df)
}
df <- mapNames(df)
dfCountry <- mapNames(dfCountry)
nameLimits=c("RyeCat", "IACat")
# print the unique names that are not RyeCat or IACat
# kable(table()
missingNames <- unique(df$Name[!df$Name %in%nameLimits])
# subset the data frame to only include the rows where the Name column is equal to "RyeCat" or "IACat"
df <- df[Name %in% nameLimits, ]
# stop()
dfCountry <- dfCountry[Name %in%nameLimits, ]
toiso3 <- function(SOV_A3) {
# Need to convert "FR1" "US1" "GB1" "NL1" "CH1" to valid ISO3 codes
iso3 = gsub("FR1", "FRA", SOV_A3)
iso3 = gsub("US1", "USA", iso3)
iso3 = gsub("GB1", "GBR", iso3)
iso3 = gsub("NL1", "NLD", iso3)
iso3 = gsub("CH1", "CHE", iso3)
# convert cuba
iso3 = gsub("CU1", "CUB", iso3)
return(iso3)
}
print(iso)
dfCountry$iso3 = toiso3(dfCountry$SOV_A3)
# stop if there are any countries that are not in the iso data frame
if(any(!dfCountry$iso3 %in% iso$iso3)) stop("There are countries that are not in the iso data frame")
```
```{r echo=FALSE, message=FALSE, warning=FALSE}
# function to sum the counts column by a given column name
sum_by_column <- function(in_df, column_names) {
#group by the list of column names and sum the counts column
in_df <- in_df[, .(counts = sum(counts)), by = column_names]
return(in_df)
}
```
## World counts
### Total Cats per Country
```{r}
l <- list(color = toRGB("#d1d1d1"), width = 0.5)
#Specify map projection and options
g <- list(
showframe = FALSE,
showcoastlines = FALSE,
projection = list(type = 'orthographic'),
resolution = '200',
showcountries = TRUE,
countrycolor = '#d1d1d1',
showocean = TRUE,
oceancolor = '#c9d2e0',
showlakes = TRUE,
lakecolor = '#99c0db',
showrivers = TRUE,
rivercolor = '#99c0db'
)
sumByCountry <-
sum_by_column(in_df = dfCountry, column_names = c("iso3"))
sumByCountry$counts <- log10(sumByCountry$counts)
p <- plot_geo(sumByCountry) %>%
add_trace(
z = ~ counts,
color = ~ counts,
colors = 'viridis',
text = ~ iso3,
locations = ~ iso3,
marker = list(line = l)
) %>%
layout(title = 'log10 total counts', geo = g)
# remove the colorbar
hide_colorbar(p)
# stop()
```
### Cat wins per Country
```{r}
# create a data frame of the cat wins by country
sumByCountryCat <-
sum_by_column(in_df = dfCountry, column_names = c("iso3","Name"))
# log10 transform the counts column
sumByCountryCat$counts <- log10(sumByCountryCat$counts)
# create a data frame of the cat wins by country
sumByCountryCatWin= sumByCountryCat[, .SD[which.max(counts)], by = iso3]
# set counts won by IACat to negative
sumByCountryCatWin$counts[sumByCountryCatWin$Name == "RyeCat"] <-
-sumByCountryCatWin$counts[sumByCountryCatWin$Name == "RyeCat"]
p <- plot_geo(sumByCountryCatWin) %>%
add_trace(
z = ~ counts,
color = ~ Name,
colors = 'viridis',
text = ~ iso3,
locations = ~ iso3,
marker = list(line = l)
) %>%
layout(title = 'log10 counts won by', geo = g)
hide_colorbar(p)
```
## State counts
### Total Cats per State
```{r}
sumByState <-
sum_by_column(in_df = df, column_names = c("STATE_NAME"))
# log10 transform the counts column
sumByState$counts <- log10(sumByState$counts)
#create a chlorealpleth map of the US
highchart() %>%
hc_add_series_map(
usgeojson,
sumByState,
value = "counts",
joinBy = c("name", "STATE_NAME"),
name = "Count",
dataLabels = list(enabled = TRUE, format = '{point.name}')
) %>%
hc_mapNavigation(enabled = TRUE) %>%
hc_colorAxis(stops = color_stops()) %>%
hc_title(text = "Count of Tracks by State") %>%
hc_add_theme(hc_theme_smpl()) %>%
hc_legend(enabled = TRUE) %>%
hc_tooltip(pointFormat = "{point.name}: {point.value}")
```
### Count of Tracks by County
```{r}
df$COUNTY_FIP=paste0(df$STATEFP,df$COUNTYFP)
#
# stop()
# Shannon County, SD (FIPS code = 46113) was renamed Oglala Lakota County and assigned anew FIPS code (46102) effective in 2014.
df$COUNTY_FIP[df$COUNTY_FIP == "46102"] <- "46113"
sumByCounty <-
sum_by_column(in_df = df, column_names = c("COUNTY_FIP"))
# log10 transform the counts column
sumByCounty$counts <- log10(sumByCounty$counts)
#create a chlorealpleth map of the US
highchart() %>%
hc_add_series_map(
uscountygeojson,
sumByCounty,
value = "counts",
joinBy = c("fips", "COUNTY_FIP"),
name = "Count") %>%
hc_mapNavigation(enabled = TRUE) %>%
hc_colorAxis(stops = color_stops()) %>%
hc_title(text = "Count of Tracks by County") %>%
hc_add_theme(hc_theme_smpl()) %>%
hc_legend(enabled = FALSE)
# %>%
# hc_tooltip(pointFormat = "{point.name}: {point.value}")
```
## Cat wins
### cat counts per state
```{r}
# sum the counts column by the Name and STATE_NAME columns
sumByCatStateFull <-
sum_by_column(in_df = df, column_names = c("Name", "STATE_NAME"))
sumByCatStateWin <-
sumByCatStateFull[, .SD[which.max(counts)], by = STATE_NAME]
# create a color palette that maps RyeCat to bl
colorPalette <- c("#0000FF","#FF0000")
# create a named vector of colors
colorVector <- setNames(colorPalette, unique(sumByCatStateWin$Name))
# replace the color column with the color vector
sumByCatStateWin$color <- colorVector[sumByCatStateWin$Name]
# log10 transform the counts column
#create a chlorealpleth map of the US
highchart() %>%
hc_add_series_map(
usgeojson,
sumByCatStateWin,
value = "counts",
joinBy = c("name", "STATE_NAME"),
name = "Cat Winner",
dataLabels = list(enabled = TRUE, format = '{point.name}'),color =colorVector
) %>%
hc_mapNavigation(enabled = TRUE) %>%
hc_title(text = "Which Cat Wins Which State (+DC") %>%
hc_add_theme(hc_theme_smpl()) %>%
hc_legend(enabled = FALSE) %>%
hc_tooltip(pointFormat = "{point.name} is won by {point.Name}: {point.value} total tracks")
```
### States Won per Cat {data-width=250}
```{r}
# count the number of states that each cat won
total = sumByCatStateWin[, .N, by = Name]
#create a ggplot bar chart of the number of states that each cat won
# label by the cat name
# color by the colorPalette
# order by the number of states won
g = ggplot(total, aes(x = Name, y = N, fill = Name)) +
geom_bar(stat = "identity") +
geom_text(aes(label = Name), vjust = -0.5, size = 3) +
scale_fill_manual(values = colorVector) +
labs(x = "Cat", y = "Number of States(+DC) Won") +
theme_minimal() +
theme(legend.position = "none")
ggplotly(g)
```
### cat counts per county
```{r}
# sum the counts column by the Name and STATE_NAME columns
sumByCatCountyFull <-
sum_by_column(in_df = df,
column_names = c("Name", "COUNTY_FIP","NAME"))
sumByCatCountyWin <-
sumByCatCountyFull[, .SD[which.max(counts)], by = COUNTY_FIP]
sumByCatCountyWin$color <- colorVector[sumByCatCountyWin$Name]
#create a chlorealpleth map of the US
highchart() %>%
hc_add_series_map(
uscountygeojson,
sumByCatCountyWin,
value = "counts",
joinBy = c("fips", "COUNTY_FIP"),
name = "Cat Winner",
color = colorVector
) %>%
hc_mapNavigation(enabled = TRUE) %>%
hc_title(text = "Which Cat Wins Which County") %>%
hc_add_theme(hc_theme_smpl()) %>%
hc_legend(enabled = FALSE) %>%
hc_tooltip(pointFormat = "{point.name} is won by {point.Name}: {point.value} total tracks")
```
### Counties Won per Cat {data-width=250}
```{r}
# count the number of states that each cat won
total = sumByCatCountyWin[, .N, by = Name]
#create a ggplot bar chart of the number of states that each cat won
# label by the cat name
# color by the colorPalette
# order by the number of states won
g=ggplot(total, aes(x = Name, y = N, fill = Name)) +
geom_bar(stat = "identity") +
geom_text(aes(label = Name), vjust = -0.5, size = 3) +
scale_fill_manual(values = colorVector) +
labs(x = "Cat", y = "Number of Counties Won") +
theme_minimal() +
theme(legend.position = "none")
ggplotly(g)
```
## Tracks per Cat per state
### State diff per Cat
```{r}
# get the second highest count for each state
secondHighestState <-
sumByCatStateFull[, .SD[order(-counts)[2]], by = STATE_NAME]
# merge the sumByCatStateWin and secondHighestState data frames by the STATE_NAME column
diffByCatState <-
merge(sumByCatStateWin,
secondHighestState,
by = "STATE_NAME",
all = TRUE)
# calculate the difference between the counts columns
diffByCatState$diff <-
diffByCatState$counts.x - diffByCatState$counts.y
#log10 transform the diff column
# if the difference is NA, set it to counts.x
diffByCatState$diff[is.na(diffByCatState$diff)] <-
diffByCatState$counts.x[is.na(diffByCatState$diff)]
diffByCatState$log10diff <- log10(diffByCatState$diff)
diffByCatState$log10diff[diffByCatState$Name.x == "IACat"] <-
-diffByCatState$log10diff[diffByCatState$Name.x == "IACat"]
p0 <- ggplot(data = diffByCatState,
mapping = aes(
x = log10diff,
y = reorder(STATE_NAME, log10diff),
color = Name.x
)) + scale_color_manual(values = colorVector) + geom_vline(xintercept = 0,
color = "black",
linetype = "dashed") + geom_point()
# remove the legend
p0 <- p0 + theme(legend.position = "none")
# add x axis label
p0 <- p0 + labs(x = "log10 difference in number of tracks won")
# add y axis label
p0 <- p0 + labs(y = "State")
ggplotly(p0)
# stop()
```
### County diff per Cat
```{r}
secondHighestCounty <-
sumByCatCountyFull[, .SD[order(-counts)[2]], by = COUNTY_FIP]
secondHighestCounty$NAME = NULL
diffByCatCounty <-
merge(sumByCatCountyWin,
secondHighestCounty,
by = "COUNTY_FIP",
all = TRUE)
diffByCatCounty$diff <-
diffByCatCounty$counts.x - diffByCatCounty$counts.y
diffByCatCounty$diff[is.na(diffByCatCounty$diff)] <-
diffByCatCounty$counts.x[is.na(diffByCatCounty$diff)]
diffByCatCounty$log10diff <- log10(diffByCatCounty$diff)
# set non finite values to 0
diffByCatCounty$log10diff[!is.finite(diffByCatCounty$log10diff)] <-
0
diffByCatCounty$log10diff[diffByCatCounty$Name.x == "IACat"] <-
-diffByCatCounty$log10diff[diffByCatCounty$Name.x == "IACat"]
p1 <- ggplot(data = diffByCatCounty,
mapping = aes(x = log10diff,
y = reorder(NAME, log10diff),
color = Name.x)) + scale_color_manual(values = colorVector) + geom_vline(xintercept = 0,
color = "black",
linetype = "dashed") + geom_point()
# remove the legend
p1 <- p1 + theme(legend.position = "none")
# remove y axis labels
p1 <- p1 + theme(axis.text.y = element_blank())
# remove y axis ticks
p1 <- p1 + theme(axis.ticks.y = element_blank())
# remove y axis title
# p1 <- p1 + theme(axis.title.y = element_blank())
# remove all grid lines
p1 <- p1 + theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank())
p1 <- p1 + labs(x = "log10 difference in number of tracks won")
p1 <- p1 + labs(y = "County")
ggplotly(p1)
# stop()
#
```
Row {.tabset .tabset-fade}
-------------------------------------
### cats represented in the analysis
```{r}
kable(unique(df$OriginalName))
```
### cats not represented in the analysis
```{r}
kable(missingNames)
```
# Cat Acts
## Per Day
### cat activity counts per day
```{r}
dfActivity=mapNames(dfActivity)
# limt to Names in nameLimit
dfActivity <- dfActivity[Name %in% nameLimits,]
dfActivity$baseCounts =dfActivity$counts
# set IACat to negative
# column names of dfActivity are "Activity" "Name" "date" "counts"
# remove blank Activity
toRemove=c("","Unknown","Stationary","Driving")
dfActivity <- dfActivity[which(!dfActivity$Activity %in% toRemove), ]
dfActivity$date <- as.Date(dfActivity$date)
dfActivity$Year_Month <- format(as.Date(dfActivity$date), "%Y-%m")
sumByCatActivity <-sum_by_column(dfActivity, c("Name", "Activity", "Year_Month"))
# convert Year_Month to date, looks like this "2020-06"
sumByCatActivity$Year_Month <- as.Date(paste0(sumByCatActivity$Year_Month, "-01"))
dfActivity$counts[dfActivity$Name == "IACat"] <- -dfActivity$counts[dfActivity$Name == "IACat"]
# plot counts by day
p2 <- ggplot(data = dfActivity,
mapping = aes(x = date,
y = counts,
color = Name)) + geom_point(size=1) + scale_color_manual(values = colorVector) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + labs(x = "Date", y = "Number of Tracks")
p2 = p2 + facet_wrap( ~ Activity, scales = "free_y")
ggplotly(p2)
```
### cat activity days per count
```{r}
#histogram of counts colored by Name
p2 <- ggplot(data = dfActivity,
mapping = aes(x = baseCounts,
color = Name)) + geom_histogram(bins=100, position="identity",alpha=.5) + scale_color_manual(values = colorVector) + labs(x = "Counts Per Day", y = "Number of Days")
# p2 + facet_wrap( ~ Activity+Name, scales = "free_y")
#log scale the x axis
p2=p2 + facet_wrap( ~ Activity, scales = "free_y") + scale_x_log10()
ggplotly(p2)
```
## Per Month
### cat activity counts per month
```{r}
# plot counts by month
p3 <- ggplot(data = sumByCatActivity,
mapping = aes(x = Year_Month,
y = counts,
color = Name)) + geom_point(size=1) + scale_color_manual(values = colorVector) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + labs(x = "Date", y = "Number of Tracks")
p3=p3 + facet_wrap( ~ Activity, scales = "free_y")
ggplotly(p3)
```
### cat activity months per count
```{r}
#histogram of counts colored by Name
p3 <- ggplot(data = sumByCatActivity,
mapping = aes(x = counts,
color = Name)) + geom_histogram(bins = 100,
position = "identity",
alpha = .5) + scale_color_manual(values = colorVector) + labs(x = "Counts Per Month", y = "Number of Months")
# p3 + facet_wrap( ~ Activity+Name, scales = "free_y")
#log scale the x axis
p3 = p3 + facet_wrap(~ Activity, scales = "free_y") + scale_x_log10()
ggplotly(p3)