Cat Counts

Use these results without warranty or express or implied fitness for any purpose. The results are not vetted for accuracy. The results are not vetted for completeness. The results are not vetted for usability

World counts

Total Cats per Country

Cat wins per Country

State counts

Total Cats per State

Count of Tracks by County

Cat wins

cat counts per state

States Won per Cat

cat counts per county

Counties Won per Cat

Tracks per Cat per state

State diff per Cat

County diff per Cat

Row

cats represented in the analysis

x
Rye8
Rye13
Big Papa
Bigger Papa
tonga-moto-63b2
sofia-moto-fdb7
tonga-moto-f8db
RyePhone
sofia-moto-0538
sofia-moto-099b
sofia-moto-3582
sofia-moto-fc37
Lil Papa
iha
tonga-moto-7617
sofia-moto-084b
sofia-moto-3965
sofia-moto-555d
sofia-moto-6c2f
sofia-moto-70c2
sofia-moto-7c9a
sofia-moto-80cb
sofia-moto-9062
sofia-moto-95c9
sofia-moto-a24e
sofia-moto-b904
sofia-moto-bf20
sofia-moto-e67d
Papa G
sofia-moto-8302
sofia-moto-bf28
sofia-moto-d36e
LilPapa

cats not represented in the analysis

x
Chishiki
Twenty7
Kitty’s MacBook Pro
iPhone 8
Big Mamma
KK’s Work Phone
iPhone
Jonathan Carlomagno’s iPhone
Bob’s iPhone 6
Rathbone
Kayleigh’s iPhone
jl
marlin-Pixel-222d
jlc
P2
kek
pancho
moto-i-2
JLC
ubp52
Pam Ardis iPhone
tester
QP1A.191005.007.A3/Pixel XL
QPMS30.80-51-5/moto g power
QPMS30.80-51-8/moto g power

Cat Acts

Per Day

cat activity counts per day

cat activity days per count

Per Month

cat activity counts per month

cat activity months per count

---
title: "geoCat"
output: 
  flexdashboard::flex_dashboard:
    vertical_layout: scroll
    orientation: rows
    social: menu
    source_code: embed
---

# Cat Counts

Use these results without warranty or express or implied fitness for any purpose.  The results are not vetted for accuracy.  The results are not vetted for completeness.  The results are not vetted for usability

- Limited to results from IACat and RyeCat

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE)


library(data.table)
library(tidyverse)
library(highcharter)
library(dplyr)
library(knitr)
library(maps)
library(plotly)
library(nrgiR)

data("county.fips")
data("uscountygeojson")
data("usgeojson")

theme_set(theme_minimal())

dfActivity <-
  fread("../output/output_activity_count.csv_combined.csv")
# stop()
dfCountry <-
  fread("../output/output_country_count.csv_combined.csv")


df <-
  fread(
    "../output/output_state_count.csv_combined.csv",
    colClasses = c("COUNTYFP" = "character", "STATEFP" = "character")
  )

df$NAME = paste0(df$STATE_NAME, ", ", df$NAME, "-", df$COUNTYFP)
# create a vector of the names that need to be mapped
mapNames <- function(df) {
  namesToMap <- c("Rye8", "RyePhone", "Rye13")
  # replace the names in the Name column with "RyeCat"
  # if the names are not in the Name column, throw a warning
  if(any(!namesToMap %in% df$Name)) warning("Some names are not in the data frame")
  
  df$OriginalName <- df$Name
  df$Name[df$Name %in% namesToMap] <- "RyeCat"
  
  
  
  # get the names that start with sofia or that contain Papa or that start with tonga
  tongaNames <- df$Name[grepl("^tonga", df$Name, ignore.case = TRUE)]
  papaNames <- df$Name[grepl("papa", df$Name, ignore.case = TRUE)]
  sofiaNames <- df$Name[grepl("^sofia", df$Name, ignore.case = TRUE)]
  IACatNames <- c(tongaNames, papaNames, sofiaNames, "iha")
  iaCatPatterns= c("^tonga", "papa", "^sofia", "iha")
  
  # map the IACatNames to IACat
  df$Name[df$Name %in% IACatNames] <- "IACat"
  return(df)
}

df <- mapNames(df)
dfCountry <- mapNames(dfCountry)
nameLimits=c("RyeCat", "IACat")

# print the unique names that are not RyeCat or IACat
# kable(table()
missingNames <- unique(df$Name[!df$Name %in%nameLimits])


# subset the data frame to only include the rows where the Name column is equal to "RyeCat" or "IACat"
df <- df[Name %in% nameLimits, ]
# stop()
dfCountry <- dfCountry[Name %in%nameLimits, ]

toiso3 <- function(SOV_A3) {
  # Need to convert "FR1" "US1" "GB1" "NL1" "CH1" to valid ISO3 codes
  iso3 = gsub("FR1", "FRA", SOV_A3)
  iso3 = gsub("US1", "USA", iso3)
  iso3 = gsub("GB1", "GBR", iso3)
  iso3 = gsub("NL1", "NLD", iso3)
  iso3 = gsub("CH1", "CHE", iso3)
  # convert cuba
  iso3 = gsub("CU1", "CUB", iso3)
  return(iso3)
}


print(iso)
dfCountry$iso3 = toiso3(dfCountry$SOV_A3)
# stop if there are any countries that are not in the iso data frame
if(any(!dfCountry$iso3 %in% iso$iso3)) stop("There are countries that are not in the iso data frame")



```


```{r echo=FALSE, message=FALSE, warning=FALSE}

# function to sum the counts column by a given column name
sum_by_column <- function(in_df, column_names) {
  #group by the list of column names and sum the counts column
  in_df <- in_df[, .(counts = sum(counts)), by = column_names]
  return(in_df)
}

```

## World counts

### Total Cats per Country

```{r}
l <- list(color = toRGB("#d1d1d1"), width = 0.5)
#Specify map projection and options
g <- list(
  showframe = FALSE,
  showcoastlines = FALSE,
  projection = list(type = 'orthographic'),
  resolution = '200',
  showcountries = TRUE,
  countrycolor = '#d1d1d1',
  showocean = TRUE,
  oceancolor = '#c9d2e0',
  showlakes = TRUE,
  lakecolor = '#99c0db',
  showrivers = TRUE,
  rivercolor = '#99c0db'
)

sumByCountry <-
  sum_by_column(in_df = dfCountry, column_names = c("iso3"))

sumByCountry$counts <- log10(sumByCountry$counts)

p <- plot_geo(sumByCountry) %>%
  add_trace(
    z = ~ counts,
    color = ~ counts,
    colors = 'viridis',
    text = ~ iso3,
    locations = ~ iso3,
    marker = list(line = l)
  ) %>%
  layout(title = 'log10 total counts', geo = g)
# remove the colorbar
hide_colorbar(p)

# stop()
```

### Cat wins per Country

```{r}
# create a data frame of the cat wins by country
sumByCountryCat <-
  sum_by_column(in_df = dfCountry, column_names = c("iso3","Name"))
# log10 transform the counts column
sumByCountryCat$counts <- log10(sumByCountryCat$counts)
# create a data frame of the cat wins by country
sumByCountryCatWin= sumByCountryCat[, .SD[which.max(counts)], by = iso3]
# set counts won by IACat to negative
sumByCountryCatWin$counts[sumByCountryCatWin$Name == "RyeCat"] <-
  -sumByCountryCatWin$counts[sumByCountryCatWin$Name == "RyeCat"]

p <- plot_geo(sumByCountryCatWin) %>%
  add_trace(
    z = ~ counts,
    color = ~ Name,
    colors = 'viridis',
    text = ~ iso3,
    locations = ~ iso3,
    marker = list(line = l)
  ) %>%
  layout(title = 'log10 counts won by', geo = g)
hide_colorbar(p)

```


## State counts
   
### Total Cats per State
```{r}
sumByState <-
  sum_by_column(in_df = df, column_names = c("STATE_NAME"))
# log10 transform the counts column
sumByState$counts <- log10(sumByState$counts)
#create a chlorealpleth map of the US
highchart() %>%
  hc_add_series_map(
    usgeojson,
    sumByState,
    value = "counts",
    joinBy = c("name", "STATE_NAME"),
    name = "Count",
    dataLabels = list(enabled = TRUE, format = '{point.name}')
  ) %>%
  hc_mapNavigation(enabled = TRUE) %>%
  hc_colorAxis(stops = color_stops()) %>%
  hc_title(text = "Count of Tracks by State") %>%
  hc_add_theme(hc_theme_smpl()) %>%
  hc_legend(enabled = TRUE) %>%
  hc_tooltip(pointFormat = "{point.name}: {point.value}")

```


### Count of Tracks by County



```{r}
df$COUNTY_FIP=paste0(df$STATEFP,df$COUNTYFP)
# 
# stop()
# Shannon County, SD (FIPS code = 46113) was renamed Oglala Lakota County and assigned anew FIPS code (46102) effective in 2014.
df$COUNTY_FIP[df$COUNTY_FIP == "46102"] <- "46113"

sumByCounty <-
  sum_by_column(in_df = df, column_names = c("COUNTY_FIP"))
# log10 transform the counts column
sumByCounty$counts <- log10(sumByCounty$counts)
#create a chlorealpleth map of the US
highchart() %>%
  hc_add_series_map(
    uscountygeojson,
    sumByCounty,
    value = "counts",
    joinBy = c("fips", "COUNTY_FIP"),
    name = "Count") %>%
  hc_mapNavigation(enabled = TRUE) %>%
  hc_colorAxis(stops = color_stops()) %>%
  hc_title(text = "Count of Tracks by County") %>%
  hc_add_theme(hc_theme_smpl()) %>%
  hc_legend(enabled = FALSE) 
# %>%
#   hc_tooltip(pointFormat = "{point.name}: {point.value}")

```



## Cat wins

### cat counts per state

```{r}
# sum the counts column by the Name and STATE_NAME columns
sumByCatStateFull <-
  sum_by_column(in_df = df, column_names = c("Name", "STATE_NAME"))

sumByCatStateWin <-
  sumByCatStateFull[, .SD[which.max(counts)], by = STATE_NAME]

# create a color palette that maps RyeCat to bl
colorPalette <- c("#0000FF","#FF0000")
# create a named vector of colors
colorVector <- setNames(colorPalette, unique(sumByCatStateWin$Name))
# replace the color column with the color vector
sumByCatStateWin$color <- colorVector[sumByCatStateWin$Name]
# log10 transform the counts column
#create a chlorealpleth map of the US
highchart() %>%
  hc_add_series_map(
    usgeojson,
    sumByCatStateWin,
    value = "counts",
    joinBy = c("name", "STATE_NAME"),
    name = "Cat Winner",
    dataLabels = list(enabled = TRUE, format = '{point.name}'),color =colorVector
  ) %>%
  hc_mapNavigation(enabled = TRUE) %>%
  hc_title(text = "Which Cat Wins Which State (+DC") %>%
  hc_add_theme(hc_theme_smpl()) %>%
  hc_legend(enabled = FALSE) %>%
  hc_tooltip(pointFormat = "{point.name} is won by {point.Name}: {point.value} total tracks")


```


### States Won per Cat {data-width=250}



```{r}


# count the number of states that each cat won
total = sumByCatStateWin[, .N, by = Name]
#create a ggplot bar chart of the number of states that each cat won
# label by the cat name
# color by the colorPalette
# order by the number of states won
g = ggplot(total, aes(x = Name, y = N, fill = Name)) +
  geom_bar(stat = "identity") +
  geom_text(aes(label = Name), vjust = -0.5, size = 3) +
  scale_fill_manual(values = colorVector) +
  labs(x = "Cat", y = "Number of States(+DC) Won") +
  theme_minimal() +
  theme(legend.position = "none")

ggplotly(g)


```


### cat counts per county

```{r}
# sum the counts column by the Name and STATE_NAME columns
sumByCatCountyFull <-
  sum_by_column(in_df = df,
                column_names = c("Name", "COUNTY_FIP","NAME"))
sumByCatCountyWin <-
  sumByCatCountyFull[, .SD[which.max(counts)], by = COUNTY_FIP]
sumByCatCountyWin$color <- colorVector[sumByCatCountyWin$Name]

#create a chlorealpleth map of the US
highchart() %>%
  hc_add_series_map(
    uscountygeojson,
    sumByCatCountyWin,
    value = "counts",
    joinBy = c("fips", "COUNTY_FIP"),
    name = "Cat Winner",
    color = colorVector
  ) %>%
  hc_mapNavigation(enabled = TRUE) %>%
  hc_title(text = "Which Cat Wins Which County") %>%
  hc_add_theme(hc_theme_smpl()) %>%
  hc_legend(enabled = FALSE) %>%
  hc_tooltip(pointFormat = "{point.name} is won by {point.Name}: {point.value} total tracks")

```


### Counties Won per Cat {data-width=250}

```{r}
# count the number of states that each cat won
total = sumByCatCountyWin[, .N, by = Name]
#create a ggplot bar chart of the number of states that each cat won
# label by the cat name
# color by the colorPalette
# order by the number of states won
g=ggplot(total, aes(x = Name, y = N, fill = Name)) +
  geom_bar(stat = "identity") +
  geom_text(aes(label = Name), vjust = -0.5, size = 3) +
  scale_fill_manual(values = colorVector) +
  labs(x = "Cat", y = "Number of Counties Won") +
  theme_minimal() +
  theme(legend.position = "none")
ggplotly(g)


```


## Tracks per Cat per state



### State diff per Cat

```{r}

# get the second highest count for each state

secondHighestState <-
  sumByCatStateFull[, .SD[order(-counts)[2]], by = STATE_NAME]
# merge the sumByCatStateWin and secondHighestState data frames by the STATE_NAME column
diffByCatState <-
  merge(sumByCatStateWin,
        secondHighestState,
        by = "STATE_NAME",
        all = TRUE)
# calculate the difference between the counts columns
diffByCatState$diff <-
  diffByCatState$counts.x - diffByCatState$counts.y
#log10 transform the diff column
# if the difference is NA, set it to counts.x
diffByCatState$diff[is.na(diffByCatState$diff)] <-
  diffByCatState$counts.x[is.na(diffByCatState$diff)]

diffByCatState$log10diff <- log10(diffByCatState$diff)

diffByCatState$log10diff[diffByCatState$Name.x == "IACat"] <-
  -diffByCatState$log10diff[diffByCatState$Name.x == "IACat"]

p0 <- ggplot(data = diffByCatState,
             mapping = aes(
               x = log10diff,
               y = reorder(STATE_NAME, log10diff),
               color = Name.x
             )) + scale_color_manual(values = colorVector) + geom_vline(xintercept = 0,
                                                                         color = "black",
                                                                         linetype = "dashed") + geom_point()
# remove the legend
p0 <- p0 + theme(legend.position = "none")

# add x axis label
p0 <- p0 + labs(x = "log10 difference in number of tracks won")
# add y axis label
p0 <- p0 + labs(y = "State")
ggplotly(p0)


# stop()
```

### County diff per Cat

```{r}
secondHighestCounty <-
  sumByCatCountyFull[, .SD[order(-counts)[2]], by = COUNTY_FIP]
secondHighestCounty$NAME = NULL
diffByCatCounty <-
  merge(sumByCatCountyWin,
        secondHighestCounty,
        by = "COUNTY_FIP",
        all = TRUE)
diffByCatCounty$diff <-
  diffByCatCounty$counts.x - diffByCatCounty$counts.y
diffByCatCounty$diff[is.na(diffByCatCounty$diff)] <-
  diffByCatCounty$counts.x[is.na(diffByCatCounty$diff)]

diffByCatCounty$log10diff <- log10(diffByCatCounty$diff)
# set non finite values to 0
diffByCatCounty$log10diff[!is.finite(diffByCatCounty$log10diff)] <-
  0

diffByCatCounty$log10diff[diffByCatCounty$Name.x == "IACat"] <-
  -diffByCatCounty$log10diff[diffByCatCounty$Name.x == "IACat"]

p1 <- ggplot(data = diffByCatCounty,
             mapping = aes(x = log10diff,
                           y = reorder(NAME, log10diff),
                           color = Name.x)) + scale_color_manual(values = colorVector) + geom_vline(xintercept = 0,
                                                                                                    color = "black",
                                                                                                    linetype = "dashed") + geom_point()
# remove the legend
p1 <- p1 + theme(legend.position = "none")
# remove y axis labels
p1 <- p1 + theme(axis.text.y = element_blank())
# remove y axis ticks
p1 <- p1 + theme(axis.ticks.y = element_blank())
# remove y axis title
# p1 <- p1 + theme(axis.title.y = element_blank())
# remove all grid lines
p1 <- p1 + theme(panel.grid.major = element_blank(),
                 panel.grid.minor = element_blank(),
                 panel.background = element_blank())

p1 <- p1 + labs(x = "log10 difference in number of tracks won")
p1 <- p1 + labs(y = "County")

ggplotly(p1)

# stop()
# 
```


Row {.tabset .tabset-fade}
-------------------------------------


### cats represented in the analysis

```{r}

kable(unique(df$OriginalName))

```

### cats not represented in the analysis


```{r}
kable(missingNames)

```


# Cat Acts

## Per Day

### cat activity counts per day

```{r}
dfActivity=mapNames(dfActivity)
# limt to Names in nameLimit
dfActivity <- dfActivity[Name %in% nameLimits,]

dfActivity$baseCounts =dfActivity$counts
# set IACat to negative

# column names of dfActivity are "Activity" "Name"     "date"     "counts"  

# remove blank Activity
toRemove=c("","Unknown","Stationary","Driving")
dfActivity <- dfActivity[which(!dfActivity$Activity %in% toRemove), ]
dfActivity$date <- as.Date(dfActivity$date)
dfActivity$Year_Month <- format(as.Date(dfActivity$date), "%Y-%m")

sumByCatActivity <-sum_by_column(dfActivity, c("Name", "Activity", "Year_Month"))

# convert Year_Month to date, looks like this "2020-06"
sumByCatActivity$Year_Month <- as.Date(paste0(sumByCatActivity$Year_Month, "-01"))

dfActivity$counts[dfActivity$Name == "IACat"] <- -dfActivity$counts[dfActivity$Name == "IACat"]

# plot counts by day
p2 <- ggplot(data = dfActivity,
             mapping = aes(x = date,
                           y = counts,
                           color = Name)) + geom_point(size=1) + scale_color_manual(values = colorVector) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + labs(x = "Date", y = "Number of Tracks")

p2 = p2 + facet_wrap( ~ Activity, scales = "free_y")

ggplotly(p2)

```


### cat activity days per count

```{r}

#histogram of counts colored by Name
p2 <- ggplot(data = dfActivity,
             mapping = aes(x = baseCounts,
                           color = Name)) + geom_histogram(bins=100, position="identity",alpha=.5) + scale_color_manual(values = colorVector) + labs(x = "Counts Per Day", y = "Number of Days")
# p2 + facet_wrap( ~ Activity+Name, scales = "free_y")
#log scale the x axis
p2=p2 + facet_wrap( ~ Activity, scales = "free_y") + scale_x_log10()

ggplotly(p2)
```


## Per Month


### cat activity counts per month

```{r}
# plot counts by month
p3 <- ggplot(data = sumByCatActivity,
             mapping = aes(x = Year_Month,
                           y = counts,
                           color = Name)) + geom_point(size=1) + scale_color_manual(values = colorVector) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + labs(x = "Date", y = "Number of Tracks")
p3=p3 + facet_wrap( ~ Activity, scales = "free_y")

ggplotly(p3)

```

### cat activity months per count

```{r}

#histogram of counts colored by Name
p3 <- ggplot(data = sumByCatActivity,
             mapping = aes(x = counts,
                           color = Name)) + geom_histogram(bins = 100,
                                                           position = "identity",
                                                           alpha = .5) + scale_color_manual(values = colorVector) + labs(x = "Counts Per Month", y = "Number of Months")
# p3 + facet_wrap( ~ Activity+Name, scales = "free_y")
#log scale the x axis
p3 = p3 + facet_wrap(~ Activity, scales = "free_y") + scale_x_log10()

ggplotly(p3)