Randall County

Setup

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.2     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.3     ✔ tibble    3.2.1
✔ lubridate 1.9.2     ✔ tidyr     1.3.0
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(janitor)

Attaching package: 'janitor'

The following objects are masked from 'package:stats':

    chisq.test, fisher.test
library(readxl)
library(lubridate)

Import

Same process as before

randall <- read_excel("data-raw/RandallCounty.xlsx")

randall |> glimpse()
Rows: 904
Columns: 14
$ `SO #`        <dbl> 90243, 74194, 90242, 84882, 90272, 60488, 86201, 90268, …
$ name          <chr> "DUBIOS, JOSHUA WAYNE", "FLORES, RODRIGO LEE", "UST, CON…
$ age           <dbl> 22, 50, 21, 22, 39, 28, 24, 23, 23, 42, 60, 34, 23, 31, …
$ sex           <chr> "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "F", "…
$ race          <chr> "W", "W", "W", "B", "B", "W", "W", "W", "W", "W", "W", "…
$ height_ft     <dbl> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 5, 5, 5, 5, 5,…
$ height_in     <dbl> 7, 11, 11, 8, 8, 9, 11, 10, 9, 11, 3, 10, 7, 3, 3, 6, 5,…
$ weight        <dbl> 145, 220, 160, 170, 150, 185, 150, 190, 135, 182, 120, 1…
$ address       <chr> "220 VICTORIA POINT\\nSchertz, TX 78154", "7804 CONODE\\…
$ date_arrest   <dttm> 2018-01-02, 2018-01-02, 2018-01-02, 2018-01-03, 2018-01…
$ time_arrest   <dttm> 1899-12-31 11:30:00, 1899-12-31 00:45:00, 1899-12-31 11…
$ agency_arrest <chr> "TPW", "APD", "TPW", "RCSO", "APD", "RCSO", "RCSO", "APD…
$ warrant_ref   <chr> "ONVIEW", "ONVIEW", "ONVIEW;ONVIEW", "26582B;26582B;2658…
$ charges       <chr> "POSS MARIJ <=2OZ", "POSS MARIJ <=2OZ", "POSS CS PG 2 >=…

Clean

Get rid of extra columns and make date_arrest and time_arrest into one column.

randall_clean <- randall |> mutate(
  datetime_arrest = paste(date_arrest,substr(time_arrest,12,19)),
  datetime_arrest = ymd_hms(datetime_arrest),
) |> select(
  -"SO #",
  -time_arrest,
  -agency_arrest,
  -warrant_ref,
  -height_ft,
  -height_in,
  -weight,
  -address
) |> cbind(address_arrest = NA, agency_arrest = NA, ethnicity = NA)
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `datetime_arrest = ymd_hms(datetime_arrest)`.
Caused by warning:
!  54 failed to parse.
randall_clean |> glimpse()
Rows: 904
Columns: 10
$ name            <chr> "DUBIOS, JOSHUA WAYNE", "FLORES, RODRIGO LEE", "UST, C…
$ age             <dbl> 22, 50, 21, 22, 39, 28, 24, 23, 23, 42, 60, 34, 23, 31…
$ sex             <chr> "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "F",…
$ race            <chr> "W", "W", "W", "B", "B", "W", "W", "W", "W", "W", "W",…
$ date_arrest     <dttm> 2018-01-02, 2018-01-02, 2018-01-02, 2018-01-03, 2018-…
$ charges         <chr> "POSS MARIJ <=2OZ", "POSS MARIJ <=2OZ", "POSS CS PG 2 …
$ datetime_arrest <dttm> 2018-01-02 11:30:00, 2018-01-02 00:45:00, 2018-01-02 …
$ address_arrest  <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ agency_arrest   <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ ethnicity       <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…

Export

randall_clean |> write_csv("data-processed/Randall-County.csv")