Midland City

Setup

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.2     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.3     ✔ tibble    3.2.1
✔ lubridate 1.9.2     ✔ tidyr     1.3.0
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(janitor)

Attaching package: 'janitor'

The following objects are masked from 'package:stats':

    chisq.test, fisher.test
library(readxl)
library(lubridate)

Import

Same process as before

midland <- read_excel("data-raw/MidlandCity.xlsx")

midland |> glimpse()
Rows: 2,704
Columns: 29
$ age         <chr> "39", "39", "25", "29", "36", "37", "17", "19", "31", "21"…
$ officer     <dbl> 196, 26, 593, 523, 587, 46, 597, 597, 596, 58, 142, 504, 1…
$ charges     <chr> "POSS CS PG 1 >= 1G < 4G", "POSS CS PG 1 >= 1G < 4G", "ASS…
$ date_arrest <dttm> 2019-07-25, 2019-07-25, 2019-07-29, 2019-07-31, 2019-07-3…
$ district    <chr> "P5", "P5", "P2", "P2", "P7", "P6", "P2", "P2", "P4", "P2"…
$ gangrelat   <chr> NA, NA, "NO", NA, "NO", NA, NA, NA, "NO", "NO", "NO", NA, …
$ geox        <dbl> 1756171, 1756171, 1738124, 1743033, 1734635, 1735286, 1741…
$ geoy        <dbl> 10701051, 10701051, 10690065, 10696034, 10706882, 10676229…
$ name_id     <dbl> 10286735, 10286735, 10193951, 10137556, 10064002, 10304231…
$ race        <chr> "W", "W", "W", "W", "B", "W", "W", "W", "W", "W", "W", "W"…
$ reportarea  <chr> "0437", "0437", "0543", "0533", "0309", "0204", "0556", "0…
$ sex         <chr> "M", "M", "F", "F", "F", "F", "F", "M", "M", "M", "M", "M"…
$ time_arrest <chr> "1339", "1338", "2256", "0206", "1821", "1242", "0121", "0…
$ arr_chrg    <chr> "HSC 481.115(c)  F3", "HSC 481.115(c)  F3", "HSC 483.041(d…
$ chrg_cnt    <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
$ charge_desc <chr> "POSS CS PG 1 >= 1G < 4G", "POSS CS PG 1 >= 1G < 4G", "POS…
$ fel_misd    <chr> "F", "F", "M", "M", "F", "F", "F", "F", "M", "M", "F", "M"…
$ ucr_code    <chr> "35A", "35A", "35A", "35A", "35A", "35A", "35A", "35A", "3…
$ ibr_code    <chr> "35A", "35A", "35A", "35A", "35A", "35A", "35A", "35A", "3…
$ firstname   <chr> "CARLTON", "CARLTON", "CHRISTINA", "KATELYN", "LATOYA", "H…
$ gangaffil   <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ hair        <chr> "GRY", "GRY", NA, "BRO", "BRO", "BRO", "BRO", "BLK", "BLD"…
$ height      <chr> "507", "507", NA, "505", "507", "506", "505", "502", "507"…
$ lastname    <chr> "BLACKWELL", "BLACKWELL", "BALDERRAMA", "SANDERSON", "WILL…
$ middlename  <chr> NA, NA, NA, "MARIE", "DENISE", "DAWN", "ELIZABETH", NA, NA…
$ street      <chr> "N LORAINE ST", "N LORAINE ST", "ROOSEVELT DR", "AIRLINE R…
$ streetnbr   <chr> "601", "601", "4428", "7100", "1212", NA, "4715", "4309", …
$ weight      <dbl> 180, 180, 0, 155, 240, 200, 160, 130, 140, 180, 150, 100, …
$ age_group   <chr> "35 to 44", "35 to 44", "25 to 34", "25 to 34", "35 to 44"…

Select Columns

midland_clean <- midland |> mutate(
  datetime_arrest = paste(date_arrest, time_arrest), 
  name = paste(firstname, lastname)) |> select(
  -officer,
  -reportarea,
  -arr_chrg,
  -chrg_cnt,
  -ucr_code,
  -ibr_code,
  -name_id,
  -district,
  -age_group,
  -charge_desc,
  -fel_misd,
  -gangaffil,
  -height,
  -hair,
  -weight,
  -time_arrest,
  -firstname,
  -lastname,
  -middlename,
  -street,
  -streetnbr,
  -gangrelat,
  -geox,
  -geoy
) |> cbind(address_arrest = NA, agency_arrest = NA, ethnicity = NA)

midland_clean |> glimpse()
Rows: 2,704
Columns: 10
$ age             <chr> "39", "39", "25", "29", "36", "37", "17", "19", "31", …
$ charges         <chr> "POSS CS PG 1 >= 1G < 4G", "POSS CS PG 1 >= 1G < 4G", …
$ date_arrest     <dttm> 2019-07-25, 2019-07-25, 2019-07-29, 2019-07-31, 2019-…
$ race            <chr> "W", "W", "W", "W", "B", "W", "W", "W", "W", "W", "W",…
$ sex             <chr> "M", "M", "F", "F", "F", "F", "F", "M", "M", "M", "M",…
$ datetime_arrest <chr> "2019-07-25 1339", "2019-07-25 1338", "2019-07-29 2256…
$ name            <chr> "CARLTON BLACKWELL", "CARLTON BLACKWELL", "CHRISTINA B…
$ address_arrest  <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ agency_arrest   <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ ethnicity       <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…

Export

midland_clean |> write_csv("data-processed/Midland-City.csv")