── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.1 ✔ tibble 3.2.1
✔ lubridate 1.9.4 ✔ tidyr 1.3.1
✔ purrr 1.0.4
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
Data Import
df <-read_csv("data/sports_sss.csv")
Rows: 310 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (5): Year, NSA_name_en, NSA_name_tc, NSA_name_sc, Subvention_granted
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(df)
# A tibble: 6 × 5
Year NSA_name_en NSA_name_tc NSA_name_sc Subvention_granted
<chr> <chr> <chr> <chr> <chr>
1 2021-22 Hong Kong Archery Associat… 香港射箭總會…… 香港射箭总会…… 6,053,025
2 2021-22 Hong Kong Association of A… 香港田徑總會有限公司… 香港田径总会有限公司… 8,507,101
3 2021-22 Hong Kong Badminton Associ… 香港羽毛球總會有限公… 香港羽毛球总会有限公… 20,513,305
4 2021-22 The Hong Kong Baseball Ass… 香港棒球總會有限公司… 香港棒球总会有限公司… 8,950,382
5 2021-22 Hong Kong Basketball Assoc… 香港籃球總會有限公司… 香港篮球总会有限公司… 14,105,445
6 2021-22 Hong Kong Billiard Sports … 香港桌球總會有限公司… 香港桌球总会有限公司… 5,625,452
str(df)
spc_tbl_ [310 × 5] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
$ Year : chr [1:310] "2021-22" "2021-22" "2021-22" "2021-22" ...
$ NSA_name_en : chr [1:310] "Hong Kong Archery Association" "Hong Kong Association of Athletics Affiliates Limited" "Hong Kong Badminton Association Limited" "The Hong Kong Baseball Association Limited" ...
$ NSA_name_tc : chr [1:310] "香港射箭總會" "香港田徑總會有限公司" "香港羽毛球總會有限公司" "香港棒球總會有限公司" ...
$ NSA_name_sc : chr [1:310] "香港射箭总会" "香港田径总会有限公司" "香港羽毛球总会有限公司" "香港棒球总会有限公司" ...
$ Subvention_granted: chr [1:310] "6,053,025" "8,507,101" "20,513,305" "8,950,382" ...
- attr(*, "spec")=
.. cols(
.. Year = col_character(),
.. NSA_name_en = col_character(),
.. NSA_name_tc = col_character(),
.. NSA_name_sc = col_character(),
.. Subvention_granted = col_character()
.. )
- attr(*, "problems")=<externalptr>
Data Cleaning
# Renaming the columnsdf1 <- df |>rename(`engname`=`NSA_name_en`,`chiname`=`NSA_name_tc`, `subsidy`=`Subvention_granted`)head(df1)
# A tibble: 6 × 5
Year engname chiname NSA_name_sc subsidy
<chr> <chr> <chr> <chr> <chr>
1 2021-22 Hong Kong Archery Association 香港射箭總會… 香港射箭总会…… 6,053,…
2 2021-22 Hong Kong Association of Athletics Affili… 香港田徑總會… 香港田径总会有限公司… 8,507,…
3 2021-22 Hong Kong Badminton Association Limited 香港羽毛球總… 香港羽毛球总会有限公… 20,513…
4 2021-22 The Hong Kong Baseball Association Limited 香港棒球總會… 香港棒球总会有限公司… 8,950,…
5 2021-22 Hong Kong Basketball Association Limited 香港籃球總會… 香港篮球总会有限公司… 14,105…
6 2021-22 Hong Kong Billiard Sports Control Council… 香港桌球總會… 香港桌球总会有限公司… 5,625,…
# Cleaning the numbers and changing them from characters to numeric expressions. df2 <- df1 |>mutate(year =substr(`Year`, 1, 4)) |>mutate(year =as.integer(year)) |>mutate(subsidy2 =as.numeric(gsub(",","", subsidy))) |>drop_na(subsidy2)
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `subsidy2 = as.numeric(gsub(",", "", subsidy))`.
Caused by warning:
! NAs introduced by coercion
head(df2)
# A tibble: 6 × 7
Year engname chiname NSA_name_sc subsidy year subsidy2
<chr> <chr> <chr> <chr> <chr> <int> <dbl>
1 2021-22 Hong Kong Archery Associat… 香港射箭總會… 香港射箭总会…… 6,053,… 2021 6053025
2 2021-22 Hong Kong Association of A… 香港田徑總會… 香港田径总会有限公司… 8,507,… 2021 8507101
3 2021-22 Hong Kong Badminton Associ… 香港羽毛球總… 香港羽毛球总会有限公… 20,513… 2021 20513305
4 2021-22 The Hong Kong Baseball Ass… 香港棒球總會… 香港棒球总会有限公司… 8,950,… 2021 8950382
5 2021-22 Hong Kong Basketball Assoc… 香港籃球總會… 香港篮球总会有限公司… 14,105… 2021 14105445
6 2021-22 Hong Kong Billiard Sports … 香港桌球總會… 香港桌球总会有限公司… 5,625,… 2021 5625452
# A tibble: 6 × 4
year engname chiname subsidy2
<int> <chr> <chr> <dbl>
1 2021 Hong Kong Archery Association 香港射箭總會… 6053025
2 2021 Hong Kong Association of Athletics Affiliates Limited 香港田徑總會… 8507101
3 2021 Hong Kong Badminton Association Limited 香港羽毛球總… 20513305
4 2021 The Hong Kong Baseball Association Limited 香港棒球總會… 8950382
5 2021 Hong Kong Basketball Association Limited 香港籃球總會… 14105445
6 2021 Hong Kong Billiard Sports Control Council Company Limi… 香港桌球總會… 5625452
# Saving the df to an RData filesave(df_clean, file ="df_clean.RData")