Title: | Reshape Data Table |
---|---|
Description: | A grammar of data manipulation with 'data.table', providing a consistent a series of utility functions that help you solve the most common data manipulation challenges. |
Authors: | Jiena McLellan [aut, cre]
|
Maintainer: | Jiena McLellan <[email protected]> |
License: | MIT + file LICENSE |
Version: | 1.0.0 |
Built: | 2025-02-10 02:42:46 UTC |
Source: | https://github.com/cran/dataMojo |
create a new column which is the percentage of other columns
col_cal_percent(df, new_col_name, numerator_cols, denominator_cols)
col_cal_percent(df, new_col_name, numerator_cols, denominator_cols)
df |
input data frame |
new_col_name |
new column name |
numerator_cols |
numerator columns |
denominator_cols |
denominator columns |
data frame with a new percentage column
test_df <- data.frame( hc1 = c(2, 0, 1, 5, 6, 7, 10), hc2 = c(1, 0, 10, 12, 4, 1, 9 ), total = c(10, 2, 0, 39, 23, 27, 30) ) dataMojo::col_cal_percent(test_df, new_col_name = "hc_percentage", numerator_cols = c("hc1", "hc2"), denominator_cols = "total" )
test_df <- data.frame( hc1 = c(2, 0, 1, 5, 6, 7, 10), hc2 = c(1, 0, 10, 12, 4, 1, 9 ), total = c(10, 2, 0, 39, 23, 27, 30) ) dataMojo::col_cal_percent(test_df, new_col_name = "hc_percentage", numerator_cols = c("hc1", "hc2"), denominator_cols = "total" )
Anonymized sample data
data(dt_dates)
data(dt_dates)
a data table with dates
Jiena Gu McLellan, 2020-05-26
data(dt_dates)
data(dt_dates)
group by columns and return a summarized table
dt_group_by(dt, group_by_cols, summarize_at, operation)
dt_group_by(dt, group_by_cols, summarize_at, operation)
dt |
input data.table |
group_by_cols |
group by columns |
summarize_at |
column summarize at |
operation |
calculation operation, value should be one of following: sum, mean, median, max, min |
a summarized table
data("dt_groups") dataMojo::dt_group_by(dt_groups, group_by_cols = c("group1", "group2"), summarize_at = "A1", operation = "mean")
data("dt_groups") dataMojo::dt_group_by(dt_groups, group_by_cols = c("group1", "group2"), summarize_at = "A1", operation = "mean")
Anonymized sample data
data(dt_groups)
data(dt_groups)
a data table with groups
Jiena Gu McLellan, 2020-05-26
data(dt_groups)
data(dt_groups)
Anonymized sample data
data(dt_long)
data(dt_long)
a data table in long format
Jiena Gu McLellan, 2020-05-26
data(dt_long)
data(dt_long)
Anonymized sample data
data(dt_missing)
data(dt_missing)
a data table with missing values
Jiena Gu McLellan, 2020-05-26
data(dt_missing)
data(dt_missing)
Anonymized sample data
data(dt_values)
data(dt_values)
a data table with values
Jiena Gu McLellan, 2020-05-26
data(dt_values)
data(dt_values)
Fill missing values
fill_NA_with(dt, fill_cols, fill_value)
fill_NA_with(dt, fill_cols, fill_value)
dt |
input data table |
fill_cols |
filter by this columns |
fill_value |
fill NA with this value |
data table which NAs are filled
data("dt_missing") fill_NA_with(dt_missing, fill_cols = c("Full_name"), fill_value = "pending")
data("dt_missing") fill_NA_with(dt_missing, fill_cols = c("Full_name"), fill_value = "pending")
Filter all rows that meeting requirements
filter_all(dt, operator, cutoff_value)
filter_all(dt, operator, cutoff_value)
dt |
input data.table |
operator |
operator should be one of l, g. l means less than, g means greater than. |
cutoff_value |
threshold value |
filtered data table
data("dt_values") dataMojo::filter_all(dt_values, operator = "l", .2)
data("dt_values") dataMojo::filter_all(dt_values, operator = "l", .2)
Filter all rows that meet requirements with selected columns
filter_all_at(dt, operator, cutoff_value, selected_cols)
filter_all_at(dt, operator, cutoff_value, selected_cols)
dt |
input data table |
operator |
operator should be one of l, or g. l means less than, g means greater than |
cutoff_value |
cutoff value |
selected_cols |
selected columns from input data table |
filtered data table
data("dt_values") dataMojo::filter_all_at(dt_values, operator = "l", .1, c("A1", "A2"))
data("dt_values") dataMojo::filter_all_at(dt_values, operator = "l", .1, c("A1", "A2"))
Filter any rows that meeting requirements
filter_any(dt, operator, cutoff_value)
filter_any(dt, operator, cutoff_value)
dt |
input data.table |
operator |
operator should be one of l, g. l means less than, g means greater than. |
cutoff_value |
threshold value |
fitlered data table
data("dt_values") dataMojo::filter_any(dt_values, operator = "l", .1)
data("dt_values") dataMojo::filter_any(dt_values, operator = "l", .1)
Filter any rows that meet requirements with selected columns
filter_any_at(dt, operator, cutoff_value, selected_cols)
filter_any_at(dt, operator, cutoff_value, selected_cols)
dt |
input data table |
operator |
operator should be one of l, or g. l means less than, g means greater than |
cutoff_value |
cutoff value |
selected_cols |
selected columns from input data table |
filtered data table
data("dt_values") dataMojo::filter_all_at(dt_values, operator = "l", .1, c("A1", "A2"))
data("dt_values") dataMojo::filter_all_at(dt_values, operator = "l", .1, c("A1", "A2"))
Fetch one row from each grouped by group
get_row_group_by(dt, group_by_cols, fetch_row)
get_row_group_by(dt, group_by_cols, fetch_row)
dt |
input data table |
group_by_cols |
group by columns |
fetch_row |
first means to fetch first row and last means to fetch last row |
grouped by data table
data("dt_groups") dataMojo::get_row_group_by(dt_groups, group_by_cols = c("group1", "group2"), fetch_row = "first")
data("dt_groups") dataMojo::get_row_group_by(dt_groups, group_by_cols = c("group1", "group2"), fetch_row = "first")
Create an aggregated data table with all proportion of one selected column
pivot_percent_at(dt, question_col, aggregated_by_cols)
pivot_percent_at(dt, question_col, aggregated_by_cols)
dt |
data table |
question_col |
column selected as questions |
aggregated_by_cols |
grouped by columns |
aggregated data table
test_dt <- data.table::data.table( Question = c(rep("Good", 3), rep("OK", 3), rep("Bad", 3)), Gender = c(rep("F", 4), rep("M", 5)) ) dataMojo::pivot_percent_at(test_dt, question_col = "Question", aggregated_by_cols = "Gender")
test_dt <- data.table::data.table( Question = c(rep("Good", 3), rep("OK", 3), rep("Bad", 3)), Gender = c(rep("F", 4), rep("M", 5)) ) dataMojo::pivot_percent_at(test_dt, question_col = "Question", aggregated_by_cols = "Gender")
Create an aggragated data table with all proportion of multiple selected column
pivot_percent_at_multi(dt, question_col, aggregated_by_cols)
pivot_percent_at_multi(dt, question_col, aggregated_by_cols)
dt |
data table |
question_col |
columns selected as questions |
aggregated_by_cols |
grouped by columns |
an aggragated data table
test_dt <- data.table::data.table( Question1 = c(rep("Good", 3), rep("OK", 3), rep("Bad", 3)), Question2 = c(rep("Good", 2), rep("OK", 2), rep("Bad", 5)), Gender = c(rep("F", 4), rep("M", 5)) ) dataMojo::pivot_percent_at_multi(test_dt, question_col = c("Question1","Question2") , aggregated_by_cols = "Gender")
test_dt <- data.table::data.table( Question1 = c(rep("Good", 3), rep("OK", 3), rep("Bad", 3)), Question2 = c(rep("Good", 2), rep("OK", 2), rep("Bad", 5)), Gender = c(rep("F", 4), rep("M", 5)) ) dataMojo::pivot_percent_at_multi(test_dt, question_col = c("Question1","Question2") , aggregated_by_cols = "Gender")
Reshape data frame to a longer format
reshape_longer(dt, keep_cols, label_cols, value_cols)
reshape_longer(dt, keep_cols, label_cols, value_cols)
dt |
input data |
keep_cols |
columns to be kept |
label_cols |
column name that contains the melted columns |
value_cols |
column name that contains the value of melted columns |
data table in a longer format
data("dt_dates") reshape_longer(dt_dates, keep_cols = "Full_name", label_cols = c("Date_Type"), value_cols = "Exact_date")
data("dt_dates") reshape_longer(dt_dates, keep_cols = "Full_name", label_cols = c("Date_Type"), value_cols = "Exact_date")
Reshape data frame to a wider format
reshape_wider(dt, keep_cols, col_label, col_value)
reshape_wider(dt, keep_cols, col_label, col_value)
dt |
input data table |
keep_cols |
columns to be kept |
col_label |
columns that each unique values will be reshaped as a column name |
col_value |
columns that fill the reshaped columns |
reshaped widen data table
data("dt_long") dataMojo::reshape_wider(dt_long, keep_cols = c("Full_name"), col_label = c("Date_Type"), col_value = "Exact_date")
data("dt_long") dataMojo::reshape_wider(dt_long, keep_cols = c("Full_name"), col_label = c("Date_Type"), col_value = "Exact_date")
Expand row given start and end dates
row_expand_dates(dt, start_date_col, end_date_col, new_name)
row_expand_dates(dt, start_date_col, end_date_col, new_name)
dt |
input data table |
start_date_col |
start date column |
end_date_col |
end date column |
new_name |
new generated column name |
expanded data table
dt_dates_simple <- data.table::data.table( Start_Date = as.Date(c("2020-02-03", "2020-03-01") ), End_Date = as.Date(c("2020-02-05", "2020-03-02") ), group = c("A", "B") ) row_expand_dates(dt_dates_simple, "Start_Date", "End_Date", "Date")[]
dt_dates_simple <- data.table::data.table( Start_Date = as.Date(c("2020-02-03", "2020-03-01") ), End_Date = as.Date(c("2020-02-05", "2020-03-02") ), group = c("A", "B") ) row_expand_dates(dt_dates_simple, "Start_Date", "End_Date", "Date")[]
Expand row based on pattern
row_expand_pattern(dt, col_name, split_by_pattern, new_name)
row_expand_pattern(dt, col_name, split_by_pattern, new_name)
dt |
input data table |
col_name |
column to be expanded |
split_by_pattern |
split based on pattern |
new_name |
new generated column name |
expanded data table
data("starwars_simple") row_expand_pattern(starwars_simple, "films", ", ", "film")[]
data("starwars_simple") row_expand_pattern(starwars_simple, "films", ", ", "film")[]
Convert count to percentage
row_percent_convert(data, cols_rowsum)
row_percent_convert(data, cols_rowsum)
data |
data frame |
cols_rowsum |
columns need to be converted to percentage |
data frame with calculated row percentage
test_df <- data.frame( Group = c("A", "B", "C"), Female = c(2,3,5), Male = c(10,11, 13) ) dataMojo::row_percent_convert(test_df, cols_rowsum = c("Female", "Male"))
test_df <- data.frame( Group = c("A", "B", "C"), Female = c(2,3,5), Male = c(10,11, 13) ) dataMojo::row_percent_convert(test_df, cols_rowsum = c("Female", "Male"))
Select columns
select_cols(dt, cols)
select_cols(dt, cols)
dt |
input data table |
cols |
select columns |
data table with selected columns
data("dt_dates") select_cols(dt_dates, c("Start_Date", "Full_name"))
data("dt_dates") select_cols(dt_dates, c("Start_Date", "Full_name"))
starwars data
data(starwars_simple)
data(starwars_simple)
a data table as example
Jiena Gu McLellan, 2020-05-26
data(starwars_simple)
data(starwars_simple)
Split one column to multiple columns based on patterns
str_split_col(dt, by_col, by_pattern, match_to_names = NULL)
str_split_col(dt, by_col, by_pattern, match_to_names = NULL)
dt |
input data table |
by_col |
by this column |
by_pattern |
split by this patter |
match_to_names |
created new columns names |
data table with new columns
data("dt_dates") str_split_col(dt_dates, by_col = "Full_name", by_pattern = ", ", match_to_names = c("First Name", "Last Name"))
data("dt_dates") str_split_col(dt_dates, by_col = "Full_name", by_pattern = ", ", match_to_names = c("First Name", "Last Name"))