## 8.1
fancy_print <- function( text ){
print("*****")
print( text )
print("*****")
}
fancy_print("Testing my fancy_print function")
[1] "*****" [1] "Testing my fancy_print function" [1] "*****"
## 8.2
my_function <- function( number ){
return( 2 * number )
}
print( my_function(3) )
[1] 6
## 8.3
largest_item <- function( list_of_things ) {
largest <- -1
for( item in list_of_things ) {
if( item > largest ) {
largest <- item
}
}
return( largest )
}
print( largest_item( list(1, 5, 4, 9, 14, 2 ) ) )
[1] 14
## 8.4
mathematics <- function( x, y ) {
if( x > y ) {
return( x * y - y )
} else {
return( 2 * y - x )
}
}
## 8.5
is_leap_year <- function( year ) {
if( year %% 100 == 0 ){
if( year %% 400 == 0 ) { ## if it is divisible by 100, it is a leap year only if it can also be divided by 400.
return( TRUE )
} else {
return( FALSE )
}
} else {
if( year %% 4 == 0 ) { ## A year is a leap year if it can be divided by 4.
return( TRUE )
} else {
return( FALSE )
}
}
}
## 8.6
number_of_days <- function( year1, year2 ){
total_days <- 0
for( year in year1:(year2-1) ){
if( is_leap_year( year ) ) {
total_days <- total_days + 366
} else {
total_days <- total_days + 365
}
}
return( total_days )
}
print( number_of_days(2023, 2024) )
print( number_of_days(2023, 2030) )
[1] 365 [1] 2557
## 8.7
## this is a Python only exercise
## 8.8
dependent_variable_name <- 'V10'
independent_variables <- list('V4', 'V5', 'V6', 'V7', 'V8', 'V9')
dependent_data <- list()
independent_data <- list()
recode <- list( "1" = "Very happy",
"2" = "Rather happy",
"3" = "Not very happy",
"4" = "Not at all happy"
)
data <- read.csv('wvs.csv')
for( rn in 1:nrow(data) ) {
if( data[rn, 'V2'] == 752 ) {
dependent_variable <- data[rn, dependent_variable_name ]
if( 1 <= dependent_variable ) { ## we want to check dependable data is OK, i.e. in range of accepted options
dependent_data <- c( dependent_data , recode[[dependent_variable]] )
dd <- list()
for( variable in independent_variables ) {
dd <- c( dd, data[rn, variable] )
}
independent_data <- c( independent_data , list( dd ) )
}
}
}
print( dependent_data )
print( independent_data )
## while a fun exercise, in R one usually does not need to operate on data in such low level
## caret allows doing equations with R and applying data frames (see Chapter 9 for more)
## so this would be more like this, but with much more data cleaning etc still required
## for example, limiting oneself to just country 752 (V2) is non part of the code here
## neither are missing values properly addressed
library('caret')
train( V10 ~ V4 + V5 + V6 + V7 + V8 + V9, data = data, methood = 'rpart')
[[1]] [1] "Very happy" [[2]] [1] "Not at all happy" [[3]] [1] "Not at all happy" [[4]] [1] "Not at all happy" [[5]] [1] "Not very happy" [[6]] [1] "Not at all happy" [[7]] [1] "Very happy" [[8]] [1] "Not very happy" [[9]] [1] "Very happy" [[10]] [1] "Not at all happy" [[1]] [[1]][[1]] [1] 2 [[1]][[2]] [1] 4 [[1]][[3]] [1] 3 [[1]][[4]] [1] 4 [[1]][[5]] [1] 0 [[1]][[6]] [1] 0 [[2]] [[2]][[1]] [1] 3 [[2]][[2]] [1] 1 [[2]][[3]] [1] 4 [[2]][[4]] [1] 3 [[2]][[5]] [1] 4 [[2]][[6]] [1] 3 [[3]] [[3]][[1]] [1] 4 [[3]][[2]] [1] 4 [[3]][[3]] [1] 3 [[3]][[4]] [1] 1 [[3]][[5]] [1] 1 [[3]][[6]] [1] 3 [[4]] [[4]][[1]] [1] 1 [[4]][[2]] [1] 1 [[4]][[3]] [1] 0 [[4]][[4]] [1] 0 [[4]][[5]] [1] 3 [[4]][[6]] [1] 3 [[5]] [[5]][[1]] [1] 0 [[5]][[2]] [1] 4 [[5]][[3]] [1] 4 [[5]][[4]] [1] 0 [[5]][[5]] [1] 2 [[5]][[6]] [1] 1 [[6]] [[6]][[1]] [1] 1 [[6]][[2]] [1] 2 [[6]][[3]] [1] 4 [[6]][[4]] [1] 2 [[6]][[5]] [1] 0 [[6]][[6]] [1] 4 [[7]] [[7]][[1]] [1] 0 [[7]][[2]] [1] 2 [[7]][[3]] [1] 0 [[7]][[4]] [1] 1 [[7]][[5]] [1] 2 [[7]][[6]] [1] 0 [[8]] [[8]][[1]] [1] 0 [[8]][[2]] [1] 0 [[8]][[3]] [1] 0 [[8]][[4]] [1] 1 [[8]][[5]] [1] 4 [[8]][[6]] [1] 4 [[9]] [[9]][[1]] [1] 1 [[9]][[2]] [1] 2 [[9]][[3]] [1] 3 [[9]][[4]] [1] 3 [[9]][[5]] [1] 3 [[9]][[6]] [1] 3 [[10]] [[10]][[1]] [1] 0 [[10]][[2]] [1] 3 [[10]][[3]] [1] 0 [[10]][[4]] [1] 2 [[10]][[5]] [1] 4 [[10]][[6]] [1] 2
Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?” Warning message in randomForest.default(x, y, mtry = param$mtry, ...): “The response has five or fewer unique values. Are you sure you want to do regression?”
Random Forest 50 samples 6 predictor No pre-processing Resampling: Bootstrapped (25 reps) Summary of sample sizes: 50, 50, 50, 50, 50, 50, ... Resampling results across tuning parameters: mtry RMSE Rsquared MAE 2 1.491547 0.04539493 1.293368 4 1.513259 0.05283877 1.308843 6 1.538556 0.05202806 1.331302 RMSE was used to select the optimal model using the smallest value. The final value used for the model was mtry = 2.
8.9
8.10
## 8.12
## define known leap years between 1900 and 2022
leap_years = list( 1904, 1908, 1912, 1916, 1920, 1924, 1928, 1932, 1936, 1940, 1944, 1948, 1952, 1956, 1960, 1964, 1968, 1972, 1976, 1980, 1984, 1988, 1992, 1996, 2000, 2004, 2008, 2012, 2016, 2020 )
for( potential_leap_year in leap_years ) {
if( is_leap_year ( potential_leap_year ) == FALSE ) {
print("Is leap year is not working correctly")
}
}
for( year in 1900:2023 ) {
if( ! year %in% leap_years ) {
if( is_leap_year( year ) == TRUE ) {
print("Is leap year is not working correctly")
}
}
}
print("If nothing printed above, is leap year correctly identified leap years and not-leap-years between 1900 and 2022")
[1] "If nothing printed above, is leap year correctly identified leap years and not-leap-years between 1900 and 2022"
## 8.13
tax_rate <- function( income, children ) {
taxes <- 0.2 * income
taxes <- taxes - 10 * children
if( taxes < 0 ) {
taxes <- 10
}
return( taxes )
}
tax_rate <- function( income, children ) {
taxes <- 0.2 * income
if( children > 0 ) {
taxes <- 2/3 * taxes
}
return( taxes )
}
## 8.14
stockA <- list(100)
stockB <- list(100)
inflowA <- function( stockA, stockB ) {
increaseA <- 0.05
increaseA_B <- 0.05
return( increaseA * stockA[[length(stockA)]] + increaseA_B * stockB[[length(stockB)]] )
}
outflowA <- function( stockA, stockB ) { ## while this does not use stockB, it might be better to make the function so that in case we use it, it is there
decreaseA <- 0.05
return( decreaseA * stockA[[length(stockA)]] )
}
inflowB <- function( stockA, stockB ) {
increaseB <- 0.05
increaseB_A <- 0.05
return( increaseB * stockB[[length(stockB)]] + increaseB_A * stockA[[length(stockA)]] )
}
outflowB <- function( stockA, stockB ) {
decreaseB <- 0.05
return( decreaseB * stockB[[length(stockB)]] )
}
for( i in 1:50 ) {
changeA <- inflowA( stockA, stockB ) - outflowA( stockA, stockB )
stockA <- c( stockA, stockA[[length(stockA)]] + changeA )
changeB <- inflowB( stockA, stockB ) - outflowB( stockA, stockB )
stockB <- c( stockB, stockB[[length(stockB)]] + changeB )
}
print( unlist(stockA) )
print( unlist(stockB) )
[1] 100.0000 105.0000 110.2625 115.8007 121.6283 127.7600 134.2112 [8] 140.9978 148.1370 155.6465 163.5451 171.8526 180.5897 189.7782 [15] 199.4413 209.6029 220.2885 231.5249 243.3400 255.7636 268.8265 [22] 282.5615 297.0029 312.1868 328.1512 344.9359 362.5830 381.1365 [29] 400.6429 421.1509 442.7118 465.3794 489.2105 514.2647 540.6045 [36] 568.2958 597.4078 628.0134 660.1889 694.0150 729.5761 766.9611 [43] 806.2636 847.5817 891.0188 936.6834 984.6897 1035.1577 1088.2136 [50] 1143.9901 1202.6265 [1] 100.0000 105.2500 110.7631 116.5532 122.6346 129.0226 135.7331 [8] 142.7830 150.1899 157.9722 166.1495 174.7421 183.7716 193.2605 [15] 203.2325 213.7127 224.7271 236.3034 248.4704 261.2585 274.6999 [22] 288.8279 303.6781 319.2874 335.6950 352.9418 371.0709 390.1277 [29] 410.1599 431.2174 453.3530 476.6220 501.0825 526.7958 553.8260 [36] 582.2408 612.1112 643.5118 676.5213 711.2220 747.7008 786.0489 [43] 826.3621 868.7412 913.2921 960.1263 1009.3607 1061.1186 1115.5293 [50] 1172.7288 1232.8601
## 8.16
sum <- function( list_of_things ) {
sum <- 0
for( thing in list_of_things ){
sum <- sum + thing
}
return( sum )
}
mean <- function( list_of_things ) {
return( sum( list_of_things ) / length( list_of_things ) )
}
experimental <- list()
experimental[['A']] <- list()
experimental[['B']] <- list()
data <- read.csv("experiment.txt", header = FALSE )
for( rn in 1:nrow(data) ) {
condition <- data[rn, 2]
for( i in 3:7 ) {
experimental[[condition]] <- c( experimental[[condition]], data[rn, i] )
}
}
## means
print( paste("Mean for A", mean( experimental[['A']] ) ) )
[1] "Mean for A 6"
## 8.17
make_science_happen <- function( index ) {
conditionA <- 0
conditionB <- 0
data <- read.csv("experiment.txt", header = FALSE )
for( rn in 1:nrow(data) ) {
condition <- data[rn, 2]
experimental_value = data[rn, index + 2] ## index 1 is user name, index 2 is condition, thus add two
if( condition == 'A' ){
conditionA <- conditionA + experimental_value
}
if( condition == 'B' ){
conditionB <- conditionB + experimental_value
}
}
if( conditionA - conditionB > 1 ){
return( TRUE )
}
return( FALSE )
}
make_science_happen(1)