Working directory

# get working directory
getwd()

# check working directory
setwd('D:\\Teaching_Clark\\GitRepo\\Spring2025\\geog-247\\docs\\Lectures\\Week01')
getwd()

Terminate script

i <- 1
while (i>0) {
  print('good')
}

Get help for activate libraries

help('dplyr')
?dplyr

# get help for all installed libraries
??dplyr
help.search('dplyr')

Variables

x <- 10 
y <- 10 
sum <- x + y  # Add x and y
product <- x * y  # Multiply x and y

print(x)  # Output the value of x
print(sum)  # Output the sum
print(product)  # Output the product

Changing Variable Values

x <- 15  # Reassign a new value to x
print(x)  # Output the updated value of x

# Variable Types
name <- "R Programming"  # Character
is_great <- TRUE         # Logical
pi_value <- 3.14         # Numeric

List

#creating a list
my_list <- list(
  name = "John Doe",
  age = 30,
  scores = c(85, 90, 95),
  passed = TRUE
)

# Print the list
print(my_list)

# Access by position
print(my_list[[1]])  # Outputs: "John Doe"

# Access by name
print(my_list[["age"]])  # Outputs: 30

# Using $
print(my_list$scores)  # Outputs: c(85, 90, 95)

Clean environment

rm(list=ls())

Read csv

data <- read.csv("parks_trees.csv")

# Display the first few rows of the dataset
head(data)

# Check the structure of the dataset
str(data)

# Summary statistics for each column
summary(data)

Display column names

colnames(data)

# Inspecting a specific column
data$neighborhood

# add new columns
data$data_source <- "Boston_GIS"

Download package

#1. 
if (!require('ggplot2')) install.packages('ggplot2')
## Loading required package: ggplot2
#2.
# manually download package

#
#Download package that was removed from CRAN repository
#e.g., TexMix, rgdal

Regression

# Step 1: Generate serial data with more noise
set.seed(123)  # For reproducibility
n <- 100  # Number of data points
x <- seq(1, n)  # Serial data (x variable)
y <- 3 * x + rnorm(n, mean = 0, sd = 50)  # More noise added to y

# Step 2: Create a data frame
data <- data.frame(x = x, y = y)

# Step 3: Fit a linear model
model <- lm(y ~ x, data = data)

r_squared <- summary(model)$r.squared

# Step 4: Plot the regression line and confidence interval
ggplot(data, aes(x = x, y = y)) +
  geom_point(color = "blue", size = 2) +  # Scatter plot
  geom_smooth(method = "lm", color = "red", fill = "green", level = 0.95) +  # Regression line with 95% CI
  annotate("text", x = 10, y = max(data$y) - 50, label = paste("R-squared = ", round(r_squared, 2)), size = 5, color = "darkred") +
  labs(title = "Linear Regression with Confidence Interval",
       x = "Independent Variable (X)",
       y = "Dependent Variable (Y)") +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

3D regresison

# Load necessary library
library("ISLR")
data('College')

??scatter3D
## starting httpd help server ... done
library(plot3D)
# Step 1: Generate synthetic data
set.seed(123)  # For reproducibility
n <- 100  # Number of data points
x1 <- runif(n, 0, 10)  # Random values for x1
x2 <- runif(n, 0, 10)  # Random values for x2
y <- 5 + 2 * x1 + 3 * x2 + rnorm(n, mean = 0, sd = 5)  # Linear relationship with noise

# Step 2: Create a data frame
data <- data.frame(x1 = x1, x2 = x2, y = y)

# Step 3: Fit a linear model
model <- lm(y ~ x1 + x2)
summary(model)
## 
## Call:
## lm(formula = y ~ x1 + x2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.4970 -3.4107 -0.5429  2.8746 16.8313 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   4.2960     1.4309   3.002  0.00341 ** 
## x1            2.0486     0.1728  11.853  < 2e-16 ***
## x2            2.9852     0.1867  15.986  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.882 on 97 degrees of freedom
## Multiple R-squared:  0.7904, Adjusted R-squared:  0.7861 
## F-statistic: 182.9 on 2 and 97 DF,  p-value: < 2.2e-16
# create a grid from the x and y values (min to max) and predict values for every point
# this will become the regression plane
grid.lines = 20
x.pred <- seq(min(x1), max(x1), length.out = grid.lines)
y.pred <- seq(min(x2), max(x2), length.out = grid.lines)
xy <- expand.grid( x1 = x.pred, x2 = y.pred)
z.pred <- matrix(predict(model, newdata = xy), 
                 nrow = grid.lines, ncol = grid.lines)

# create the fitted points for droplines to the surface
fitpoints <- predict(model)

# scatter plot with regression plane
scatter3D(x1, x2, y, pch = 19, cex = 1,colvar = NULL, col="red", 
          theta = 30, phi = 5, bty="b",
          xlab = "x1", ylab = "x2", zlab = "y",  
          surf = list(x = x.pred, y = y.pred, z = z.pred,  
                      facets = TRUE, fit = fitpoints,
                      col=ramp.col(col = c("dodgerblue3","seagreen2"),
                                    n = 300, alpha=0.9), 
                      border="black"), main = "3D Regression")