Working directory
# get working directory
getwd()
# check working directory
setwd('D:\\Teaching_Clark\\GitRepo\\Spring2025\\geog-247\\docs\\Lectures\\Week01')
getwd()
Terminate script
i <- 1
while (i>0) {
print('good')
}
Get help for activate libraries
help('dplyr')
?dplyr
# get help for all installed libraries
??dplyr
help.search('dplyr')
Variables
x <- 10
y <- 10
sum <- x + y # Add x and y
product <- x * y # Multiply x and y
print(x) # Output the value of x
print(sum) # Output the sum
print(product) # Output the product
Changing Variable Values
x <- 15 # Reassign a new value to x
print(x) # Output the updated value of x
# Variable Types
name <- "R Programming" # Character
is_great <- TRUE # Logical
pi_value <- 3.14 # Numeric
List
#creating a list
my_list <- list(
name = "John Doe",
age = 30,
scores = c(85, 90, 95),
passed = TRUE
)
# Print the list
print(my_list)
# Access by position
print(my_list[[1]]) # Outputs: "John Doe"
# Access by name
print(my_list[["age"]]) # Outputs: 30
# Using $
print(my_list$scores) # Outputs: c(85, 90, 95)
Clean environment
rm(list=ls())
Read csv
data <- read.csv("parks_trees.csv")
# Display the first few rows of the dataset
head(data)
# Check the structure of the dataset
str(data)
# Summary statistics for each column
summary(data)
Display column names
colnames(data)
# Inspecting a specific column
data$neighborhood
# add new columns
data$data_source <- "Boston_GIS"
Download package
#1.
if (!require('ggplot2')) install.packages('ggplot2')
## Loading required package: ggplot2
#2.
# manually download package
#
#Download package that was removed from CRAN repository
#e.g., TexMix, rgdal
Regression
# Step 1: Generate serial data with more noise
set.seed(123) # For reproducibility
n <- 100 # Number of data points
x <- seq(1, n) # Serial data (x variable)
y <- 3 * x + rnorm(n, mean = 0, sd = 50) # More noise added to y
# Step 2: Create a data frame
data <- data.frame(x = x, y = y)
# Step 3: Fit a linear model
model <- lm(y ~ x, data = data)
r_squared <- summary(model)$r.squared
# Step 4: Plot the regression line and confidence interval
ggplot(data, aes(x = x, y = y)) +
geom_point(color = "blue", size = 2) + # Scatter plot
geom_smooth(method = "lm", color = "red", fill = "green", level = 0.95) + # Regression line with 95% CI
annotate("text", x = 10, y = max(data$y) - 50, label = paste("R-squared = ", round(r_squared, 2)), size = 5, color = "darkred") +
labs(title = "Linear Regression with Confidence Interval",
x = "Independent Variable (X)",
y = "Dependent Variable (Y)") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

3D regresison
# Load necessary library
library("ISLR")
data('College')
??scatter3D
## starting httpd help server ... done
library(plot3D)
# Step 1: Generate synthetic data
set.seed(123) # For reproducibility
n <- 100 # Number of data points
x1 <- runif(n, 0, 10) # Random values for x1
x2 <- runif(n, 0, 10) # Random values for x2
y <- 5 + 2 * x1 + 3 * x2 + rnorm(n, mean = 0, sd = 5) # Linear relationship with noise
# Step 2: Create a data frame
data <- data.frame(x1 = x1, x2 = x2, y = y)
# Step 3: Fit a linear model
model <- lm(y ~ x1 + x2)
summary(model)
##
## Call:
## lm(formula = y ~ x1 + x2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.4970 -3.4107 -0.5429 2.8746 16.8313
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.2960 1.4309 3.002 0.00341 **
## x1 2.0486 0.1728 11.853 < 2e-16 ***
## x2 2.9852 0.1867 15.986 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.882 on 97 degrees of freedom
## Multiple R-squared: 0.7904, Adjusted R-squared: 0.7861
## F-statistic: 182.9 on 2 and 97 DF, p-value: < 2.2e-16
# create a grid from the x and y values (min to max) and predict values for every point
# this will become the regression plane
grid.lines = 20
x.pred <- seq(min(x1), max(x1), length.out = grid.lines)
y.pred <- seq(min(x2), max(x2), length.out = grid.lines)
xy <- expand.grid( x1 = x.pred, x2 = y.pred)
z.pred <- matrix(predict(model, newdata = xy),
nrow = grid.lines, ncol = grid.lines)
# create the fitted points for droplines to the surface
fitpoints <- predict(model)
# scatter plot with regression plane
scatter3D(x1, x2, y, pch = 19, cex = 1,colvar = NULL, col="red",
theta = 30, phi = 5, bty="b",
xlab = "x1", ylab = "x2", zlab = "y",
surf = list(x = x.pred, y = y.pred, z = z.pred,
facets = TRUE, fit = fitpoints,
col=ramp.col(col = c("dodgerblue3","seagreen2"),
n = 300, alpha=0.9),
border="black"), main = "3D Regression")
