Date: 2024-04-23
R
version: 3.5.0
*Corresponding author: matthew.malishev [at] gmail.com
This document can be found at https://github.com/darwinanddavis/UsefulCode
This document outlines some useful R
code for plotting,
cool functions, and other random tidbits.
Access structural attributes of unique classes, such as raster and ggmap (bbox).
# Normal example
<- data.frame(X = c(1:5), Y = c(6:10))
df str(df)
$X
df
# `attr` method
require(ggmap)
<- get_map("Atlanta", zoom = 12, source = "stamen",
map maptype = "toner-lines")
str(map)
attr(map, "bb")$ll.lat
Convert character to factor to numeric without conversion error
read.table(f, header = T, sep = ",", row.names = NULL,
stringsAsFactors = FALSE, strip.white = TRUE)
$V2 <- as.numeric(f$V2) f
See call options for class
methods(class = "estUDm")
Set dynamic input for variable / assign variable to char vector
<- function(shadedens) {
shadedens # set shade density to clumped (to match food) or
# sparse
if (shadedens == "Random") {
NLCommand("set Shade-density \"Random\" ")
else {
} NLCommand("set Shade-density \"Clumped\" ")
}
}shadedens("Clumped") # set clumped resources
Interactive network plots using d3
# Load package
install.packages("networkD3")
library(networkD3)
# Load energy projection data
<- "https://cdn.rawgit.com/christophergandrud/networkD3/master/JSONdata/energy.json"
URL <- jsonlite::fromJSON(URL)
Energy
# Now we have 2 data frames: a 'links' data frame
# with 3 columns (from, to, value), and a 'nodes'
# data frame that gives the name of each node.
head(Energy$links)
head(Energy$nodes)
# Thus we can plot it
sankeyNetwork(Links = Energy$links, Nodes = Energy$nodes,
Source = "source", Target = "target", Value = "value",
NodeID = "name", units = "TWh", fontSize = 12,
nodeWidth = 30)
`?`(sankeyNetwork)
Optimal empty data frame
<- data.frame(Date = as.Date(character()), X = numeric(),
df Y = integer(), stringsAsFactors = FALSE)
Add df cols with mutate
require(dplyr)
<- data.frame(a = rnorm(10), b = (1:20))
df %>% mutate(c = rnorm(20), b = b * 67) df
Change df
column names
colnames(data)[c(1,2,3)] <- c("TimeStamp","Lat","Long")
# tidyr
%>% rename("Updated_col" = "Current_col")
df %>% rename(.cols = 1:2, # specify columns
df "Updated_col" = "Current_col",
"Updated_col2" = 2)
Remove multiple columns from df
### Remove multiple NA columns
<- grep("NA", names(tt), ignore.case = F)
rm_cols colnames(df[, rm_cols])] <- list(NULL) df[,
Check number of characters in each column
sapply(meso1, function(x) sum(nchar(x)))
Create empty df with N cols
<- matrix(ncol = 1:500, nrow = 0) %>% data.frame()
df names(df) <- LETTERS[1:500]
Generic useful functions that I can’t place under any other headings here
# dput() for converting outputs such as copied text
# or data tables into vectors
<- "Some copied text or table from the internet"
xx dput(xx)
Round up integers to optimal rounded value
<- c(46, 11, 23)
nn round_any(nn, 10)
round_any(nn, 10, ceiling)
round_any(nn, 10, floor)
Get summary stats for dataset (means)
= aggregate(Cumulative_cercs ~ r * hb, data = df,
means FUN = mean)
Find maximum value in entire list
<- list(1:10, 100, rnorm(12))
master do.call(max, master)
Plot all elements in a list
<- list(sample(5, 1000, replace = T), rnorm(1000),
xx sample(50, 1000, replace = T))
plot(unlist(xx), type = "l")
Apply each row of df or vector to individual elements of a list
= data.frame(events = LETTERS[1:10], outs = 1:10)
df sapply(df$outs, list)
Append extra element onto existing list
<- sample(1000, 15) # random vector
rv <- sapply(rep(NA, 7), list) # list with 7 empty elements
listvec <- c(listvec, list(rv)) # append rv
listvec_final <- c(listvec, rv) # to append rv contents as separate elements, remove internal list listvec_final
Save loop output in master list
<- seq(0, 1, 0.5)
pars <- list()
master <- list()
t_list for (p in 1:length(pars)) {
for (t in 5) {
<- rnorm(1000 * t)
tt <- tt
t_list[t]
}length(master) + 1]] <- t_list # store in master list
master[[ }
Optimal way to save results to data frame in loop
require(dplyr)
<- sum # sum # choose mean or sum
fun <- list() # create first empty list
out_first <- list() # create second empty list
out_second
for (me in 1:10) {
= paste0(getwd(), "/", me, ".R") # get file handle
global_output_fh <- readRDS(global_output_fh) # read in file
output <- output[[1]] # get data
cercs # define function
= function(x) {
SEM sd(x)/sqrt(length(x))
}# create col name to pass to aggregate function
= list(Outs = cercs)
cerc_outs = aggregate(Outs ~ ., data = cerc_outs, FUN = fun)
outs = list(SEs = cercs)
cerc_se = aggregate(SEs ~ ., data = cerc_se, FUN = SEM)
se
# save to df by creating new df cols
$me <- me # create new col with iteration
outs<- outs # add first output to list
out_first[[me]] <- se # add second output to list
out_second[[me]] # end file read
}
# option 1
= do.call(rbind, out_first)
out_final # option 2
<- bind_rows(out_first) # make fresh df
out_final $Second <- bind_rows(out_second) # add second col out_final
High res maps
# https://hecate.hakai.org/rguide/mapping-in-r.html
require(maptools)
<- map_data("worldHires", c("Colombia", "Ecuador",
d "Peru", "Panama"))
# plot
ggplot() + geom_polygon(data = d, aes(x = long, y = lat,
group = group), fill = "black", col = "pink") +
# theme_tufte(ticks=F) +
theme_nothing() + coord_map("mercator", xlim = c(-75,
-81), ylim = c(-2, 8))
Read in KMZ/KML data (Google Maps data)
require(sf)
<- sf::st_read("ziggy_test.kml") zp
Display status message of progress
for (i in 1:10) {
Sys.sleep(0.2)
# comment to use appendLF=FALSE.
message(i, "\r", appendLF = FALSE) # appendLF = new line
flush.console()
}
Display popup progress bar
require(tcltk)
<- tkProgressBar("test progress bar", "Some information in %",
pb 0, 100, 50)
Sys.sleep(0.5)
<- c(0, sort(runif(20, 0, 100)), 100)
u for (i in u) {
Sys.sleep(0.1)
<- sprintf("%d%% done", round(i))
info setTkProgressBar(pb, i, sprintf("test (%s)", info),
info)
}Sys.sleep(5)
close(pb)
NAs
and NaNs
Replace NAs and NaNs with 0’s
is.na(df)] <- 0
df[is.nan(df)] <- 0 # good for matrices df[
Replace X values less than given value (V) with 0
$X[df$X < V] <- 0 df
Check for NAs
%>% sapply(is.na) %>% unique # check where NAs exist
df sapply(df, function(x) sum(is.na(x)))
%>% sapply(is.na) %>% sum # get total NAs df
Replace NaN
and Inf
values with
NA
$col1[which(!is.finite(df$col1))] <- NA df
Fill in missing data values in sequence with NA
# /Users/malishev/Documents/Manuscripts/Chapter4/Sims/Chapter4_figs.R
library(zoo)
<- data.frame(index = c(1:4, 6:10), data = c(1.5,
data 4.3, 5.6, 6.7, 7.1, 12.5, 14.5, 16.8, 3.4))
# you can create a series
<- zoo(data$data, data$index)
z # end extend it to the grid 1:10
<- merge(zoo(, 1:10), z)
z
# worked example fill in missing Tb values
<- zoo(minTb$Tick, minTb$Days)
minTb.d <- merge(zoo(NULL, 1:days), minTb.d) # make the minTb series match the temp series (117 days)
minTb.d <- as.numeric(minTb.d) # = time individuals reached VTMIN in ticks
minTb.d <- minTb.d - temp$Tick # get diff between starting time and time to reach VTMIN
minTb <- minTb/2 # convert ticks to minutes
minTb <- minTb/60 #convert to hours
minTb <- data.frame(Days = 1:days, Time = minTb)
minTb
# then fill in missing values
approx(minTb$Time, method = "linear")
Remove rows with NA
<- data[!is.na(data$X), ] data
Turn NULLs in list into NAs to get numeric values (fix for ‘cannot coerce double’ error)
<- lapply(hl_list, function(x) ifelse(x ==
hl_list "NULL", NA, x))
Turn NaN or NAs in list into 0s
# NaN
<- rapply(global_output, f = function(x) ifelse(is.nan(x),
global_output 0, x), how = "replace")
# NA
<- rapply(global_output, f = function(x) ifelse(is.na(x),
global_output 0, x), how = "replace")
Replace NaN with 0 using dplyr
$layer %>% head
site<- "day9_raster.Rda" %>% readRDS %>% mutate_at("layer",
site ~replace(., is.nan(.), 0))
Good NA exploration
sapply(df, function(x) sum(is.na(x)))
%>% sapply(is.na) %>% sum
df $col1[which(!is.finite(df$col1))] <- NA # Replace `NaN` and `Inf` values with `NA`
df<- data[!is.na(data$X), ] # rm all rows with nas
data <- rapply(global_output, f = function(x) ifelse(is.na(x),
global_output 0, x), how = "replace") # Turn NaN or NAs in list into 0s
Remove NA’s based on column value
require(tidyr)
%>% drop_na(c("var1", "var2")) df
rLandsat
Sourcing, requesting, and downloading NASA Landsat 8 satellite data.
Radix
Improved RMarkdown
output and interaction.
rpanel
Reference
guide
Create interactive GUI control toggles from R
. Like an
early Shiny.
Plot one plot window above and two below
layout(matrix(c(1, 1, 2, 3), 2, 2, byrow = TRUE))
Bookend axis ticks for plot E.g. at 0 and 100 when data is 1:99
axis(1, at = c(0, length(loco$X)), labels = c("", "")) # bookending axis tick marks
Optimal legend formatting for base
legend("right", legend = c("Small", "Intermediate",
"Large"), col = c(colfunc[colvec[1:3]]), bty = "n",
pch = 20, pt.cex = 1.5, cex = 0.7, y.intersp = 0.5,
xjust = 0.5, title = "Size class", title.adj = 0.3,
text.font = 2, trace = T, inset = 0.1)
Plot inset plot in current plot (https://stackoverflow.com/questions/17041246/how-to-add-an-inset-subplot-to-topright-of-an-r-plot)
# calculate position of inset
<- par("plt") # get plot window dims as fraction of current plot dims
plotdim = plotdim[2] - (plotdim[2] - plotdim[1]) * 0.5
xleft = plotdim[2] #
xright = plotdim[4] - (plotdim[4] - plotdim[3]) *
ybottom 0.5 #
= plotdim[4] #
ytop
# set position for plot inset
par(fig = c(xleft, xright, ybottom, ytop), mar = c(0,
0, 0, 0), new = TRUE)
boxplot(Eggs ~ Size, data = meso2, col = adjustcolor(colfunc[colvec[1:3]],
alpha = 0.5), notch = T, xlab = "Week", ylab = "Diameter (mm)",
xaxs = "i", yaxs = "i")
Interactive plots with rCharts (javascript and d3 viz)
http://ramnathv.github.io/rCharts/
require(devtools)
install_github("rCharts", "ramnathv")
Cluster plot
https://rpubs.com/dgrtwo/technology-clusters
library(readr)
library(dplyr)
library(igraph)
library(ggraph)
library(ggforce)
# This shared file contains the number of question
# that have each pair of tags This counts only
# questions that are not deleted and have a
# positive score
<- read_csv("http://varianceexplained.org/files/tag_pairs.csv.gz")
tag_pair_data
<- tag_pair_data %>% mutate(Fraction = Cooccur/Tag1Total) %>%
relationships filter(Fraction >= 0.35) %>% distinct(Tag1)
<- tag_pair_data %>% select(Tag1, Tag1Total) %>%
v distinct(Tag1) %>% filter(Tag1 %in% relationships$Tag1 |
%in% relationships$Tag2) %>% arrange(desc(Tag1Total))
Tag1
<- grid::arrow(length = grid::unit(0.08, "inches"),
a ends = "first", type = "closed")
set.seed(2016)
%>% graph_from_data_frame(vertices = v) %>%
relationships ggraph(layout = "fr") + geom_edge_link(aes(alpha = Fraction),
arrow = a) + geom_node_point(aes(size = Tag1Total),
color = "lightblue") + geom_node_text(aes(size = Tag1Total,
label = name), check_overlap = TRUE) + scale_size_continuous(range = c(2,
9)) + ggforce::theme_no_axes() + theme(legend.position = "none")
Define global plotting graphics function.
The plot_it.R
function is updated on the plot_it
Github page.
require(ggplot2)
require(ggthemes)
### set plotting params plotting function (plot for
### MS or not, set bg color, set color palette from
### RColorBrewer, set alpha value for transperancy)
<- function(manuscript, bg, cp1, cp2, alpha,
plot_it
family) {graphics.off()
if (manuscript == 0) {
if (bg == "black") {
<<- magma(200, 1) # plot window bg # USES <<- OPERATOR
colvec par(bg = colvec[1], col.axis = "white",
col.lab = "white", col.main = "white",
fg = "white", bty = "n", las = 1, mar = c(5,
6, 4, 2), family = family) #mono
= adjustcolor("purple", alpha = 0.5)
border else {
} <<- bpy.colors(200) # plot window bg # USES <<- OPERATOR
colvec par(bg = colvec[1], col.axis = "white",
col.lab = "white", col.main = "white",
fg = "white", bty = "n", las = 1, mar = c(5,
6, 4, 2), family = family)
= adjustcolor("blue", alpha = 0.5)
border
}else {
} # graphics.off()
par(bty = "n", las = 1, family = family)
<- "white"
colv
}# color palettes
# ifelse(manuscript==1,colvec<-adjustcolor(brewer.pal(9,cp1)[9],
# alpha = alpha),colvec <-
# adjustcolor(brewer.pal(9,cp1)[5], alpha = alpha))
# # fine tune plotting colors for plotting bg
# colfunc <<-
# colorRampPalette(brewer.pal(9,cp1),alpha=alpha)
<- brewer.pal.info[cp1, ]$maxcolors
cp1_info <- brewer.pal.info[cp2, ]$maxcolors
cp2_info <<- brewer.pal(cp1_info, cp1) # USES <<- OPERATOR
colv <<- brewer.pal(cp2_info, cp2) # USES <<- OPERATOR
colv2
}
# Setting ggplot theme graphics bg = colour to plot
# bg, family = font family
<- function(bg) {
plot_it_gg if (bg == "white") {
<- "white"
bg <- "black"
fg theme_tufte(base_family = "HersheySans") +
theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_rect(fill = bg,
colour = bg), plot.background = element_rect(fill = bg)) +
theme(axis.line = element_line(color = fg)) +
theme(axis.ticks = element_line(color = fg)) +
theme(plot.title = element_text(colour = fg)) +
theme(axis.title.x = element_text(colour = fg),
axis.title.y = element_text(colour = fg)) +
theme(axis.text.x = element_text(color = fg),
axis.text.y = element_text(color = fg)) +
theme(legend.key = element_rect(fill = bg)) +
theme(legend.title = element_text(colour = fg)) +
theme(legend.text = element_text(colour = fg))
}# end gg
}
### Set plotting function
require("RCurl")
<- getURL("https://raw.githubusercontent.com/darwinanddavis/plot_it/master/plot_it.R",
script ssl.verifypeer = FALSE)
eval(parse(text = script))
cat("plot_it( \n0 for presentation, 1 for manuscript, \nset colour for background, \nset colour palette. use 'display.brewer.all()', \nset alpha for colour transperancy, \nset font style \n)")
plot_it(0, "blue", "Spectral", "Greens", 1, "mono") # set col function params
plot_it_gg("white") # same as above
Make plot cycle on one page
plot(m_abundance$gam, pages = 1)
Get plot summaries and values from plot
plot.gam(m_abundance$gam, shade = T, pages = 1, seWithMean = T)[1] # everything
plot.gam(m_abundance$gam, shade = T, pages = 1, seWithMean = T)[1][[1]]$x #subset x
plot.gam(m_abundance$gam, shade = T, pages = 1, seWithMean = T)[1][[1]]$fit #get values to produce fit curve
Package for stock world maps
# worldmap
library(choroplethrMaps)
Circle packing, tree, dendogram, network plots
# dendogram tree nested bubble circle packing
# network
# https://www.r-graph-gallery.com/313-basic-circle-packing-with-several-levels/
# circle packing plot Libraries
<- c("ggraph", "igraph", "tidyverse", "DeducerSpatial",
p "Rcpp", "car")
install.packages(p, dependencies = T)
lapply(p, library, character.only = T)
# We need a data frame giving a hierarchical
# structure. Let's consider the flare dataset:
= flare$edges
edges # edges cols = character
# Usually we associate another dataset that give
# information about each node of the dataset:
= flare$vertices
vertices # vertices cols = character, numeric, character
# Create a subset of the dataset (I remove 1 level)
= flare$edges %>% filter(to %in% from) %>% droplevels()
edges = flare$vertices %>% filter(name %in% c(edges$from,
vertices $to)) %>% droplevels()
edges$size = runif(nrow(vertices))
vertices
# Then we have to make a 'graph' object using the
# igraph library:
<- graph_from_data_frame(edges, vertices = vertices)
mygraph
# circle packing
ggraph(mygraph, layout = "circlepack", weight = "size",
sort.by = NULL, direction = "out") + geom_node_circle(aes(fill = depth)) +
geom_node_text(aes(label = shortName, filter = leaf,
fill = depth, size = size)) + theme_void() +
# theme(legend.position='F') + #show legend
scale_fill_viridis(alpha = 0.5, direction = -1, option = "magma")
# scale_fill_distiller(palette = 'Blues')
# geom_node_label(aes(label=shortName, filter=leaf,
# size=size)) + # add text boxes
# circular dendo
str(mygraph)
ggraph(mygraph, layout = "dendrogram", circular = T) +
geom_edge_diagonal(flipped = F, label_colour = "black",
label_alpha = 1, angle_calc = "rot", force_flip = TRUE,
label_dodge = NULL, label_push = NULL, show.legend = NA) +
theme_void() + # theme(legend.position='none') +
scale_fill_distiller(palette = "Blues")
# tree map
ggraph(mygraph, "treemap", weight = "size") + geom_node_tile(aes(fill = depth),
size = 0.25) + theme_void() + theme(legend.position = "none")
# circular partition
ggraph(mygraph, "partition", circular = TRUE) + geom_node_arc_bar(aes(fill = depth),
size = 0.25) + theme_void() + theme(legend.position = "none")
# node
ggraph(mygraph) + geom_edge_link() + geom_node_point() +
theme_void() + theme(legend.position = "none")
Insert an animal silhouette into a plot
# 1. Get image from http://www.phylopic.org
library(png)
<- readPNG("thething.png")
ima plot(1:3, 1:3)
rasterImage(image = ima, xleft = 2, ybottom = 1.8,
xright = 2.7, ytop = 2.7)
Create an empty plot window
# 1
plot(0, type = "n", axes = FALSE, ann = FALSE)
# 2
plot(1, type = "n", xlab = "", ylab = "", xlim = c(0,
10), ylim = c(0, 10))
# 3
plot.new()
Set color gradient, palette for smoothing data points
require(RColorBrewer)
<- 0.8 # transparency (0 to 1 value)
alpha set.seed(5000)
<- rnorm(5000)
rr
# user defined gradient
<- colorRampPalette(c("steelblue", "lightblue",
col "orange", "red")) # set your own col gradient with as many colours as you want
<- col(length(rr))[as.numeric(cut(rr, breaks = length(rr)))] # define breaks in col gradient
colfunc plot(rr, col = colfunc, pch = 20)
# gradient from palette
display.brewer.all()
<- "Greens"
col <- colorRampPalette(brewer.pal(brewer.pal.info[col,
col $maxcolors, col)) # col gradient
]<- col(length(rr))[as.numeric(cut(rr, breaks = length(rr)))] # define breaks in col gradient
colfunc plot(rr, col = colfunc, pch = 20)
Add plot point every nth element
<- 3
n plot(runif(10, 0, 1), type = "o", pch = c(20, rep(NA,
n)))
Create function to make line as default type in plot
<- function(...) plot(..., type = "l")
lplot lplot(runif(200))
Stack dataframe columns automatically in plot
head(outplot)
# time N P S I 1 0.00 200.000000 200.0000 20.00000
# 2.000000 2 0.01 78.245140 177.1952 20.58217
# 2.067159 3 0.02 34.785145 168.9650 21.12174
# 2.136073
<- zoo(outplot)
dats plot(dats)
Make 3D scatterplot
require(scatterplot3d)
<- rnorm(1000)
xx <- runif(1000)
yy <- c(rep(1e-04, 500), rep(1, 500))
dens <- runif(3)
controls <- 1
add.control <- 1 * 10^-10 # 0 or 1*10^-10. value to knock out blanket of colour on plot surface
dens_val # linear model of r/ship between coords
<- lm(dens ~ xx + yy)
dens_lm
<- c(min(xx), max(xx))
xlim <- c(min(yy), max(yy))
ylim = c(min(dens), max(dens)) # set lims
zlim <- "Blues"
colv <- colorRampPalette(brewer.pal(brewer.pal.info[colv,
colvv $maxcolors, colv)) # col gradient
]<- colorRampPalette(c("steelblue", "lightblue",
colvv "orange", "red")) # set your own col gradient with as many colours as you want
# colvv<-colorRampPalette(magma(length(dens))) #
# set your own col gradient with as many colours as
# you want
# set col palette
<- colvv(length(dens))[as.numeric(cut(dens,
colfunc breaks = length(dens)))] # define breaks in col gradient
<- bpy.colors(1)
bg <- 0.8
alpha
# pdf(paste0(plot.dir,strat,'_',density,'_',stage,'_kudspdf.pdf'),width=8.27,height=11.69,paper='a4r')
# color=ifelse(col_heat==1, adjustcolor(colfunc,
# alpha=1),adjustcolor('lightgreen',alpha=0.2)),
scatterplot3d(x = xx, y = yy, z = dens, color = ifelse(dens <=
adjustcolor(ifelse(bg == bpy.colors(1),
dens_val, bpy.colors(1), "white"), alpha = 0.1), adjustcolor(colfunc,
alpha = alpha)), las = 1, pch = 15, type = "p",
lty.hplot = 1, xlim = xlim, ylim = ylim, zlim = zlim,
xlab = "X", ylab = "Y", zlab = "Density", main = "Main",
box = F, lty.axis = par(1), grid = F, col.grid = adjustcolor("gray",
1), lty.grid = par(3), axis = T)
# other plot options cex.symbols=dens*3,
# cex.symbols = ifelse(z<=0,0,0.5), highlight.3d=T,
# angle=70,
# append the below section starting at the '$' to
# the above closing bracket
# $plane3d(dens_lm, # add 3d linear model plane. #
# ??plane3d(Intercept, x.coef = NULL, y.coef =
# NULL, lty = 'dashed', lty.box = NULL, draw_lines
# = TRUE, draw_polygon = FALSE, polygon_args =
# list(border = NA, col = rgb(0,0,0,0.2))
# lty='dashed', lty.box = NULL, draw_lines = F,
# draw_polygon = T, polygon_args = list(border =
# NA, col = adjustcolor('light green',alpha=0.4)))
# add control dates
if (add.control == 1) {
par(new = T)
scatterplot3d(x = rep(0, length(controls)), y = controls,
z = rep(max(dens), length(controls)), color = "gray",
las = 1, pch = "", lty.hplot = 1, xlim = xlim,
ylim = ylim, zlim = zlim, xlab = "", ylab = "",
zlab = "", box = F, grid = F, cex.symbols = 2,
axis = F, type = "h")
}
Adding title from separate list to plot in loop
(ggplot
)
# plot all sim results in one window
<- list()
gspl <- c("cerc", "food", "juv", "adult", "infec",
ttl_list "infec (shed)", "host L", "parasite mass")
# choose sim to plot
<- global_detritus
global_sim_plot
for (g in 1:10) {
<- ggplot() + geom_line(data = y_m, aes(x = rep.int(1:n.ticks,
gspl[[g]] max(L1)), y = value, group = L1, colour = factor(L1)),
+ # scale_color_manual(values = viridis(length(mm)))
) # + linetype=y_m$L1) +
theme_tufte() + labs(title = ttl_list[g], x = "",
y = "") + if (g == length(global_sim_plot)) {
theme(legend.title = element_text(size = 0.2),
legend.text = element_text(size = 0.2)) +
theme(legend.position = "top")
labs(x = "Time")
else {
} theme(legend.position = "none")
}
}# + geom_text(x=,y=,label =
# max(value),check_overlap = TUE)
do.call(grid.arrange, gspl) # plot in one window
Using math expressions in plot labels
plot(rnorm(1000), xlab = expression(paste("X values"^2)),
ylab = expression(paste("Y values"^3, hat(beta))))
Adding faint gridlines to plot
# add gridlines
grid(nx = NA, ny = NULL)
Storing current par
variables for plotting
<- par(no.readonly = T) # store current par values og_pars
Clear graphics memory
dput(par(no.readonly = TRUE)) # reset graphical params
par()
Read in file manually
<- read.table(file.choose()) #read file manually
get.file.vol <- get.file.vol[1:100, 1] #get the volume v.file
Loop through files from dir and append to list
# option 1 reading in spdf (hrpath) files from
# drive
setwd("/Users/camel/Desktop/Matt2016/Manuscripts/MalishevBullKearney/Resubmission/2016/barcoo sims/barcooresults/hrpath_75")
<- list.files()
file.list <- as.list(rep(1, 100)) # empty list
hrs75 for (f in 1:100) {
load(file.list[f])
<- hrpath
hrs75[f]
}
# working version converting spdf into
# mcp(spdf,100,unout='m2)
<- list()
ghr for (i in hrs75[1:10]) {
<- mcp(i, 100, unout = "m2")
m <- c(ghr, m)
ghr
}
ghr
# option 2
<- getwd()
wd <- list() # create list
me_list for (me_day in c("A", "B", "C")) {
for (me_im in 1:5) {
<- readRDS(paste0(wd, resource_type, "_meday_",
mes "_meim", me_im, ".R")) # read .R files from dir
me_day, cat("\n", paste0(wd, resource_type, "_meday_",
"_meim", me_im, ".R"))
me_day, names(mes) <- c("cerc", "food", "juv", "adult",
"infected", "infected shedding", "mean host length",
"mean parasite mass", "summed host biomass",
"summed host eggs", "mean host eggs", "infected host length") # name all original R file list elements
<- mes$cerc # get cercs (as list) use mes$'cerc'[[1]] for numeric
mes names(mes) <- paste0(me_day, "_", me_im) # name list elements according to loop iterations
<- c(me_list, mes) # bind to master list
me_list
} }
Read in PDF files from online source in R and save to drive.
# from https://github.com/ropensci/pdftools
require(pdftools)
<- "https://raw.githubusercontent.com/darwinanddavis/499R/master/exp_pop_growth.pdf"
url <- "FOLDER ON YOUR COMPUTER WHERE YOU WANT THE FILE SAVED"
dir <- "NAME OF THE FILE"
f <- paste0(f, ".pdf")
f
# run all this
download.file(url, paste0(dir, "/", f), mode = "wb")
<- pdf_text(paste0(dir, "/", f))
txt
# first page text
<- 1 # enter the page number
page cat(txt[page])
<- pdf_toc(paste0(dir, "/", f))
toc
require(jsonlite)
# Show as JSON
::toJSON(toc, auto_unbox = TRUE, pretty = TRUE)
jsonlite
# show author, version, etc
<- pdf_info(f)
info
# renders pdf to bitmap array
<- pdf_render_page(f, page = 1)
bitmap
# save bitmap image
::writePNG(bitmap, "page.png")
png::writeJPEG(bitmap, "page.jpeg")
jpeg::write_webp(bitmap, "page.webp") webp
Read .txt files
readLines("search_terms.txt") # must have a blank line at end of file to avoid line read error
Load in data to avoid ‘magic number error’
# avoid load()
readRDS("path to file .R") # can use .R and .Rdata
source("path to file .R")
Access files anywhere without changing working dir
# https://github.com/jennybc/here_here
require(here)
getwd()
# '/Users/malishev/Documents/Data/gggmap'
<- here("here_test", "here_test.txt")
here_loc
here_loc# '/Users/malishev/Documents/Data/gggmap/here_test/here_test.txt'
readLines(here_loc) # access the file even though your working dir is up N levels from the file in your dir
regex
)Get just numbers or characters
<- "16-Feb-2018 20:08:04 PM"
vec <- gsub("[^[:digit:]]", "", vec)
vecN
vecprint(paste0("Just numbers: ", vecN))
<- gsub("[[:digit:]]", "", vec)
vecC
vecprint(paste0("Just characters: ", vecC))
# with tidyr. requires data frame
require(tidyr)
<- data.frame(N1 = c("APPLE348744", "BANANA77845",
df "OATS2647892", "EGG98586456"))
print("tidyr doesn't work with strings separated by spaces")
%>% separate(N1, into = c("text", "num"), sep = "(?<=[A-Za-z])(?=[0-9])") df
Insert or replace a character in a string at a specific location
require(stringi)
<- "ABCEF"
vec stri_sub(vec, 4, 2) <- "d"
print(paste0("Original: ABCEF"))
## [1] "Original: ABCEF"
print(paste0("New: ", vec))
## [1] "New: ABCdEF"
Testing regex expressions and their output
# Testing regex expressions and their output
# https://regex101.com/r/ksY7HU/2
Removing multiple cols from df using grep
<- c("dplyr", "purrr")
packages
<- "LEC100testrecords.txt"
fh <- read.delim(paste0(wd, "/", fh), header = T, sep = "\t")
tt
# Enter data column you want to search
<- "Title"
col2search <- c("evidence", "human", "africa")
keyterms
# 1. find key terms
<- tt[grep(keyterms, tt[, col2search], ignore.case = T),
final #
] length(final[, col2search]) # get number of results
# show raw outputs tt[final[, col2search], col2search]
Hide unwanted code output, such as inherent examples for functions
# ```{r, cache = TRUE, tidy = TRUE, lazy = TRUE,
# results='markup'}
Math notation in R Markdown
x=y \(x = y\)
x<y \(x < y\)
x>y \(x > y\)
x≤y \(x \le y\)
x≥y \(x \ge y\)
xn \(x^{n}\)
xn \(x_{n}\)
x⎯⎯⎯ \(\overline{x}\)
x̂ \(\hat{x}\)
x̃ \(\tilde{x}\)
ab \(\frac{a}{b}\)
∂f∂x \(\frac{a}{b}\)
∂f∂x \(\displaystyle
\frac{a}{b}\)
(nk) \(\binom{n}{k}\)
x1+x2+⋯+xn \(x_{1} + x_{2} + \cdots +
x_{n}\)
x1,x2,…,xn \(x_{1}, x_{2}, \dots,
x_{n}\)
x=⟨x1,x2,…,xn \(\mathbf{x} = \langle x_{1},
x_{2}, \dots, x_{n}\rangle\)
x∈A \(x \in A\)
|A| \(|A|\)
A⊂B \(x \subset B\)
A⊆B \(x \subseteq B\)
A∪B \(A \cup B\)
A∩B \(A \cap B\)
X∼𝖡𝗂𝗇𝗈𝗆(n,π) \(X \sim {\sf Binom}(n,
\pi)\)
P(X≤x)=𝚙𝚋𝚒𝚗𝚘𝚖(x,n,π) \(\mathrm{P}(X \le x)
= {\tt pbinom}(x, n, \pi)\)
P(A∣B) \(P(A \mid B)\)
P(A∣B) \(\mathrm{P}(A \mid B)\)
{1,2,3} \(\{1, 2, 3\}\)
sin(x) \(\sin(x)\)
log(x) \(\log(x)\)
∫ba \(\int_{a}^{b}\)
(∫baf(x)dx) \(\left(\int_{a}^{b} f(x) \;
dx\right)\)
[∫∞−∞f(x)dx] \(\left[\int_{\-infty}^{\infty}
f(x) \; dx\right]\)
F(x)|ba \(\left. F(x)
\right|_{a}^{b}\)
∑bx=af(x) \(\sum_{x = a}^{b}
f(x)\)
∏bx=af(x) \(\prod_{x = a}^{b}
f(x)\)
limx→∞f(x) \(\lim_{x \to \infty}
f(x)\)
limx→∞f(x) \(\displaystyle \lim_{x \to \infty}
f(x)\)
Greek Letters
αA \(\alpha A\)
νN $N $
βB \(\beta B\)
ξΞ \(\xi\Xi\)
γΓ \(\gamma \Gamma\)
oO \(o O\) (omicron)
δΔ \(\delta \Delta\)
πΠ \(\pi \Pi\)
ϵεE \(\epsilon \varepsilon E\)
ρϱP \(\rho\varrho P\)
ζZ \(\zeta Z \sigma \,\!\)
Σ \(\sigma \Sigma\)
ηH \(\eta H\)
τT \(\tau T\)
θϑΘ \(\theta \vartheta \Theta\)
υΥ \(\upsilon \Upsilon\)
ιI \(\iota I\)
ϕφΦ \(\phi \varphi \Phi\)
κK \(\kappa K\)
χX \(\chi X\)
λΛ \(\lambda \Lambda\)
ψΨ \(\psi \Psi\)
μM \(\mu M\)
ω Ω\(\omega \Omega\)
Select specific rows E.g. select rows of sfeed_move not in foodh
library(sqldf)
<- sqldf("SELECT * FROM sfeed_move EXCEPT SELECT * FROM foodh")
a1NotIna2_h <- sqldf("SELECT * FROM sfeed_move EXCEPT SELECT * FROM foodl")
a1NotIna2_l # select rows from sfeed_move that also appear in
# foodh
<- sqldf("SELECT * FROM sfeed_move INTERSECT SELECT * FROM foodh")
a1Ina2_h <- sqldf("SELECT * FROM sfeed_move INTERSECT SELECT * FROM foodl") a1Ina2_l
Count occurrences of values in data frame
table(unlist(df$X))
Remove a specific column from a data frame
within(df, rm("Col1"))
Scraping web tables
http://web.mit.edu/~r/current/arch/i386_linux26/lib/R/library/XML/html/readHTMLTable.html[http://web.mit.edu/~r/current/arch/i386_linux26/lib/R/library/XML/html/readHTMLTable.html]
library(XML)
readHTMLTable()
Scraping Twitter timelines
See complete example at http://varianceexplained.org/r/trump-tweets/
# https://cran.r-project.org/web/packages/twitteR/
library(dplyr)
library(purrr)
library(twitteR)