字符串
library(stringr)
x = "hello world hello chongqing"
x
## [1] "hello world hello chongqing"
字符串长度:str_length()
str_length(x)
## [1] 27
字符串拆分:str_split()
str_split(x," ")
## [[1]]
## [1] "hello" "world" "hello" "chongqing"
class(str_split(x," "))
## [1] "list"
按位置提取字符:str_sub()
str_sub(x,7,11)
## [1] "world"
使用str_split函数拆分x,生成向量
x2 = str_split(x," ")
x2 = x2[[1]]
x2
## [1] "hello" "world" "hello" "chongqing"
字符检测:str_detect()
str_detect(x2,"w")
## [1] FALSE TRUE FALSE FALSE
str_starts(x2,"h")
## [1] TRUE FALSE TRUE FALSE
str_ends(x2,"g")
## [1] FALSE FALSE FALSE TRUE
字符替换
str_replace(x2,"g","G") #只替换第一个
## [1] "hello" "world" "hello" "chonGqing"
str_replace_all(x2,"g","G")
## [1] "hello" "world" "hello" "chonGqinG"
字符删除:str_remove()/str_remove_all()
str_remove(x," ") #只删除第一个
## [1] "helloworld hello chongqing"
str_remove_all(x," ")
## [1] "helloworldhellochongqing"
数据框
#生成一个简单的数据框test
a = head(iris,3)
b = tail(iris,3)
test = rbind(a,b)
rownames(test) = c(1,2,3,4,5,6)
test
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 6.5 3.0 5.2 2.0 virginica
## 5 6.2 3.4 5.4 2.3 virginica
## 6 5.9 3.0 5.1 1.8 virginica
#arrange
library(dplyr)
arrange(test, Sepal.Length)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 3 4.7 3.2 1.3 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 1 5.1 3.5 1.4 0.2 setosa
## 6 5.9 3.0 5.1 1.8 virginica
## 5 6.2 3.4 5.4 2.3 virginica
## 4 6.5 3.0 5.2 2.0 virginica
#arrange降序
arrange(test,desc(Sepal.Length))
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 4 6.5 3.0 5.2 2.0 virginica
## 5 6.2 3.4 5.4 2.3 virginica
## 6 5.9 3.0 5.1 1.8 virginica
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
#去重复
distinct(test, Species)
## Species
## 1 setosa
## 4 virginica
distinct(test, Species,.keep_all = T)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 4 6.5 3.0 5.2 2.0 virginica
#数据框新增列
test = mutate(test, new_column = Sepal.Length+Sepal.Width)
test
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species new_column
## 1 5.1 3.5 1.4 0.2 setosa 8.6
## 2 4.9 3.0 1.4 0.2 setosa 7.9
## 3 4.7 3.2 1.3 0.2 setosa 7.9
## 4 6.5 3.0 5.2 2.0 virginica 9.5
## 5 6.2 3.4 5.4 2.3 virginica 9.6
## 6 5.9 3.0 5.1 1.8 virginica 8.9
#数据框新增列2
test$new2 = test$Sepal.Length*test$Sepal.Width
#数据框筛选列和行
iris
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
## 7 4.6 3.4 1.4 0.3 setosa
## 8 5.0 3.4 1.5 0.2 setosa
## 9 4.4 2.9 1.4 0.2 setosa
## 10 4.9 3.1 1.5 0.1 setosa
## 11 5.4 3.7 1.5 0.2 setosa
## 12 4.8 3.4 1.6 0.2 setosa
## 13 4.8 3.0 1.4 0.1 setosa
## 14 4.3 3.0 1.1 0.1 setosa
## 15 5.8 4.0 1.2 0.2 setosa
## 16 5.7 4.4 1.5 0.4 setosa
## 17 5.4 3.9 1.3 0.4 setosa
## 18 5.1 3.5 1.4 0.3 setosa
## 19 5.7 3.8 1.7 0.3 setosa
## 20 5.1 3.8 1.5 0.3 setosa
## 21 5.4 3.4 1.7 0.2 setosa
## 22 5.1 3.7 1.5 0.4 setosa
## 23 4.6 3.6 1.0 0.2 setosa
## 24 5.1 3.3 1.7 0.5 setosa
## 25 4.8 3.4 1.9 0.2 setosa
## 26 5.0 3.0 1.6 0.2 setosa
## 27 5.0 3.4 1.6 0.4 setosa
## 28 5.2 3.5 1.5 0.2 setosa
## 29 5.2 3.4 1.4 0.2 setosa
## 30 4.7 3.2 1.6 0.2 setosa
## 31 4.8 3.1 1.6 0.2 setosa
## 32 5.4 3.4 1.5 0.4 setosa
## 33 5.2 4.1 1.5 0.1 setosa
## 34 5.5 4.2 1.4 0.2 setosa
## 35 4.9 3.1 1.5 0.2 setosa
## 36 5.0 3.2 1.2 0.2 setosa
## 37 5.5 3.5 1.3 0.2 setosa
## 38 4.9 3.6 1.4 0.1 setosa
## 39 4.4 3.0 1.3 0.2 setosa
## 40 5.1 3.4 1.5 0.2 setosa
## 41 5.0 3.5 1.3 0.3 setosa
## 42 4.5 2.3 1.3 0.3 setosa
## 43 4.4 3.2 1.3 0.2 setosa
## 44 5.0 3.5 1.6 0.6 setosa
## 45 5.1 3.8 1.9 0.4 setosa
## 46 4.8 3.0 1.4 0.3 setosa
## 47 5.1 3.8 1.6 0.2 setosa
## 48 4.6 3.2 1.4 0.2 setosa
## 49 5.3 3.7 1.5 0.2 setosa
## 50 5.0 3.3 1.4 0.2 setosa
## 51 7.0 3.2 4.7 1.4 versicolor
## 52 6.4 3.2 4.5 1.5 versicolor
## 53 6.9 3.1 4.9 1.5 versicolor
## 54 5.5 2.3 4.0 1.3 versicolor
## 55 6.5 2.8 4.6 1.5 versicolor
## 56 5.7 2.8 4.5 1.3 versicolor
## 57 6.3 3.3 4.7 1.6 versicolor
## 58 4.9 2.4 3.3 1.0 versicolor
## 59 6.6 2.9 4.6 1.3 versicolor
## 60 5.2 2.7 3.9 1.4 versicolor
## 61 5.0 2.0 3.5 1.0 versicolor
## 62 5.9 3.0 4.2 1.5 versicolor
## 63 6.0 2.2 4.0 1.0 versicolor
## 64 6.1 2.9 4.7 1.4 versicolor
## 65 5.6 2.9 3.6 1.3 versicolor
## 66 6.7 3.1 4.4 1.4 versicolor
## 67 5.6 3.0 4.5 1.5 versicolor
## 68 5.8 2.7 4.1 1.0 versicolor
## 69 6.2 2.2 4.5 1.5 versicolor
## 70 5.6 2.5 3.9 1.1 versicolor
## 71 5.9 3.2 4.8 1.8 versicolor
## 72 6.1 2.8 4.0 1.3 versicolor
## 73 6.3 2.5 4.9 1.5 versicolor
## 74 6.1 2.8 4.7 1.2 versicolor
## 75 6.4 2.9 4.3 1.3 versicolor
## 76 6.6 3.0 4.4 1.4 versicolor
## 77 6.8 2.8 4.8 1.4 versicolor
## 78 6.7 3.0 5.0 1.7 versicolor
## 79 6.0 2.9 4.5 1.5 versicolor
## 80 5.7 2.6 3.5 1.0 versicolor
## 81 5.5 2.4 3.8 1.1 versicolor
## 82 5.5 2.4 3.7 1.0 versicolor
## 83 5.8 2.7 3.9 1.2 versicolor
## 84 6.0 2.7 5.1 1.6 versicolor
## 85 5.4 3.0 4.5 1.5 versicolor
## 86 6.0 3.4 4.5 1.6 versicolor
## 87 6.7 3.1 4.7 1.5 versicolor
## 88 6.3 2.3 4.4 1.3 versicolor
## 89 5.6 3.0 4.1 1.3 versicolor
## 90 5.5 2.5 4.0 1.3 versicolor
## 91 5.5 2.6 4.4 1.2 versicolor
## 92 6.1 3.0 4.6 1.4 versicolor
## 93 5.8 2.6 4.0 1.2 versicolor
## 94 5.0 2.3 3.3 1.0 versicolor
## 95 5.6 2.7 4.2 1.3 versicolor
## 96 5.7 3.0 4.2 1.2 versicolor
## 97 5.7 2.9 4.2 1.3 versicolor
## 98 6.2 2.9 4.3 1.3 versicolor
## 99 5.1 2.5 3.0 1.1 versicolor
## 100 5.7 2.8 4.1 1.3 versicolor
## 101 6.3 3.3 6.0 2.5 virginica
## 102 5.8 2.7 5.1 1.9 virginica
## 103 7.1 3.0 5.9 2.1 virginica
## 104 6.3 2.9 5.6 1.8 virginica
## 105 6.5 3.0 5.8 2.2 virginica
## 106 7.6 3.0 6.6 2.1 virginica
## 107 4.9 2.5 4.5 1.7 virginica
## 108 7.3 2.9 6.3 1.8 virginica
## 109 6.7 2.5 5.8 1.8 virginica
## 110 7.2 3.6 6.1 2.5 virginica
## 111 6.5 3.2 5.1 2.0 virginica
## 112 6.4 2.7 5.3 1.9 virginica
## 113 6.8 3.0 5.5 2.1 virginica
## 114 5.7 2.5 5.0 2.0 virginica
## 115 5.8 2.8 5.1 2.4 virginica
## 116 6.4 3.2 5.3 2.3 virginica
## 117 6.5 3.0 5.5 1.8 virginica
## 118 7.7 3.8 6.7 2.2 virginica
## 119 7.7 2.6 6.9 2.3 virginica
## 120 6.0 2.2 5.0 1.5 virginica
## 121 6.9 3.2 5.7 2.3 virginica
## 122 5.6 2.8 4.9 2.0 virginica
## 123 7.7 2.8 6.7 2.0 virginica
## 124 6.3 2.7 4.9 1.8 virginica
## 125 6.7 3.3 5.7 2.1 virginica
## 126 7.2 3.2 6.0 1.8 virginica
## 127 6.2 2.8 4.8 1.8 virginica
## 128 6.1 3.0 4.9 1.8 virginica
## 129 6.4 2.8 5.6 2.1 virginica
## 130 7.2 3.0 5.8 1.6 virginica
## 131 7.4 2.8 6.1 1.9 virginica
## 132 7.9 3.8 6.4 2.0 virginica
## 133 6.4 2.8 5.6 2.2 virginica
## 134 6.3 2.8 5.1 1.5 virginica
## 135 6.1 2.6 5.6 1.4 virginica
## 136 7.7 3.0 6.1 2.3 virginica
## 137 6.3 3.4 5.6 2.4 virginica
## 138 6.4 3.1 5.5 1.8 virginica
## 139 6.0 3.0 4.8 1.8 virginica
## 140 6.9 3.1 5.4 2.1 virginica
## 141 6.7 3.1 5.6 2.4 virginica
## 142 6.9 3.1 5.1 2.3 virginica
## 143 5.8 2.7 5.1 1.9 virginica
## 144 6.8 3.2 5.9 2.3 virginica
## 145 6.7 3.3 5.7 2.5 virginica
## 146 6.7 3.0 5.2 2.3 virginica
## 147 6.3 2.5 5.0 1.9 virginica
## 148 6.5 3.0 5.2 2.0 virginica
## 149 6.2 3.4 5.4 2.3 virginica
## 150 5.9 3.0 5.1 1.8 virginica
#删除一列-转换矩阵
x1 = select(iris,-5)
x2 = as.matrix(x1)
x3 = head(x2,50)
x3
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## [1,] 5.1 3.5 1.4 0.2
## [2,] 4.9 3.0 1.4 0.2
## [3,] 4.7 3.2 1.3 0.2
## [4,] 4.6 3.1 1.5 0.2
## [5,] 5.0 3.6 1.4 0.2
## [6,] 5.4 3.9 1.7 0.4
## [7,] 4.6 3.4 1.4 0.3
## [8,] 5.0 3.4 1.5 0.2
## [9,] 4.4 2.9 1.4 0.2
## [10,] 4.9 3.1 1.5 0.1
## [11,] 5.4 3.7 1.5 0.2
## [12,] 4.8 3.4 1.6 0.2
## [13,] 4.8 3.0 1.4 0.1
## [14,] 4.3 3.0 1.1 0.1
## [15,] 5.8 4.0 1.2 0.2
## [16,] 5.7 4.4 1.5 0.4
## [17,] 5.4 3.9 1.3 0.4
## [18,] 5.1 3.5 1.4 0.3
## [19,] 5.7 3.8 1.7 0.3
## [20,] 5.1 3.8 1.5 0.3
## [21,] 5.4 3.4 1.7 0.2
## [22,] 5.1 3.7 1.5 0.4
## [23,] 4.6 3.6 1.0 0.2
## [24,] 5.1 3.3 1.7 0.5
## [25,] 4.8 3.4 1.9 0.2
## [26,] 5.0 3.0 1.6 0.2
## [27,] 5.0 3.4 1.6 0.4
## [28,] 5.2 3.5 1.5 0.2
## [29,] 5.2 3.4 1.4 0.2
## [30,] 4.7 3.2 1.6 0.2
## [31,] 4.8 3.1 1.6 0.2
## [32,] 5.4 3.4 1.5 0.4
## [33,] 5.2 4.1 1.5 0.1
## [34,] 5.5 4.2 1.4 0.2
## [35,] 4.9 3.1 1.5 0.2
## [36,] 5.0 3.2 1.2 0.2
## [37,] 5.5 3.5 1.3 0.2
## [38,] 4.9 3.6 1.4 0.1
## [39,] 4.4 3.0 1.3 0.2
## [40,] 5.1 3.4 1.5 0.2
## [41,] 5.0 3.5 1.3 0.3
## [42,] 4.5 2.3 1.3 0.3
## [43,] 4.4 3.2 1.3 0.2
## [44,] 5.0 3.5 1.6 0.6
## [45,] 5.1 3.8 1.9 0.4
## [46,] 4.8 3.0 1.4 0.3
## [47,] 5.1 3.8 1.6 0.2
## [48,] 4.6 3.2 1.4 0.2
## [49,] 5.3 3.7 1.5 0.2
## [50,] 5.0 3.3 1.4 0.2
heatmap(x3)
#管道符-删除列-转换矩阵
iris %>%
select(-5) %>%
as.matrix() %>%
head(50)%>%
heatmap()

条件和循环
#if语句
i = 2
if (i>0) print("i>0")
## [1] "i>0"
if (i<0) print("i<0")
#if else语句
s = 1
if (i>0){print('+')}else{print("-")}
## [1] "+"
#if else简化
s = 1
ifelse(s>0,"+","-")
## [1] "+"
#ifelse()+str_detect()
#创造samples向量
samples = paste0(c("tumor","normal"),rep(1:3,times = 2));samples
## [1] "tumor1" "normal2" "tumor3" "normal1" "tumor2" "normal3"
k1 = str_detect(samples,"tumor");k1
## [1] TRUE FALSE TRUE FALSE TRUE FALSE
ifelse(k1,"tumor","normal")
## [1] "tumor" "normal" "tumor" "normal" "tumor" "normal"
#多个条件的ifelse语句
m = 1
ifelse(m>0,"+",ifelse(m<0,"-",0))
## [1] "+"
#for循环
for(i in 1:4){print(i)}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
#批量装包
#pks = c("rio","read.data")
#for (p in pks){
# if(!require(p,character.only = T))
# install.packages(p,ask = F, update = F)
#}
隐式循环
apply(x,MARGIN,FUN,…)
x 数据框/矩阵
MARGIN=1为行,2为列
FUN为函数
对x的每一行/列进行FUN函数
test3 = iris[1:4,1:4]
test3
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1 5.1 3.5 1.4 0.2
## 2 4.9 3.0 1.4 0.2
## 3 4.7 3.2 1.3 0.2
## 4 4.6 3.1 1.5 0.2
apply(test3, 1, sum)
## 1 2 3 4
## 10.2 9.5 9.4 9.4
apply(test3, 2, mean)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 4.825 3.200 1.400 0.200
挑出30个数里最大的
mm = rnorm(30);mm
## [1] 0.038231251 1.614018170 0.609117309 -0.986239371 2.010533337 1.834587847
## [7] -0.556371445 0.551342940 -1.403249101 0.448909349 1.276030534 0.518258595
## [13] -1.371033118 0.214918283 -1.007146133 0.155630412 0.990492912 0.002897102
## [19] 0.241762625 -0.145003475 0.198665660 2.465514040 -1.042278503 0.255811504
## [25] 2.284997963 -1.132938482 0.804076360 0.288959444 -0.566542658 -0.710499513
tail(sort(mm),5)
## [1] 1.614018 1.834588 2.010533 2.284998 2.465514
挑出表达矩阵里方差最大的3个基因
#创建表达矩阵
m1 = matrix(rnorm(30),nrow = 5)
rownames(m1) = paste0("gene",1:5)
colnames(m1) = paste0(rep(c("control","treat"),each = 3),1:3)
#计算方差
v1 = apply(m1, 1, var)
#每个基因方差排序
r1 = sort(v1)
#列出方差前三的基因名
names(tail(r1,3))
## [1] "gene5" "gene4" "gene3"
##两个数据框的连接
inner_join 交集
full_join 全连接

表达矩阵画箱线图
新建矩阵
exp = matrix(rnorm(18),ncol = 6);exp
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 0.7262405 -0.6788916 -1.0830389 -0.72391563 -1.693845 0.6249748
## [2,] 2.4111533 1.3142188 0.6307410 2.34536693 3.557996 1.3821617
## [3,] 0.3285533 -1.5975085 0.8196729 -0.04391899 -1.287655 -0.7410555
exp = round(exp,2);exp
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 0.73 -0.68 -1.08 -0.72 -1.69 0.62
## [2,] 2.41 1.31 0.63 2.35 3.56 1.38
## [3,] 0.33 -1.60 0.82 -0.04 -1.29 -0.74
rownames(exp) = paste0("gene",1:3);exp
## [,1] [,2] [,3] [,4] [,5] [,6]
## gene1 0.73 -0.68 -1.08 -0.72 -1.69 0.62
## gene2 2.41 1.31 0.63 2.35 3.56 1.38
## gene3 0.33 -1.60 0.82 -0.04 -1.29 -0.74
colnames(exp) = paste0("test",1:6);exp
## test1 test2 test3 test4 test5 test6
## gene1 0.73 -0.68 -1.08 -0.72 -1.69 0.62
## gene2 2.41 1.31 0.63 2.35 3.56 1.38
## gene3 0.33 -1.60 0.82 -0.04 -1.29 -0.74
exp[,1:3] = exp[,1:3]+1;exp
## test1 test2 test3 test4 test5 test6
## gene1 1.73 0.32 -0.08 -0.72 -1.69 0.62
## gene2 3.41 2.31 1.63 2.35 3.56 1.38
## gene3 1.33 -0.60 1.82 -0.04 -1.29 -0.74
画箱线图
library(tidyr)
library(tibble)
library(dplyr)
#数据准备
dat = t(exp) %>% #转置
as.data.frame() %>% #转换为数据框
rownames_to_column() %>% #行名转换成列
mutate(group = rep(c("control","treat"),each = 3))#添加列
#转换成长列表
dat
## rowname gene1 gene2 gene3 group
## 1 test1 1.73 3.41 1.33 control
## 2 test2 0.32 2.31 -0.60 control
## 3 test3 -0.08 1.63 1.82 control
## 4 test4 -0.72 2.35 -0.04 treat
## 5 test5 -1.69 3.56 -1.29 treat
## 6 test6 0.62 1.38 -0.74 treat
pdat = dat %>%
pivot_longer(cols = starts_with("gene"),
names_to = "gene",
values_to = "count")
#绘图
library(ggplot2)
p = ggplot(pdat, aes(gene,count))+
geom_boxplot(aes(color = group))+ #不加aes(color = group)会把两个组的数据放在一个箱线中
theme_classic()
p

CSDN只支持上传Markdown格式,不支持RMarkdown
使用R语言自带的Save as MD,文件容易乱
须先保存RMarkdown文件,后运行代码导出Md格式文件
#knitr::knit(input = "./test.Rmd*.Rmd", output = "./test.md*.md")
从生信技能树数据挖掘班搬运,感谢小洁老师和她的团队

4870

被折叠的 条评论
为什么被折叠?



