数据挖掘笔记-R语言基础-day6

字符串

library(stringr)
x = "hello world hello chongqing"
x
## [1] "hello world hello chongqing"

字符串长度:str_length()

str_length(x)
## [1] 27

字符串拆分:str_split()

str_split(x," ")
## [[1]]
## [1] "hello"     "world"     "hello"     "chongqing"
class(str_split(x," "))
## [1] "list"

按位置提取字符:str_sub()

str_sub(x,7,11)
## [1] "world"

使用str_split函数拆分x,生成向量

x2 = str_split(x," ")
x2 = x2[[1]]
x2
## [1] "hello"     "world"     "hello"     "chongqing"

字符检测:str_detect()

str_detect(x2,"w")
## [1] FALSE  TRUE FALSE FALSE
str_starts(x2,"h")
## [1]  TRUE FALSE  TRUE FALSE
str_ends(x2,"g")
## [1] FALSE FALSE FALSE  TRUE

字符替换

str_replace(x2,"g","G") #只替换第一个
## [1] "hello"     "world"     "hello"     "chonGqing"
str_replace_all(x2,"g","G")
## [1] "hello"     "world"     "hello"     "chonGqinG"

字符删除:str_remove()/str_remove_all()

str_remove(x," ") #只删除第一个
## [1] "helloworld hello chongqing"
str_remove_all(x," ")
## [1] "helloworldhellochongqing"

数据框

#生成一个简单的数据框test
a = head(iris,3)
b = tail(iris,3)
test = rbind(a,b)
rownames(test) = c(1,2,3,4,5,6)
test
##   Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
## 1          5.1         3.5          1.4         0.2    setosa
## 2          4.9         3.0          1.4         0.2    setosa
## 3          4.7         3.2          1.3         0.2    setosa
## 4          6.5         3.0          5.2         2.0 virginica
## 5          6.2         3.4          5.4         2.3 virginica
## 6          5.9         3.0          5.1         1.8 virginica
#arrange
library(dplyr)
arrange(test, Sepal.Length)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
## 3          4.7         3.2          1.3         0.2    setosa
## 2          4.9         3.0          1.4         0.2    setosa
## 1          5.1         3.5          1.4         0.2    setosa
## 6          5.9         3.0          5.1         1.8 virginica
## 5          6.2         3.4          5.4         2.3 virginica
## 4          6.5         3.0          5.2         2.0 virginica
#arrange降序
arrange(test,desc(Sepal.Length))
##   Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
## 4          6.5         3.0          5.2         2.0 virginica
## 5          6.2         3.4          5.4         2.3 virginica
## 6          5.9         3.0          5.1         1.8 virginica
## 1          5.1         3.5          1.4         0.2    setosa
## 2          4.9         3.0          1.4         0.2    setosa
## 3          4.7         3.2          1.3         0.2    setosa
#去重复
distinct(test, Species)
##     Species
## 1    setosa
## 4 virginica
distinct(test, Species,.keep_all = T)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
## 1          5.1         3.5          1.4         0.2    setosa
## 4          6.5         3.0          5.2         2.0 virginica
#数据框新增列
test = mutate(test, new_column = Sepal.Length+Sepal.Width)
test
##   Sepal.Length Sepal.Width Petal.Length Petal.Width   Species new_column
## 1          5.1         3.5          1.4         0.2    setosa        8.6
## 2          4.9         3.0          1.4         0.2    setosa        7.9
## 3          4.7         3.2          1.3         0.2    setosa        7.9
## 4          6.5         3.0          5.2         2.0 virginica        9.5
## 5          6.2         3.4          5.4         2.3 virginica        9.6
## 6          5.9         3.0          5.1         1.8 virginica        8.9
#数据框新增列2
test$new2 = test$Sepal.Length*test$Sepal.Width
#数据框筛选列和行
iris
##     Sepal.Length Sepal.Width Petal.Length Petal.Width    Species
## 1            5.1         3.5          1.4         0.2     setosa
## 2            4.9         3.0          1.4         0.2     setosa
## 3            4.7         3.2          1.3         0.2     setosa
## 4            4.6         3.1          1.5         0.2     setosa
## 5            5.0         3.6          1.4         0.2     setosa
## 6            5.4         3.9          1.7         0.4     setosa
## 7            4.6         3.4          1.4         0.3     setosa
## 8            5.0         3.4          1.5         0.2     setosa
## 9            4.4         2.9          1.4         0.2     setosa
## 10           4.9         3.1          1.5         0.1     setosa
## 11           5.4         3.7          1.5         0.2     setosa
## 12           4.8         3.4          1.6         0.2     setosa
## 13           4.8         3.0          1.4         0.1     setosa
## 14           4.3         3.0          1.1         0.1     setosa
## 15           5.8         4.0          1.2         0.2     setosa
## 16           5.7         4.4          1.5         0.4     setosa
## 17           5.4         3.9          1.3         0.4     setosa
## 18           5.1         3.5          1.4         0.3     setosa
## 19           5.7         3.8          1.7         0.3     setosa
## 20           5.1         3.8          1.5         0.3     setosa
## 21           5.4         3.4          1.7         0.2     setosa
## 22           5.1         3.7          1.5         0.4     setosa
## 23           4.6         3.6          1.0         0.2     setosa
## 24           5.1         3.3          1.7         0.5     setosa
## 25           4.8         3.4          1.9         0.2     setosa
## 26           5.0         3.0          1.6         0.2     setosa
## 27           5.0         3.4          1.6         0.4     setosa
## 28           5.2         3.5          1.5         0.2     setosa
## 29           5.2         3.4          1.4         0.2     setosa
## 30           4.7         3.2          1.6         0.2     setosa
## 31           4.8         3.1          1.6         0.2     setosa
## 32           5.4         3.4          1.5         0.4     setosa
## 33           5.2         4.1          1.5         0.1     setosa
## 34           5.5         4.2          1.4         0.2     setosa
## 35           4.9         3.1          1.5         0.2     setosa
## 36           5.0         3.2          1.2         0.2     setosa
## 37           5.5         3.5          1.3         0.2     setosa
## 38           4.9         3.6          1.4         0.1     setosa
## 39           4.4         3.0          1.3         0.2     setosa
## 40           5.1         3.4          1.5         0.2     setosa
## 41           5.0         3.5          1.3         0.3     setosa
## 42           4.5         2.3          1.3         0.3     setosa
## 43           4.4         3.2          1.3         0.2     setosa
## 44           5.0         3.5          1.6         0.6     setosa
## 45           5.1         3.8          1.9         0.4     setosa
## 46           4.8         3.0          1.4         0.3     setosa
## 47           5.1         3.8          1.6         0.2     setosa
## 48           4.6         3.2          1.4         0.2     setosa
## 49           5.3         3.7          1.5         0.2     setosa
## 50           5.0         3.3          1.4         0.2     setosa
## 51           7.0         3.2          4.7         1.4 versicolor
## 52           6.4         3.2          4.5         1.5 versicolor
## 53           6.9         3.1          4.9         1.5 versicolor
## 54           5.5         2.3          4.0         1.3 versicolor
## 55           6.5         2.8          4.6         1.5 versicolor
## 56           5.7         2.8          4.5         1.3 versicolor
## 57           6.3         3.3          4.7         1.6 versicolor
## 58           4.9         2.4          3.3         1.0 versicolor
## 59           6.6         2.9          4.6         1.3 versicolor
## 60           5.2         2.7          3.9         1.4 versicolor
## 61           5.0         2.0          3.5         1.0 versicolor
## 62           5.9         3.0          4.2         1.5 versicolor
## 63           6.0         2.2          4.0         1.0 versicolor
## 64           6.1         2.9          4.7         1.4 versicolor
## 65           5.6         2.9          3.6         1.3 versicolor
## 66           6.7         3.1          4.4         1.4 versicolor
## 67           5.6         3.0          4.5         1.5 versicolor
## 68           5.8         2.7          4.1         1.0 versicolor
## 69           6.2         2.2          4.5         1.5 versicolor
## 70           5.6         2.5          3.9         1.1 versicolor
## 71           5.9         3.2          4.8         1.8 versicolor
## 72           6.1         2.8          4.0         1.3 versicolor
## 73           6.3         2.5          4.9         1.5 versicolor
## 74           6.1         2.8          4.7         1.2 versicolor
## 75           6.4         2.9          4.3         1.3 versicolor
## 76           6.6         3.0          4.4         1.4 versicolor
## 77           6.8         2.8          4.8         1.4 versicolor
## 78           6.7         3.0          5.0         1.7 versicolor
## 79           6.0         2.9          4.5         1.5 versicolor
## 80           5.7         2.6          3.5         1.0 versicolor
## 81           5.5         2.4          3.8         1.1 versicolor
## 82           5.5         2.4          3.7         1.0 versicolor
## 83           5.8         2.7          3.9         1.2 versicolor
## 84           6.0         2.7          5.1         1.6 versicolor
## 85           5.4         3.0          4.5         1.5 versicolor
## 86           6.0         3.4          4.5         1.6 versicolor
## 87           6.7         3.1          4.7         1.5 versicolor
## 88           6.3         2.3          4.4         1.3 versicolor
## 89           5.6         3.0          4.1         1.3 versicolor
## 90           5.5         2.5          4.0         1.3 versicolor
## 91           5.5         2.6          4.4         1.2 versicolor
## 92           6.1         3.0          4.6         1.4 versicolor
## 93           5.8         2.6          4.0         1.2 versicolor
## 94           5.0         2.3          3.3         1.0 versicolor
## 95           5.6         2.7          4.2         1.3 versicolor
## 96           5.7         3.0          4.2         1.2 versicolor
## 97           5.7         2.9          4.2         1.3 versicolor
## 98           6.2         2.9          4.3         1.3 versicolor
## 99           5.1         2.5          3.0         1.1 versicolor
## 100          5.7         2.8          4.1         1.3 versicolor
## 101          6.3         3.3          6.0         2.5  virginica
## 102          5.8         2.7          5.1         1.9  virginica
## 103          7.1         3.0          5.9         2.1  virginica
## 104          6.3         2.9          5.6         1.8  virginica
## 105          6.5         3.0          5.8         2.2  virginica
## 106          7.6         3.0          6.6         2.1  virginica
## 107          4.9         2.5          4.5         1.7  virginica
## 108          7.3         2.9          6.3         1.8  virginica
## 109          6.7         2.5          5.8         1.8  virginica
## 110          7.2         3.6          6.1         2.5  virginica
## 111          6.5         3.2          5.1         2.0  virginica
## 112          6.4         2.7          5.3         1.9  virginica
## 113          6.8         3.0          5.5         2.1  virginica
## 114          5.7         2.5          5.0         2.0  virginica
## 115          5.8         2.8          5.1         2.4  virginica
## 116          6.4         3.2          5.3         2.3  virginica
## 117          6.5         3.0          5.5         1.8  virginica
## 118          7.7         3.8          6.7         2.2  virginica
## 119          7.7         2.6          6.9         2.3  virginica
## 120          6.0         2.2          5.0         1.5  virginica
## 121          6.9         3.2          5.7         2.3  virginica
## 122          5.6         2.8          4.9         2.0  virginica
## 123          7.7         2.8          6.7         2.0  virginica
## 124          6.3         2.7          4.9         1.8  virginica
## 125          6.7         3.3          5.7         2.1  virginica
## 126          7.2         3.2          6.0         1.8  virginica
## 127          6.2         2.8          4.8         1.8  virginica
## 128          6.1         3.0          4.9         1.8  virginica
## 129          6.4         2.8          5.6         2.1  virginica
## 130          7.2         3.0          5.8         1.6  virginica
## 131          7.4         2.8          6.1         1.9  virginica
## 132          7.9         3.8          6.4         2.0  virginica
## 133          6.4         2.8          5.6         2.2  virginica
## 134          6.3         2.8          5.1         1.5  virginica
## 135          6.1         2.6          5.6         1.4  virginica
## 136          7.7         3.0          6.1         2.3  virginica
## 137          6.3         3.4          5.6         2.4  virginica
## 138          6.4         3.1          5.5         1.8  virginica
## 139          6.0         3.0          4.8         1.8  virginica
## 140          6.9         3.1          5.4         2.1  virginica
## 141          6.7         3.1          5.6         2.4  virginica
## 142          6.9         3.1          5.1         2.3  virginica
## 143          5.8         2.7          5.1         1.9  virginica
## 144          6.8         3.2          5.9         2.3  virginica
## 145          6.7         3.3          5.7         2.5  virginica
## 146          6.7         3.0          5.2         2.3  virginica
## 147          6.3         2.5          5.0         1.9  virginica
## 148          6.5         3.0          5.2         2.0  virginica
## 149          6.2         3.4          5.4         2.3  virginica
## 150          5.9         3.0          5.1         1.8  virginica
#删除一列-转换矩阵
x1 = select(iris,-5)
x2 = as.matrix(x1)
x3 = head(x2,50)
x3
##       Sepal.Length Sepal.Width Petal.Length Petal.Width
##  [1,]          5.1         3.5          1.4         0.2
##  [2,]          4.9         3.0          1.4         0.2
##  [3,]          4.7         3.2          1.3         0.2
##  [4,]          4.6         3.1          1.5         0.2
##  [5,]          5.0         3.6          1.4         0.2
##  [6,]          5.4         3.9          1.7         0.4
##  [7,]          4.6         3.4          1.4         0.3
##  [8,]          5.0         3.4          1.5         0.2
##  [9,]          4.4         2.9          1.4         0.2
## [10,]          4.9         3.1          1.5         0.1
## [11,]          5.4         3.7          1.5         0.2
## [12,]          4.8         3.4          1.6         0.2
## [13,]          4.8         3.0          1.4         0.1
## [14,]          4.3         3.0          1.1         0.1
## [15,]          5.8         4.0          1.2         0.2
## [16,]          5.7         4.4          1.5         0.4
## [17,]          5.4         3.9          1.3         0.4
## [18,]          5.1         3.5          1.4         0.3
## [19,]          5.7         3.8          1.7         0.3
## [20,]          5.1         3.8          1.5         0.3
## [21,]          5.4         3.4          1.7         0.2
## [22,]          5.1         3.7          1.5         0.4
## [23,]          4.6         3.6          1.0         0.2
## [24,]          5.1         3.3          1.7         0.5
## [25,]          4.8         3.4          1.9         0.2
## [26,]          5.0         3.0          1.6         0.2
## [27,]          5.0         3.4          1.6         0.4
## [28,]          5.2         3.5          1.5         0.2
## [29,]          5.2         3.4          1.4         0.2
## [30,]          4.7         3.2          1.6         0.2
## [31,]          4.8         3.1          1.6         0.2
## [32,]          5.4         3.4          1.5         0.4
## [33,]          5.2         4.1          1.5         0.1
## [34,]          5.5         4.2          1.4         0.2
## [35,]          4.9         3.1          1.5         0.2
## [36,]          5.0         3.2          1.2         0.2
## [37,]          5.5         3.5          1.3         0.2
## [38,]          4.9         3.6          1.4         0.1
## [39,]          4.4         3.0          1.3         0.2
## [40,]          5.1         3.4          1.5         0.2
## [41,]          5.0         3.5          1.3         0.3
## [42,]          4.5         2.3          1.3         0.3
## [43,]          4.4         3.2          1.3         0.2
## [44,]          5.0         3.5          1.6         0.6
## [45,]          5.1         3.8          1.9         0.4
## [46,]          4.8         3.0          1.4         0.3
## [47,]          5.1         3.8          1.6         0.2
## [48,]          4.6         3.2          1.4         0.2
## [49,]          5.3         3.7          1.5         0.2
## [50,]          5.0         3.3          1.4         0.2
heatmap(x3)
#管道符-删除列-转换矩阵
iris %>%
  select(-5) %>%
  as.matrix() %>%
  head(50)%>%
  heatmap()

在这里插入图片描述

条件和循环

#if语句
i = 2
if (i>0) print("i>0")
## [1] "i>0"
if (i<0) print("i<0")

#if else语句
s = 1
if (i>0){print('+')}else{print("-")}
## [1] "+"
#if else简化
s = 1
ifelse(s>0,"+","-")
## [1] "+"
#ifelse()+str_detect()
#创造samples向量
samples = paste0(c("tumor","normal"),rep(1:3,times = 2));samples
## [1] "tumor1"  "normal2" "tumor3"  "normal1" "tumor2"  "normal3"
k1 = str_detect(samples,"tumor");k1
## [1]  TRUE FALSE  TRUE FALSE  TRUE FALSE
ifelse(k1,"tumor","normal")
## [1] "tumor"  "normal" "tumor"  "normal" "tumor"  "normal"
#多个条件的ifelse语句
m = 1
ifelse(m>0,"+",ifelse(m<0,"-",0))
## [1] "+"
#for循环
for(i in 1:4){print(i)}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
#批量装包
#pks = c("rio","read.data")
#for (p in pks){
 # if(!require(p,character.only = T))
   # install.packages(p,ask = F, update = F)
#}

隐式循环

apply(x,MARGIN,FUN,…)
x 数据框/矩阵
MARGIN=1为行,2为列
FUN为函数
对x的每一行/列进行FUN函数

test3 = iris[1:4,1:4]
test3
##   Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1          5.1         3.5          1.4         0.2
## 2          4.9         3.0          1.4         0.2
## 3          4.7         3.2          1.3         0.2
## 4          4.6         3.1          1.5         0.2
apply(test3, 1, sum)
##    1    2    3    4 
## 10.2  9.5  9.4  9.4
apply(test3, 2, mean)
## Sepal.Length  Sepal.Width Petal.Length  Petal.Width 
##        4.825        3.200        1.400        0.200

挑出30个数里最大的

mm = rnorm(30);mm
##  [1]  0.038231251  1.614018170  0.609117309 -0.986239371  2.010533337  1.834587847
##  [7] -0.556371445  0.551342940 -1.403249101  0.448909349  1.276030534  0.518258595
## [13] -1.371033118  0.214918283 -1.007146133  0.155630412  0.990492912  0.002897102
## [19]  0.241762625 -0.145003475  0.198665660  2.465514040 -1.042278503  0.255811504
## [25]  2.284997963 -1.132938482  0.804076360  0.288959444 -0.566542658 -0.710499513
tail(sort(mm),5)
## [1] 1.614018 1.834588 2.010533 2.284998 2.465514

挑出表达矩阵里方差最大的3个基因

#创建表达矩阵
m1 = matrix(rnorm(30),nrow = 5)
rownames(m1) = paste0("gene",1:5)
colnames(m1) = paste0(rep(c("control","treat"),each = 3),1:3)
#计算方差
v1 = apply(m1, 1, var)
#每个基因方差排序
r1 = sort(v1)
#列出方差前三的基因名
names(tail(r1,3))  
## [1] "gene5" "gene4" "gene3"

##两个数据框的连接
inner_join 交集
full_join 全连接
在这里插入图片描述

表达矩阵画箱线图

新建矩阵

exp = matrix(rnorm(18),ncol = 6);exp
##           [,1]       [,2]       [,3]        [,4]      [,5]       [,6]
## [1,] 0.7262405 -0.6788916 -1.0830389 -0.72391563 -1.693845  0.6249748
## [2,] 2.4111533  1.3142188  0.6307410  2.34536693  3.557996  1.3821617
## [3,] 0.3285533 -1.5975085  0.8196729 -0.04391899 -1.287655 -0.7410555
exp = round(exp,2);exp
##      [,1]  [,2]  [,3]  [,4]  [,5]  [,6]
## [1,] 0.73 -0.68 -1.08 -0.72 -1.69  0.62
## [2,] 2.41  1.31  0.63  2.35  3.56  1.38
## [3,] 0.33 -1.60  0.82 -0.04 -1.29 -0.74
rownames(exp) = paste0("gene",1:3);exp
##       [,1]  [,2]  [,3]  [,4]  [,5]  [,6]
## gene1 0.73 -0.68 -1.08 -0.72 -1.69  0.62
## gene2 2.41  1.31  0.63  2.35  3.56  1.38
## gene3 0.33 -1.60  0.82 -0.04 -1.29 -0.74
colnames(exp) = paste0("test",1:6);exp
##       test1 test2 test3 test4 test5 test6
## gene1  0.73 -0.68 -1.08 -0.72 -1.69  0.62
## gene2  2.41  1.31  0.63  2.35  3.56  1.38
## gene3  0.33 -1.60  0.82 -0.04 -1.29 -0.74
exp[,1:3] = exp[,1:3]+1;exp
##       test1 test2 test3 test4 test5 test6
## gene1  1.73  0.32 -0.08 -0.72 -1.69  0.62
## gene2  3.41  2.31  1.63  2.35  3.56  1.38
## gene3  1.33 -0.60  1.82 -0.04 -1.29 -0.74

画箱线图

library(tidyr)
library(tibble)
library(dplyr)

#数据准备
dat = t(exp) %>% #转置
  as.data.frame() %>% #转换为数据框
  rownames_to_column() %>% #行名转换成列
  mutate(group = rep(c("control","treat"),each = 3))#添加列

#转换成长列表
dat
##   rowname gene1 gene2 gene3   group
## 1   test1  1.73  3.41  1.33 control
## 2   test2  0.32  2.31 -0.60 control
## 3   test3 -0.08  1.63  1.82 control
## 4   test4 -0.72  2.35 -0.04   treat
## 5   test5 -1.69  3.56 -1.29   treat
## 6   test6  0.62  1.38 -0.74   treat
pdat = dat %>%
  pivot_longer(cols = starts_with("gene"),
               names_to = "gene",
               values_to = "count")

#绘图
library(ggplot2)
p = ggplot(pdat, aes(gene,count))+
  geom_boxplot(aes(color = group))+ #不加aes(color = group)会把两个组的数据放在一个箱线中
  theme_classic()
p

在这里插入图片描述

CSDN只支持上传Markdown格式,不支持RMarkdown
使用R语言自带的Save as MD,文件容易乱
须先保存RMarkdown文件,后运行代码导出Md格式文件

#knitr::knit(input = "./test.Rmd*.Rmd", output = "./test.md*.md")

从生信技能树数据挖掘班搬运,感谢小洁老师和她的团队

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值