内容概况:
* 函数
* 控制流:if for
* 数据重构
* 自编函数
setwd("D:\\Documents\\work\\R培训\\第三期")
################函数############
#1.数学函数 abs() sqrt() exp() floor() ceiling() round() 三角函数
abs(c(-9,-8,-7,-6,-5,4,3,2,1))
sqrt(c(1,2,4,9,16,25))
exp(c(1,2,3,4,5,6))
floor(c(2,2.1,2.5,2.9,3,3.1)) #不大于x的最大整数
ceiling(c(2,2.1,2.5,2.9,3,3.1)) #不小于x的最小整数
round(2.3456789,2)
sin(pi/2)
#2.统计函数 sum() mean() median() sd() var() range() min() max() quantile()
sum(c(1,2,3,4))
mean(c(1,2,3,4))
median(c(1,2,3,4,10))
quantile(c(0,1,2,3,4,5,6,7,8,9),c(0.2,0.8))
#3.字符串函数
#3.1 字符数统计 length() nchar()
samp<-c("a","bc","def","ghij")
length(samp) #向量的元素个数
nchar(samp) #向量各元素的长度
#3.2 大小写转换
tolower("abcd EFGH")
toupper("abcd EFGH")
#3.3 字符串连接
paste("2015","08","07",sep="-")
paste("项目",1:6,sep="")
#case: 多变量vlookup()
case_paste1<-read.csv("case_paste1.csv",stringsAsFactors=FALSE)
case_paste2<-read.csv("case_paste2.csv",stringsAsFactors=FALSE)
case_paste1$rev<-case_paste2[match(paste(case_paste1$出游月份,case_paste1$八大区,sep=""),paste(case_paste2$出游月份,case_paste2$八大区,sep="")),3]
#case_paste1$rev<-case_paste2[match(paste(case_paste1$出游月份,case_paste1$八大区),paste(case_paste2$出游月份,case_paste2$八大区)),3]
#3.4 字符串拆分
text <- "Hello world Hello china"
strsplit(text, " ")
strsplit("2015-08-07", "-")
#3.5 字符串查询
grep("a",c("java","python","tableau","javascript"))
grep("l|y",c("java","python","tableau","javascript"))
#case: cm
case_grep<-read.csv("case_grep.csv",stringsAsFactors=FALSE)
pattern1 <- "韩国|日本|济州|泰国|清迈|澳大利亚|邮轮|签证|出境|塞班|普吉|巴厘岛|马尔代夫|马来西亚|澳洲|美国|美洲|长滩|大溪地|欧洲|英国|法国|德国|法意瑞|土耳其|迪拜|埃及|俄罗斯|意大利|澳门|台湾|香港|新加坡|南京银行|毛里求斯|柬埔寨|越南|阿联酋|斯里兰卡|首尔|斐济|曼谷|西班牙|葡萄牙|北欧|苏梅|关岛|希腊|宿雾|兰卡威|瑞士|芬兰|菲律宾|东京|新西兰|塞舌尔|加拿大|悉尼|冲绳|沙巴|老挝|帕劳|夏威夷|南非|文莱|尼泊尔|新马泰|芽庄|吴哥|伊朗|印度|清莱|澳新|德法|波兰|坦桑尼亚|肯尼亚|邮轮"
#case_grep[grep(pattern1 ,case_grep[,1]),"BU"]<- "出境游"
case_grep[grep(pattern1,case_grep[,1]),"BU"] <- "出境游"
pattern2<- "海南|三亚|海口|蜈支洲|鹿回头|亚龙湾|跟团|包团|发团|当地|长途|当地游|双飞"
case_grep[grep(pattern2,case_grep[,1]),"BU"] <- "国内游"
grepl("a",c("java","python","tableau","javascript"))
#3.6 字符串替换
#sub(查找,替换,data)
sub("a","bc",c("java","python","tableau","javascript"))
#仅替换首个
gsub("a","bc",c("java","python","tableau","javascript"))
#替换全部
#3.7 字符串截取
#strtrim(data,n) substr(data,start,stop)
data<-c("2016-08-01","2016-08-02","2016-08-03","2016-08-04")
strtrim(data,4) #excel:left()
substr(data,9,10) #excel:mid()
#3.8 字符串匹配
#match 返回第一次匹配的元素位置
match(c("a","b","c","d"),c("b","d","c","a"))
#4 函数用于数据框 apply族函数
#apply(data,1 or 2 ,function) 1:row 2:column
data<-matrix(1:9,nrow=3)
apply(data,1,mean)
mean(data[1,])
apply(data,2,mean)
#sapply lapply 区别:sapply输出向量,lapply输出列表
sapply(data[1,], function(x) x^2)
sapply(data, function(x) x^2)
sapply(total_region_month_target$sub_cash, bonus_cash_func) ##bonus_cash_func 自定义方程
lapply(data,function(x) x^2)
#############控制流(if,for)##################
#1.条件
#ifelse(条件判断,真返回值,假返回值)
x<-c(59,300,203,50)
ifelse(x>=100,"大","小")
#if语法:1. if (条件) 执行语句 else 执行语句;
#2.if(){} else if(){} else{} ;
m=500
#if (m>=300) 执行语句 else 执行语句
if (m>=300) print("非常大") else print("大")
m=10
if (m>=300){
print("非常大")
}else if(m>=100){
print("大")
}else{
print("小")
}
x<-c(59,300,203,50,120,230)
if (x[2]>=100){
print("大")
} else {
print("小")
} #报错
#2.循环
#for for (i ) {}
for (i in 1:10){
print(i)
}
x<-c(59,300,203,50,120,230,456,7889,89)
for (i in 1:length(x)){
if (x[i]>=100){
print("大")
} else {
print("小")
}
}
#while
i=1
while (i<8){
print(paste("星期",i,sep="-"))
i<-i+1
print (i)
}
############数据重构##########
#1.转置 t()
case_t<-read.csv("case_转置.csv",stringsAsFactors=FALSE)
case_t
t(case_t)
#2.融合 melt()
#melt(data,id=c(固定列))
library(reshape)
case_melt<-read.csv("case_整合.csv",stringsAsFactors=FALSE)
melt(case_melt,id=c("出游月份"))
#3.展开 cast()
#cast(data,固定列~展开列)
case_cast<-read.csv("case_展开.csv",stringsAsFactors=FALSE)
cast(case_cast,出游月份~八大区)
###########自编函数###########
#语法:myfunction<-function(参数1,参数2,……){ 描述;return()}
myfunction<-function(x,fun){
if(fun=="sum"){
return(sum(x))
}else if(fun=="mean"){
return(mean(x))
}else if(fun=="median"){
return(median(x))
}else{
return("error")
}
}
myfunction(c(1,2,3,4,10),"sum")
myfunction(c(1,2,3,4,10),"mean")
myfunction(c(1,2,3,4,10),"median")
myfunction(c(1,2,3,4,10),"round")
diff_weekday<-function(x,y){
if(x==y){
return(1)
}else{
a<-seq(as.Date(x),as.Date(y),"day")
b<-c("星期六","星期日")
length(a[sapply(a,function(x) all(weekdays(x)!=b))])
}
}
#seq(1,9,2)
diff_weekday("2016-08-01","2016-08-01")
diff_weekday("2016-08-01","2016-08-31")
x="2016-08-01"
y="2016-08-31"
a<-seq(as.Date(x),as.Date(y),"day")
b<-c("星期六","星期日")
result<-sapply(a,function(m) all(weekdays(m)!=b))
m<-a[6]
weekdays(m)
weekdays(m)!=b
all(weekdays(m)!=b)
length(a[result])

1369

被折叠的 条评论
为什么被折叠?



