par(no.readonly=TRUE) 修改当前图形参数,会话结束前一直有效。
符号与线条:pch 绘制点时使用的符号;cex 符号大小;lty 线条类型;ldy 线条宽度
legend 图例标签
基本图形
条形图
barplot(height, width = 1, space = NULL,
beside为F,则为堆砌条形图,为T,则为分组条形图
棘状图
library(vcd)
attach(Arthritis)
counts<-table(Treatment,Improved)
spine(counts,main="Spinogram")
直方图
hist(mtcars$mpg,freq=F,breaks=12,col="red",xlab="Miles perGallon",main="Histogram,rug plot,density curve")
rug(jitter(mtcars$mpg))
lines(density(mtcars$mpg),col="blue",lwd=2)#density为核密度图
| freq | logical; if |
核密度图
par(lwd=2)
library(sm)
attach(mtcars)
cyl.f<-factor(cyl,levels=c(4,6,8),labels=c("4cylinders","6cylinders","8cylinders"))#创建分组因子
sm.density.compare(mpg,cyl,xlab="miles per gallon")#绘制密度图
title(main="MPG Distribution by Car Cylinders")
colfill<-c(2:(1+length(levels(cyl.f))))
legend(locator(1),levels(cyl.f),fill=colfill)
detach(mtcars)
箱线图
boxplot(mpg~cyl,data=mtcars,main="Car Mileage Data",xlab="Numberof Cylinders",ylab="Miles perGallon")
mtcars$cyl.f<-factor(mtcars$cyl,levels=c(4,6,8),labels=c("4","6","8"))
mtcars$am.f<-factor(mtcars$am,levels=c(0,1),labels=c("auto","standard"))
boxplot(mpg~am.f*cyl.f,data=mtcars,varwidth=T,col=c("gold","green"),main="MPGDistribution by Auto Type",xlab="Auto Type")
点图
dotchart(x,labels=)
x<-mtcars[order(mtcars$mpg),]
x$cyl<-factor(x$cyl)
x$color[x$cyl==4]<-"red"
x$color[x$cyl==6]<-"blue"
x$color[x$cyl==8]<-"darkgreen"
dotchart(x$mpg,labels=row.names(x),cex=.7,groups=x$cyl,gcolor="black",color=x$color,pch=19,main="GasMileage for Car Models",xlab="Miles per Gallon")
scatterplot(weight~height,data=women,spread=F,lty.smooth=2,pch=19,main="WomenAge 30-39",xlab="Height",ylab="Weight")
scatterplotMatrix(x, var.labels=colnames(x),
legend.plot=length(levels(groups)) > 1,legend.pos=NULL, row1attop=TRUE, ...)
气泡图
symbols(x,y,circle=radius)
相关图
corrgram(x, type=NULL, order = FALSE, labels,panel=panel.shade,
library(corrgram)
corrgram(mtcars,order=T,lower.panel=panel.shade,upper.panel=panel.pie,text.panel=panel.txt,main="Correlogram of mtcars intercorrelations")
正斜杠为正相关,反斜杠为负相关。颜色越深,相关性越大。正相关从12点处开始顺时针填充饼图,负相关逆时针填充饼图。
马赛克图Library(vcd)
mosaic(Titanic,shade=T,legend=T)
mosaic(~Class+Sex+Age+Survived,data=Titanic,shade=T,legend=T) #the two are same | |
缺失值图library(mice)
md.pattern(sleep)
42 2 3 9 2 1 2 1
| |
library(VIM)
aggr(sleep,prop=F,numbers=T) 描述性统计 | |
|
|
aggregate(mtcars[vars],by=list(am=mtcars$am),mean)
1
2
aggregate()仅允许在每次调用中使用平均数、标准差这样的单返回值函数。
列联表:
library(vcd) #use data Arthritis
library(gmodels)
CrossTable(Arthritis$Treatment,Arthritis$Improved)
|-------------------------|
|
| Chi-square contribution |
|
|
|
|-------------------------|
Total Observations in Table:
Arthritis$Treatment|
--------------------|-----------|-----------|-----------|-----------|
--------------------|-----------|-----------|-----------|-----------|
--------------------|-----------|-----------|-----------|-----------|
--------------------|-----------|-----------|-----------|-----------|
OLS回归
lm(formula, data, subset, weights, na.action,
多元线性回归
检验变量相关性
states<-as.data.frame(state.x77[,c("Murder","Population","Illiteracy","Income","Frost")])
cor(states)
Murder
Population
Illiteracy
Income
Frost
library(car)
scatterplotMatrix(states,spread=F,lty.smooth=2,main="Scatter PlotMatrix")
简单线性回归
lm(formular,data=mydata)
For example:
fit<- lm(weight~height+I(height^2),data=women)
coef(fit)
261.87818358
回归诊断
confint(fit)
(Intercept) 206.97913605 316.77723111
height
I(height^2)
95%的置信区间从2.5%~97.5%。如果置信区间包含0,则该变量对方程无影响。
par(mfrow=c(2,2))
plot(fit)
向后回归
library(MASS)
fit1<-lm(Murder~Population+Illiteracy+Income+Frost,data=states)
stepAIC(fit1,direction="backward")
Start:
Murder ~ Population + Illiteracy + Income + Frost
-Frost
-Income
- Population
- Illiteracy
Step:
Murder ~ Population + Illiteracy + Income
-Income
- Population
- Illiteracy
Step:
Murder ~ Population + Illiteracy
- Population
- Illiteracy
Call:
lm(formula = Murder ~ Population + Illiteracy, data =states)
Coefficients:
(Intercept)
全子集回归
regsubsets(x=, data=, weights=NULL, nbest=1,nvmax=8, force.in=NULL, force.out=NULL, intercept=TRUE,method=c("exhaustive", "backward", "forward", "seqrep"),really.big=FALSE,...)
| x | design matrix or model formula for full model,or |
| data | Optional data frame |
| y | response vector |
| weights | weight vector |
| nbest | number of subsets of each size to record |
| nvmax | maximum size of subsets to examine |
| force.in | index to columns of design matrix that should be in allmodels |
| force.out | index to columns of design matrix that should be in nomodels |
| intercept | Add an intercept? |
| method | Use exhaustive search, forward selection, backward selection orsequential replacement to search. |
| really.big | Must be TRUE to perform exhaustive search on more than 50variables. |
| object | regsubsets object |
| all.best | Show all the best subsets or just one of each size |
| matrix | Show a matrix of the variables in each model or just summarystatistics |
| matrix.logical | With |
| df | Specify a number of degrees of freedom for the summarystatistics. The default is |
| id | Which model or models (ordered as in the summary output) toreturn coefficients and variance matrix for |
| vcov | If |
leaps<-regsubsets(Murder~Population+Illiteracy+Income+Frost,data=states,nbest=4)
plot(leaps,scale="adjr2")
adjr2为调整的R^2
广义线性模型
glm()函数
Logistic回归
用到的安装包“AER”
glm ( formular, data = mydata, family = binomial () ).其中formular为回归式,因变量为二元值。
泊松回归:
glm ( formular, data= mydata, family = poisson()).Formular为回归式。
主成分分析:
用到的安装包“psych”
Principal() 含多种方差旋转方法的主成分分析;fa()可用主轴、最小残差、最大似然估计的因子分析;fa.parallel() 含平行分析的碎石图;factor()绘制因子分析或主成分分析的结果。
1. 判断主成分个数
fa.parallel(Harman23.cor$cov,n.obs=302,fa="pc",n.iter=100,show.legend=F,main="ScreePlot")
2. 提取主成分及主成分旋转
rc<-principal(Harman23.cor$cov,nfactors=2,rotate="varimax")#nfators mean 2 facors
rc
Principal Components Analysis
Call: principal(r = Harman23.cor$cov, nfactors = 2, rotate ="varimax")
Standardized loadings (pattern matrix) based upon correlationmatrix
height
arm.span
forearm
lower.leg
weight
bitro.diameter 0.19 0.84 0.74 0.261 1.1
chest.girth
chest.width
SSloadings
ProportionVar
CumulativeVar
两个主成分PC1,PC2,h2为成分公因子方差——主成分对每个变量的解释度,u2(1-h2)为无法解释的比例。ssloadings包含与主成分相关联的特征值;proportion var 是每个主成分对整个数据集的解释度;cumulativevar是主成分累计可解释度。
3. 获取主成分得分系数
round(unclass(rc$weights),2) #have already standardized
height
arm.span
forearm
lower.leg
weight
bitro.diameter -0.08
chest.girth
chest.width
这篇博客详细介绍了R语言中的各种图形绘制方法,包括条形图、棘状图、直方图、核密度图、箱线图、点图、气泡图、相关图以及OLS回归等。同时,讲解了如何进行多元线性回归分析,包括变量相关性检验、简单线性回归、回归诊断、向后回归和全子集回归。此外,还探讨了广义线性模型中的Logistic回归。内容深入浅出,结合实例展示,适合R语言初学者和进阶者学习。


1万+

被折叠的 条评论
为什么被折叠?



