1.data.fram(), 2.head(),tail(),str() 3.names(),colnames() 4.访问数据框[],,subset()5.新增列,删除列,subset() 5.新增列,删除列,subset()5.新增列,删除列
#1.数据框的创建 data.frame()
> company <- c("A", "A", "B","C","D","C")
> cash_flow <- c(100, 200, 300,800,500,1000)
> year <- c(1, 3, 2,5,3,8)
>
> cash=data.frame(company, cash_flow, year)
> cashcompany cash_flow year
1 A 100 1
2 A 200 3
3 B 300 2
4 C 800 5
5 D 500 3
6 C 1000 8
>
> a=data.frame(company = c("A", "A", "B","C","D","C"), cash_flow = c(100, 200, 300,800,500,1000), year = c(1, 3, 2,5,3,8))
> acompany cash_flow year
1 A 100 1
2 A 200 3
3 B 300 2
4 C 800 5
5 D 500 3
6 C 1000 8
> #2. head()和tail()函数访问数据的首末行
> #head() 返回数据框的前几行。默认情况下是6。若要更改此,请使用 head(数据, n = ___)。
> #tail() 返回数据框的最后几行。默认情况下是6。若要更改此,请使用tail(数据, n = ___)。
> #str() 检查一个物体的结构。这个奇妙的函数将显示您传入的对象的数据类型(在这里是数据.框架),并列出每个列变量及其数据类型。
> head(cash,2)company cash_flow year
1 A 100 1
2 A 200 3
> tail(cash,2)company cash_flow year
5 D 500 3
6 C 1000 8
> str(cash)
'data.frame': 6 obs. of 3 variables:$ company : chr "A" "A" "B" "C" ...$ cash_flow: num 100 200 300 800 500 1000$ year : num 1 3 2 5 3 8
>
> #3.names函数,colnames函数给变量修改元素名称
> names(cash) <- c('comp','cash','yr')
> cashcomp cash yr
1 A 100 1
2 A 200 3
3 B 300 2
4 C 800 5
5 D 500 3
6 C 1000 8
>
> names(cash)[1] <- c('company')
> cashcompany cash yr
1 A 100 1
2 A 200 3
3 B 300 2
4 C 800 5
5 D 500 3
6 C 1000 8
>
> colnames(cash)[1:2] <- c('co','ca')
> cashco ca yr
1 A 100 1
2 A 200 3
3 B 300 2
4 C 800 5
5 D 500 3
6 C 1000 8
>
> names(cash) <- c('company')
> cashcompany NA NA
1 A 100 1
2 A 200 3
3 B 300 2
4 C 800 5
5 D 500 3
6 C 1000 8
>
>
> #4. 访问和子集化数据框
> #(1) [ ] 形式访问
> cash=data.frame(company = c("A", "A", "B","C","D","C"), cash_flow = c(100, 200, 300,800,500,1000), year = c(1, 3, 2,5,3,8))
> cashcompany cash_flow year
1 A 100 1
2 A 200 3
3 B 300 2
4 C 800 5
5 D 500 3
6 C 1000 8
> cash[1,]company cash_flow year
1 A 100 1
> cash[,1]
[1] "A" "A" "B" "C" "D" "C"
> cash[3,2]
[1] 300
> cash[5,'year']
[1] 3
>
>
> #(2)$ 形式访问
> cash$cash_flow
[1] 100 200 300 800 500 1000
> #他等价于
> cash[,"cash_flow"]
[1] 100 200 300 800 500 1000
> > #(3) subset( )访问
> cash=data.frame(company = c("A", "A", "B","C","D","C"), cash_flow = c(100, 200, 300,800,500,1000), year = c(1, 3, 2,5,3,8))
> cashcompany cash_flow year
1 A 100 1
2 A 200 3
3 B 300 2
4 C 800 5
5 D 500 3
6 C 1000 8
> subset(cash, company =="A")company cash_flow year
1 A 100 1
2 A 200 3
>
>
> #5. 增加新列
> # step 1.四分之一现金流添加到数据框
> cash$quarter_cash <- cash$cash_flow *1/4
> # step 2.双倍年数回收你的钱到数据框
> cash$double_year <- cash$year *2
> cashcompany cash_flow year quarter_cash double_year
1 A 100 1 25 2
2 A 200 3 50 6
3 B 300 2 75 4
4 C 800 5 200 10
5 D 500 3 125 6
6 C 1000 8 250 16
>
> library(MASS)
> b<-biopsy #复制数据
> head(b)ID V1 V2 V3 V4 V5 V6 V7 V8 V9 class
1 1000025 5 1 1 1 2 1 3 1 1 benign
2 1002945 5 4 4 5 7 10 3 2 1 benign
3 1015425 3 1 1 1 2 2 3 1 1 benign
4 1016277 6 8 8 1 3 4 3 7 1 benign
5 1017023 4 1 1 3 2 1 3 1 1 benign
6 1017122 8 10 10 8 7 10 9 7 1 malignant
>
> b$classn[b$class=="benign"]<-0 #重新赋值
> b$classn[b$class=="malignant"]<-1
> bID V1 V2 V3 V4 V5 V6 V7 V8 V9 class classn
1 1000025 5 1 1 1 2 1 3 1 1 benign 0
2 1002945 5 4 4 5 7 10 3 2 1 benign 0
3 1015425 3 1 1 1 2 2 3 1 1 benign 0
4 1016277 6 8 8 1 3 4 3 7 1 benign 0
5 1017023 4 1 1 3 2 1 3 1 1 benign 0
6 1017122 8 10 10 8 7 10 9 7 1 malignant 1> > #(3)可以通过指定NULL来删除列。运行代码来删除company。
> cash$company <- NULL
> cashcash_flow year
1 100 1
2 200 3
3 300 2
4 800 5
5 500 3
6 1000 8
>