当前位置: 代码迷 >> 综合 >> hive学习笔记——常用命令
  详细解决方案

hive学习笔记——常用命令

热度:63   发布时间:2023-11-21 13:49:29.0
show databases like '';数据库
show functions;方法
show tables;表
show create table demo;表结构
show partitions student2;表分区desc database extended hivedemo;数据库
desc demo;表结果
desc formatted demo;表use hivedemo;使用数据库drop table demotwo;删除表
drop database hivedemo cascade;彻底删除数据库truncate table demo;删除表数据insert into demo values(1,"zhangsan");插入数据
insert overwrite table demo values(3,"wangwu");插入数据并覆盖原数据
insert into table studentp1 partition(age,gender) select id,name,likes,address,age,gender from studentp;根据指定分区插入数据
insert overwrite table employee_id_buckets select * from employee_id;select * from demo;查看表所有数据
select current_database();查看当前使用的数据库
select * from employee where gender_age.age=27;数据类型struct
select * from employee where skill_score["DB"]=80;数据类型map
select * from employee where work_place[1]="Toronto";数据类型array
select t.* from (select * from employee where gender_age.gender='Male') t where t.name='Will';子查询select t.* from (select * from employee where gender_age.gender='Male') t where t.name='Will'
union all
select * from employee where gender_age.gender='Female';select * from employee_id_buckets tablesample(bucket 1 out of 4 on employee_id)s;
select * from employee_id_buckets tablesample(10 rows)s;
select * from employee_id_buckets tablesample(1M)s;
select * from employee_id_buckets tablesample(100 percent)s;with
t1 as (select * from employee where gender_age.gender='Male'),
t2 as (select * from t1 where t1.name='Will'),
t3 as (select * from employee where gender_age.gender='Female')
select * from t2 union all select * from t3;alter database demo2 set dbproperties('createtime'='20202020');
alter table demo2 set tbproperties('EXTERNAL'='TRUE');修改表为外部表
alter table demo2 set tbproperties('EXTERNAL'='FALSE');修改表为内部表
alter table demo2 rename to demotwo;表改名
alter table demotwo change name username string;更新列
alter table demotwo add columns(age int comment'user age', email string);加列
alter table demotwo replace columns(id int,username string);替换列
alter table student3 add partition(age=20,gender='male') partition(age=30,gender='male');加分区
alter table student3 drop partition(age=20,gender='male'),partition(age=30,gender='male');删除分区create database if not exists hivedemo location '';创建数据库
create table demo(id int, name string);创建表create table if not exists employee(
id int,
name string,
work_place array<string>,
gender_age struct<gender:string,age:int>,
skill_score map<string,int>,
depart_title map<string,array<string>>
)
comment 'this is an internal table'
partitioned by (age int,gender string)
row format delimited fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
lines terminated by '\n';创建内部表分区create external table if not exists employee_external(
name string,
work_place array<string>,
gender_age struct<gender:string,age:int>,
skill_score map<string,int>,
depart_title map<string,array<string>>
)
comment 'this is an internal table'
row format delimited fields terminated by '|'
collection items terminated by ','
map keys terminated by ':'
lines terminated by '\n'
location '/tmp/hivedata/employee';创建外部表create table employee_id_buckets(
name string,
employee_id int,
work_place array<string>,
gender_age struct<gender:string,age:int>,
skills_score map<string,int>,
depart_title map<string,array<string>>
)
clustered by(employee_id) into 2 buckets
row format delimited fields terminated by '|'
collection items terminated by ','
map keys terminated by ':'
lines terminated by '\n';创建内部表分桶create temporary table tmp_employee as select name,work_place from employee;使用查询,将结果存入临时表中create temporary table ctas_employee as
with
t1 as (select * from employee where gender_age.gender='Male'),
t2 as (select * from t1 where t1.name='Will'),
t3 as (select * from employee where gender_age.gender='Female')
select * from t2 union all select * from t3;使用ctas查询,将结果保存到临时表中load data local inpath '/opt/tmp/student.txt' into table student;插入本地数据
load data inpath '/tmp/student.txt' into table student;插入hdfs数据
load data local inpath '/opt/tmp/student.txt' overwrite into table student;插入本地数据并覆盖
load data local inpath '/opt/tmp/student.txt' into table student2 partition(age=20,gender='man');插入数据并分区set hive.exec.dynamic.partition=true;分区用
set hive.exec.dynamic.partition.mode=nonstrict;
set map.reduce.tasks=2;分桶用
set hive.enforce.bucketing=true;侧视图
将employee表中的work_place(array)炸裂
select explode(work_place) from employee;
select name,wp from employee lateral view explode(work_place) work_place as wp;
将employee表中的skill_score(map)炸裂
select explode(skill_score) from employee;
select name,skill,score from employee lateral view explode(skill_score) sk_sc as skill,score;
select name,wp,skill,score from employee 
lateral view explode(work_place) wo_pl as wp 
lateral view explode(skill_score) sk_sc as skill,score;WOrdCount案例,explode使用
select split(line,"\\s") from wordcount;
select w.word,count(1) num 
from(select explode(split(line,"\\s")) word from wordcount) w 
group by w.word order by num desc;
with
t1 as (select explode(split(line,"\\s")) word from wordcount)
select t1.word,count(1) num from t1 group by word order by num desc;case when then else end用法
将年龄分段
select name,age,case when age>30 then 'old' when age<=18 then 'young' else 'middle' end as tag from studentp;
将性别转数字
select name,gender,case when gender='boy' then 1 when gender='man' then 1 when gender='male' then 1 when gender='girl' then 0 when gender='woman' then 0 when gender='female' then 0 else -1 end as gendertag from studentp; create temporary table tmp_gender_tag as select name, gender,
case when gender='boy' then 1 else 0 end as boygender,
case when gender='girl' then 1 else 0 end as girlgender,
case when gender is null then 1 else 0 end as taijiangender
from studentp;select sum(boygender) as boysum,sum(girlgender) as girlsum,sum(taijiangender) as taijiansum from tmp_gender_tag;行转列
select gender,concat_ws(",",collect_set(name)) as stu_col from studentp group by gender;去重
select gender,concat_ws("&",collect_list(name)) as stu_col from studentp group by gender;不去重导出表数据到指定文件夹,必须要求导出文件夹为空文件夹,否则会报错
export table studentp to '/tmp/out1';导出表数据到本地文件夹,要确保目标文件夹存在,否则导出失败
hive -e "select * from hivedemo2.studentp1" >> /opt/tmp/out/student.txtorder by          全局排序
sort by           分区内部有序,整体无序
distribute by     mapTask分区   一般与sort by合作使用
cluster by        如果分区和排序字段相同并且升序,等同与distribute by age sort aby age ascselect * from studentp order by age;select * from studentp sort by age;create table dissortstudentp as select * from studentp distribute by id sort by age;insert overwrite table dissortstudentp select * from studentp distribute by age sort by age desc;create table dissorstudenttp3 as select * from studentp cluster by age;