HiveSql中的函数家族(一)

一.内置函数

1-1 日期类型操作

-- 获取当前日期
select `current_date`();
-- 获取当前日期时间
select `current_timestamp`();
-- 获取unix时间（时间戳） 从1970年1月1号0时0分0秒 到现在过去了多少秒
select unix_timestamp();


-- unix时间 和日期时间的转化
-- 日期时间转为unix
select unix_timestamp('2023-10-01 15:30:28');
-- 将unix时间转为日期时间
select from_unixtime(12390886789);

-- 年月日的取值
select year('2023-10-01 15:30:28');
select month('2023-10-01 15:30:28');
select day('2023-10-01 15:30:28');
select dayofmonth('2023-10-12 15:30:28');
select dayofweek('2023-10-12 15:30:28');
select hour('2023-10-12 15:30:28');
select minute('2023-10-12 15:30:28');
select second('2023-10-12 15:30:28');

-- 时间加减
select date_add('2023-10-12 15:30:28',5);
select date_add('2023-10-12 15:30:28',-5);

-- 比较时间相差多少天
select datediff(`current_date`(),'2023-10-12');

1-2 类型转化

-- 字段类型不符合计算需求，可以进行类型转化
-- 隐式转化  hive会自动判断进行转化数据然后计算
select '123'+'456';
-- 手动指定转化
select cast('123' as int) + cast('456' as int);

select * from itcast.tb_hero;
desc itcast.tb_hero;
-- 转化只是在计算时进行，并不会改变字段本身类型
select cast(blood as bigint) from itcast.tb_hero;

1-3 字符串数据转json，array操作

josn字符串操作
- 数据是一个 "{key:value}" 格式
- 使用方法取值value

create table tb_order_detail(
    json_field string
);


select * from tb_order_detail;
-- 对字段中的json字符串数据进行取值，按照key取value值
-- 方法一  get_json_object 每次只能取一个字段数据  ，可以向下一直取值
select
    get_json_object(json_field,'$.orderid') as orderid,
    get_json_object(json_field,'$.goods[0]') as good1,  /*array操作*/
    get_json_object(json_field,'$.goods[1]') as good2
from tb_order_detail;

-- json_tuple 一次取多个字段值,不能对嵌套数据往下取值
select json_tuple(json_field,'orderid','total_price','total_num','goods') as(orderid,total_price,total_num,goods) from tb_order_detail

二、DQL的查询计算

2-1 单表查询计算

2.2.1.where 的条件过滤

格式：

select 字段1,字段2,字段3,常量值,内置函数计算 from tb where 过滤条件

(1).比较大小
- 字段 = 数值判断字段和数值是否相等
- 字段 > 数值
- 字段 < 数值
- 字段 >= 数值
- 字段 <= 数值
- 字段 != 数值

-- 大小比较
-- 年龄大于19岁
select * from tb_stu where age >19;
-- 查询性别为女性的学生信息
select * from tb_stu where gender='女';
-- 查询学科不是IS的学生信息
select * from tb_stu where cls !='IS';

(2).判断空值
- 字段 is null 字段为空
- 字段 is not null

-- 空值判断
insert into tb_stu values(9023,null,'男',20,'MA');
select * from tb_stu where name is not null;
select * from tb_stu where name is null;

select * from tb_stu where name !=''; -- 空字符过滤是会将null值一起过滤掉
select * from tb_stu where name =''; -- 相等判断是,空字符是不会过滤出null值的

(3).范围判断
- 字段 between 数值1 and 数值2
  - 字段 >=数值 and 字段 <=数值
- 字段 in (数值1,数值2....) 字段的值等于任意一个值就返回结果

select * from tb_stu where age between 20 and 25;
select * from tb_stu where age in(19,22);
select * from tb_stu where age not in(19,22);

(4).模糊查询

字段 like '% _ 数据' % 可以匹配任意多个 _ 匹配任意一个字符

字段 rlink '正则表达式'

create table tb_stu2(
    id int,
    name string,
    gender string,
    age int,
    cls string,
    email string
)row format delimited fields terminated by ',';

select * from tb_stu2;
-- like的模糊查询
-- 查询姓名为刘的学生
select * from tb_stu where name like '刘%'; -- % 代表任意多个字符
-- 查询姓名为刘的学生 名字个数时2个字的
select * from tb_stu where name like '刘_';
select * from tb_stu where name like '刘__'; -- 查询三个字的

-- rlike 的正则表达式
-- 表的是就是通过不同的符号来表示不同的数据进行匹配
-- \\d 匹配数据的表达式   \\w  匹配字符字母  \\s 匹配空格
select * from tb_stu2;
-- ^ 表是什么开头
select * from tb_stu2 where email rlike '^\\d'; -- 表是以数字开头
select * from tb_stu2 where email rlike '^\\w';
select * from tb_stu2 where email rlike '^\\S';

-- ^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$

select email,split(email,'@')[1] from tb_stu2;
select email,split(split(email,'@')[1],'\\.')[0] from tb_stu2;

(5).与或非

条件1 and 条件2 and 条件3 ... 多个条件都成立，返回对应的行数据

条件1 or 条件2 or 条件3 ... 多个条件满足任意一个，返回对应的行数据

-- 与 多个条件都成立
select * from  tb_stu;
-- 查询性别为男性，学科是is的
select * from  tb_stu where gender='男' and cls = 'IS';
-- 查询性别为男性或学科是is的
select * from  tb_stu where gender='男' or cls = 'IS';

2.2.2.聚合计算 sum，count

select * from tb_stu;
select sum(age) from tb_stu2;
select count(*) from tb_stu where name is not null;
select avg(age) from tb_stu2;
select max(age) from tb_stu;
select min(age) from tb_stu;

2.2.3.分组聚合 group by

select sum(age) from tb_stu group by gender;
select sum(age),gender from tb_stu group by gender;

2.2.4.分组后过滤 having

select sum(age),gender from tb_stu group by gender having sum(age)> 200;

注意分组后，select 中不能出现非分组字段

2.2.5.排序

order by 全局排序

select * from tb_stu order by age; -- 默认是升序 从小到大
select * from tb_stu order by age desc ; -- 降序 从大到小

2.2.6.分页 limit

-- 分页
select * from tb_stu limit 5;
select * from tb_stu limit 10,5; -- 页数 m  每页数量是n   (m-1)*n,n

2-2 多表关联查询

join的列关联

内关联

找关联字段相同的数据

左关联

展示保留左边表的所有数据，右边表有相同数据显示，没有相同数据则为null

右关联

展示保留右边表的所有数据，左边表有相同数据显示，没有相同数据则为null

-- table1: 员工表
CREATE TABLE employee(
   id int,
   name string,
   deg string,
   salary int,
   dept string
 ) row format delimited
fields terminated by ',';

-- table2:员工家庭住址信息表
CREATE TABLE employee_address (
    id int,
    hno string,
    street string,
    city string
) row format delimited
fields terminated by ',';

-- table3:员工联系方式信息表
CREATE TABLE employee_connection (
    id int,
    phno string,
    email string
) row format delimited
fields terminated by ',';

-- on 当成where使用，进行条件顾虑
select * from employee t1 join  employee_address t2  on  t1.id = t2.id and salary> 30000;
select * from employee t1 left join  employee_address t2  on  t1.id = t2.id;
select * from employee t1 right join  employee_address t2  on  t1.id = t2.id;
-- 实现内关联的效果
select * from employee,employee_address where employee.id = employee_address.id;

union的行关联

将select查询计算后的结果表合并

-- union合并
select 'tb_stu',count(*) from tb_stu where name is not null
union
select 'tb_stu2', count(*) from tb_stu2 where name is not null;

-- 保留重复数据
select id,name from tb_stu
union all
select id,name from tb_stu2;

本文来自互联网用户投稿，该文观点仅代表作者本人，不代表本站立场。本站仅提供信息存储空间服务，不拥有所有权，不承担相关法律责任。如若转载，请注明出处：http://www.mfbz.cn/a/555948.html

如若内容造成侵权/违法违规/事实不符，请联系我们进行投诉反馈qq邮箱809451989@qq.com，一经查实，立即删除！