AcWing 22. Hive SQL

历史记录

清除记录

猜你想搜

AcWing热点
App
登录/注册
        AcWing 22. Hive SQL
          
        原题链接
          
        简单
    
    作者：
    
        
         
        只吃虾仁大雪菜
    
    , 
    2022-08-22 16:42:51
    
    , 
    
        所有人可见
    
    , 
    阅读 392

                                    
    
    

    1
    

    
        
    
    

    
    

    
        
            1
        
    

                                

            Hive SQL

窗口函数
    -- 串口函数的执行是在group by之后
    -- group by之后    select之前


    -- 1. 结合聚合函数使用  给聚合函数指定聚合的范围  
        -- count(name) over()  别名         类似于打标记 1,2,3,4,5,6,7
            count(distinct name) over()     去重之后为总人数(有时候去重也可以通过group by实现)
            sum(cost) over(partition by substr(orderdate,1,7)) 别名
            sum(cost) over(partition by name, substr(orderdate,1,7)) 别名
        -- 累计消费额(滑动窗口)       order by 默认为滑动窗口  自上而下
        sum(cost) over(partition by name order by orderdate rows between unbounded preceding and current row)
        -- 最近三次的 累计 消费
        sum(cost) over(partition by name order by orderdate rows between 2 preceding and current row)
        -- 明细及每个人最近三笔消费的最大值
        max(cost) over(partition by name order by orderdate rows between 2 preceding and current row)



    -- 2. 只能结合开窗使用的函数
        -- 1.排名函数(rank/dense_rank/row_number/percent_rank)
            rank() over(partition by substr(orderdate,1,7) order by cost desc)
                -- 1, 1, 3, 4, 5, 5, 7
            dense_rank()
                -- 1, 1, 2, 3, 4, 4, 5
            row_number()
                -- 1, 2, 3, 4, 5, 6
            percent_rank()
                -- 0, 0 , 0.4, 0.6, 0.8, 1

        -- 2. 某列的下一行/上一行     lag/lead
            -- 距离上一次消费过了多少天
            lag(orderdate,1,'没有消费') over(partition by name order by orderdate) 别名
            datediff(orderdate, lag(orderdate,1,'没有消费') over(partition by name order by orderdate))

        -- 3.first_value/last_value  返回滑动窗口的第一行/最后一行  若加了参数turn则返回第一个不为null的数字
            first_value(orderdate) over(partition by name order by orderdate rows between 2 preceding and current row)
            first_value(orderdate,ture) over()
            -- 最近一笔消费大于30元的日期，没有显示null
            last_value(if(cost>30),orderdate,true) over(partition by name order by orderdate)




聚合函数：跳过null    多行聚合到一行    配合group by 使用
    -- 结合 group by 使用

    -- 1. count()/  sum()/  avg()/  min()/  max() 
        max(if(course_id='01',score,null))

    -- 2.collect_list(name) / collect_set()  将name放到一个数组(集合)中

    -- 3.explode(array(1,2,3))  炸裂函数 一行 --> 多行   将数组(一行数据)炸开成元素(多行数据)
        [a] [1, 2]  -->  [a][1]
                         [a][2]
        lateral view explode(friends) 别名 as 列名
        炸数组集合



基本函数：

    字符串类：
    -- upper()/lower()/length()   转大小写长度    trim() 去除两边空格
    -- substr("abc",1,3)  下标从1开始  
    -- replace('yxc','y','Y') 替换    regexp_replace
    -- split("a:b:b",':')    将字符串切成数组
    -- nvl(job,'无业')       若job为空替换成 无业 
    -- coalesce(job,sal,0)   可以写多列返回第一个非空值
    -- concat('a','b','c')   拼接字符
        concat_ws(',',array('a','b','c'))   用什么字符串拼接字符
    -- get_json_object(,$name)

    日期类：
    -- timestamp('2020-08-20')  date('2020-0820')
    -- unix_timestamp(date('2020-0820'))   将日期类型转化成bigint类型 多少秒
    -- from_unixtime(213123132,'yyyy-MM-dd HH:mm:ss')  将秒数转化成date类型
    -- current_timestamp  返回当前时间戳
    -- year(current_timestamp)  / month(current_timestamp) / day(current_timestamp)
    -- datediff('2022-08-20','2020-10-20')  第一个时间减去第二个时间
    -- date_add(current_date(),1)   时间加1天
    -- date_format(current_date(),'MM/dd/yy')

    -- 根据日期计算年龄的时候需要用if判断是否过完生日

    流程控制函数：
    -- if(a>5, 1, 0)  单分支 如果a>5返回1否则0   sum(if(a>5, 1, 0))
    -- case substr(name, 1, 1)
            when '张' then 'xxxx'
            when '王' then 'yyyy'
            else 'zzzz'
            end
    -- array(1,2,3,4)       map('a',1,'b',2,'c',3)
    -- array_contatins(array(1,2,3,4), 1)
        map_keys(map('a',1,'b',2,'c',3))
    -- named_struct()
    -- str_to_map()



分组：
    -- 1.和聚合函数并列的列一定要放到group by中
        select deptno,
                ename,
                avg(sal) 别名
        from emp e
        group by deptno, ename

    -- 2.分组条件过滤 having  
        -- 注意：where发生在map读数据阶段不符合要求的数据放弃读入，而having发生于reduce之后
    -- 平均工资高于2000块的部门
        select deptno
        from emp e
        group by deptno
        having avg(sal) > 2000


连接： 
    -- 1.join()/  left join()/  right join()      左右连接表
    -- 三表查询 
        from ename, dname, loc_name
        from emp e 
                join dept d on e.deptno = d.deptno
                join location l on d.loc = l.loc

    -- 2.union  / union all(重复的值也保留)        上下连接表


排序：
    -- 1.order by   /   desc  /  limit 1 2

注意：
    -- where 判断的可以直接join  如果进行了group by having 之后 需要作为子查询进行join

开本地模式：set hive.exec.mode.local.auto=ture;
习题：
    -- 1.查询同姓,并统计同姓
        select substr(stu_name, 1, 1)
                count(stu_id) 别名1
        from student_info 别名2
        group by substr(stu_name, 1, 1)
    -- 2.显示学生语数英成绩，没成绩输出0,成绩降序显示
        -- 原表是一个学生一科的成绩，所得的是一个学生的三门成绩在一行  粒度发生变化需要group by
        course_info    score_info

        select stu_id,
                max(if(course_id='01',score,null)) yuwen,
                max(if(course_id='02',score,null)) shuxue,
                max(if(course_id='03',score,null)) yingyu,
                count(course_id) cnt,
                avg(score) avg_score
        from score_info si
        group by stu_id

    -- 3.所有课程成绩均小于60分学号，姓名    --> 最大成绩小于60
        select si.stu_id,si.stu_name, sum(if(score<60,1,0)) sc, count(course_id) cnt
        from student_info si join score_info sc on si.stu_id = sc.stu_id
        group by stu_id,stu_name
        having cnt = sc

        select si.stu_id,si.stu_name
        from student_info si join score_info sc on si.stu_id = sc.stu_id
        group by stu_id,stu_name
        having max(score)<60

    -- 4.两门以上不及格学号及平均成绩
        select stu_id,
                avg(score) avg_score
        from score_info si
        group by stu_id
        having sum(if(score<60,1,0))>=2
        having count(if(score<60,course_id,null))>=2

    -- 5.李体音老师教所有课的同学的学号姓名

        select t1.stu_id,
                t1.stu_name
        from student_info t1
        where course_id in
        (select course_id
        from teacher_info ti join course_info ci on ti.tea_id = ci.tea_id
        where tea_name = '李体音')
        join student_info t2 on t1.stu_id = t2.stu_id
        group by stu_id, stu_name
        having count(t1.course_id)=2


    -- 6.学李体音老师任意一门课程学号姓名
    -- 7.没学过李体音老师任意一门课程学号姓名 --> not in ; stu_id学过任意一门课程
    -- 8.至少有一门课与学号为'001'学生所选课相同的学号姓名
    select si.stu_id,
            si.stu_name
    from student_info si
    join score_info sc on si.stu_id = sc.stu_id
    where course_id in (
        select course_id
        from score_info
        where stu_id = '001'
    )
    group by stu_id,stu_name

        
0 评论
        
    

                    
                

                
            

    
    
App 内打开
AcWing 22. Hive SQL 原题链接 简单

Hive SQL

0 评论

AcWing 22. Hive SQL 原题链接简单