多参表达式Hive UDF

支持的操作符

  • :跳过,即无条件筛选
  • =:等于
  • !=:不等于
  • range:区间内,range[n,m]表示 between n and m
  • nrange:区间外,即not between and
  • in:集合内,in(n,m,j,k)表示 in (‘n’,‘m’,‘j’,‘k’)
  • nin:集合外,即not in
  • regex:正则表达式,regex(^a.*)表示以a字母头

输入与输出

  • 输入:参数个数成双。每一对参数中,参数1为任意类型的筛选列,参数2为字符串类型的条件列。
  • 输出:true or false
select-- 等于空串multi_exp('', '=') as t1,-- 空条件key 跳过比较multi_exp('abc', '') as t2,-- 数字会自动转为string比较multi_exp(123, '=123') as t3,-- string类型multi_exp('abc', '=abc') as t4,multi_exp('abc', '!=aaa') as t5,multi_exp('23', 'range[1,100]') as t6,multi_exp('23', 'nrange[1,100]') as t7,multi_exp('abc', 'in(abc,aaa,bbb,ccc)') as t8,multi_exp('abc', 'nin(abc,aaa,bbb,ccc)') as t9,multi_exp('abc', 'regex(^a.+)') as t10,multi_exp('abc', '=abc', 'abc', '!=aaa', '23', 'range[1,100]', 'abc', 'in(abc,aaa,bbb,ccc)', 'abc', 'regex(^a.+)') t11-- map类型,multi_exp(str_to_map('k1:v1,k2:v2,k3:v3', ',', ':'), '{"k1":"=v1","k2":"=v2"}') as t12, -- k1=v1 and k2=v2, truemulti_exp(str_to_map('k1:v1,k2:v2,k3:v3', ',', ':'), '{"k1":"=v1","k2":"=v3"}') as t13  -- k1=v1 但是k2!=v3, false-- json格式字符串,multi_exp('{"k1":"v1", "k2":"v2", "k3":"v3"}', '{"k1":"=v1","k2":"=v2"}') as t14,  -- k1=v1 and k2=v2, truemulti_exp('{"k1":"v1", "k2":"v2", "k3":"v3"}', '{"k1":"=v1","k3":"=v2"}') as t15  -- k1=v1 但是k3!=v2, false
;

源码

import com.alibaba.fastjson.JSONObject;
import org.apache.hadoop.hive.ql.exec.UDF;import java.util.*;
import java.util.regex.Pattern;public class MultiExpUDF extends UDF {private static final String IGNORE = "";private static final String OPERATOR_EQ = "=";private static final String OPERATOR_NE = "!=";private static final String OPERATOR_RANGE = "range";private static final String OPERATOR_NRANGE = "nrange";private static final String OPERATOR_IN = "in";private static final String OPERATOR_NIN = "nin";private static final String OPERATOR_REGEX = "regex";public boolean evaluate(Object... args) {if (args.length % 2 != 0) {return false;}for (int i = 0; i < args.length; i += 2) {Object originalValue = args[i];Object conditionValue = args[i + 1];// 明细列处理 空值转空串originalValue = originalValue == null ? "" : originalValue;// 跳过指定条件keyif (IGNORE.equals(conditionValue)) {continue;}try {if (originalValue instanceof Map) {@SuppressWarnings("unchecked")Map<String, String> originalMap = (Map<String, String>) originalValue;String conditionStr = conditionValue.toString();if (!conditionStr.startsWith("{") || !conditionStr.endsWith("}")) {return false;}JSONObject conditionJson = JSONObject.parseObject(conditionStr);Set<String> keys = conditionJson.keySet();for (String key : keys) {String condition = conditionJson.getString(key);String original = originalMap.get(key);// 处理map中key为null或空的情况original = original == null ? "" : original;if (compares(original, condition)) {return false;}}} else {String originalStr = originalValue.toString();try {JSONObject originalJson = JSONObject.parseObject(originalStr);String conditionStr = conditionValue.toString();if (conditionStr.startsWith("{") && conditionStr.endsWith("}")) {JSONObject conditionJson = JSONObject.parseObject(conditionStr);Set<String> keys = conditionJson.keySet();for (String key : keys) {String condition = conditionJson.getString(key);String original = originalJson.getString(key);// 处理json中key为null或空的情况original = original == null ? "" : original;if (compares(original, condition)) {return false;}}} else {if (compares(originalStr, conditionStr)) {return false;}}} catch (Exception e) {if (compares(originalStr, conditionValue.toString())) {return false;}}}} catch (Exception e) {return false;}}return true;}private boolean compares(String original, String condition) {if (condition.startsWith(OPERATOR_EQ)) {String expected = condition.substring(1);if (expected.isEmpty()) {return !original.isEmpty();}return !original.equals(expected);} else if (condition.startsWith(OPERATOR_NE)) {String expected = condition.substring(2);if (expected.isEmpty()) {return original.isEmpty();}return original.equals(expected);} else if (condition.startsWith(OPERATOR_RANGE)) {String rangeStr = condition.substring(OPERATOR_RANGE.length() + 1, condition.length() - 1);String[] range = rangeStr.split(",");if (range.length == 2) {try {Comparable<?> originalVal = getComparable(original);Comparable<?> lower = getComparable(range[0]);Comparable<?> upper = getComparable(range[1]);if (originalVal.getClass().equals(lower.getClass()) && originalVal.getClass().equals(upper.getClass())) {@SuppressWarnings("unchecked")int lowerCompare = ((Comparable<Object>) originalVal).compareTo(lower);@SuppressWarnings("unchecked")int upperCompare = ((Comparable<Object>) originalVal).compareTo(upper);return lowerCompare < 0 || upperCompare > 0;}} catch (Exception e) {return true;}}return true;} else if (condition.startsWith(OPERATOR_NRANGE)) {String rangeStr = condition.substring(OPERATOR_NRANGE.length() + 1, condition.length() - 1);String[] range = rangeStr.split(",");if (range.length == 2) {try {Comparable<?> originalVal = getComparable(original);Comparable<?> lower = getComparable(range[0]);Comparable<?> upper = getComparable(range[1]);if (originalVal.getClass().equals(lower.getClass()) && originalVal.getClass().equals(upper.getClass())) {@SuppressWarnings("unchecked")int lowerCompare = ((Comparable<Object>) originalVal).compareTo(lower);@SuppressWarnings("unchecked")int upperCompare = ((Comparable<Object>) originalVal).compareTo(upper);return lowerCompare >= 0 && upperCompare <= 0;}} catch (Exception e) {return true;}}return true;} else if (condition.startsWith(OPERATOR_IN)) {String inStr = condition.substring(OPERATOR_IN.length() + 1, condition.length() - 1);List<String> values = Arrays.asList(inStr.split(","));return !values.contains(original);} else if (condition.startsWith(OPERATOR_NIN)) {String ninStr = condition.substring(OPERATOR_NIN.length() + 1, condition.length() - 1);List<String> values = Arrays.asList(ninStr.split(","));return values.contains(original);} else if (condition.startsWith(OPERATOR_REGEX)) {String patternStr = condition.substring(OPERATOR_REGEX.length() + 1, condition.length() - 1);Pattern pattern = Pattern.compile(patternStr);return !pattern.matcher(original).find();}return true;}private Comparable<?> getComparable(String value) {try {return Integer.parseInt(value);} catch (NumberFormatException e) {try {return Double.parseDouble(value);} catch (NumberFormatException ex) {return value;}}}
//public static void main(String[] args) {MultiExpUDF udf = new MultiExpUDF();// 测试 普通字符串boolean result1 = udf.evaluate( "1", null);System.out.println("非 map 或 json 字符串测试结果: " + result1);}
//        // 测试 含正则
//        boolean result2 = udf.evaluate("abc", "=abc", "aabcdef", "regex(cd.*)");
//        System.out.println("含正则输入测试结果: " + result2);
//
//        // 测试 map 类型
//        Map<String, String> map = new HashMap<>();
//        map.put("key1", "abc");
//        map.put("key2", "efg");
//        map.put("key3", "ab");
//        boolean result3 = udf.evaluate(map, "{\"key1\":\"=abc\",\"key2\":\"!=bcd\",\"key3\":\"in(ab,b)\"}");
//        System.out.println("map 类型测试结果: " + result3);
//
//        // 测试 json 格式字符串测试
//        String jsonStr = "{\"key1\":\"abc\",\"key2\":\"efg\",\"key3\":\"5\"}";
//        boolean result4 = udf.evaluate(jsonStr, "{\"key1\":\"=abc\",\"key2\":\"!=bcd\",\"key3\":\"range[5,10]\"}");
//        System.out.println("json 格式字符串测试结果: " + result4);
//
//        // 测试 map 中 key 为空的情况
//        Map<String, String> map2 = new HashMap<>();
//        map2.put("key1", "abc");
//        map2.put("key2", null);
//        boolean result6 = udf.evaluate(map2, "{\"key1\":\"=abc\",\"key2\":\"=\"}");
//        System.out.println("测试 map 中 key 为空的情况: " + result6);
//
//        // 测试 json 中 key 为空的情况
//        String jsonStr2 = "{\"key1\":\"abc\",\"key2\":null}";
//        boolean result7 = udf.evaluate(jsonStr2, "{\"key1\":\"=abc\",\"key3\":\"=\"}");
//        System.out.println("测试 json 中 key 为空的情况: " + result7);
//
//        // 测试 not in 功能
//        boolean result8 = udf.evaluate("abc1", "nin(abc,def,ghi)");
//        System.out.println("测试 not in 功能结果: " + result8);
//
//        // 测试 not between 功能
//        boolean result9 = udf.evaluate("3", "nrange[4,6]");
//        System.out.println("测试 not between 功能结果: " + result9);
//    }
}

本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:http://www.mfbz.cn/a/438.html

如若内容造成侵权/违法违规/事实不符,请联系我们进行投诉反馈qq邮箱809451989@qq.com,一经查实,立即删除!

相关文章

Flink读取Kafka写入Paimon

Flink SQL -- 1&#xff09;注册 Paimon 源 CREATE CATALOG paimon_hive WITH(type paimon,warehouse hdfs://xxxxx/paimon,metastore hive,hive-conf-dir /xxxxx/conf,uri thrift://域名1:9083,thrift://域名2:9083);-- 2&#xff09;声明 Kafka 源 create table kafkaS…

【开源工具】:基于PyQt5的智能网络驱动器映射工具开发全流程(附源码)

&#x1f517; 【开源工具】&#xff1a;基于PyQt5的智能网络驱动器映射工具开发全流程 &#x1f308; 个人主页&#xff1a;创客白泽 - CSDN博客 &#x1f525; 系列专栏&#xff1a;&#x1f40d;《Python开源项目实战》 &#x1f4a1; 热爱不止于代码&#xff0c;热情源自每…

MySQL 索引学习笔记

1.二叉树&#xff0c;红黑树&#xff0c;B 树&#xff0c;B树 二叉树&#xff1a;就是每个节点最多只能有两个子节点的树&#xff1b; 红黑树&#xff1a;就是自平衡二叉搜索树&#xff0c;红黑树通过一下五个规则构建&#xff1a; 1.节点只能是红色或黑色&#xff1b; 2.根…

嵌入式通信模块实战新范式:基于虚拟仿真平台的NB-IoT核心技能训练——零硬件损耗的全栈式实验方案,重构物联网通信教学逻辑

在万物智联时代&#xff0c;NB-IoT通信模块已成为低功耗广域网的基石。BC260Y作为行业主流模组&#xff0c;其AT指令控制与网络诊断能力是嵌入式开发者的必备技能。传统教学受限于硬件采购成本、设备管理难度及实验风险&#xff0c;难以开展规模化训练。嵌入式仿真实验教学平台…

docker compose的变量使用说明

澄清一下 x-shared-env 和 &shared-api-worker-env 的作用范围&#xff1a; 核心概念&#xff1a;Docker Compose 配置 vs 容器环境 x-shared-env: &shared-api-worker-env 是 Docker Compose 配置的一部分 这些定义仅在 Docker Compose 解析 YAML 文件时 有效它们定义…

美团完整面经

面试岗位 面试的岗位 - 2025春季校招 【转正实习】软件服务工程师-后端方向&#xff08;成都 - 软硬件服务-SaaS事业部&#xff09; 一面&#xff08;业务初试 - 30min&#xff09; 问题 自我介绍 Java基础 HashMap底层用的数据结构是什么&#xff1f;是线程安全的吗&…

JAVA毕业设计227—基于SpringBoot+hadoop+spark+Vue的大数据房屋维修系统(源代码+数据库)

毕设所有选题&#xff1a; https://blog.csdn.net/2303_76227485/article/details/131104075 基于SpringBoothadoopsparkVue的大数据房屋维修系统(源代码数据库)227 一、系统介绍 本项目前后端分离&#xff0c;分为业主、维修人员、管理员三种角色 1、业主&#xff1a; 登…

uniapp 页面栈一定深度后,回首页导航到新页面的解决方案

uniapp 页面栈一定深度后&#xff0c;回首页导航到新页面的解决方案 uniapp 页面导航解决方案 在 uniapp 中&#xff0c;要实现先弹出页面栈回到首页&#xff0c;然后再跳转到指定页面。 /*** description 后台选择链接专用跳转*/ interface Link {path: string;name?: stri…

java实现Google邮箱SMTP协议

一、开通Google的SMTP协议 在谷歌邮箱中开启IMAP访问 到google的设置中开启两步验证功能 在到 创建和管理应用专用密码 二、java中实现 引入maven <!--邮件--><dependency><groupId>com.sun.mail</groupId><artifactId>javax.mail</artif…

【2025最新】Adobe Illustrator下载保姆级安装教程(附官方下载链接)

文章目录 Adobe Illustrator 2024新功能介绍如何提高Adobe Illustrator的运行效率 Adobe Illustrator 这款神器相信不用我多介绍了吧&#xff0c;设计师们的得力助手&#xff01;最新的2025版据说功能和体验都提升了不少。这篇呢&#xff0c;算是我个人整理的一个超详细adobe i…

2025.06.11【Ribo-seq】|根据注释文件获取外显子及ORF序列

文章目录 一、准备材料二、提取外显子区间为BED文件1. 提取GTF中exon为BED 三、用bedtools提取外显子fasta四、后续拼接外显子为ORF序列五、流程总结 一、准备材料 基因组fasta&#xff08;如&#xff1a;genome.fa&#xff09;RiboCode生成的GTF文件&#xff08;如&#xff1…

python第48天打卡

知识点回顾&#xff1a; 随机张量的生成&#xff1a;torch.randn函数卷积和池化的计算公式&#xff08;可以不掌握&#xff0c;会自动计算的&#xff09;pytorch的广播机制&#xff1a;加法和乘法的广播机制 ps&#xff1a;numpy运算也有类似的广播机制&#xff0c;基本一致 作…