Java判别编码是utf-8或gbk

本文提供了Java代码示例,用于判断文件的编码是UTF-8还是GBK。

实例代码如下:

import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.io.Text;

public class TravelLabel {
	//判别规则
	protected static String encodeReg = "^(?:[\\x00-\\x7f]|[\\xe0-\\xef][\\x80-\\xbf]{2})+$";
	// 以下三个方法用于判断url的编码格式是utf-8或gbk
	public static Boolean isUTF8(String string) {
		Pattern encode_pattern = Pattern.compile(encodeReg);
		String unescaped_string = unescape(string);
		Matcher encode_matcher = encode_pattern.matcher(unescaped_string);
		if (encode_matcher.matches()) {
			return true;
		} else {
			return false;
		}
	}

	public static boolean isGBK(String string) throws UnsupportedEncodingException {
		if (string.equals(new String(string.getBytes("GBK"))))
			return true;
		else
			return false;
	}

	public static String unescape(String src) {
		StringBuffer tmp = new StringBuffer();
		tmp.ensureCapacity(src.length());
		int lastPos = 0, pos = 0;
		char ch;
		while (lastPos < src.length()) {
			pos = src.indexOf("%", lastPos);
			if (pos == lastPos) {
				if (src.charAt(pos + 1) == 'u') {
					ch = (char) Integer.parseInt(src.substring(pos + 2, pos + 6), 16);
					tmp.append(ch);
					lastPos = pos + 6;
				} else {
					ch = (char) Integer.parseInt(src.substring(pos + 1, pos + 3), 16);
					tmp.append(ch);
					lastPos = pos + 3;
				}
			} else {
				if (pos == -1) {
					tmp.append(src.substring(lastPos));
					lastPos = src.length();
				} else {
					tmp.append(src.substring(lastPos, pos));
					lastPos = pos;
				}
			}
		}
		return tmp.toString();
	}
	
	try {
			String arr[] = sef.get(host).split(",");
			for (int i = 0; i < arr.length; i++) {
				Pattern pa = Pattern.compile(arr[i]);
				Matcher m = pa.matcher(url);
				if (m.find()) {
					list.add(m.group(1));
				}
			}
			for (int i = 0; i < list.size(); i++) {
				String str = list.get(i);
				if (i < list.size() - 1) {
					if (isUTF8(str)) {
						travelRes = travelRes+URLDecoder.decode(str, "UTF-8").toString() + ",";
					} else if (isGBK(str)) {
						travelRes = travelRes+URLDecoder.decode(str, "GBK").toString() + ",";
					}
				} else {
					if (isUTF8(str)) {
						travelRes = travelRes+URLDecoder.decode(str, "UTF-8").toString() ;
					} else if (isGBK(str)) {
						travelRes = travelRes+URLDecoder.decode(str, "GBK").toString() ;
					}
				}
			}
		} catch (UnsupportedEncodingException e) {

		} catch (IllegalArgumentException e2) {
		} catch (StringIndexOutOfBoundsException e3) {

		} catch (NullPointerException e4) {

		}
		return travelRes;
	}
}

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值