1200字范文,内容丰富有趣,写作的好帮手!
1200字范文 > 判断文件编码格式是否是UTF-8还是GBK格式

判断文件编码格式是否是UTF-8还是GBK格式

时间:2019-09-19 13:37:43

相关推荐

判断文件编码格式是否是UTF-8还是GBK格式

编码格式判断封装

import java.io.BufferedInputStream;import java.io.ByteArrayOutputStream;import java.io.File;import java.io.FileInputStream;import java.io.FileNotFoundException;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStream;public class Demo {/*** 如果文件是gbk编码或者gb2312返回true,反之false* @param file* @return*/public static Boolean isGbk(File file) {boolean isGbk = true;byte[] buffer = readByteArrayData(file);int end = buffer.length;for (int i = 0; i < end; i++) {byte temp = buffer[i];if ((temp & 0x80) == 0) {continue;// B0A1-F7FE//A1A1-A9FE} else if ((Byte.toUnsignedInt(temp) < 0xAA && Byte.toUnsignedInt(temp) > 0xA0)|| (Byte.toUnsignedInt(temp) < 0xF8 && Byte.toUnsignedInt(temp) > 0xAF)) {if (i + 1 < end) {if (Byte.toUnsignedInt(buffer[i + 1]) < 0xFF && Byte.toUnsignedInt(buffer[i + 1]) > 0xA0&& Byte.toUnsignedInt(buffer[i + 1]) != 0x7F) {i = i + 1;continue;}} // 8140-A0FE} else if (Byte.toUnsignedInt(temp) < 0xA1 && Byte.toUnsignedInt(temp) > 0x80) {if (i + 1 < end) {if (Byte.toUnsignedInt(buffer[i + 1]) < 0xFF && Byte.toUnsignedInt(buffer[i + 1]) > 0x3F&& Byte.toUnsignedInt(buffer[i + 1]) != 0x7F) {i = i + 1;continue;}} // AA40-FEA0//A840-A9A0} else if ((Byte.toUnsignedInt(temp) < 0xFF && Byte.toUnsignedInt(temp) > 0xA9)|| (Byte.toUnsignedInt(temp) < 0xAA && Byte.toUnsignedInt(temp) > 0xA7)) {if (i + 1 < end) {if (Byte.toUnsignedInt(buffer[i + 1]) < 0xA1 && Byte.toUnsignedInt(buffer[i + 1]) > 0x3F&& Byte.toUnsignedInt(buffer[i + 1]) != 0x7F) {i = i + 1;continue;}}}isGbk = false;break;}return isGbk;}/*** 判断 UTF-8* @param file* @return*/public static byte[] readByteArrayData(File file) {byte[] rebyte = null;BufferedInputStream bis;ByteArrayOutputStream output;try {bis = new BufferedInputStream(new FileInputStream(file));output = new ByteArrayOutputStream();byte[] byt = new byte[1024 * 4];int len;try {while ((len = bis.read(byt)) != -1) {if (len < 1024 * 4) {output.write(byt, 0, len);} elseoutput.write(byt);}} catch (IOException e) {e.printStackTrace();}rebyte = output.toByteArray();if (bis != null) {bis.close();}if (output != null) {output.close();}} catch (FileNotFoundException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}return rebyte;}public static Boolean isUtf8(File file) {boolean isUtf8 = true;byte[] buffer = readByteArrayData(file);int end = buffer.length;for (int i = 0; i < end; i++) {byte temp = buffer[i];if ((temp & 0x80) == 0) {// 0xxxxxxxcontinue;} else if ((temp & 0xC0) == 0xC0 && (temp & 0x20) == 0) {// 110xxxxx 10xxxxxxif (i + 1 < end && (buffer[i + 1] & 0x80) == 0x80 && (buffer[i + 1] & 0x40) == 0) {i = i + 1;continue;}} else if ((temp & 0xE0) == 0xE0 && (temp & 0x10) == 0) {// 1110xxxx 10xxxxxx 10xxxxxxif (i + 2 < end && (buffer[i + 1] & 0x80) == 0x80 && (buffer[i + 1] & 0x40) == 0&& (buffer[i + 2] & 0x80) == 0x80 && (buffer[i + 2] & 0x40) == 0) {i = i + 2;continue;}} else if ((temp & 0xF0) == 0xF0 && (temp & 0x08) == 0) {// 11110xxx 10xxxxxx 10xxxxxx 10xxxxxxif (i + 3 < end && (buffer[i + 1] & 0x80) == 0x80 && (buffer[i + 1] & 0x40) == 0&& (buffer[i + 2] & 0x80) == 0x80 && (buffer[i + 2] & 0x40) == 0&& (buffer[i + 3] & 0x80) == 0x80 && (buffer[i + 3] & 0x40) == 0) {i = i + 3;continue;}}isUtf8 = false;break;}return isUtf8;}}

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。