将中文字转换成URL编码的汉字

  1. 1、下载文档前请自行甄别文档内容的完整性,平台不提供额外的编辑、内容补充、找答案等附加服务。
  2. 2、"仅部分预览"的文档,不可在线预览部分如存在完整性等问题,可反馈申请退款(可完整预览的文档不适用该条件!)。
  3. 3、如文档侵犯您的权益,请联系客服反馈,我们会尽快为您处理(人工客服工作时间:9:00-18:30)。

public class URLtoUTF8 {

//转换为%E4%BD%A0形式

public static String toUtf8String(String s) {

StringBuffer sb = new StringBuffer();

for (int i = 0; i < s.length(); i++) {

char c = s.charAt(i);

if (c >= 0 && c <= 255) {

sb.append(c);

} else {

byte[] b;

try {

b = String.valueOf(c).getBytes("utf-8");

} catch (Exception ex) {

System.out.println(ex);

b = new byte[0];

}

for (int j = 0; j < b.length; j++) {

int k = b[j];

if (k < 0)

k += 256;

sb.append("%" + Integer.toHexString(k).toUpperCase());

}

}

}

return sb.toString();

}

//将%E4%BD%A0转换为汉字

public static String unescape(String s) {

StringBuffer sbuf = new StringBuffer();

int l = s.length();

int ch = -1;

int b, sumb = 0;

for (int i = 0, more = -1; i < l; i++) {

/* Get next byte b from URL segment s */

switch (ch = s.charAt(i)) {

case '%':

ch = s.charAt(++i);

int hb = (Character.isDigit((char) ch) ? ch - '0'

: 10 + Character.toLowerCase((char) ch) - 'a') & 0xF; ch = s.charAt(++i);

int lb = (Character.isDigit((char) ch) ? ch - '0'

: 10 + Character.toLowerCase((char) ch) - 'a') & 0xF;

b = (hb << 4) | lb;

break;

case '+':

b = ' ';

break;

default:

b = ch;

}

/* Decode byte b as UTF-8, sumb collects incomplete chars */

if ((b & 0xc0) == 0x80) { // 10xxxxxx (continuation byte)

sumb = (sumb << 6) | (b & 0x3f); // Add 6 bits to sumb

if (--more == 0)

sbuf.append((char) sumb); // Add char to sbuf

} else if ((b & 0x80) == 0x00) { // 0xxxxxxx (yields 7 bits) sbuf.append((char) b); // Store in sbuf

} else if ((b & 0xe0) == 0xc0) { // 110xxxxx (yields 5 bits) sumb = b & 0x1f;

more = 1; // Expect 1 more byte

} else if ((b & 0xf0) == 0xe0) { // 1110xxxx (yields 4 bits) sumb = b & 0x0f;

more = 2; // Expect 2 more bytes

} else if ((b & 0xf8) == 0xf0) { // 11110xxx (yields 3 bits) sumb = b & 0x07;

more = 3; // Expect 3 more bytes

} else if ((b & 0xfc) == 0xf8) { // 111110xx (yields 2 bits) sumb = b & 0x03;

more = 4; // Expect 4 more bytes

} else /*if ((b & 0xfe) == 0xfc)*/{ // 1111110x (yields 1 bit) sumb = b & 0x01;

more = 5; // Expect 5 more bytes

}

/* We don't test if the UTF-8 encoding is well-formed */

}

return sbuf.toString();

}

public static void main(String[] args){

System.out.println(URLtoUTF8.toUtf8String("你"));

System.out.println(URLtoUTF8.unescape("%E4%BD%A0%20%E5%A5%BD"));

}

}

相关文档
最新文档