Oracle 中文记录 及 乱码 判断 说明 .(转)
一. 判断记录中是否有中文
这个可以使用Length 和 Lengthb 函数。 这个在blog里有说明:
Oracle Length 和 Lengthb 函数说明
这种方法使用与字符集为GBK的情况,其他情况不使用。在GBK 字符集下,中文的length 是1个字符,而lengthb 是2个byte。
示例:
select * from t where length(c1) != lengthb(c1);
二. 判断记录中是否有乱码
这里需要使用asciistr 函数,ASCIISTR 的官网说明:
ASCIISTR takesas its argument a string, or an expression that resolves to a string, in anycharacter set and returns an ASCII version of the string in the databasecharacter set. Non-ASCII characters are converted to the form. \xxxx, where xxxx representsa UTF-16 code unit.
ASCIISTR函数会把非ASCII 的字符转换成\xxxx 的格式,xxxx 是UTF-16的code unit。
示例:
SQL> select asciistr('/\/Davecome from 安?庆?') as str from dual;
/\005C/Dave come from \5B89?\5E86?
这里的’安’ 被转换成了\5B89,‘庆’被转换成了\5E86。
这里要注意一个特殊字符“\”,当它出现的时候转换后的码为“\005C”。
当然,我们也可以使用UNISTR函数,把asciistr 的结果反转回来,如:
SQL>select UNISTR('\5E86') from dual;
庆
那么当我们的记录中存在中文乱码时,
select UNISTR('\FFFD') from dual;
?
注意一点:
执行以上SQL 不要在sqlplus 里执行,sqlplus 受本地环境影响。到第三方的工具(PL/SQL DEV 或者Toad)里测试。
那么当我们的中文记录变成乱码后,那么转成asciistr的值就会包含2种特殊符号: ? 和 \FFFD 对应的问号。我们只需要匹配这2种符号,就可以判断记录里是否有乱码了。
HelloJin 同学提供的判断乱码的存储过程:
view plaincopy to clipboardprint?
01.
02.CREATE OR REPLACE PROCEDURE MessyCode (pv_tbname IN VARCHAR2,
03. pv_owner IN VARCHAR2)
04.IS
05. num NUMBER;
06. p_flag NUMBER;
07. p_flag2 NUMBER;
08. p_flagw1 NUMBER;
09. p_flagw2 NUMBER;
10. p_col VARCHAR2 (30);
11. p_str VARCHAR2 (1000);
12. p_tbname VARCHAR2 (30);
13. p_owner VARCHAR2 (30);
14.BEGIN
15. p_tbname := pv_tbname;
16. p_owner := pv_owner;
17. p_flagw1 := 0;
18. p_flagw2 := 0;
19.
20. -- p_tbname:='LANK_F1';
21. -- p_owner:='SYS';
22. --dbms_output.put_line(p_tbname||p_owner);
23. SELECT MAX (COLUMN_ID)
24. INTO p_flag
25. FROM dba_tab_columns
26. WHERE table_name = p_tbname AND wner = p_owner;
27.
28. -- dbms_output.put_line('Total:'||p_flag);
29. EXECUTE IMMEDIATE 'alter session enable parallel dml';
30.
31. WHILE p_flag > 0
32. LOOP
33. SELECT column_name
34. INTO p_col
35. FROM dba_tab_columns
36. WHERE table_name = p_tbname AND COLUMN_ID = p_flag AND wner = p_owner;
37.
38.--用来判断是否有中文,并且表的记录数小于<10000
39. EXECUTE IMMEDIATE
40. 'select sum(length('
41. || p_col
42. || ')) , sum(lengthb('
43. || p_col
44. || ')) from '
45. || p_owner
46. || '.'
47. || p_tbname
48. || ' where rownum<10000'
49. INTO p_flagw1, p_flagw2;
50.
51.--如果有中文,在判断乱码,这里启用了8个并行查询
52. -- dbms_output.put_line(p_str);
53. IF p_flagw1 <> p_flagw2
54. THEN
55. EXECUTE IMMEDIATE
56. 'select count('
59. || p_col
60. || ') from '
61. || p_owner
62. || '.'
63. || p_tbname
64. || ' where asciistr('
65. || p_col
66. || ') like ''%??%'' or asciistr('
67. || p_col
68. || ') like ''%\FFFD%'' '
69. INTO p_flag2;
70.
71. IF p_flag2 > 0
72. THEN
73. DBMS_OUTPUT.put_line (
74. p_owner
75. || '.'
76. || p_tbname
77. || ' Have:'
78. || p_flag
79. || ','
80. || p_col);
81.
82. INSERT INTO resout
83. VALUES (p_owner,
84. p_tbname,
85. p_flag,
86. p_col);
87.
88. COMMIT;
89. END IF;
90. END IF;
91.
92. --init
93. p_flag2 := 0;
94. p_flag := p_flag - 1;
95. END LOOP;
96.
97. INSERT INTO table_temp
98. VALUES (p_owner, p_tbname);
99.
100. COMMIT;
101.END MessyCode;
CREATE OR REPLACE PROCEDURE MessyCode (pv_tbname IN VARCHAR2,
pv_owner IN VARCHAR2)
IS
num NUMBER;
p_flag NUMBER;
p_flag2 NUMBER;
p_flagw1 NUMBER;
p_flagw2 NUMBER;
p_col VARCHAR2 (30);
p_str VARCHAR2 (1000);
p_tbname VARCHAR2 (30);
p_owner VARCHAR2 (30);
BEGIN
p_tbname := pv_tbname;
p_owner := pv_owner;
p_flagw1 := 0;
p_flagw2 := 0;
-- p_tbname:='LANK_F1';
-- p_owner:='SYS';
--dbms_output.put_line(p_tbname||p_owner);
SELECT MAX (COLUMN_ID)
INTO p_flag
FROM dba_tab_columns
WHERE table_name = p_tbname AND wner = p_owner;
-- dbms_output.put_line('Total:'||p_flag);
EXECUTE IMMEDIATE 'alter session enable parallel dml';
WHILE p_flag > 0
LOOP
SELECT column_name
INTO p_col
FROM dba_tab_columns
WHERE table_name = p_tbname AND COLUMN_ID = p_flag AND wner = p_owner;
--用来判断是否有中文,并且表的记录数小于<10000
EXECUTE IMMEDIATE
'select sum(length('
|| p_col
|| ')) , sum(lengthb('
|| p_col
|| ')) from '
|| p_owner
|| '.'
|| p_tbname
|| ' where rownum<10000'
INTO p_flagw1, p_flagw2;
--如果有中文,在判断乱码,这里启用了8个并行查询
-- dbms_output.put_line(p_str);
IF p_flagw1 <> p_flagw2
THEN
EXECUTE IMMEDIATE
'select count('
|| p_col
|| ') from '
|| p_owner
|| '.'
|| p_tbname
|| ' where asciistr('
|| p_col
|| ') like ''%??%'' or asciistr('
|| p_col
|| ') like ''%\FFFD%'' '
INTO p_flag2;
IF p_flag2 > 0
THEN
DBMS_OUTPUT.put_line (
p_owner
|| '.'
|| p_tbname
|| ' Have:'
|| p_flag
|| ','
|| p_col);
INSERT INTO resout
VALUES (p_owner,
p_tbname,
p_flag,
p_col);
COMMIT;
END IF;
END IF;
--init
p_flag2 := 0;
p_flag := p_flag - 1;
END LOOP;
INSERT INTO table_temp
VALUES (p_owner, p_tbname);
COMMIT;
END MessyCode;
该存储过程由HelloJin同学编写,感谢他的辛勤劳动。
这里的核心语句是:
view plaincopy to clipboardprint?
01.|| ' where asciistr('
02.|| p_col
03.|| ') like ''%??%'' or asciistr('
04.|| p_col
05.|| ') like ''%\FFFD%'' '
06.INTO p_flag2;
|| ' where asciistr('
|| p_col
|| ') like ''%??%'' or asciistr('
|| p_col
|| ') like ''%\FFFD%'' '
INTO p_flag2;
Oracle Parallel Execution(并行执行)
如果是因为复制导致的乱码,可以和源库进行匹配,然后使用replace函数替换掉乱码。
下面的SQL 可以处理库之间复制导致的乱码:
view plaincopy to clipboardprint?
01.
02.DECLARE
03. exec_block VARCHAR2 (32767);
04. module_block VARCHAR2 (32767)
05. := '
06. declare cur_ucs sys_refcursor;
07. type type_tab_ucs1 is table of $$1.$$2.$$3%type;
08. v_tab_ucs1 type_tab_ucs1;
09. type type_tab_pk1 is table of $$1.$$2.$$4%type;
10. v_tab_pk1 type_tab_pk1;
11.
12.begin
13. open cur_ucs for select $$3,$$4 from ;
14. loop
15. fetch cur_ucs bulk collect into v_tab_ucs1, v_tab_pk1 limit 10000;
16. exit when v_tab_ucs1.count = 0;
17.
18. forall i in 1 .. v_tab_ucs1.count
19. update $$1.$$2
20. set $$1.$$2.$$3 = v_tab_ucs1(i)
21. where $$1.$$2.$$4 = v_tab_pk1(i);
22. commit;
23.end loop;
24.close cur_ucs;
25.end; ';
26.BEGIN
27. FOR i IN (SELECT table_name, col_name FROM tab_col)
28. LOOP
29. exec_block := REPLACE (module_block, '$$1', i.owner);
30. exec_block := REPLACE (module_block, '$$2', i.table_name);
31.
32. EXECUTE IMMEDIATE exec_block;
33.
34. INSERT INTO recordupdate
35. VALUES (i.owner, i.table_name, i.col_name);
36. END LOOP;
37.END;
DECLARE
exec_block VARCHAR2 (32767);
module_block VARCHAR2 (32767)
:= '
declare cur_ucs sys_refcursor;
type type_tab_ucs1 is table of $$1.$$2.$$3%type;
v_tab_ucs1 type_tab_ucs1;
type type_tab_pk1 is table of $$1.$$2.$$4%type;
v_tab_pk1 type_tab_pk1;
begin
open cur_ucs for select $$3,$$4 from ;
loop
fetch cur_ucs bulk collect into v_tab_ucs1, v_tab_pk1 limit 10000;
exit when v_tab_ucs1.count = 0;
forall i in 1 .. v_tab_ucs1.count
update $$1.$$2
set $$1.$$2.$$3 = v_tab_ucs1(i)
where $$1.$$2.$$4 = v_tab_pk1(i);
commit;
end loop;
close cur_ucs;
end; ';
BEGIN
FOR i IN (SELECT table_name, col_name FROM tab_col)
LOOP
exec_block := REPLACE (module_block, '$$1', i.owner);
exec_block := REPLACE (module_block, '$$2', i.table_name);
EXECUTE IMMEDIATE exec_block;
INSERT INTO recordupdate
VALUES (i.owner, i.table_name, i.col_name);
END LOOP;
END;
该代码同样由HelloJin 同学提供。
其他文档:
Common Character Sets Found in InternetEmails [ID 477967.1]