Filename | /usr/local/lib/perl5/site_perl/Mail/SpamAssassin/Locales.pm |
Statements | Executed 12 statements in 1.15ms |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
1 | 1 | 1 | 43µs | 53µs | BEGIN@20 | Mail::SpamAssassin::Locales::
1 | 1 | 1 | 33µs | 64µs | BEGIN@21 | Mail::SpamAssassin::Locales::
1 | 1 | 1 | 28µs | 87µs | BEGIN@23 | Mail::SpamAssassin::Locales::
1 | 1 | 1 | 23µs | 110µs | BEGIN@25 | Mail::SpamAssassin::Locales::
1 | 1 | 1 | 19µs | 32µs | BEGIN@22 | Mail::SpamAssassin::Locales::
0 | 0 | 0 | 0s | 0s | is_charset_ok_for_locales | Mail::SpamAssassin::Locales::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | # <@LICENSE> | ||||
2 | # Licensed to the Apache Software Foundation (ASF) under one or more | ||||
3 | # contributor license agreements. See the NOTICE file distributed with | ||||
4 | # this work for additional information regarding copyright ownership. | ||||
5 | # The ASF licenses this file to you under the Apache License, Version 2.0 | ||||
6 | # (the "License"); you may not use this file except in compliance with | ||||
7 | # the License. You may obtain a copy of the License at: | ||||
8 | # | ||||
9 | # http://www.apache.org/licenses/LICENSE-2.0 | ||||
10 | # | ||||
11 | # Unless required by applicable law or agreed to in writing, software | ||||
12 | # distributed under the License is distributed on an "AS IS" BASIS, | ||||
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
14 | # See the License for the specific language governing permissions and | ||||
15 | # limitations under the License. | ||||
16 | # </@LICENSE> | ||||
17 | |||||
18 | package Mail::SpamAssassin::Locales; | ||||
19 | |||||
20 | 2 | 78µs | 2 | 64µs | # spent 53µs (43+11) within Mail::SpamAssassin::Locales::BEGIN@20 which was called:
# once (43µs+11µs) by Mail::SpamAssassin::Plugin::HTMLEval::BEGIN@26 at line 20 # spent 53µs making 1 call to Mail::SpamAssassin::Locales::BEGIN@20
# spent 11µs making 1 call to strict::import |
21 | 2 | 61µs | 2 | 95µs | # spent 64µs (33+31) within Mail::SpamAssassin::Locales::BEGIN@21 which was called:
# once (33µs+31µs) by Mail::SpamAssassin::Plugin::HTMLEval::BEGIN@26 at line 21 # spent 64µs making 1 call to Mail::SpamAssassin::Locales::BEGIN@21
# spent 31µs making 1 call to warnings::import |
22 | 2 | 82µs | 2 | 45µs | # spent 32µs (19+13) within Mail::SpamAssassin::Locales::BEGIN@22 which was called:
# once (19µs+13µs) by Mail::SpamAssassin::Plugin::HTMLEval::BEGIN@26 at line 22 # spent 32µs making 1 call to Mail::SpamAssassin::Locales::BEGIN@22
# spent 13µs making 1 call to bytes::import |
23 | 2 | 75µs | 2 | 146µs | # spent 87µs (28+59) within Mail::SpamAssassin::Locales::BEGIN@23 which was called:
# once (28µs+59µs) by Mail::SpamAssassin::Plugin::HTMLEval::BEGIN@26 at line 23 # spent 87µs making 1 call to Mail::SpamAssassin::Locales::BEGIN@23
# spent 59µs making 1 call to re::import |
24 | |||||
25 | 1 | 2µs | # spent 110µs (23+87) within Mail::SpamAssassin::Locales::BEGIN@25 which was called:
# once (23µs+87µs) by Mail::SpamAssassin::Plugin::HTMLEval::BEGIN@26 at line 27 | ||
26 | %charsets_for_locale | ||||
27 | 1 | 817µs | 2 | 197µs | }; # spent 110µs making 1 call to Mail::SpamAssassin::Locales::BEGIN@25
# spent 87µs making 1 call to vars::import |
28 | |||||
29 | ########################################################################### | ||||
30 | |||||
31 | # A mapping of known country codes to frequent charsets used therein. | ||||
32 | # note that the ISO and CP charsets will already have been permitted, | ||||
33 | # so only "unusual" charsets should be listed here. | ||||
34 | # | ||||
35 | # Country codes should be lowercase, charsets uppercase. | ||||
36 | # | ||||
37 | # A good listing is in /usr/share/config/charsets from KDE 2.2.1 | ||||
38 | # | ||||
39 | 1 | 18µs | %charsets_for_locale = ( | ||
40 | |||||
41 | # Japanese: Peter Evans writes: iso-2022-jp = rfc approved, rfc 1468, created | ||||
42 | # by Jun Murai in 1993 back when he didnt have white hair! rfc approved. | ||||
43 | # (rfc 2237) <-- by M$. | ||||
44 | 'ja' => 'EUCJP JISX020119760 JISX020819830 JISX020819900 JISX020819970 '. | ||||
45 | 'JISX021219900 JISX021320001 JISX021320002 SHIFT_JIS SHIFTJIS '. | ||||
46 | 'ISO2022JP SJIS JIS7 JISX0201 JISX0208 JISX0212', | ||||
47 | |||||
48 | # Korea | ||||
49 | 'ko' => 'EUCKR KSC56011987', | ||||
50 | |||||
51 | # Cyrillic: Andrew Vasilyev notes CP866 is common (bug 2278) | ||||
52 | 'ru' => 'KOI8R KOI8U KOI8T ISOIR111 CP1251 GEORGIANPS CP1251 PT154 CP866', | ||||
53 | 'ka' => 'KOI8R KOI8U KOI8T ISOIR111 CP1251 GEORGIANPS CP1251 PT154 CP866', | ||||
54 | 'tg' => 'KOI8R KOI8U KOI8T ISOIR111 CP1251 GEORGIANPS CP1251 PT154 CP866', | ||||
55 | 'be' => 'KOI8R KOI8U KOI8T ISOIR111 CP1251 GEORGIANPS CP1251 PT154 CP866', | ||||
56 | 'uk' => 'KOI8R KOI8U KOI8T ISOIR111 CP1251 GEORGIANPS CP1251 PT154 CP866', | ||||
57 | 'bg' => 'KOI8R KOI8U KOI8T ISOIR111 CP1251 GEORGIANPS CP1251 PT154 CP866', | ||||
58 | |||||
59 | # Thai | ||||
60 | 'th' => 'TIS620', | ||||
61 | |||||
62 | # Chinese (simplified and traditional). Peter Evans writes: new government | ||||
63 | # mandated chinese encoding = gb18030, chinese mail is supposed to be | ||||
64 | # iso-2022-cn (rfc 1922?) | ||||
65 | 'zh' => 'GB1988 GB2312 GB231219800 GB18030 GBK BIG5HKSCS BIG5 EUCTW ISO2022CN', | ||||
66 | |||||
67 | # Chinese Traditional charsets only | ||||
68 | 'zh.big5' => 'BIG5HKSCS BIG5 EUCTW', | ||||
69 | |||||
70 | # Chinese Simplified charsets only | ||||
71 | 'zh.gb2312' => 'GB1988 GB2312 GB231219800 GB18030 GBK ISO2022CN', | ||||
72 | ); | ||||
73 | |||||
74 | ########################################################################### | ||||
75 | |||||
76 | sub is_charset_ok_for_locales { | ||||
77 | my ($cs, @locales) = @_; | ||||
78 | |||||
79 | $cs = uc $cs; $cs =~ s/[^A-Z0-9]//g; | ||||
80 | $cs =~ s/^3D//gs; # broken by quoted-printable | ||||
81 | $cs =~ s/:.*$//gs; # trim off multiple charsets, just use 1st | ||||
82 | |||||
83 | study $cs; # study is a no-op since perl 5.16.0, eliminating related bugs | ||||
84 | #warn "JMD $cs"; | ||||
85 | |||||
86 | # always OK (the net speaks mostly roman charsets) | ||||
87 | return 1 if ($cs eq 'USASCII'); | ||||
88 | return 1 if ($cs =~ /^ISO8859/); | ||||
89 | return 1 if ($cs =~ /^ISO10646/); | ||||
90 | return 1 if ($cs =~ /^UTF/); | ||||
91 | return 1 if ($cs =~ /^UCS/); | ||||
92 | return 1 if ($cs =~ /^CP125/); | ||||
93 | return 1 if ($cs =~ /^WINDOWS/); # argh, Windows | ||||
94 | return 1 if ($cs eq 'IBM852'); | ||||
95 | return 1 if ($cs =~ /^UNICODE11UTF[78]/); # wtf? never heard of it | ||||
96 | return 1 if ($cs eq 'XUNKNOWN'); # added by sendmail when converting to 8bit | ||||
97 | return 1 if ($cs eq 'ISO'); # Magellan, sending as 'charset=iso 8859-15'. grr | ||||
98 | |||||
99 | foreach my $locale (@locales) { | ||||
100 | if (!defined($locale) || $locale eq 'C') { $locale = 'en'; } | ||||
101 | $locale =~ s/^([a-z][a-z]).*$/$1/; # zh_TW... => zh | ||||
102 | |||||
103 | my $ok_for_loc = $charsets_for_locale{$locale}; | ||||
104 | next if (!defined $ok_for_loc); | ||||
105 | |||||
106 | if ($ok_for_loc =~ /(?:^| )\Q${cs}\E(?:$| )/) { | ||||
107 | return 1; | ||||
108 | } | ||||
109 | } | ||||
110 | |||||
111 | return 0; | ||||
112 | } | ||||
113 | |||||
114 | 1 | 18µs | 1; |