2010年9月30日木曜日

[Perl] Encode::encode_utf8 utf8::downgrade Encode::_utf8_off のUTF8フラグ操作

Encode::encode_utf8 utf8::downgrade Encode::_utf8_offの動作差異を確認したメモ。
これらを踏まえて、sv.cを見てみる。

(テスト1)
utf8::upgradeでUTF8フラグを立てて、
Encode::encode_utf8 utf8::downgrade Encode::_utf8_offで消す。
use strict;
use warnings;
use Encode;
use Devel::Peek;
use Perl6::Say;

my $str = 'あいうえお';
use utf8;
utf8::upgrade( $str );
my $encode_utf8 = Encode::encode_utf8($str);
my $utf8_downgrade = $str;
utf8::downgrade( $utf8_downgrade, 1 );
my $utf8_off = $str;
Encode::_utf8_off( $utf8_off );

say "str:$str";
Dump $str;
say "encode_utf8:$encode_utf8";
Dump $encode_utf8;
say "utf8_downgrade:$utf8_downgrade";
Dump $utf8_downgrade;
say "utf8_off:$utf8_off";
Dump $utf8_off;

(結果1)
文字化け:Encode::encode_utf8 Encode::_utf8_offで文字化け発生
UTF8フラグ:全て取れている
str:あいうえお
SV = PV(0x9f49b00) at 0x9f495dc
REFCNT = 1
FLAGS = (PADBUSY,PADMY,POK,pPOK,UTF8)
PV = 0x9f64800 "\303\243\302\201\302\202\303\243\302\201\302\204\303\243\302\201\302\206\303\243\302\201\302\210\303\243\302\201\302\212"\0 [UTF8 "\x{e3}\x{81}\x{82}\x{e3}\x{81}\x{84}\x{e3}\x{81}\x{86}\x{e3}\x{81}\x{88}\x{e3}\x{81}\x{8a}"]
CUR = 30
LEN = 31
encode_utf8:あいうえお
SV = PV(0x9f49b9c) at 0xa0477dc
REFCNT = 1
FLAGS = (PADBUSY,PADMY,POK,pPOK)
PV = 0x9f64750 "\303\243\302\201\302\202\303\243\302\201\302\204\303\243\302\201\302\206\303\243\302\201\302\210\303\243\302\201\302\212"\0
CUR = 30
LEN = 32
utf8_downgrade:あいうえお
SV = PV(0x9f49b6c) at 0x9f495d0
REFCNT = 1
FLAGS = (PADBUSY,PADMY,POK,pPOK)
PV = 0x9f64778 "\343\201\202\343\201\204\343\201\206\343\201\210\343\201\212"\0
CUR = 15
LEN = 32
utf8_off:あいうえお
SV = PV(0x9f49ae8) at 0x9f495a0
REFCNT = 1
FLAGS = (PADBUSY,PADMY,POK,pPOK)
PV = 0x9f647a0 "\303\243\302\201\302\202\303\243\302\201\302\204\303\243\302\201\302\206\303\243\302\201\302\210\303\243\302\201\302\212"\0
CUR = 30
LEN = 32

(テスト2)
Encode::decode_utf8でUTF8フラグを立てて、
Encode::encode_utf8 utf8::downgrade Encode::_utf8_offで消す。
use strict;
use warnings;
use Encode;
use Devel::Peek;
use Perl6::Say;

my $str = 'あいうえお';
use utf8;
$str = Encode::decode_utf8( $str );
my $encode_utf8 = Encode::encode_utf8($str);
my $utf8_downgrade = $str;
utf8::downgrade( $utf8_downgrade, 1 );
my $utf8_off = $str;
Encode::_utf8_off( $utf8_off );

say "str:$str";
Dump $str;
say "encode_utf8:$encode_utf8";
Dump $encode_utf8;
say "utf8_downgrade:$utf8_downgrade";
Dump $utf8_downgrade;
say "utf8_off:$utf8_off";
Dump $utf8_off;

(結果2)
文字化け:なし
UTF8フラグ:utf8::downgradeだけ取れない
str:あいうえお
SV = PV(0x9c12b00) at 0x9c125dc
REFCNT = 1
FLAGS = (PADBUSY,PADMY,POK,pPOK,UTF8)
PV = 0x9cfd518 "\343\201\202\343\201\204\343\201\206\343\201\210\343\201\212"\0 [UTF8 "\x{3042}\x{3044}\x{3046}\x{3048}\x{304a}"]
CUR = 15
LEN = 16
encode_utf8:あいうえお
SV = PV(0x9cb22b0) at 0x9d107dc
REFCNT = 1
FLAGS = (PADBUSY,PADMY,POK,pPOK)
PV = 0x9ce24f0 "\343\201\202\343\201\204\343\201\206\343\201\210\343\201\212"\0
CUR = 15
LEN = 16
utf8_downgrade:あいうえお
SV = PV(0x9c13214) at 0x9c125d0
REFCNT = 1
FLAGS = (PADBUSY,PADMY,POK,pPOK,UTF8)
PV = 0x9cca968 "\343\201\202\343\201\204\343\201\206\343\201\210\343\201\212"\0 [UTF8 "\x{3042}\x{3044}\x{3046}\x{3048}\x{304a}"]
CUR = 15
LEN = 16
utf8_off:あいうえお
SV = PV(0x9cb2274) at 0x9c125a0
REFCNT = 1
FLAGS = (PADBUSY,PADMY,POK,pPOK)
PV = 0x9c2d750 "\343\201\202\343\201\204\343\201\206\343\201\210\343\201\212"\0
CUR = 15
LEN = 16

(テスト3)
Encode::_utf8_onでUTF8フラグを立てて、
Encode::encode_utf8 utf8::downgrade Encode::_utf8_offで消す。
use strict;
use warnings;
use Encode;
use Devel::Peek;
use Perl6::Say;

my $str = 'あいうえお';
use utf8;
Encode::_utf8_on( $str );
my $encode_utf8 = Encode::encode_utf8($str);
my $utf8_downgrade = $str;
utf8::downgrade( $utf8_downgrade, 1 );
my $utf8_off = $str;
Encode::_utf8_off( $utf8_off );

say "str:$str";
Dump $str;
say "encode_utf8:$encode_utf8";
Dump $encode_utf8;
say "utf8_downgrade:$utf8_downgrade";
Dump $utf8_downgrade;
say "utf8_off:$utf8_off";
Dump $utf8_off;

(結果3)
文字化け:なし
UTF8フラグ:utf8::downgradeだけ取れない
str:あいうえお
SV = PV(0x8228b00) at 0x82285dc
REFCNT = 1
FLAGS = (PADBUSY,PADMY,POK,pPOK,UTF8)
PV = 0x823e088 "\343\201\202\343\201\204\343\201\206\343\201\210\343\201\212"\0 [UTF8 "\x{3042}\x{3044}\x{3046}\x{3048}\x{304a}"]
CUR = 15
LEN = 16
encode_utf8:あいうえお
SV = PV(0x8228b9c) at 0x83267dc
REFCNT = 1
FLAGS = (PADBUSY,PADMY,POK,pPOK)
PV = 0x8245868 "\343\201\202\343\201\204\343\201\206\343\201\210\343\201\212"\0
CUR = 15
LEN = 16
utf8_downgrade:あいうえお
SV = PV(0x8228b6c) at 0x82285d0
REFCNT = 1
FLAGS = (PADBUSY,PADMY,POK,pPOK,UTF8)
PV = 0x8260018 "\343\201\202\343\201\204\343\201\206\343\201\210\343\201\212"\0 [UTF8 "\x{3042}\x{3044}\x{3046}\x{3048}\x{304a}"]
CUR = 15
LEN = 16
utf8_off:あいうえお
SV = PV(0x8228ae8) at 0x82285a0
REFCNT = 1
FLAGS = (PADBUSY,PADMY,POK,pPOK)
PV = 0x82f84f0 "\343\201\202\343\201\204\343\201\206\343\201\210\343\201\212"\0
CUR = 15
LEN = 16

(テスト4)
use utf8でUTF8フラグを立てて、
Encode::encode_utf8 utf8::downgrade Encode::_utf8_offで消す。
use strict;
use warnings;
use Encode;
use Devel::Peek;
use Perl6::Say;

use utf8;
my $str = 'あいうえお';
my $encode_utf8 = Encode::encode_utf8($str);
my $utf8_downgrade = $str;
utf8::downgrade( $utf8_downgrade, 1 );
my $utf8_off = $str;
Encode::_utf8_off( $utf8_off );

say "str:$str";
Dump $str;
say "encode_utf8:$encode_utf8";
Dump $encode_utf8;
say "utf8_downgrade:$utf8_downgrade";
Dump $utf8_downgrade;
say "utf8_off:$utf8_off";
Dump $utf8_off;

(結果4)
文字化け:なし
UTF8フラグ:utf8::downgradeだけ取れない
str:あいうえお
SV = PV(0x9ddab00) at 0x9dda5e8
REFCNT = 1
FLAGS = (PADBUSY,PADMY,POK,pPOK,UTF8)
PV = 0x9df0088 "\343\201\202\343\201\204\343\201\206\343\201\210\343\201\212"\0 [UTF8 "\x{3042}\x{3044}\x{3046}\x{3048}\x{304a}"]
CUR = 15
LEN = 16
encode_utf8:あいうえお
SV = PV(0x9ddab9c) at 0x9dda5a0
REFCNT = 1
FLAGS = (PADBUSY,PADMY,POK,pPOK)
PV = 0x9e8fe50 "\343\201\202\343\201\204\343\201\206\343\201\210\343\201\212"\0
CUR = 15
LEN = 16
utf8_downgrade:あいうえお
SV = PV(0x9ddab6c) at 0x9dda5f4
REFCNT = 1
FLAGS = (PADBUSY,PADMY,POK,pPOK,UTF8)
PV = 0x9df7f90 "\343\201\202\343\201\204\343\201\206\343\201\210\343\201\212"\0 [UTF8 "\x{3042}\x{3044}\x{3046}\x{3048}\x{304a}"]
CUR = 15
LEN = 16
utf8_off:あいうえお
SV = PV(0x9ddaae8) at 0x9dda5d0
REFCNT = 1
FLAGS = (PADBUSY,PADMY,POK,pPOK)
PV = 0x9eaa4f0 "\343\201\202\343\201\204\343\201\206\343\201\210\343\201\212"\0
CUR = 15
LEN = 16

0 件のコメント: