#!/usr/local/bin/jperl -- -*-Perl-*- $ad{"01"} = "1面"; $ad{"02"} = "2面"; $ad{"03"} = "3面"; $ad{"04"} = "解説"; $ad{"05"} = "社説"; $ad{"07"} = "国際"; $ad{"08"} = "経済"; $ad{"10"} = "特集"; $ad{"12"} = "総合"; $ad{"13"} = "家庭"; $ad{"14"} = "文化"; $ad{"15"} = "読書"; $ad{"16"} = "科学"; $ad{"18"} = "芸能"; $ad{"35"} = "スポーツ"; $ad{"41"} = "社会"; sub zen2han($) { $_[0] =~ tr/ !”#$%&’()*+,−./0-9:;<=>?@A-Z[¥]^― a-z{|} ̄ / !-~/; $_[0]; } sub transfer($$) { my ( $key, $context ) = @_; my $data; if ( $key eq 'ID' || $key eq 'C0' || $key eq 'AF' ) { $data = zen2han( $context ); } elsif ( $key eq 'AE' ) { $data = ( $context eq 'Y' ) ? '有' : '無' ; } elsif ( $key eq 'S1' ) { my $size; ( $size ) = /.*(全(.*)文字)/; $data = zen2han( $size ); } elsif ( $key eq 'AD' ) { $data = $ad{zen2han($context)} } else { $data = $context; } $data; } sub output { my $key; print "\n"; foreach $key ( 'ID', 'C0', 'AD', 'AE', 'AF', 'T1', 'S1' ) { print "<", $key, ">", $keyword{$key}->[0], "\n"; } foreach $key ( 'S2', 'T2' ) { print "<",$key,">\n", join("\n",@{$keyword{$key}}), "\n\n"; } foreach $key ( 'KA','AA','KB','AB' ) { print "<",$key,">", join( " ",@{$keyword{$key}} ), "\n"; } print "\n"; } $first = 1; while (<>) { chomp; ( $tag, $context ) = /\(.*)\(.*)/; $key = zen2han( $tag ); $data = transfer( $key, $context ); if ( $key eq "ID" ) { if ( $first == 1 ) { $first = 0; } elsif ( $first == 0 ) { output; undef %keyword; $first = -1; } else { print "\n"; output; undef %keyword; } } $keyword{$key} = [] unless $keyword{$key}; push @{$keyword{$key}}, $data; } output;