#!/usr/local/bin/jperl # -------------------------------------------------------- # NEタグ付き京大コーパスをsgmlフォーマットに変換する # -------------------------------------------------------- $k = 0; $docid = 0; while(<>){ chop; $line = $_; if ($line =~ /^# A-ID:(\d+)/ ){ $newdocid = $1; if($docid != 0 && $k>0){ print "\n\n$docid\n\n"; for($i=0;$i<$k;$i++){ print "$lines[$i]\n"; } print "\n\n\n"; } $docid = $newdocid; $k=0; } elsif($line =~ /^#.*/){ } else { $lines[$k++] = $line; } }