Diff for /charlint/charlint.pl between versions 1.21 and 1.22

version 1.21, 2001/10/03 06:40:15 version 1.22, 2002/04/03 08:23:42
Line 1 Line 1
 #!/usr/bin/perl  #!/usr/bin/perl
   
 # Copyright notice:  # Copyright notice:
 #    (c) Copyright Keio University 1999, 2000  #    (c) Copyright Keio University 1999-2002
 #    This software is made available under the terms of the  #    This software is made available under the terms of the
 #    W3C Software Licence available at  #    W3C Software Licence available at
 #    http://www.w3.org/Consortium/Legal/copyright-software.  #    http://www.w3.org/Consortium/Legal/copyright-software.
Line 16 Line 16
 #                   Kai Henningsen for finally getting me to clean up  #                   Kai Henningsen for finally getting me to clean up
 #                                      for 'use strict' and 'perl -w'  #                                      for 'use strict' and 'perl -w'
   
 # Authors:  # Author:
 # MJD Martin J. Du"rst, duerst@w3.org  # MJD Martin J. Du"rst, duerst@w3.org
   
 my $version = 'Version 0.49';  my $version = 'Version 0.50';
   
 # History:  # History:
   # 2002/04/03: 0.50, updated for 3.2.0; added -F951; added -c         MJD
 # 2001/10/03: 0.49, code cleanup for use strict and -w               MJD  # 2001/10/03: 0.49, code cleanup for use strict and -w               MJD
 # 2001/04/01: 0.48, updated for 3.1.0 (final)                        MJD  # 2001/04/01: 0.48, updated for 3.1.0 (final)                        MJD
 # 2001/03/07: 0.47, YOD WITH HIRIQ corrigendum                       MJD  # 2001/03/07: 0.47, YOD WITH HIRIQ corrigendum                       MJD
Line 52  use Storable; Line 53  use Storable;
 # Global variables (options and data arrays)  # Global variables (options and data arrays)
 use vars qw($OPTB $OPTC $OPTD $OPTE $OPTK  use vars qw($OPTB $OPTC $OPTD $OPTE $OPTK
         $OPTN $OPTP $OPTS $OPTU $OPTX $OPTYWH          $OPTN $OPTP $OPTS $OPTU $OPTX $OPTYWH
         $OPTb $OPTd $OPTf $OPTh $OPTn $OPTo          $OPTb $OPTc $OPTd $OPTf $OPTF951 $OPTh
         $OPTq $OPTs $OPTv $OPTx          $OPTn $OPTo $OPTq $OPTs $OPTv $OPTx
         %CombClass %CompCano %DecoCano %DecoCanoData          %CombClass %CompCano %DecoCano %DecoCanoData
         %DecoCanoRest %DecoKompData %DecoKompKind %exists);          %DecoCanoRest %DecoKompData %DecoKompKind %exists);
   
Line 400  sub ReadCharacterDataFile { Line 401  sub ReadCharacterDataFile {
     close (BASE);      close (BASE);
     print STDERR "Finished reading character database.\n" if (!$OPTq);      print STDERR "Finished reading character database.\n" if (!$OPTq);
   
       if ($OPTF951) {
           $DecoCanoData{"\xEF\xA5\x91"} =
               $DecoKompData{"\xEF\xA5\x91"} = "\xE9\x9B\xBB";
       }
     %DecoCanoRest = %DecoCano = %DecoCanoData;    # keep original data as is, and      %DecoCanoRest = %DecoCano = %DecoCanoData;    # keep original data as is, and
                                                   # copy to restrict for composition                                                    # copy to restrict for composition
   
Line 607  sub ReadCharacterDataFile { Line 612  sub ReadCharacterDataFile {
         'FB4D',  # HEBREW LETTER KAF WITH RAFE          'FB4D',  # HEBREW LETTER KAF WITH RAFE
         'FB4E',  # HEBREW LETTER PE WITH RAFE          'FB4E',  # HEBREW LETTER PE WITH RAFE
         ## post composition exclusion          ## post composition exclusion
           '2ADC',  #  FORKING
         '1D15E', # MUSICAL SYMBOL HALF NOTE          '1D15E', # MUSICAL SYMBOL HALF NOTE
                 '1D15F', # MUSICAL SYMBOL QUARTER NOTE          '1D15F', # MUSICAL SYMBOL QUARTER NOTE
                 '1D160', # MUSICAL SYMBOL EIGHTH NOTE          '1D160', # MUSICAL SYMBOL EIGHTH NOTE
                 '1D161', # MUSICAL SYMBOL SIXTEENTH NOTE          '1D161', # MUSICAL SYMBOL SIXTEENTH NOTE
                 '1D162', # MUSICAL SYMBOL THIRTY-SECOND NOTE          '1D162', # MUSICAL SYMBOL THIRTY-SECOND NOTE
                 '1D163', # MUSICAL SYMBOL SIXTY-FOURTH NOTE          '1D163', # MUSICAL SYMBOL SIXTY-FOURTH NOTE
                 '1D164', # MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE          '1D164', # MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE
                 '1D1BB', # MUSICAL SYMBOL MINIMA          '1D1BB', # MUSICAL SYMBOL MINIMA
                 '1D1BC', # MUSICAL SYMBOL MINIMA BLACK          '1D1BC', # MUSICAL SYMBOL MINIMA BLACK
                 '1D1BD', # MUSICAL SYMBOL SEMIMINIMA WHITE          '1D1BD', # MUSICAL SYMBOL SEMIMINIMA WHITE
                 '1D1BE', # MUSICAL SYMBOL SEMIMINIMA BLACK          '1D1BE', # MUSICAL SYMBOL SEMIMINIMA BLACK
                 '1D1BF', # MUSICAL SYMBOL FUSA WHITE          '1D1BF', # MUSICAL SYMBOL FUSA WHITE
                 '1D1C0'  # MUSICAL SYMBOL FUSA BLACK          '1D1C0'  # MUSICAL SYMBOL FUSA BLACK
     );      );
           
     if (!$OPTYWH) {      if (!$OPTYWH) {
Line 743  Available options: Line 749  Available options:
 (options prefixed by # are currently not available)  (options prefixed by # are currently not available)
 -b: Remove initial 'Byte Order Mark'  -b: Remove initial 'Byte Order Mark'
 -B: Supress warning about initial 'Byte Order Mark'  -B: Supress warning about initial 'Byte Order Mark'
   -c: Detect non-normalized data (but do not normalize)
 -C: Do not normalize  -C: Do not normalize
 -d: Debug: Thoroughly check character data table input  -d: Debug: Thoroughly check character data table input
 -D: Leave after reading in character data  -D: Leave after reading in character data
 -e: # remove undefined codepoints  -e: # remove undefined codepoints
 -E: Do not warn about undefined codepoints  -E: Do not warn about undefined codepoints
 -f file: Read data from file (no default anymore)  -f file: Read data from file (no default anymore)
          (please use newest V3.0 datafiles)           (please use newest V3.2.0 datafiles)
   -F951: Use old (wrong) mapping for U+F951 (use this option
             if you really need 3.1.0 behaviour)
 -h: Prints out this short description  -h: Prints out this short description
 -k: # Warn about compatibility codepoints  -k: # Warn about compatibility codepoints
 -K: Normalize out (i.e. decompose) compatibility codepoints  -K: Normalize out (i.e. decompose) compatibility codepoints
Line 768  Available options: Line 777  Available options:
 -x: Do decomposition only  -x: Do decomposition only
 -X: Don't do decomposition (assume input is decomposed)  -X: Don't do decomposition (assume input is decomposed)
 -YWH: Treat YOD WITH HIRIQ as precomposed (use this option  -YWH: Treat YOD WITH HIRIQ as precomposed (use this option
           if you really need pre-corrigendum behaviour)            if you really need 3.0.0 behaviour)
   
 EOF  EOF
 # end of raw in-place text  # end of raw in-place text
   
 # ideas for more options:  # ideas for more options:
 # * don't normalize, just check  
 # * allow to do kompatibility processing by category  # * allow to do kompatibility processing by category
 # * warn/remove plane 14 language tag codes and other crap  # * warn/remove plane 14 language tag codes and other crap
 # * convert crap to what it's supposed to be (difficult)  # * convert crap to what it's supposed to be (difficult)
Line 797  sub initialize { Line 805  sub initialize {
                 $_ = shift(@ARGV);                  $_ = shift(@ARGV);
                 $OPTb= 1, next OPTIONS  if /^-b$/;                  $OPTb= 1, next OPTIONS  if /^-b$/;
                 $OPTB= 1, next OPTIONS  if /^-B$/;                  $OPTB= 1, next OPTIONS  if /^-B$/;
                   $OPTc= 1, next OPTIONS  if /^-c$/;
                 $OPTC= 1, next OPTIONS  if /^-C$/;                  $OPTC= 1, next OPTIONS  if /^-C$/;
                 $OPTd= 1, next OPTIONS  if /^-d$/;                  $OPTd= 1, next OPTIONS  if /^-d$/;
                 $OPTD= 1, next OPTIONS  if /^-D$/;                  $OPTD= 1, next OPTIONS  if /^-D$/;
Line 808  sub initialize { Line 817  sub initialize {
                                 " Maybe not what you intend.\n" if ($dataFile =~ /^-.$/ && !$OPTq);                                  " Maybe not what you intend.\n" if ($dataFile =~ /^-.$/ && !$OPTq);
                         next OPTIONS;                          next OPTIONS;
                 }                  }
                   $OPTF951= 1, next OPTIONS  if /^-F951$/;
                 $OPTh= 1, next OPTIONS  if /^-h$/;                  $OPTh= 1, next OPTIONS  if /^-h$/;
                 $OPTK= 1, next OPTIONS  if /^-K$/;                  $OPTK= 1, next OPTIONS  if /^-K$/;
                 $OPTn= 1, next OPTIONS  if /^-n$/;                  $OPTn= 1, next OPTIONS  if /^-n$/;
Line 924  while (<>) { Line 934  while (<>) {
     }      }
   
     my @line = splitutf8($_);      my @line = splitutf8($_);
       my @lineoriginal = @line;
     my @line2 = ();      my @line2 = ();
   
     if (!$OPTC) {      if (!$OPTC) {
Line 973  while (<>) { Line 984  while (<>) {
             }              }
             $#line = $targetPos-1;              $#line = $targetPos-1;
         } # end of recomposition          } # end of recomposition
     } #if (!OPTC)          if ($OPTc && join("",@line) ne join("",@lineoriginal)) {
               die "Line $line: Non-normalized data.\nGiving up!\n";
           }
       } #if (!$OPTC)
   
     printOPT (join "", @line);      printOPT (join "", @line);
   

Removed from v.1.21  
changed lines
  Added in v.1.22


Webmaster