|
|
- #!/usr/bin/perl -w
-
- use warnings;
- use strict;
-
- my $HASHBUCKETS1_16 = 256;
- my $HASHBUCKETS1_32 = 16;
- my $HASHBUCKETS2_16 = 16;
- my $HASHBUCKETS3_16 = 4;
-
- print <<__EOF__;
- /*
- * This file is part of PhysicsFS (https://icculus.org/physfs/)
- *
- * This data generated by physfs/extras/makecasefoldhashtable.pl ...
- * Do not manually edit this file!
- *
- * Please see the file LICENSE.txt in the source's root directory.
- */
-
- #ifndef _INCLUDE_PHYSFS_CASEFOLDING_H_
- #define _INCLUDE_PHYSFS_CASEFOLDING_H_
-
- #ifndef __PHYSICSFS_INTERNAL__
- #error Do not include this header from your applications.
- #endif
-
- /* We build three simple hashmaps here: one that maps Unicode codepoints to
- a one, two, or three lowercase codepoints. To retrieve this info: look at
- case_fold_hashX, where X is 1, 2, or 3. Most foldable codepoints fold to one,
- a few dozen fold to two, and a handful fold to three. If the codepoint isn't
- in any of these hashes, it doesn't fold (no separate upper and lowercase).
-
- Almost all these codepoints fit into 16 bits, so we hash them as such to save
- memory. If a codepoint is > 0xFFFF, we have separate hashes for them,
- since there are (currently) only about 120 of them and (currently) all of them
- map to a single lowercase codepoint. */
-
- typedef struct CaseFoldMapping1_32
- {
- PHYSFS_uint32 from;
- PHYSFS_uint32 to0;
- } CaseFoldMapping1_32;
-
- typedef struct CaseFoldMapping1_16
- {
- PHYSFS_uint16 from;
- PHYSFS_uint16 to0;
- } CaseFoldMapping1_16;
-
- typedef struct CaseFoldMapping2_16
- {
- PHYSFS_uint16 from;
- PHYSFS_uint16 to0;
- PHYSFS_uint16 to1;
- } CaseFoldMapping2_16;
-
- typedef struct CaseFoldMapping3_16
- {
- PHYSFS_uint16 from;
- PHYSFS_uint16 to0;
- PHYSFS_uint16 to1;
- PHYSFS_uint16 to2;
- } CaseFoldMapping3_16;
-
- typedef struct CaseFoldHashBucket1_16
- {
- const CaseFoldMapping1_16 *list;
- const PHYSFS_uint8 count;
- } CaseFoldHashBucket1_16;
-
- typedef struct CaseFoldHashBucket1_32
- {
- const CaseFoldMapping1_32 *list;
- const PHYSFS_uint8 count;
- } CaseFoldHashBucket1_32;
-
- typedef struct CaseFoldHashBucket2_16
- {
- const CaseFoldMapping2_16 *list;
- const PHYSFS_uint8 count;
- } CaseFoldHashBucket2_16;
-
- typedef struct CaseFoldHashBucket3_16
- {
- const CaseFoldMapping3_16 *list;
- const PHYSFS_uint8 count;
- } CaseFoldHashBucket3_16;
-
- __EOF__
-
-
- my @foldPairs1_16;
- my @foldPairs2_16;
- my @foldPairs3_16;
- my @foldPairs1_32;
-
- for (my $i = 0; $i < $HASHBUCKETS1_16; $i++) {
- $foldPairs1_16[$i] = '';
- }
-
- for (my $i = 0; $i < $HASHBUCKETS1_32; $i++) {
- $foldPairs1_32[$i] = '';
- }
-
- for (my $i = 0; $i < $HASHBUCKETS2_16; $i++) {
- $foldPairs2_16[$i] = '';
- }
-
- for (my $i = 0; $i < $HASHBUCKETS3_16; $i++) {
- $foldPairs3_16[$i] = '';
- }
-
- open(FH,'<','casefolding.txt') or die("failed to open casefolding.txt: $!\n");
- while (<FH>) {
- chomp;
- # strip comments from textfile...
- s/\#.*\Z//;
-
- # strip whitespace...
- s/\A\s+//;
- s/\s+\Z//;
-
- next if not /\A([a-fA-F0-9]+)\;\s*(.)\;\s*(.+)\;/;
- my ($code, $status, $mapping) = ($1, $2, $3);
-
- my $hexxed = hex($code);
- #print("// code '$code' status '$status' mapping '$mapping'\n");
-
- if (($status eq 'C') or ($status eq 'F')) {
- my ($map1, $map2, $map3) = (undef, undef, undef);
- $map1 = $1 if $mapping =~ s/\A([a-fA-F0-9]+)(\s*|\Z)//;
- $map2 = $1 if $mapping =~ s/\A([a-fA-F0-9]+)(\s*|\Z)//;
- $map3 = $1 if $mapping =~ s/\A([a-fA-F0-9]+)(\s*|\Z)//;
- die("mapping space too small for '$code'\n") if ($mapping ne '');
- die("problem parsing mapping for '$code'\n") if (not defined($map1));
-
- if ($hexxed < 128) {
- # Just ignore these, we'll handle the low-ASCII ones ourselves.
- } elsif ($hexxed > 0xFFFF) {
- # We just need to add the 32-bit 2 and/or 3 codepoint maps if this die()'s here.
- die("Uhoh, a codepoint > 0xFFFF that folds to multiple codepoints! Fixme.") if defined($map2);
- my $hashed = (($hexxed ^ ($hexxed >> 8)) & ($HASHBUCKETS1_32-1));
- #print("// hexxed '$hexxed' hashed1 '$hashed'\n");
- $foldPairs1_32[$hashed] .= " { 0x$code, 0x$map1 },\n";
- } elsif (not defined($map2)) {
- my $hashed = (($hexxed ^ ($hexxed >> 8)) & ($HASHBUCKETS1_16-1));
- #print("// hexxed '$hexxed' hashed1 '$hashed'\n");
- $foldPairs1_16[$hashed] .= " { 0x$code, 0x$map1 },\n";
- } elsif (not defined($map3)) {
- my $hashed = (($hexxed ^ ($hexxed >> 8)) & ($HASHBUCKETS2_16-1));
- #print("// hexxed '$hexxed' hashed2 '$hashed'\n");
- $foldPairs2_16[$hashed] .= " { 0x$code, 0x$map1, 0x$map2 },\n";
- } else {
- my $hashed = (($hexxed ^ ($hexxed >> 8)) & ($HASHBUCKETS3_16-1));
- #print("// hexxed '$hexxed' hashed3 '$hashed'\n");
- $foldPairs3_16[$hashed] .= " { 0x$code, 0x$map1, 0x$map2, 0x$map3 },\n";
- }
- }
- }
- close(FH);
-
- for (my $i = 0; $i < $HASHBUCKETS1_16; $i++) {
- $foldPairs1_16[$i] =~ s/,\n\Z//;
- my $str = $foldPairs1_16[$i];
- next if $str eq '';
- my $num = '000' . $i;
- $num =~ s/\A.*?(\d\d\d)\Z/$1/;
- my $sym = "case_fold1_16_${num}";
- print("static const CaseFoldMapping1_16 ${sym}[] = {\n$str\n};\n\n");
- }
-
- for (my $i = 0; $i < $HASHBUCKETS1_32; $i++) {
- $foldPairs1_32[$i] =~ s/,\n\Z//;
- my $str = $foldPairs1_32[$i];
- next if $str eq '';
- my $num = '000' . $i;
- $num =~ s/\A.*?(\d\d\d)\Z/$1/;
- my $sym = "case_fold1_32_${num}";
- print("static const CaseFoldMapping1_32 ${sym}[] = {\n$str\n};\n\n");
- }
-
- for (my $i = 0; $i < $HASHBUCKETS2_16; $i++) {
- $foldPairs2_16[$i] =~ s/,\n\Z//;
- my $str = $foldPairs2_16[$i];
- next if $str eq '';
- my $num = '000' . $i;
- $num =~ s/\A.*?(\d\d\d)\Z/$1/;
- my $sym = "case_fold2_16_${num}";
- print("static const CaseFoldMapping2_16 ${sym}[] = {\n$str\n};\n\n");
- }
-
- for (my $i = 0; $i < $HASHBUCKETS3_16; $i++) {
- $foldPairs3_16[$i] =~ s/,\n\Z//;
- my $str = $foldPairs3_16[$i];
- next if $str eq '';
- my $num = '000' . $i;
- $num =~ s/\A.*?(\d\d\d)\Z/$1/;
- my $sym = "case_fold3_16_${num}";
- print("static const CaseFoldMapping3_16 ${sym}[] = {\n$str\n};\n\n");
- }
-
- print("static const CaseFoldHashBucket1_16 case_fold_hash1_16[] = {\n");
-
- for (my $i = 0; $i < $HASHBUCKETS1_16; $i++) {
- my $str = $foldPairs1_16[$i];
- if ($str eq '') {
- print(" { NULL, 0 },\n");
- } else {
- my $num = '000' . $i;
- $num =~ s/\A.*?(\d\d\d)\Z/$1/;
- my $sym = "case_fold1_16_${num}";
- print(" { $sym, __PHYSFS_ARRAYLEN($sym) },\n");
- }
- }
- print("};\n\n");
-
-
- print("static const CaseFoldHashBucket1_32 case_fold_hash1_32[] = {\n");
-
- for (my $i = 0; $i < $HASHBUCKETS1_32; $i++) {
- my $str = $foldPairs1_32[$i];
- if ($str eq '') {
- print(" { NULL, 0 },\n");
- } else {
- my $num = '000' . $i;
- $num =~ s/\A.*?(\d\d\d)\Z/$1/;
- my $sym = "case_fold1_32_${num}";
- print(" { $sym, __PHYSFS_ARRAYLEN($sym) },\n");
- }
- }
- print("};\n\n");
-
-
- print("static const CaseFoldHashBucket2_16 case_fold_hash2_16[] = {\n");
-
- for (my $i = 0; $i < $HASHBUCKETS2_16; $i++) {
- my $str = $foldPairs2_16[$i];
- if ($str eq '') {
- print(" { NULL, 0 },\n");
- } else {
- my $num = '000' . $i;
- $num =~ s/\A.*?(\d\d\d)\Z/$1/;
- my $sym = "case_fold2_16_${num}";
- print(" { $sym, __PHYSFS_ARRAYLEN($sym) },\n");
- }
- }
- print("};\n\n");
-
- print("static const CaseFoldHashBucket3_16 case_fold_hash3_16[] = {\n");
-
- for (my $i = 0; $i < $HASHBUCKETS3_16; $i++) {
- my $str = $foldPairs3_16[$i];
- if ($str eq '') {
- print(" { NULL, 0 },\n");
- } else {
- my $num = '000' . $i;
- $num =~ s/\A.*?(\d\d\d)\Z/$1/;
- my $sym = "case_fold3_16_${num}";
- print(" { $sym, __PHYSFS_ARRAYLEN($sym) },\n");
- }
- }
- print("};\n\n");
-
- print <<__EOF__;
-
- #endif /* _INCLUDE_PHYSFS_CASEFOLDING_H_ */
-
- /* end of physfs_casefolding.h ... */
-
- __EOF__
-
- exit 0;
-
- # end of makecashfoldhashtable.pl ...
-
|