add NFC_Compose mapping, which is generated arithmetically from NFC_Decompose
authorRoker <roker@pep-project.org>
Wed, 18 Apr 2018 17:35:27 +0200
changeset 484d42beebf05ca
parent 483 ba5bb187bf26
child 485 69eccc396726
add NFC_Compose mapping, which is generated arithmetically from NFC_Decompose
server/nfc.cc
server/nfc_sets.cc
server/nfc_sets.hh
server/scripts/gen_sets.sh
     1.1 --- a/server/nfc.cc	Mon Apr 16 23:36:22 2018 +0200
     1.2 +++ b/server/nfc.cc	Wed Apr 18 17:35:27 2018 +0200
     1.3 @@ -371,3 +371,19 @@
     1.4  	// TODO:
     1.5  	throw std::logic_error("NFC normalization is necessary, but unimplemented. Sorry.");
     1.6  }
     1.7 +
     1.8 +
     1.9 +// used only to initialize the NFC Compose mapping:
    1.10 +std::map< std::pair<unsigned, unsigned>, unsigned> generate_nfc_compose()
    1.11 +{
    1.12 +	std::map< std::pair<unsigned, unsigned>, unsigned> m;
    1.13 +	for(const auto& decomp : NFC_Decompose)
    1.14 +	{
    1.15 +		if(decomp.second.second >= 0) // skip singleton decompositions
    1.16 +		{
    1.17 +			m[ decomp.second ] = decomp.first;
    1.18 +		}
    1.19 +	}
    1.20 +	
    1.21 +	return m;
    1.22 +}
     2.1 --- a/server/nfc_sets.cc	Mon Apr 16 23:36:22 2018 +0200
     2.2 +++ b/server/nfc_sets.cc	Wed Apr 18 17:35:27 2018 +0200
     2.3 @@ -3013,3 +3013,5 @@
     2.4  {0x2FA1D, {0x2A600, -1}},
     2.5  };
     2.6  
     2.7 +std::map< std::pair<unsigned, unsigned>, unsigned> generate_nfc_compose();
     2.8 +const std::map< std::pair<unsigned, unsigned>, unsigned> NFC_Compose = generate_nfc_compose();
     3.1 --- a/server/nfc_sets.hh	Mon Apr 16 23:36:22 2018 +0200
     3.2 +++ b/server/nfc_sets.hh	Wed Apr 18 17:35:27 2018 +0200
     3.3 @@ -8,6 +8,8 @@
     3.4  // from Unicode's DerivedNormalizationProps.txt and UnicodeData.txt.
     3.5  // see scripts/ subdirectory
     3.6  
     3.7 +// TODO: (maybe) Replace them by flat_map or sorted arrays, because these might be faster. But make benchmarks first!
     3.8 +
     3.9  // Contains all codepoints with NFC_No property.
    3.10  extern const std::set<unsigned> NFC_No;
    3.11  
    3.12 @@ -20,4 +22,7 @@
    3.13  // Contains the canonical decomposing pairs. second member might be -1 for single decompositions.
    3.14  extern const std::map<unsigned, std::pair<int,int>> NFC_Decompose;
    3.15  
    3.16 +// canonical composing mapping, except excluded ones according to Unicode TR-15
    3.17 +extern const std::map< std::pair<unsigned, unsigned>, unsigned> NFC_Compose;
    3.18 +
    3.19  #endif // NFC_SETS_HH
     4.1 --- a/server/scripts/gen_sets.sh	Mon Apr 16 23:36:22 2018 +0200
     4.2 +++ b/server/scripts/gen_sets.sh	Wed Apr 18 17:35:27 2018 +0200
     4.3 @@ -93,4 +93,7 @@
     4.4  
     4.5  echo -en '};\n\n'
     4.6  
     4.7 +echo 'std::map< std::pair<unsigned, unsigned>, unsigned> generate_nfc_compose();'
     4.8 +echo -en 'const std::map< std::pair<unsigned, unsigned>, unsigned> NFC_Compose = generate_nfc_compose();\n\n'
     4.9 +
    4.10  # end of file