Merge pull request #155 from github/no-apple

Deprecate Apple Palette and upgrade to Emoji 11.
This commit is contained in:
Mislav Marohnić
2019-05-05 01:47:18 +02:00
committed by GitHub
10 changed files with 19685 additions and 13156 deletions

5
.gitignore vendored
View File

@@ -3,7 +3,6 @@
.bundle
.ruby-version
Gemfile.lock
db/emoji-test.txt
db/ucd.nounihan.grouped.xml
images/unicode/*.png
vendor/
vendor/*
!vendor/unicode-emoji-test.txt

View File

@@ -10,9 +10,7 @@ end
namespace :db do
desc %(Generate Emoji data files needed for development)
task :generate => [
'db/Category-Emoji.json',
'db/ucd.nounihan.grouped.xml',
'db/emoji-test.txt',
'vendor/unicode-emoji-test.txt',
]
desc %(Dump a list of supported Emoji with Unicode descriptions and aliases)
@@ -21,22 +19,8 @@ namespace :db do
end
end
task 'db/Category-Emoji.json' do |t|
system 'plutil', '-convert', 'json', '-r',
'/System/Library/Input Methods/CharacterPalette.app/Contents/Resources/Category-Emoji.plist',
'-o', t.name
end
file 'db/ucd.nounihan.grouped.xml' do
Dir.chdir('db') do
system 'curl', '-fsSLO', 'http://www.unicode.org/Public/9.0.0/ucdxml/ucd.nounihan.grouped.zip'
system 'unzip', '-q', 'ucd.nounihan.grouped.zip'
rm 'ucd.nounihan.grouped.zip'
end
end
file 'db/emoji-test.txt' do |t|
system 'curl', '-fsSL', 'http://unicode.org/Public/emoji/4.0/emoji-test.txt', '-o', t.name
file 'vendor/unicode-emoji-test.txt' do |t|
system 'curl', '-fsSL', 'http://unicode.org/Public/emoji/11.0/emoji-test.txt', '-o', t.name
end
directory 'images/unicode' do

View File

@@ -1,71 +0,0 @@
{
"EmojiDataArray" : [
{
"CVDataTitle" : "EmojiCategory-People",
"CVCategoryImage" : "Emoji-HumanImage",
"CVCategoryData" : {
"CVSkipNullGlyphs" : false,
"Data" : "😀,😃,😄,😁,😆,😅,😂,🤣,☺️,😊,😇,🙂,🙃,😉,😌,😍,😘,😗,😙,😚,😋,😜,😝,😛,🤑,🤗,🤓,😎,🤡,🤠,😏,😒,😞,😔,😟,😕,🙁,☹️,😣,😖,😫,😩,😤,😠,😡,😶,😐,😑,😯,😦,😧,😮,😲,😵,😳,😱,😨,😰,😢,😥,🤤,😭,😓,😪,😴,🙄,🤔,🤥,😬,🤐,🤢,🤧,😷,🤒,🤕,😈,👿,👹,👺,💩,👻,💀,☠️,👽,👾,🤖,🎃,😺,😸,😹,😻,😼,😽,🙀,😿,😾,👐,🙌,👏,🙏,🤝,👍,👎,👊,✊,🤛,🤜,🤞,✌️,🤘,👌,👈,👉,👆,👇,☝️,✋,🤚,🖐,🖖,👋,🤙,💪,🖕,✍️,🤳,💅,💍,💄,💋,👄,👅,👂,👃,👣,👁,👀,🗣,👤,👥,👶,👦,👧,👨,👩,👱‍♀️,👱,👴,👵,👲,👳‍♀️,👳,👮‍♀️,👮,👷‍♀️,👷,💂‍♀️,💂,🕵️‍♀️,🕵️,👩‍⚕️,👨‍⚕️,👩‍🌾,👨‍🌾,👩‍🍳,👨‍🍳,👩‍🎓,👨‍🎓,👩‍🎤,👨‍🎤,👩‍🏫,👨‍🏫,👩‍🏭,👨‍🏭,👩‍💻,👨‍💻,👩‍💼,👨‍💼,👩‍🔧,👨‍🔧,👩‍🔬,👨‍🔬,👩‍🎨,👨‍🎨,👩‍🚒,👨‍🚒,👩‍✈️,👨‍✈️,👩‍🚀,👨‍🚀,👩‍⚖️,👨‍⚖️,🤶,🎅,👸,🤴,👰,🤵,👼,🤰,🙇‍♀️,🙇,💁,💁‍♂️,🙅,🙅‍♂️,🙆,🙆‍♂️,🙋,🙋‍♂️,🤦‍♀️,🤦‍♂️,🤷‍♀️,🤷‍♂️,🙎,🙎‍♂️,🙍,🙍‍♂️,💇,💇‍♂️,💆,💆‍♂️,🕴,💃,🕺,👯,👯‍♂️,🚶‍♀️,🚶,🏃‍♀️,🏃,👫,👭,👬,💑,👩‍❤️‍👩,👨‍❤️‍👨,💏,👩‍❤️‍💋‍👩,👨‍❤️‍💋‍👨,👪,👨‍👩‍👧,👨‍👩‍👧‍👦,👨‍👩‍👦‍👦,👨‍👩‍👧‍👧,👩‍👩‍👦,👩‍👩‍👧,👩‍👩‍👧‍👦,👩‍👩‍👦‍👦,👩‍👩‍👧‍👧,👨‍👨‍👦,👨‍👨‍👧,👨‍👨‍👧‍👦,👨‍👨‍👦‍👦,👨‍👨‍👧‍👧,👩‍👦,👩‍👧,👩‍👧‍👦,👩‍👦‍👦,👩‍👧‍👧,👨‍👦,👨‍👧,👨‍👧‍👦,👨‍👦‍👦,👨‍👧‍👧,👚,👕,👖,👔,👗,👙,👘,👠,👡,👢,👞,👟,👒,🎩,🎓,👑,⛑,🎒,👝,👛,👜,💼,👓,🕶,🌂,☂️"
}
},
{
"CVDataTitle" : "EmojiCategory-Nature",
"CVCategoryImage" : "Emoji-NatureImage",
"CVCategoryData" : {
"CVSkipNullGlyphs" : false,
"Data" : "🐶,🐱,🐭,🐹,🐰,🦊,🐻,🐼,🐨,🐯,🦁,🐮,🐷,🐽,🐸,🐵,🙈,🙉,🙊,🐒,🐔,🐧,🐦,🐤,🐣,🐥,🦆,🦅,🦉,🦇,🐺,🐗,🐴,🦄,🐝,🐛,🦋,🐌,🐚,🐞,🐜,🕷,🕸,🐢,🐍,🦎,🦂,🦀,🦑,🐙,🦐,🐠,🐟,🐡,🐬,🦈,🐳,🐋,🐊,🐆,🐅,🐃,🐂,🐄,🦌,🐪,🐫,🐘,🦏,🦍,🐎,🐖,🐐,🐏,🐑,🐕,🐩,🐈,🐓,🦃,🕊,🐇,🐁,🐀,🐿,🐾,🐉,🐲,🌵,🎄,🌲,🌳,🌴,🌱,🌿,☘️,🍀,🎍,🎋,🍃,🍂,🍁,🍄,🌾,💐,🌷,🌹,🥀,🌻,🌼,🌸,🌺,🌎,🌍,🌏,🌕,🌖,🌗,🌘,🌑,🌒,🌓,🌔,🌚,🌝,🌞,🌛,🌜,🌙,💫,⭐️,🌟,✨,⚡️,🔥,💥,☄️,☀️,🌤,⛅️,🌥,🌦,🌈,☁️,🌧,⛈,🌩,🌨,☃️,⛄️,❄️,🌬,💨,🌪,🌫,🌊,💧,💦,☔️"
}
},
{
"CVDataTitle" : "EmojiCategory-Foods",
"CVCategoryImage" : "Emoji-FoodsImage",
"CVCategoryData" : {
"CVSkipNullGlyphs" : false,
"Data" : "🍏,🍎,🍐,🍊,🍋,🍌,🍉,🍇,🍓,🍈,🍒,🍑,🍍,🥝,🥑,🍅,🍆,🥒,🥕,🌽,🌶,🥔,🍠,🌰,🥜,🍯,🥐,🍞,🥖,🧀,🥚,🍳,🥓,🥞,🍤,🍗,🍖,🍕,🌭,🍔,🍟,🥙,🌮,🌯,🥗,🥘,🍝,🍜,🍲,🍥,🍣,🍱,🍛,🍚,🍙,🍘,🍢,🍡,🍧,🍨,🍦,🍰,🎂,🍮,🍭,🍬,🍫,🍿,🍩,🍪,🥛,🍼,☕️,🍵,🍶,🍺,🍻,🥂,🍷,🥃,🍸,🍹,🍾,🥄,🍴,🍽"
}
},
{
"CVDataTitle" : "EmojiCategory-Activity",
"CVCategoryImage" : "Emoji-ActivityImage",
"CVCategoryData" : {
"CVSkipNullGlyphs" : false,
"Data" : "⚽️,🏀,🏈,⚾️,🎾,🏐,🏉,🎱,🏓,🏸,🥅,🏒,🏑,🏏,⛳️,🏹,🎣,🥊,🥋,⛸,🎿,⛷,🏂,🏋️‍♀️,🏋️,🤺,🤼‍♀️,🤼‍♂️,🤸‍♀️,🤸‍♂️,⛹️‍♀️,⛹️,🤾‍♀️,🤾‍♂️,🏌️‍♀️,🏌️,🏄‍♀️,🏄,🏊‍♀️,🏊,🤽‍♀️,🤽‍♂️,🚣‍♀️,🚣,🏇,🚴‍♀️,🚴,🚵‍♀️,🚵,🎽,🏅,🎖,🥇,🥈,🥉,🏆,🏵,🎗,🎫,🎟,🎪,🤹‍♀️,🤹‍♂️,🎭,🎨,🎬,🎤,🎧,🎼,🎹,🥁,🎷,🎺,🎸,🎻,🎲,🎯,🎳,🎮,🎰"
}
},
{
"CVDataTitle" : "EmojiCategory-Places",
"CVCategoryImage" : "Emoji-PlacesImage",
"CVCategoryData" : {
"CVSkipNullGlyphs" : false,
"Data" : "🚗,🚕,🚙,🚌,🚎,🏎,🚓,🚑,🚒,🚐,🚚,🚛,🚜,🛴,🚲,🛵,🏍,🚨,🚔,🚍,🚘,🚖,🚡,🚠,🚟,🚃,🚋,🚞,🚝,🚄,🚅,🚈,🚂,🚆,🚇,🚊,🚉,🚁,🛩,✈️,🛫,🛬,🚀,🛰,💺,🛶,⛵️,🛥,🚤,🛳,⛴,🚢,⚓️,🚧,⛽️,🚏,🚦,🚥,🗺,🗿,🗽,⛲️,🗼,🏰,🏯,🏟,🎡,🎢,🎠,⛱,🏖,🏝,⛰,🏔,🗻,🌋,🏜,🏕,⛺️,🛤,🛣,🏗,🏭,🏠,🏡,🏘,🏚,🏢,🏬,🏣,🏤,🏥,🏦,🏨,🏪,🏫,🏩,💒,🏛,⛪️,🕌,🕍,🕋,⛩,🗾,🎑,🏞,🌅,🌄,🌠,🎇,🎆,🌇,🌆,🏙,🌃,🌌,🌉,🌁"
}
},
{
"CVDataTitle" : "EmojiCategory-Objects",
"CVCategoryImage" : "Emoji-ObjectsImage",
"CVCategoryData" : {
"CVSkipNullGlyphs" : false,
"Data" : "⌚️,📱,📲,💻,⌨️,🖥,🖨,🖱,🖲,🕹,🗜,💽,💾,💿,📀,📼,📷,📸,📹,🎥,📽,🎞,📞,☎️,📟,📠,📺,📻,🎙,🎚,🎛,⏱,⏲,⏰,🕰,⌛️,⏳,📡,🔋,🔌,💡,🔦,🕯,🗑,🛢,💸,💵,💴,💶,💷,💰,💳,💎,⚖️,🔧,🔨,⚒,🛠,⛏,🔩,⚙️,⛓,🔫,💣,🔪,🗡,⚔️,🛡,🚬,⚰️,⚱️,🏺,🔮,📿,💈,⚗️,🔭,🔬,🕳,💊,💉,🌡,🚽,🚰,🚿,🛁,🛀,🛎,🔑,🗝,🚪,🛋,🛏,🛌,🖼,🛍,🛒,🎁,🎈,🎏,🎀,🎊,🎉,🎎,🏮,🎐,✉️,📩,📨,📧,💌,📥,📤,📦,🏷,📪,📫,📬,📭,📮,📯,📜,📃,📄,📑,📊,📈,📉,🗒,🗓,📆,📅,📇,🗃,🗳,🗄,📋,📁,📂,🗂,🗞,📰,📓,📔,📒,📕,📗,📘,📙,📚,📖,🔖,🔗,📎,🖇,📐,📏,📌,📍,✂️,🖊,🖋,✒️,🖌,🖍,📝,✏️,🔍,🔎,🔏,🔐,🔒,🔓"
}
},
{
"CVDataTitle" : "EmojiCategory-Symbols",
"CVCategoryImage" : "Emoji-SymbolImage",
"CVCategoryData" : {
"CVSkipNullGlyphs" : false,
"Data" : "❤️,💛,💚,💙,💜,🖤,💔,❣️,💕,💞,💓,💗,💖,💘,💝,💟,☮️,✝️,☪️,🕉,☸️,✡️,🔯,🕎,☯️,☦️,🛐,⛎,♈️,♉️,♊️,♋️,♌️,♍️,♎️,♏️,♐️,♑️,♒️,♓️,🆔,⚛️,🉑,☢️,☣️,📴,📳,🈶,🈚️,🈸,🈺,🈷️,✴️,🆚,💮,🉐,㊙️,㊗️,🈴,🈵,🈹,🈲,🅰️,🅱️,🆎,🆑,🅾️,🆘,❌,⭕️,🛑,⛔️,📛,🚫,💯,💢,♨️,🚷,🚯,🚳,🚱,🔞,📵,🚭,❗️,❕,❓,❔,‼️,⁉️,🔅,🔆,〽️,⚠️,🚸,🔱,⚜️,🔰,♻️,✅,🈯️,💹,❇️,✳️,❎,🌐,💠,Ⓜ️,🌀,💤,🏧,🚾,♿️,🅿️,🈳,🈂️,🛂,🛃,🛄,🛅,🚹,🚺,🚼,🚻,🚮,🎦,📶,🈁,🔣,,🔤,🔡,🔠,🆖,🆗,🆙,🆒,🆕,🆓,0⃣,1⃣,2⃣,3⃣,4⃣,5⃣,6⃣,7⃣,8⃣,9⃣,🔟,🔢,#️⃣,*️⃣,▶️,⏸,⏯,⏹,⏺,⏭,⏮,⏩,⏪,⏫,⏬,◀️,🔼,🔽,➡️,⬅️,⬆️,⬇️,↗️,↘️,↙️,↖️,↕️,↔️,↪️,↩️,⤴️,⤵️,🔀,🔁,🔂,🔄,🔃,🎵,🎶,,,➗,✖️,💲,💱,™️,©️,®️,〰️,➰,➿,🔚,🔙,🔛,🔝,🔜,✔️,☑️,🔘,⚪️,⚫️,🔴,🔵,🔺,🔻,🔸,🔹,🔶,🔷,🔳,🔲,▪️,▫️,◾️,◽️,◼️,◻️,⬛️,⬜️,🔈,🔇,🔉,🔊,🔔,🔕,📣,📢,👁‍🗨,💬,💭,🗯,♠️,♣️,♥️,♦️,🃏,🎴,🀄️,🕐,🕑,🕒,🕓,🕔,🕕,🕖,🕗,🕘,🕙,🕚,🕛,🕜,🕝,🕞,🕟,🕠,🕡,🕢,🕣,🕤,🕥,🕦,🕧"
}
},
{
"CVDataTitle" : "EmojiCategory-Flags",
"CVCategoryImage" : "Emoji-FlagsImage",
"CVCategoryData" : {
"CVSkipNullGlyphs" : false,
"Data" : "🏳️,🏴,🏁,🚩,🏳️‍🌈,🇦🇫,🇦🇽,🇦🇱,🇩🇿,🇦🇸,🇦🇩,🇦🇴,🇦🇮,🇦🇶,🇦🇬,🇦🇷,🇦🇲,🇦🇼,🇦🇺,🇦🇹,🇦🇿,🇧🇸,🇧🇭,🇧🇩,🇧🇧,🇧🇾,🇧🇪,🇧🇿,🇧🇯,🇧🇲,🇧🇹,🇧🇴,🇧🇶,🇧🇦,🇧🇼,🇧🇷,🇮🇴,🇻🇬,🇧🇳,🇧🇬,🇧🇫,🇧🇮,🇨🇻,🇰🇭,🇨🇲,🇨🇦,🇮🇨,🇰🇾,🇨🇫,🇹🇩,🇨🇱,🇨🇳,🇨🇽,🇨🇨,🇨🇴,🇰🇲,🇨🇬,🇨🇩,🇨🇰,🇨🇷,🇨🇮,🇭🇷,🇨🇺,🇨🇼,🇨🇾,🇨🇿,🇩🇰,🇩🇯,🇩🇲,🇩🇴,🇪🇨,🇪🇬,🇸🇻,🇬🇶,🇪🇷,🇪🇪,🇪🇹,🇪🇺,🇫🇰,🇫🇴,🇫🇯,🇫🇮,🇫🇷,🇬🇫,🇵🇫,🇹🇫,🇬🇦,🇬🇲,🇬🇪,🇩🇪,🇬🇭,🇬🇮,🇬🇷,🇬🇱,🇬🇩,🇬🇵,🇬🇺,🇬🇹,🇬🇬,🇬🇳,🇬🇼,🇬🇾,🇭🇹,🇭🇳,🇭🇰,🇭🇺,🇮🇸,🇮🇳,🇮🇩,🇮🇷,🇮🇶,🇮🇪,🇮🇲,🇮🇱,🇮🇹,🇯🇲,🇯🇵,🎌,🇯🇪,🇯🇴,🇰🇿,🇰🇪,🇰🇮,🇽🇰,🇰🇼,🇰🇬,🇱🇦,🇱🇻,🇱🇧,🇱🇸,🇱🇷,🇱🇾,🇱🇮,🇱🇹,🇱🇺,🇲🇴,🇲🇰,🇲🇬,🇲🇼,🇲🇾,🇲🇻,🇲🇱,🇲🇹,🇲🇭,🇲🇶,🇲🇷,🇲🇺,🇾🇹,🇲🇽,🇫🇲,🇲🇩,🇲🇨,🇲🇳,🇲🇪,🇲🇸,🇲🇦,🇲🇿,🇲🇲,🇳🇦,🇳🇷,🇳🇵,🇳🇱,🇳🇨,🇳🇿,🇳🇮,🇳🇪,🇳🇬,🇳🇺,🇳🇫,🇲🇵,🇰🇵,🇳🇴,🇴🇲,🇵🇰,🇵🇼,🇵🇸,🇵🇦,🇵🇬,🇵🇾,🇵🇪,🇵🇭,🇵🇳,🇵🇱,🇵🇹,🇵🇷,🇶🇦,🇷🇪,🇷🇴,🇷🇺,🇷🇼,🇧🇱,🇸🇭,🇰🇳,🇱🇨,🇵🇲,🇻🇨,🇼🇸,🇸🇲,🇸🇹,🇸🇦,🇸🇳,🇷🇸,🇸🇨,🇸🇱,🇸🇬,🇸🇽,🇸🇰,🇸🇮,🇸🇧,🇸🇴,🇿🇦,🇬🇸,🇰🇷,🇸🇸,🇪🇸,🇱🇰,🇸🇩,🇸🇷,🇸🇿,🇸🇪,🇨🇭,🇸🇾,🇹🇼,🇹🇯,🇹🇿,🇹🇭,🇹🇱,🇹🇬,🇹🇰,🇹🇴,🇹🇹,🇹🇳,🇹🇷,🇹🇲,🇹🇨,🇹🇻,🇺🇬,🇺🇦,🇦🇪,🇬🇧,🇺🇸,🇻🇮,🇺🇾,🇺🇿,🇻🇺,🇻🇦,🇻🇪,🇻🇳,🇼🇫,🇪🇭,🇾🇪,🇿🇲,🇿🇼"
}
}
],
"CVViewFontList" : [
"AppleColorEmoji"
]
}

View File

@@ -1,128 +1,44 @@
# frozen_string_literal: true
require 'emoji'
require 'json'
require 'rexml/document'
class UnicodeCharacter
attr_reader :code, :description, :version, :aliases
class CharListener
CHAR_TAG = "char".freeze
def self.parse(io, &block)
REXML::Document.parse_stream(io, self.new(&block))
end
def initialize(&block)
@callback = block
end
def tag_start(name, attributes)
if CHAR_TAG == name
@callback.call(
attributes.fetch("cp") { return },
attributes.fetch("na") { return },
attributes.fetch("age", nil),
)
end
end
def method_missing(*) end
end
def self.index
return @index if defined? @index
@index = {}
File.open(File.expand_path('../ucd.nounihan.grouped.xml', __FILE__)) do |source|
CharListener.parse(source) do |char, desc, age|
uc = UnicodeCharacter.new(char, desc, age)
@index[uc.code] = uc
end
end
@index
end
def self.fetch(code)
code = code.to_s(16).rjust(4, '0') if code.is_a?(Integer)
self.index.fetch(code)
end
def initialize(code, description, version)
@code = code.downcase
@description = description.downcase
@version = version
@aliases = []
@references = []
end
def add_alias(string)
@aliases.concat string.split(/\s*,\s*/)
end
def add_reference(code)
@references << code.downcase
end
end
unless $stdin.tty?
codepoints = STDIN.read.chomp.codepoints.map { |code|
UnicodeCharacter.fetch(code)
}
codepoints.each do |char|
printf "%5s: %s", char.code.upcase, char.description
printf " (%s)", char.version if char.version
puts
end
exit
end
trap(:PIPE) { abort }
normalize = -> (raw) {
raw.sub(Emoji::VARIATION_SELECTOR_16, '')
}
emojidesc = {}
File.open(File.expand_path('../emoji-test.txt', __FILE__)) do |file|
file.each do |line|
next if line =~ /^(#|$)/
line = line.chomp.split('# ', 2)[1]
emoji, description = line.split(' ', 2)
emojidesc[normalize.(emoji)] = description
end
end
require_relative './emoji-test-parser'
items = []
for category, emojis in Emoji.apple_palette
for raw in emojis
emoji = Emoji.find_by_unicode(raw)
unicode_version = emoji ? emoji.unicode_version : ''
ios_version = emoji ? emoji.ios_version : ''
_, categories = EmojiTestParser.parse(File.expand_path("../../vendor/unicode-emoji-test.txt", __FILE__))
seen_existing = {}
unless raw.include?(Emoji::ZERO_WIDTH_JOINER)
uchar = UnicodeCharacter.fetch(raw.codepoints[0])
unicode_version = uchar.version unless uchar.version.nil?
for category in categories
for sub_category in category[:emoji]
for emoji_item in sub_category[:emoji]
raw = emoji_item[:sequences][0]
existing_emoji = Emoji.find_by_unicode(raw) || Emoji.find_by_unicode("#{raw}\u{fe0f}")
existing_emoji = nil if seen_existing.key?(existing_emoji)
seen_existing[existing_emoji] = true
output_item = {
emoji: raw,
description: emoji_item[:description],
category: category[:name],
}
if existing_emoji
output_item.update(
aliases: existing_emoji.aliases,
tags: existing_emoji.tags,
unicode_version: existing_emoji.unicode_version,
ios_version: existing_emoji.ios_version,
)
else
output_item.update(
aliases: [emoji_item[:description].gsub(/\W+/, '_').downcase],
tags: [],
unicode_version: "11.0",
ios_version: "12.1",
)
end
output_item[:skin_tones] = true if emoji_item[:skin_tones]
items << output_item
end
description = emojidesc.fetch(normalize.(raw))
if unicode_version == ''
warn "#{description} (#{raw}) doesn't have Unicode version"
end
if ios_version == ''
ios_version = '10.2'
end
items << {
emoji: raw,
description: description,
category: category,
aliases: emoji ? emoji.aliases : [description.gsub(/\W+/, '_').downcase],
tags: emoji ? emoji.tags : [],
unicode_version: unicode_version,
ios_version: ios_version,
}
end
end
@@ -133,6 +49,8 @@ for emoji in Emoji.all.select(&:custom?)
}
end
trap(:PIPE) { abort }
puts JSON.pretty_generate(items)
.gsub("\n\n", "\n")
.gsub(/,\n( +)/) { "\n%s, " % $1[2..-1] }

121
db/emoji-test-parser.rb Normal file
View File

@@ -0,0 +1,121 @@
# frozen_string_literal: true
module EmojiTestParser
VARIATION_SELECTOR_16 = "\u{fe0f}"
SKIN_TONES = [
"\u{1F3FB}", # light skin tone
"\u{1F3FC}", # medium-light skin tone
"\u{1F3FD}", # medium skin tone
"\u{1F3FE}", # medium-dark skin tone
"\u{1F3FF}", # dark skin tone
]
HAIR_MODIFIERS = [
"\u{1F9B0}", # red-haired
"\u{1F9B1}", # curly-haired
"\u{1F9B2}", # bald
"\u{1F9B3}", # white-haired
]
module_function
def parse(filename)
File.open(filename, "r:UTF-8") do |file|
parse_file(file)
end
end
def parse_file(io)
data = []
emoji_map = {}
category = nil
sub_category = nil
io.each do |line|
begin
if line.start_with?("# group: ")
_, group_name = line.split(":", 2)
category = {
name: group_name.strip,
emoji: [],
}
data << category
sub_category = nil
elsif line.start_with?("# subgroup: ")
_, group_name = line.split(":", 2)
sub_category = {
name: group_name.strip,
emoji: [],
}
category[:emoji] << sub_category
elsif line.start_with?("#") || line.strip.empty?
next
else
row, desc = line.split("#", 2)
desc = desc.strip.split(" ", 2)[1]
codepoints, _ = row.split(";", 2)
emoji_raw = codepoints.strip.split.map { |c| c.hex }.pack("U*")
next if HAIR_MODIFIERS.include?(emoji_raw)
emoji_normalized = emoji_raw
.gsub(VARIATION_SELECTOR_16, "")
.gsub(/(#{SKIN_TONES.join("|")})/o, "")
emoji_item = emoji_map[emoji_normalized]
if desc.end_with?(" skin tone")
emoji_item[:skin_tones] = true if emoji_item
next
end
if emoji_item
emoji_item[:sequences] << emoji_raw
else
emoji_item = {
sequences: [emoji_raw],
description: desc,
}
emoji_map[emoji_normalized] = emoji_item
sub_category[:emoji] << emoji_item
end
end
rescue
warn "line: %p" % line
raise
end
end
[emoji_map, data]
end
end
if $0 == __FILE__
html_output = false
if ARGV[0] == "--html"
ARGV.shift
html_output = true
end
_, categories = EmojiTestParser.parse
trap(:PIPE) { abort }
if html_output
puts "<!doctype html>"
puts "<meta charset=utf-8>"
for category in categories
puts "<h2>#{category[:name]}</h2>"
for sub_category in category[:emoji]
puts "<h3>#{sub_category[:name]}</h3>"
puts "<ol>"
for char in sub_category[:emoji]
puts "<li>"
for sequence in char[:sequences]
codepoints = sequence.unpack("U*").map { |c| c.to_s(16).upcase }.join(" ")
printf '<span class=emoji title="%s">%s</span> ', codepoints, sequence
end
puts "#{char[:description]}</li>"
end
puts "</ol>"
end
end
else
require "json"
puts JSON.pretty_generate(categories)
end
end

View File

File diff suppressed because it is too large Load Diff

View File

@@ -16,7 +16,6 @@ Gem::Specification.new do |s|
"README.md",
"bin/gemoji",
"images/*.png",
"db/Category-Emoji.json",
"db/emoji.json",
"lib/**/*.rb",
]

View File

@@ -11,10 +11,6 @@ module Emoji
File.expand_path('../../db/emoji.json', __FILE__)
end
def apple_palette_file
File.expand_path('../../db/Category-Emoji.json', __FILE__)
end
def images_path
File.expand_path("../../images", __FILE__)
end
@@ -26,17 +22,6 @@ module Emoji
@all
end
def apple_palette
return @apple_palette if defined? @apple_palette
data = File.open(apple_palette_file, 'r:UTF-8') { |f| JSON.parse(f.read) }
@apple_palette = data.fetch('EmojiDataArray').each_with_object({}) do |group, all|
title = group.fetch('CVDataTitle').split('-', 2)[1]
all[title] = group.fetch('CVCategoryData').fetch('Data').split(',').map do |raw|
TEXT_GLYPHS.include?(raw) ? raw + VARIATION_SELECTOR_16 : raw
end
end
end
# Public: Initialize an Emoji::Character instance and yield it to the block.
# The character is added to the `Emoji.all` set.
def create(name)
@@ -75,48 +60,60 @@ module Emoji
private
VARIATION_SELECTOR_16 = "\u{fe0f}".freeze
ZERO_WIDTH_JOINER = "\u{200d}".freeze
FEMALE_SYMBOL = "\u{2640}".freeze
MALE_SYMBOL = "\u{2642}".freeze
# Chars from Apple's palette which must have VARIATION_SELECTOR_16 to render:
TEXT_GLYPHS = ["🈷", "🈂", "🅰", "🅱", "🅾", "©", "®", "", ""].freeze
# Characters which must have VARIATION_SELECTOR_16 to render as color emoji:
TEXT_GLYPHS = [
"\u{1f237}", # Japanese “monthly amount” button
"\u{1f202}", # Japanese “service charge” button
"\u{1f170}", # A button (blood type)
"\u{1f171}", # B button (blood type)
"\u{1f17e}", # O button (blood type)
"\u{00a9}", # copyright
"\u{00ae}", # registered
"\u{2122}", # trade mark
"\u{3030}", # wavy dash
].freeze
private_constant :VARIATION_SELECTOR_16, :ZERO_WIDTH_JOINER,
:FEMALE_SYMBOL, :MALE_SYMBOL, :TEXT_GLYPHS
private_constant :VARIATION_SELECTOR_16, :TEXT_GLYPHS
def parse_data_file
data = File.open(data_file, 'r:UTF-8') { |file| JSON.parse(file.read) }
data.each do |raw_emoji|
self.create(nil) do |emoji|
raw_emoji.fetch('aliases').each { |name| emoji.add_alias(name) }
if raw = raw_emoji['emoji']
unicodes = [raw, raw.sub(VARIATION_SELECTOR_16, '') + VARIATION_SELECTOR_16].uniq
unicodes.each { |uni| emoji.add_unicode_alias(uni) }
end
raw_emoji.fetch('tags').each { |tag| emoji.add_tag(tag) }
data = File.open(data_file, 'r:UTF-8') do |file|
JSON.parse(file.read, symbolize_names: true)
end
emoji.category = raw_emoji['category']
emoji.description = raw_emoji['description']
emoji.unicode_version = raw_emoji['unicode_version']
emoji.ios_version = raw_emoji['ios_version']
append_unicode = lambda do |emoji, raw|
unless TEXT_GLYPHS.include?(raw) || emoji.unicode_aliases.include?(raw)
emoji.add_unicode_alias(raw)
end
end
# Add an explicit gendered variant to emoji that historically imply a gender
data.each do |raw_emoji|
raw = raw_emoji['emoji']
next unless raw
no_gender = raw.sub(/(#{VARIATION_SELECTOR_16})?#{ZERO_WIDTH_JOINER}(#{FEMALE_SYMBOL}|#{MALE_SYMBOL})/o, '')
next unless $2
emoji = find_by_unicode(no_gender)
next unless emoji
edit_emoji(emoji) do
emoji.add_unicode_alias(
$2 == FEMALE_SYMBOL ?
raw.sub(FEMALE_SYMBOL, MALE_SYMBOL) :
raw.sub(MALE_SYMBOL, FEMALE_SYMBOL)
)
self.create(nil) do |emoji|
raw_emoji.fetch(:aliases).each { |name| emoji.add_alias(name) }
if raw = raw_emoji[:emoji]
append_unicode.call(emoji, raw)
start_pos = 0
while found_index = raw.index(VARIATION_SELECTOR_16, start_pos)
# register every variant where one VARIATION_SELECTOR_16 is removed
raw_alternate = raw.dup
raw_alternate[found_index] = ""
append_unicode.call(emoji, raw_alternate)
start_pos = found_index + 1
end
if start_pos > 0
# register a variant with all VARIATION_SELECTOR_16 removed
append_unicode.call(emoji, raw.gsub(VARIATION_SELECTOR_16, ""))
else
# register a variant where VARIATION_SELECTOR_16 is added
append_unicode.call(emoji, "#{raw}#{VARIATION_SELECTOR_16}")
end
end
raw_emoji.fetch(:tags).each { |tag| emoji.add_tag(tag) }
emoji.category = raw_emoji[:category]
emoji.description = raw_emoji[:description]
emoji.unicode_version = raw_emoji[:unicode_version]
emoji.ios_version = raw_emoji[:ios_version]
end
end
end

View File

@@ -1,4 +1,5 @@
require 'test_helper'
require_relative '../db/emoji-test-parser'
class EmojiTest < TestCase
test "fetching all emoji" do
@@ -21,7 +22,8 @@ class EmojiTest < TestCase
end
test "finding emoji by unicode" do
assert_equal "\u{1f604}", Emoji.find_by_unicode("\u{1f604}").raw
emoji = Emoji.find_by_unicode("\u{1f604}") # grinning face with smiling eyes
assert_equal "\u{1f604}", emoji.raw
end
test "finding nonexistent emoji by unicode returns nil" do
@@ -29,23 +31,13 @@ class EmojiTest < TestCase
end
test "unicode_aliases" do
emoji = Emoji.find_by_unicode("\u{2728}")
assert_equal ["\u{2728}", "\u{2728}\u{fe0f}"], emoji.unicode_aliases
end
test "unicode_aliases includes alternate position of VARIATION_SELECTOR_16" do
emoji = Emoji.find_by_unicode("\u{0031}\u{fe0f}\u{20e3}")
assert_equal ["\u{0031}\u{fe0f}\u{20e3}", "\u{0031}\u{20e3}\u{fe0f}"], emoji.unicode_aliases
emoji = Emoji.find_by_unicode("\u{2728}") # sparkles
assert_equal ["2728", "2728-fe0f"], emoji.unicode_aliases.map { |u| Emoji::Character.hex_inspect(u) }
end
test "unicode_aliases doesn't necessarily include form without VARIATION_SELECTOR_16" do
emoji = Emoji.find_by_unicode("\u{00a9}\u{fe0f}")
assert_equal ["\u{00a9}\u{fe0f}"], emoji.unicode_aliases
end
test "unicode_aliases for emoji that have gender variant include form with explicit gender" do
male_spy = Emoji.find_by_unicode("\u{1f575}\u{fe0f}")
assert_equal male_spy, Emoji.find_by_unicode("\u{1f575}\u{fe0f}\u{200d}\u{2642}\u{fe0f}")
emoji = Emoji.find_by_unicode("\u{00a9}\u{fe0f}") # copyright symbol
assert_equal ["00a9-fe0f"], emoji.unicode_aliases.map { |u| Emoji::Character.hex_inspect(u) }
end
test "emojis have tags" do
@@ -55,37 +47,68 @@ class EmojiTest < TestCase
assert emoji.tags.include?('pleased')
end
GENDER_EXCEPTIONS = [
"man_with_gua_pi_mao",
"woman_with_headscarf",
"man_in_tuxedo",
"pregnant_woman",
"isle_of_man",
"blonde_woman",
/^couple(kiss)?_/,
/^family_/,
]
test "emojis have valid names" do
aliases = Emoji.all.flat_map(&:aliases)
gender_mismatch = []
to_another_gender = lambda do |name|
case name
when *GENDER_EXCEPTIONS then name
else
name.sub(/(?<=^|_)(?:wo)?man(?=_|$)/) do |match|
match == "woman" ? "man" : "woman"
end
end
end
invalid = []
alias_count = Hash.new(0)
aliases.each do |name|
alias_count[name] += 1
invalid << name if name !~ /\A[\w+-]+\Z/
another_gender = to_another_gender.call(name)
gender_mismatch << another_gender unless aliases.include?(another_gender)
end
duplicates = alias_count.select { |_, count| count > 1 }.keys
assert_equal [], invalid, "some emoji have invalid names"
assert_equal [], duplicates, "some emoji aliases have duplicates"
assert_equal [], gender_mismatch, "missing gender variants"
end
test "missing or incorrect unicodes" do
source_unicode_emoji = Emoji.apple_palette.values.flatten
missing = source_unicode_emoji - Emoji.all.flat_map(&:unicode_aliases)
emoji_map, _ = EmojiTestParser.parse(File.expand_path("../../vendor/unicode-emoji-test.txt", __FILE__))
source_unicode_emoji = emoji_map.values
supported_sequences = Emoji.all.flat_map(&:unicode_aliases)
text_glyphs = Emoji.const_get(:TEXT_GLYPHS)
missing = 0
message = "Missing or incorrect unicodes:\n"
missing.each do |raw|
message << "#{raw} (#{Emoji::Character.hex_inspect(raw)})"
codepoint = raw.codepoints[0]
if candidate = Emoji.all.detect { |e| !e.custom? && e.raw.codepoints[0] == codepoint }
message << " - might be #{candidate.raw} (#{candidate.hex_inspect}) named #{candidate.name}"
source_unicode_emoji.each do |emoji|
emoji[:sequences].each do |raw|
next if text_glyphs.include?(raw) || Emoji.find_by_unicode(raw)
message << "%s (%s)" % [Emoji::Character.hex_inspect(raw), emoji[:description]]
if found = Emoji.find_by_unicode(raw.gsub("\u{fe0f}", ""))
message << " - could be %s (:%s:)" % [found.hex_inspect, found.name]
end
message << "\n"
missing += 1
end
message << "\n"
end
assert_equal 0, missing.size, message
assert_equal 0, missing, message
end
test "emoji have category" do
@@ -93,7 +116,7 @@ class EmojiTest < TestCase
assert_equal [], missing.map(&:name), "some emoji don't have a category"
emoji = Emoji.find_by_alias('family_man_woman_girl')
assert_equal 'People', emoji.category
assert_equal 'Smileys & People', emoji.category
end
test "emoji have description" do

3793
vendor/unicode-emoji-test.txt vendored Normal file
View File

File diff suppressed because it is too large Load Diff