Finalize new extractor script mapping glyphs to emoji

This commit is contained in:
Mislav Marohnić
2016-09-29 15:16:55 -04:00
parent 72e9749225
commit d0d9227065
4 changed files with 69 additions and 12 deletions

1
.gitignore vendored
View File

@@ -2,4 +2,5 @@
.bundle
.ruby-version
db/NamesList.txt
images/emoji/unicode
vendor/

View File

@@ -32,7 +32,7 @@ namespace :images do
desc %(Extract PNG images from Apple's "Apple Color Emoji.ttf" font)
task :extract do
require 'emoji/extractor'
gem_dir = File.dirname(File.realpath(__FILE__))
Emoji::Extractor.new(64, "#{gem_dir}/images/emoji/unicode").extract!
images_path = File.expand_path("../images/emoji", __FILE__)
Emoji::Extractor.new(64, images_path).extract!
end
end

View File

@@ -1,8 +1,8 @@
Gem::Specification.new do |s|
s.name = "gemoji"
s.version = "2.1.0"
s.summary = "Emoji conversion and image assets"
s.description = "Image assets and character information for emoji."
s.summary = "Emoji library"
s.description = "Character information and metadata for standard and custom emoji."
s.required_ruby_version = '> 1.9'
@@ -13,7 +13,7 @@ Gem::Specification.new do |s|
s.files = Dir[
"README.md",
"images/**/*.png",
"images/emoji/*.png",
"db/Category-Emoji.json",
"db/emoji.json",
"lib/**/*.rb",

View File

@@ -1,4 +1,5 @@
require 'emoji'
require 'fileutils'
module Emoji
class Extractor
@@ -11,25 +12,80 @@ module Emoji
@images_path = images_path
end
def extract!
def each(&block)
return to_enum(__method__) unless block_given?
File.open(EMOJI_TTF, 'rb') do |file|
font_offsets = parse_ttc(file)
file.pos = font_offsets[0]
tables = parse_tables(file)
glyph_index = extract_glyph_index(file, tables)
each_glyph_bitmap(file, tables, glyph_index) do |glyph_name, type, binread|
if glyph_name =~ /^u[0-9A-F]{4}/ && glyph_name !~ /\.[1-5]($|\.)/
File.open("#{images_path}/#{glyph_name}.#{type}", 'wb') do |png|
png.write binread.call
end
end
each_glyph_bitmap(file, tables, glyph_index, &block)
end
end
def extract!
each do |glyph_name, type, binread|
if emoji = glyph_name_to_emoji(glyph_name)
image_filename = "#{images_path}/#{emoji.image_filename}"
FileUtils.mkdir_p(File.dirname(image_filename))
File.open(image_filename, 'wb') { |f| f.write binread.call }
end
end
end
private
GENDER_MAP = {
"M" => "\u{2642}",
"W" => "\u{2640}",
}
FAMILY_MAP = {
"B" => "\u{1f466}",
"G" => "\u{1f467}",
"M" => "\u{1f468}",
"W" => "\u{1f469}",
}.freeze
FAMILY = "1F46A"
COUPLE = "1F491"
KISS = "1F48F"
def glyph_name_to_emoji(glyph_name)
return if glyph_name =~ /\.[1-5]($|\.)/
zwj = Emoji::ZERO_WIDTH_JOINER
v16 = Emoji::VARIATION_SELECTOR_16
if glyph_name =~ /^u(#{FAMILY}|#{COUPLE}|#{KISS})\.([#{FAMILY_MAP.keys.join('')}]+)$/
if $1 == FAMILY ? $2 == "MWB" : $2 == "WM"
raw = [$1.hex].pack('U')
else
if $1 == COUPLE
middle = "#{zwj}\u{2764}#{v16}#{zwj}" # heavy black heart
elsif $1 == KISS
middle = "#{zwj}\u{2764}#{v16}#{zwj}\u{1F48B}#{zwj}" # heart + kiss mark
else
middle = zwj
end
raw = $2.split('').map { |c| FAMILY_MAP.fetch(c) }.join(middle)
end
candidates = [raw]
else
raw = glyph_name.gsub(/(^|_)u([0-9A-F]+)/) { ($1.empty?? $1 : zwj) + [$2.hex].pack('U') }
raw.sub!(/\.0\b/, '')
raw.sub!(/\.(#{GENDER_MAP.keys.join('|')})$/) { v16 + zwj + GENDER_MAP.fetch($1) }
candidates = [raw]
candidates << raw.sub(v16, '') if raw.include?(v16)
candidates << raw.gsub(zwj, '') if raw.include?(zwj)
candidates.dup.each { |c| candidates << (c + v16) }
end
candidates.map { |c| Emoji.find_by_unicode(c) }.compact.first
end
# https://www.microsoft.com/typography/otspec/otff.htm
def parse_ttc(io)
header_name = io.read(4).unpack('a*')[0]