2017-04-23 20:31:08 +02:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
|
|
import sys
|
2020-01-24 04:18:14 +01:00
|
|
|
import re
|
2022-04-21 19:25:39 +02:00
|
|
|
from unidecode import unidecode
|
2017-04-23 20:31:08 +02:00
|
|
|
from jinja2 import Template
|
|
|
|
|
|
|
|
|
|
|
|
class Emoji(object):
|
2022-04-22 18:49:57 +02:00
|
|
|
def __init__(self, code, shortname, unicodename):
|
2022-01-01 06:16:37 +01:00
|
|
|
self.code = ''.join(['\\U'+c.rjust(8, '0') for c in code.strip().split(' ')])
|
2017-04-23 20:31:08 +02:00
|
|
|
self.shortname = shortname
|
2022-04-22 18:49:57 +02:00
|
|
|
self.unicodename = unicodename
|
2017-04-23 20:31:08 +02:00
|
|
|
|
2024-05-01 19:38:29 +02:00
|
|
|
def generate_provider_class(**kwargs):
|
|
|
|
entrycount = sum([len(c[1]) for c in kwargs.items()])
|
|
|
|
tmpl = Template('''\
|
|
|
|
// SPDX-FileCopyrightText: Nheko Contributors
|
|
|
|
//
|
|
|
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
|
|
|
|
|
|
|
// DO NOT EDIT Provider.h DIRECTLY! EDIT IT IN scripts/emoji_codegen.py AND RUN scripts/codegen.sh!
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
#include <array>
|
|
|
|
#include "Emoji.h"
|
|
|
|
|
|
|
|
namespace emoji {
|
|
|
|
class Provider
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
// all emoji for QML purposes
|
|
|
|
static const std::array<Emoji, {{ entrycount }}> emoji;
|
|
|
|
};
|
|
|
|
} // namespace emoji
|
|
|
|
''')
|
|
|
|
d = dict(entrycount=entrycount)
|
|
|
|
print(tmpl.render(d))
|
2020-05-13 06:35:26 +02:00
|
|
|
def generate_qml_list(**kwargs):
|
2022-10-07 23:16:00 +02:00
|
|
|
entrycount = sum([len(c[1]) for c in kwargs.items()])
|
2020-05-13 06:35:26 +02:00
|
|
|
tmpl = Template('''
|
2024-05-01 19:38:29 +02:00
|
|
|
std::array<Emoji, {{ entrycount }} > emoji::Provider::emoji = {
|
2020-05-13 06:35:26 +02:00
|
|
|
{%- for c in kwargs.items() %}
|
|
|
|
// {{ c[0].capitalize() }}
|
|
|
|
{%- for e in c[1] %}
|
2023-02-23 00:50:45 +01:00
|
|
|
Emoji{std::u16string_view(u"{{ e.code }}"), std::u16string_view(u"{{ e.shortname }}"), std::u16string_view(u"{{ e.unicodename }}"), emoji::Emoji::Category::{{ c[0].capitalize() }}},
|
2020-05-13 06:35:26 +02:00
|
|
|
{%- endfor %}
|
|
|
|
{%- endfor %}
|
|
|
|
};
|
|
|
|
''')
|
2022-10-07 23:16:00 +02:00
|
|
|
d = dict(kwargs=kwargs, entrycount=entrycount)
|
2020-05-13 06:35:26 +02:00
|
|
|
print(tmpl.render(d))
|
2024-05-01 19:38:29 +02:00
|
|
|
def usage():
|
|
|
|
print('usage: emoji_codegen.py {impl|header} /path/to/emoji-test /path/to/shortcodes.txt')
|
2017-04-23 20:31:08 +02:00
|
|
|
if __name__ == '__main__':
|
2024-05-01 19:38:29 +02:00
|
|
|
if len(sys.argv) < 4:
|
|
|
|
usage()
|
2017-04-23 20:31:08 +02:00
|
|
|
sys.exit(1)
|
|
|
|
|
2024-05-01 19:38:29 +02:00
|
|
|
mode = sys.argv[1]
|
|
|
|
if mode != 'impl' and mode != 'header':
|
|
|
|
usage()
|
|
|
|
sys.exit(1)
|
|
|
|
filename = sys.argv[2]
|
|
|
|
shortcodefilename = sys.argv[3]
|
2017-04-23 20:31:08 +02:00
|
|
|
|
2020-01-24 04:18:14 +01:00
|
|
|
people = []
|
|
|
|
nature = []
|
|
|
|
food = []
|
|
|
|
activity = []
|
|
|
|
travel = []
|
|
|
|
objects = []
|
|
|
|
symbols = []
|
|
|
|
flags = []
|
|
|
|
|
|
|
|
categories = {
|
|
|
|
'Smileys & Emotion': people,
|
|
|
|
'People & Body': people,
|
|
|
|
'Animals & Nature': nature,
|
|
|
|
'Food & Drink': food,
|
|
|
|
'Travel & Places': travel,
|
|
|
|
'Activities': activity,
|
|
|
|
'Objects': objects,
|
|
|
|
'Symbols': symbols,
|
2022-04-24 18:14:23 +02:00
|
|
|
'Flags': flags,
|
|
|
|
'Component': symbols
|
2020-01-24 04:18:14 +01:00
|
|
|
}
|
2023-02-23 00:50:45 +01:00
|
|
|
shortcodeDict = {}
|
2022-04-21 19:25:39 +02:00
|
|
|
# for my sanity - this strips newlines
|
2023-02-23 00:50:45 +01:00
|
|
|
for line in open(shortcodefilename, 'r', encoding="utf8"):
|
2022-04-21 19:25:39 +02:00
|
|
|
longname, shortname = line.strip().split(':')
|
|
|
|
shortcodeDict[longname] = shortname
|
2020-01-24 04:18:14 +01:00
|
|
|
current_category = ''
|
2021-09-25 08:19:44 +02:00
|
|
|
for line in open(filename, 'r', encoding="utf8"):
|
2020-01-24 04:18:14 +01:00
|
|
|
if line.startswith('# group:'):
|
|
|
|
current_category = line.split(':', 1)[1].strip()
|
|
|
|
|
|
|
|
if not line or line.startswith('#'):
|
|
|
|
continue
|
2017-04-23 20:31:08 +02:00
|
|
|
|
2020-01-24 04:18:14 +01:00
|
|
|
segments = re.split(r'\s+[#;] ', line.strip())
|
|
|
|
if len(segments) != 3:
|
|
|
|
continue
|
2017-04-23 20:31:08 +02:00
|
|
|
|
2020-01-24 04:18:14 +01:00
|
|
|
code, qualification, charAndName = segments
|
2017-04-23 20:31:08 +02:00
|
|
|
|
2022-01-01 06:16:37 +01:00
|
|
|
# skip unqualified versions of same unicode
|
2022-04-27 18:45:45 +02:00
|
|
|
if qualification != 'fully-qualified':
|
2020-01-24 04:18:14 +01:00
|
|
|
continue
|
2017-04-23 20:31:08 +02:00
|
|
|
|
2022-04-23 19:22:42 +02:00
|
|
|
char, name = re.match(r'^(\S+) E\d+\.\d+ (.*)$', charAndName).groups()
|
2022-04-22 18:49:57 +02:00
|
|
|
shortname = name
|
2022-04-27 18:45:45 +02:00
|
|
|
# until skin tone is handled, keep them around
|
2022-05-06 16:14:47 +02:00
|
|
|
## discard skin tone variants for sanity
|
|
|
|
# if "skin tone" in name and qualification != 'component':
|
2022-04-27 18:45:45 +02:00
|
|
|
# continue
|
2022-05-06 16:14:47 +02:00
|
|
|
# if qualification == 'component' and not "skin tone" in name:
|
2022-04-27 18:45:45 +02:00
|
|
|
# continue
|
2022-04-22 16:46:43 +02:00
|
|
|
#TODO: Handle skintone modifiers in a sane way
|
2022-04-27 18:45:45 +02:00
|
|
|
basicallyTheSame = False
|
2022-05-06 16:14:47 +02:00
|
|
|
if code in shortcodeDict:
|
2022-04-27 18:45:45 +02:00
|
|
|
shortname = shortcodeDict[code]
|
2022-04-24 18:14:23 +02:00
|
|
|
else:
|
|
|
|
shortname = shortname.lower()
|
2022-05-06 16:14:47 +02:00
|
|
|
if shortname.endswith(' (blood type)'):
|
2022-04-24 18:14:23 +02:00
|
|
|
shortname = shortname[:-13]
|
2022-05-06 16:14:47 +02:00
|
|
|
if shortname.endswith(': red hair'):
|
2022-04-24 18:14:23 +02:00
|
|
|
shortname = "red_haired_" + shortname[:-10]
|
2022-05-06 16:14:47 +02:00
|
|
|
if shortname.endswith(': curly hair'):
|
2022-04-24 18:14:23 +02:00
|
|
|
shortname = "curly_haired_" + shortname[:-12]
|
2022-05-06 16:14:47 +02:00
|
|
|
if shortname.endswith(': white hair'):
|
2024-05-01 19:38:29 +02:00
|
|
|
shortname = "white_haired_" + shortname[:-12]
|
2022-05-06 16:14:47 +02:00
|
|
|
if shortname.endswith(': bald'):
|
2022-04-24 18:14:23 +02:00
|
|
|
shortname = "bald_" + shortname[:-6]
|
2022-05-06 16:14:47 +02:00
|
|
|
if shortname.endswith(': beard'):
|
2022-04-24 18:14:23 +02:00
|
|
|
shortname = "bearded_" + shortname[:-7]
|
2022-05-06 16:14:47 +02:00
|
|
|
if shortname.endswith(' face'):
|
2022-04-22 18:49:57 +02:00
|
|
|
shortname = shortname[:-5]
|
2022-05-06 16:14:47 +02:00
|
|
|
if shortname.endswith(' button'):
|
|
|
|
shortname = shortname[:-7]
|
|
|
|
if shortname.endswith(' banknote'):
|
2022-04-24 18:14:23 +02:00
|
|
|
shortname = shortname[:-9]
|
2022-05-06 16:14:47 +02:00
|
|
|
|
2022-04-24 18:14:23 +02:00
|
|
|
# FIXME: Is there a better way to do this?
|
2022-05-06 16:14:47 +02:00
|
|
|
matchobj = re.match(r'^flag: (.*)$', shortname)
|
|
|
|
if shortname.startswith("flag: "):
|
2022-04-27 18:45:45 +02:00
|
|
|
country = shortname[5:]
|
2022-04-24 18:14:23 +02:00
|
|
|
shortname = country + " flag"
|
|
|
|
shortname = shortname.replace("u.s.", "us")
|
|
|
|
shortname = shortname.replace("&", "and")
|
2022-05-06 16:14:47 +02:00
|
|
|
|
|
|
|
if shortname == name.lower():
|
2022-04-27 18:45:45 +02:00
|
|
|
basicallyTheSame = True
|
|
|
|
|
2022-04-22 18:49:57 +02:00
|
|
|
shortname = shortname.replace("-", "_")
|
2022-04-27 18:45:45 +02:00
|
|
|
shortname = re.sub(r'\W', '_', shortname)
|
2022-04-24 18:14:23 +02:00
|
|
|
shortname, = re.match(r'^_*(.+)_*$', shortname).groups()
|
2022-05-06 16:14:47 +02:00
|
|
|
shortname = re.sub(r'_{2,}', '_', shortname)
|
2022-04-22 18:49:57 +02:00
|
|
|
shortname = unidecode(shortname)
|
2022-04-27 18:45:45 +02:00
|
|
|
# if basicallyTheSame:
|
|
|
|
# shortname = ""
|
2022-04-22 18:49:57 +02:00
|
|
|
categories[current_category].append(Emoji(code, shortname, name))
|
2017-04-23 20:31:08 +02:00
|
|
|
|
|
|
|
# Use xclip to pipe the output to clipboard.
|
2022-05-06 16:14:47 +02:00
|
|
|
# e.g ./emoji_codegen.py emoji.json | xclip -sel clip
|
|
|
|
# alternatively - delete the var from src/emoji/Provider.cpp, and do ./codegen.sh emojis shortcodes >> ../src/emoji/Provider.cpp
|
2024-05-01 19:38:29 +02:00
|
|
|
func = None
|
|
|
|
if mode == 'impl':
|
|
|
|
func = generate_qml_list
|
|
|
|
else:
|
|
|
|
func = generate_provider_class
|
|
|
|
func(people=people, nature=nature, food=food, activity=activity, travel=travel, objects=objects, symbols=symbols, flags=flags)
|