Source code for fontbakery.profiles.shaping

# Copyright 2020 Google Sans Authors
# Copyright 2021 Simon Cozens

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

#     http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import json
from difflib import ndiff
from pathlib import Path
from os.path import basename

from fontTools.unicodedata import ot_tag_to_script

from fontbakery.callable import check
from fontbakery.status import FAIL, PASS, SKIP, WARN
from fontbakery.fonts_profile import profile_factory
from fontbakery.message import Message
from fontbakery.section import Section
from fontbakery.utils import exit_with_install_instructions


shaping_basedir = Path("qa", "shaping_tests")

profile_imports = ((".", ("shared_conditions",)),)
profile = profile_factory(default_section=Section("Shaping Checks"))

SHAPING_PROFILE_CHECKS = [
    "com.google.fonts/check/shaping/regression",
    "com.google.fonts/check/shaping/forbidden",
    "com.google.fonts/check/shaping/collides",
    "com.google.fonts/check/dotted_circle",
    "com.google.fonts/check/soft_dotted",
]


[docs]def fix_svg(svg): svg = svg.replace("<svg", '<svg style="height:100px;margin:10px;"') svg = svg.replace("\n", " ") return svg
[docs]def create_report_item( vharfbuzz, message, text=None, buf1=None, buf2=None, note=None, extra_data=None, ): from vharfbuzz import FakeBuffer message = f"* {message}" if text: message += f": {text}" if note: message += f" ({note})" if extra_data: message += f"\n\n {extra_data}" message += "\n\n" serialized_buf1 = None serialized_buf2 = None if buf2: if isinstance(buf2, FakeBuffer): try: serialized_buf2 = vharfbuzz.serialize_buf(buf2) except Exception: # This may fail if the glyphs are not found in the font serialized_buf2 = None buf2 = None # Don't try to draw it either else: serialized_buf2 = buf2 message += f" Expected: {serialized_buf2}\n" if buf1: serialized_buf1 = vharfbuzz.serialize_buf( buf1, glyphsonly=(buf2 and isinstance(buf2, str)) ) message += f" Got : {serialized_buf1}\n" # Report a diff table if serialized_buf1 and serialized_buf2: diff = list(ndiff([serialized_buf1], [serialized_buf2])) if diff and diff[-1][0] == "?": message += f" {diff[-1][1:]}\n" # Now draw it as SVG if buf1: message += f" Got: {fix_svg(vharfbuzz.buf_to_svg(buf1))}" if buf2 and isinstance(buf2, FakeBuffer): try: message += f" Expected: {fix_svg(vharfbuzz.buf_to_svg(buf2))}" except KeyError: pass return message
[docs]def get_from_test_with_default(test, configuration, el, default=None): defaults = configuration.get("defaults", {}) return test.get(el, defaults.get(el, default))
[docs]def get_shaping_parameters(test, configuration): params = {} for el in ["script", "language", "direction", "features", "shaper"]: params[el] = get_from_test_with_default(test, configuration, el) params["variations"] = get_from_test_with_default( test, configuration, "variations", {} ) return params
# This is a very generic "do something with shaping" test runner. # It'll be given concrete meaning later.
[docs]def run_a_set_of_shaping_tests( config, ttFont, run_a_test, test_filter, generate_report, preparation=None ): try: from vharfbuzz import Vharfbuzz filename = Path(ttFont.reader.file.name) vharfbuzz = Vharfbuzz(filename) except ImportError: exit_with_install_instructions() shaping_file_found = False ran_a_test = False extra_data = None if "com.google.fonts/check/shaping" not in config: yield SKIP, "Shaping test directory not defined in configuration file" return shaping_basedir = config["com.google.fonts/check/shaping"].get("test_directory") if not shaping_basedir: yield SKIP, "Shaping test directory not defined in configuration file" return for shaping_file in Path(shaping_basedir).glob("*.json"): shaping_file_found = True try: shaping_input_doc = json.loads(shaping_file.read_text(encoding="utf-8")) except Exception as e: yield FAIL, Message( "shaping-invalid-json", f"{shaping_file}: Invalid JSON: {e}." ) return configuration = shaping_input_doc.get("configuration", {}) try: shaping_tests = shaping_input_doc["tests"] except KeyError: yield FAIL, Message( "shaping-missing-tests", f"{shaping_file}: JSON file must have a 'tests' key.", ) return if preparation: extra_data = preparation(ttFont, configuration) failed_shaping_tests = [] for test in shaping_tests: if not test_filter(test, configuration): continue if "input" not in test: yield FAIL, Message( "shaping-missing-input", f"{shaping_file}: test is missing an input key.", ) return exclude_fonts = test.get("exclude", []) if basename(filename) in exclude_fonts: continue only_fonts = test.get("only") if only_fonts and basename(filename) not in only_fonts: continue run_a_test( filename, vharfbuzz, test, configuration, failed_shaping_tests, extra_data, ) ran_a_test = True if ran_a_test: if not failed_shaping_tests: yield PASS, f"{shaping_file}: No regression detected" else: yield from generate_report( vharfbuzz, shaping_file, failed_shaping_tests ) if not shaping_file_found: yield SKIP, "No test files found." if not ran_a_test: yield SKIP, "No applicable tests ran."
[docs]@check( id="com.google.fonts/check/shaping/regression", rationale=""" Fonts with complex layout rules can benefit from regression tests to ensure that the rules are behaving as designed. This checks runs a shaping test suite and compares expected shaping against actual shaping, reporting any differences. Shaping test suites should be written by the font engineer and referenced in the FontBakery configuration file. For more information about write shaping test files and how to configure FontBakery to read the shaping test suites, see https://simoncozens.github.io/tdd-for-otl/ """, proposal="https://github.com/fonttools/fontbakery/pull/3223", ) def com_google_fonts_check_shaping_regression(config, ttFont): """Check that texts shape as per expectation""" yield from run_a_set_of_shaping_tests( config, ttFont, run_shaping_regression, lambda test, configuration: "expectation" in test, generate_shaping_regression_report, )
[docs]def run_shaping_regression( filename, vharfbuzz, test, configuration, failed_shaping_tests, extra_data ): shaping_text = test["input"] parameters = get_shaping_parameters(test, configuration) output_buf = vharfbuzz.shape(shaping_text, parameters) expectation = test["expectation"] if isinstance(expectation, dict): expectation = expectation.get(filename.name, expectation["default"]) output_serialized = vharfbuzz.serialize_buf( output_buf, glyphsonly="+" not in expectation ) if output_serialized != expectation: failed_shaping_tests.append((test, expectation, output_buf, output_serialized))
[docs]def generate_shaping_regression_report(vharfbuzz, shaping_file, failed_shaping_tests): report_items = [] for test, expected, output_buf, output_serialized in failed_shaping_tests: extra_data = { k: test[k] for k in ["script", "language", "direction", "features", "variations"] if k in test } # Make HTML report here. if "=" in expected: buf2 = vharfbuzz.buf_from_string(expected) else: buf2 = expected report_item = create_report_item( vharfbuzz, "Shaping did not match", text=test["input"], buf1=output_buf, buf2=buf2, note=test.get("note"), extra_data=extra_data, ) report_items.append(report_item) header = f"{shaping_file}: Expected and actual shaping not matching" yield FAIL, Message("shaping-regression", header + "\n" + "\n".join(report_items))
[docs]@check( id="com.google.fonts/check/shaping/forbidden", rationale=""" Fonts with complex layout rules can benefit from regression tests to ensure that the rules are behaving as designed. This checks runs a shaping test suite and reports if any glyphs are generated in the shaping which should not be produced. (For example, .notdef glyphs, visible viramas, etc.) Shaping test suites should be written by the font engineer and referenced in the FontBakery configuration file. For more information about write shaping test files and how to configure FontBakery to read the shaping test suites, see https://simoncozens.github.io/tdd-for-otl/ """, proposal="https://github.com/fonttools/fontbakery/pull/3223", ) def com_google_fonts_check_shaping_forbidden(config, ttFont): """Check that no forbidden glyphs are found while shaping""" yield from run_a_set_of_shaping_tests( config, ttFont, run_forbidden_glyph_test, lambda test, configuration: "forbidden_glyphs" in configuration, forbidden_glyph_test_results, )
[docs]def run_forbidden_glyph_test( filename, vharfbuzz, test, configuration, failed_shaping_tests, extra_data ): from stringbrewer import StringBrewer is_stringbrewer = ( get_from_test_with_default(test, configuration, "input_type", "string") == "pattern" ) parameters = get_shaping_parameters(test, configuration) forbidden_glyphs = configuration["forbidden_glyphs"] if is_stringbrewer: sb = StringBrewer( recipe=test["input"], ingredients=configuration["ingredients"] ) strings = sb.generate_all() else: strings = [test["input"]] for shaping_text in strings: output_buf = vharfbuzz.shape(shaping_text, parameters) output_serialized = vharfbuzz.serialize_buf(output_buf, glyphsonly=True) glyph_names = output_serialized.split("|") for forbidden in forbidden_glyphs: if forbidden in glyph_names: failed_shaping_tests.append((shaping_text, output_buf, forbidden))
[docs]def forbidden_glyph_test_results(vharfbuzz, shaping_file, failed_shaping_tests): report_items = [] for shaping_text, buf, forbidden in failed_shaping_tests: msg = f"{shaping_text} produced '{forbidden}'" report_items.append( create_report_item(vharfbuzz, msg, text=shaping_text, buf1=buf) ) header = f"{shaping_file}: Forbidden glyphs found while shaping" yield FAIL, Message("shaping-forbidden", header + ".\n" + "\n".join(report_items))
[docs]@check( id="com.google.fonts/check/shaping/collides", rationale=""" Fonts with complex layout rules can benefit from regression tests to ensure that the rules are behaving as designed. This checks runs a shaping test suite and reports instances where the glyphs collide in unexpected ways. Shaping test suites should be written by the font engineer and referenced in the FontBakery configuration file. For more information about write shaping test files and how to configure FontBakery to read the shaping test suites, see https://simoncozens.github.io/tdd-for-otl/ """, proposal="https://github.com/fonttools/fontbakery/pull/3223", ) def com_google_fonts_check_shaping_collides(config, ttFont): """Check that no collisions are found while shaping""" yield from run_a_set_of_shaping_tests( config, ttFont, run_collides_glyph_test, lambda test, configuration: "collidoscope" in test or "collidoscope" in configuration, collides_glyph_test_results, setup_glyph_collides, )
[docs]def setup_glyph_collides(ttFont, configuration): try: from collidoscope import Collidoscope except ImportError: exit_with_install_instructions() filename = Path(ttFont.reader.file.name) collidoscope_configuration = configuration.get("collidoscope") if not collidoscope_configuration: return { "bases": True, "marks": True, "faraway": True, "adjacent_clusters": True, } col = Collidoscope( filename, collidoscope_configuration, direction=configuration.get("direction", "LTR"), ) return {"collidoscope": col}
[docs]def run_collides_glyph_test( filename, vharfbuzz, test, configuration, failed_shaping_tests, extra_data ): try: from stringbrewer import StringBrewer except ImportError: exit_with_install_instructions() col = extra_data["collidoscope"] is_stringbrewer = ( get_from_test_with_default(test, configuration, "input_type", "string") == "pattern" ) parameters = get_shaping_parameters(test, configuration) allowed_collisions = get_from_test_with_default( test, configuration, "allowedcollisions", [] ) if is_stringbrewer: sb = StringBrewer( recipe=test["input"], ingredients=configuration["ingredients"] ) strings = sb.generate_all() else: strings = [test["input"]] for shaping_text in strings: output_buf = vharfbuzz.shape(shaping_text, parameters) glyphs = col.get_glyphs(shaping_text, buf=output_buf) collisions = col.has_collisions(glyphs) bumps = [f"{c.glyph1}/{c.glyph2}" for c in collisions] bumps = [b for b in bumps if b not in allowed_collisions] if bumps: draw = fix_svg(col.draw_overlaps(glyphs, collisions)) failed_shaping_tests.append((shaping_text, bumps, draw, output_buf))
[docs]def collides_glyph_test_results(vharfbuzz, shaping_file, failed_shaping_tests): report_items = [] seen_bumps = {} for shaping_text, bumps, draw, buf in failed_shaping_tests: # Make HTML report here. if tuple(bumps) in seen_bumps: continue seen_bumps[tuple(bumps)] = True report_item = create_report_item( vharfbuzz, f"{',' .join(bumps)} collision found in" f" e.g. <span class='tf'>{shaping_text}</span> <div>{draw}</div>", buf1=buf, ) report_items.append(report_item) header = ( f"{shaping_file}: {len(failed_shaping_tests)} collisions found while shaping" ) yield FAIL, Message("shaping-collides", header + ".\n" + "\n".join(report_items))
[docs]def is_complex_shaper_font(ttFont): try: from ufo2ft.constants import INDIC_SCRIPTS, USE_SCRIPTS except ImportError: exit_with_install_instructions() for table in ["GSUB", "GPOS"]: if table not in ttFont: continue if not ttFont[table].table.ScriptList: continue for rec in ttFont[table].table.ScriptList.ScriptRecord: script = ot_tag_to_script(rec.ScriptTag) if script in USE_SCRIPTS or script in INDIC_SCRIPTS: return True if script in ["Khmr", "Mymr", "Hang"]: return True return False
[docs]@check( id="com.google.fonts/check/dotted_circle", conditions=["is_ttf"], severity=3, rationale=""" The dotted circle character (U+25CC) is inserted by shaping engines before mark glyphs which do not have an associated base, especially in the context of broken syllabic clusters. For fonts containing combining marks, it is recommended that the dotted circle character be included so that these isolated marks can be displayed properly; for fonts supporting complex scripts, this should be considered mandatory. Additionally, when a dotted circle glyph is present, it should be able to display all marks correctly, meaning that it should contain anchors for all attaching marks. """, proposal="https://github.com/fonttools/fontbakery/issues/3600", ) def com_google_fonts_check_dotted_circle(ttFont, config): """Ensure dotted circle glyph is present and can attach marks.""" from fontbakery.utils import bullet_list, iterate_lookup_list_with_extensions mark_glyphs = [] if ( "GDEF" in ttFont and hasattr(ttFont["GDEF"].table, "GlyphClassDef") and hasattr(ttFont["GDEF"].table.GlyphClassDef, "classDefs") ): mark_glyphs = [ k for k, v in ttFont["GDEF"].table.GlyphClassDef.classDefs.items() if v == 3 ] # Only check for encoded mark_glyphs = set(mark_glyphs) & set(ttFont.getBestCmap().values()) nonspacing_mark_glyphs = [g for g in mark_glyphs if ttFont["hmtx"][g][0] == 0] if not nonspacing_mark_glyphs: yield SKIP, "Font has no nonspacing mark glyphs." return if 0x25CC not in ttFont.getBestCmap(): # How bad is this? if is_complex_shaper_font(ttFont): yield FAIL, Message( "missing-dotted-circle-complex", "No dotted circle glyph present and font uses a complex shaper", ) else: yield WARN, Message( "missing-dotted-circle", "No dotted circle glyph present" ) return # Check they all attach to dotted circle # if they attach to something else dotted_circle = ttFont.getBestCmap()[0x25CC] attachments = {dotted_circle: []} does_attach = {} def find_mark_base(lookup, attachments): if lookup.LookupType == 4: # Assume all-to-all for st in lookup.SubTable: for base in st.BaseCoverage.glyphs: for mark in st.MarkCoverage.glyphs: attachments.setdefault(base, []).append(mark) does_attach[mark] = True iterate_lookup_list_with_extensions(ttFont, "GPOS", find_mark_base, attachments) unattached = [] for g in nonspacing_mark_glyphs: if g in does_attach and g not in attachments[dotted_circle]: unattached.append(g) if unattached: yield FAIL, Message( "unattached-dotted-circle-marks", "The following glyphs could not be attached to the dotted circle glyph:\n\n" f"{bullet_list(config, sorted(unattached))}", ) else: yield PASS, "All marks were anchored to dotted circle"
[docs]@check( id="com.google.fonts/check/soft_dotted", severity=3, rationale=""" An accent placed on characters with a "soft dot", like i or j, causes the dot to disappear. An explicit dot above can be added where required. See "Diacritics on i and j" in Section 7.1, "Latin" in The Unicode Standard. Characters with the Soft_Dotted property are listed in https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt See also: https://googlefonts.github.io/gf-guide/diacritics.html#soft-dotted-glyphs """, proposal="https://github.com/fonttools/fontbakery/issues/4059", ) def com_google_fonts_check_soft_dotted(ttFont): """Ensure soft_dotted characters lose their dot when combined with marks that replace the dot.""" try: from vharfbuzz import Vharfbuzz except ImportError: exit_with_install_instructions() import itertools from beziers.path import BezierPath from fontTools import unicodedata cmap = ttFont["cmap"].getBestCmap() # Soft dotted strings know to be used in orthographies. ortho_soft_dotted_strings = set( "i̋ i̍ i᷆ i᷇ i̓ i̊ i̐ ɨ́ ɨ̀ ɨ̂ ɨ̋ ɨ̏ ɨ̌ ɨ̄ ɨ̃ ɨ̈ ɨ̧́ ɨ̧̀ ɨ̧̂ ɨ̧̌ ɨ̱́ ɨ̱̀ ɨ̱̈ " "į́ į̀ į̂ į̄ į̄́ į̄̀ į̄̂ į̄̌ į̃ į̌ ị́ ị̀ ị̂ ị̄ ị̃ ḭ́ ḭ̀ ḭ̄ j́ j̀ j̄ j̑ j̃ " "j̈ і́".split() ) # Characters with Soft_Dotted property in Unicode. soft_dotted_chars = set( ord(c) for c in "iⅈ𝐢𝑖𝒊𝒾𝓲𝔦𝕚𝖎𝗂𝗶𝘪𝙞𝚒ⁱᵢįịḭɨᶤ𝼚ᶖjⅉ𝐣𝑗𝒋𝒿𝓳𝔧𝕛𝖏𝗃𝗷𝘫𝙟𝚓ʲⱼɉʝᶨϳіј" ) & set(cmap.keys()) # Only check above marks used with Latin, Greek, Cyrillic scripts. mark_above_chars = set( ( c for c in cmap.keys() if unicodedata.combining(chr(c)) == 230 and unicodedata.block(chr(c)).startswith( ("Combining Diacritical Marks", "Cyrillic") ) ) ) # Only check non above marks used with Latin, Grek, Cyrillic scripts # that are reordered before the above marks mark_non_above_chars = set( c for c in cmap.keys() if unicodedata.combining(chr(c)) < 230 and unicodedata.block(chr(c)).startswith("Combining Diacritical Marks") ) # Skip when no characters to test with if not soft_dotted_chars or not mark_above_chars: yield SKIP, "Font has no soft dotted characters or no mark above characters." return # Collect outlines to skip fonts where i and dotlessi are the same, # or i and I are the same. outlines_dict = { codepoint: BezierPath.fromFonttoolsGlyph(ttFont, glyphname) for codepoint, glyphname in cmap.items() if codepoint in [ord("i"), ord("I"), ord("ı")] } unclear = False if ord("i") in cmap.keys() and ord("I") in cmap.keys(): if len(outlines_dict[ord("i")]) == len(outlines_dict[ord("I")]): unclear = True if not unclear and ord("i") in cmap.keys() and ord("ı") in cmap.keys(): if len(outlines_dict[ord("i")]) == len(outlines_dict[ord("ı")]): unclear = True if unclear: yield SKIP, ( "It is not clear if the soft dotted characters have glyphs with dots." ) return # Use harfbuzz to check if soft dotted glyphs are substituted filename = ttFont.reader.file.name vharfbuzz = Vharfbuzz(filename) fail_unchanged_strings = [] warn_unchanged_strings = [] for sequence in sorted( itertools.product( soft_dotted_chars, # add "" to add cases without non above marks mark_non_above_chars.union(set((0,))), mark_above_chars, ) ): soft, non_above, above = sequence if non_above: unchanged = f"{cmap[soft]}|{cmap[non_above]}|{cmap[above]}" text = f"{chr(soft)}{chr(non_above)}{chr(above)}" else: unchanged = f"{cmap[soft]}|{cmap[above]}" text = f"{chr(soft)}{chr(above)}" # Only check a few strings that we WARN about. if text not in ortho_soft_dotted_strings and len(warn_unchanged_strings) >= 20: continue buf = vharfbuzz.shape(text) output = vharfbuzz.serialize_buf(buf, glyphsonly=True) if output == unchanged: if text in ortho_soft_dotted_strings: fail_unchanged_strings.append(text) else: warn_unchanged_strings.append(text) message = "" if fail_unchanged_strings: fail_unchanged_strings = " ".join(fail_unchanged_strings) message += ( f"The dot of soft dotted characters used in orthographies" f" _must_ disappear in the following strings: {fail_unchanged_strings}" ) if warn_unchanged_strings: warn_unchanged_strings = " ".join(warn_unchanged_strings) if message: message += "\n\n" message += ( f"The dot of soft dotted characters _should_ disappear in" f" other cases, for example: {warn_unchanged_strings}" ) # Calculate font's affected languages for additional information if fail_unchanged_strings or warn_unchanged_strings: from shaperglot.checker import Checker from shaperglot.languages import Languages, gflangs languages = Languages() # Find all affected languages ortho_soft_dotted_langs = set() for c in ortho_soft_dotted_strings: for lang in gflangs: if ( c in gflangs[lang].exemplar_chars.base or c in gflangs[lang].exemplar_chars.auxiliary ): ortho_soft_dotted_langs.add(lang) if ortho_soft_dotted_langs: affected_languages = [] unaffected_languages = [] languages = Languages() checker = Checker(ttFont.reader.file.name) for lang in ortho_soft_dotted_langs: reporter = checker.check(languages[lang]) string = ( f"{gflangs[lang].name} ({gflangs[lang].script}, " f"{'{:,.0f}'.format(gflangs[lang].population)} speakers)" ) if reporter.is_success: affected_languages.append(string) else: unaffected_languages.append(string) if affected_languages: affected_languages = ", ".join(affected_languages) message += ( f"\n\nYour font fully covers the following languages that require" f" the soft-dotted feature: {affected_languages}. " ) if unaffected_languages: unaffected_languages = ", ".join(unaffected_languages) message += ( f"\n\nYour font does *not* cover the following languages that" f" require the soft-dotted feature: {unaffected_languages}." ) if fail_unchanged_strings or warn_unchanged_strings: yield WARN, Message("soft-dotted", message) else: yield PASS, ( "All soft dotted characters seem to lose their dot when combined with" " a mark above." )
profile.auto_register(globals()) profile.test_expected_checks(SHAPING_PROFILE_CHECKS, exclusive=True)