/* T1Subset: A library for subsetting PostScript Type 1 fonts Copyright (c) 2020 by Peter Frane Jr. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . The author may be contacted via the e-mail address pfranejr@hotmail.com */ #pragma once #include #include #include #include #include #include #include #include #include #include "t1-encoding.h" using namespace std; #define MAX_NAME_LEN 127 #define MAX_LINE_BUFFER 512 #define MAX_GLYPH_COUNT 256 typedef unsigned char byte_t; const unsigned short key_eexec = 55665, key_charstring = 4330; const uint16_t C1 = 52845, C2 = 22719; class t1subset { char m_line_buffer[MAX_LINE_BUFFER + 1]{ 0 }; FILE* m_input_file{ nullptr }; FILE* m_output_file{ nullptr }; bool m_char_subset[MAX_GLYPH_COUNT]{ false }; int32_t m_bin_data_offset{ 0 }; vector m_bin_data; byte_t m_discard_bytes[4]{ 0 }; int32_t m_lenIV{ 0 }; vector m_glyph_list; void clear() { if (m_input_file) { fclose(m_input_file); } if (m_output_file && m_output_file != stdout) { fclose(m_output_file); } m_input_file = m_output_file = nullptr; } void file_size_check() { long curpos, file_size, binary_data_offset = 0; fread(&binary_data_offset, sizeof(binary_data_offset), 1, m_input_file); curpos = ftell(m_input_file); fseek(m_input_file, 0, SEEK_END); file_size = ftell(m_input_file); if (file_size <= 512) // trailer size is at least 512 bytes { throw runtime_error("File is not a valid '.pfb' file. File size can't be less than 512 bytes"); } else if (binary_data_offset >= file_size) { throw runtime_error("File is not a valid '.pfb' file. Binary data missing"); } fseek(m_input_file, curpos, SEEK_SET); } void check_file_type() { byte_t signature[6] = { 0 }; char buf[20]{ 0 }; fread(signature, 2, 1, m_input_file); if (128 == signature[0] && 1 == signature[1]) { // check the file size to ensure we have enough data and to avoid checking for EOF all over the place file_size_check(); // write a temporary signature fwrite(signature, sizeof(signature), 1, m_output_file); } else { throw runtime_error("File is not a '.pfb' file"); } fgets(m_line_buffer, MAX_LINE_BUFFER, m_input_file); if ('%' == *m_line_buffer) { const char sgn1[] = "%!PS-AdobeFont"; const char sgn2[] = "%!FontType1"; if ((strncmp(m_line_buffer, sgn1, sizeof(sgn1) - 1) == 0) || (strncmp(m_line_buffer, sgn2, sizeof(sgn2) - 1) == 0)) { fputs(m_line_buffer, m_output_file); } else { throw runtime_error("Unknown file type"); } } else { throw runtime_error("File is not a '.pfb' file"); } } void load_file(const char* font_name) { fopen_s(&m_input_file, font_name, "rb"); if (!m_input_file) { throw runtime_error("Unable to open input font file"); } } void create_output_file(const char* output_filename) { if (!output_filename) { m_output_file = stdout; } else { fopen_s(&m_output_file, output_filename, "wb"); if (!m_output_file) { throw runtime_error("Unable to create the output file"); } } } void precondition(const char* font_name, const byte_t* char_subset, byte_t char_subset_count, const char* output_filename) { if (!font_name) { throw runtime_error("Please indicate the filename of the input font"); } else if (!char_subset) { throw runtime_error("Please indicate the characters/glyphs to subset"); } load_file(font_name); create_output_file(output_filename); check_file_type(); for (short i = 0; i < char_subset_count; ++i) { byte_t ch = char_subset[i]; m_char_subset[ch] = true; } } bool read_char(byte_t& ch) { int c = fgetc(m_input_file); ch = (byte_t)c; return c != EOF; } void write_comment() { fputc('%', m_output_file); if (fgets(m_line_buffer, MAX_LINE_BUFFER, m_input_file)) { fputs(m_line_buffer, m_output_file); } } bool read_name(char* name, short len) { return fscanf_s(m_input_file, "%s", name, len) == 1; } void find_encoding() { byte_t ch; while (read_char(ch)) { if ('%' == ch) { write_comment(); } else if ('/' == ch) { m_line_buffer[0] = ch; read_name(&m_line_buffer[1], MAX_NAME_LEN); fputs(m_line_buffer, m_output_file); if (strcmp(m_line_buffer, "/Encoding") == 0) { fputc(' ', m_output_file); return; } } else { fputc(ch, m_output_file); } } throw runtime_error("Unable to find the /Encoding part"); } void read_encoding_name(const char* name) { const char** enc = nullptr; if (strcmp(name, "StandardEncoding") == 0) { enc = StandardEncoding; } else if (strcmp(name, "WinAnsiEncoding") == 0) { enc = WinAnsiEncoding; } else if (strcmp(name, "MacRomanEncoding") == 0) { enc = MacRomanEncoding; } else { // m_line_buffer == name; don't use it char msg[MAX_LINE_BUFFER]; sprintf_s(msg, MAX_LINE_BUFFER, "Unsupported encoding: %s", name); throw runtime_error(msg); } // skip the 'def' after the encoding name read_name(m_line_buffer, MAX_NAME_LEN); fputs("256 array\n 0 1 255 { 1 index exch / .notdef put} for\n", m_output_file); if (enc) { string name(128, ' '); name[0] = '/'; for (uint16_t i = 0; i < MAX_GLYPH_COUNT; ++i) { if (m_char_subset[i] && enc[i]) { sprintf_s(m_line_buffer, sizeof(m_line_buffer), "dup %u /%s put\n", i, enc[i]); fputs(m_line_buffer, m_output_file); name.replace(1, string::npos, enc[i]); m_glyph_list.push_back(name); } } } fputs("readonly def\n", m_output_file); } void find_for_operator() { byte_t ch; while (read_char(ch)) { // find 'for' //" 256 array\n 0 1 255 { 1 index exch / .notdef put} for" if ('}' == ch) { fputc(ch, m_output_file); read_name(m_line_buffer, MAX_NAME_LEN); if (strcmp(m_line_buffer, "for") == 0) { fputs(" for\n", m_output_file); return; } } // unlikely else if ('%' == ch) { write_comment(); } else { fputc(ch, m_output_file); } } throw runtime_error("Unable to find the 'for' operator"); } void read_encoding_table() { find_for_operator(); while (read_name(m_line_buffer, MAX_NAME_LEN)) { if (strcmp(m_line_buffer, "dup") == 0) { char glyph_name[MAX_NAME_LEN + 1]{ 0 }; char opr_name[MAX_NAME_LEN + 1]{ 0 }; int index; if (fscanf_s(m_input_file, " %d %s %s", &index, glyph_name, MAX_NAME_LEN, opr_name, MAX_NAME_LEN) == 3) { if (index < 0 || index > 255) { sprintf_s(m_line_buffer, MAX_LINE_BUFFER, "Index out of range: %d", index); throw runtime_error(m_line_buffer); } else if (glyph_name[0] != '/') { sprintf_s(m_line_buffer, MAX_LINE_BUFFER, "Glyph name must begin with '/': %s", glyph_name); throw runtime_error(m_line_buffer); } else if (strcmp(opr_name, "put") != 0) { sprintf_s(m_line_buffer, MAX_LINE_BUFFER, "Operator 'put' expected after the glyph name '%s'; operator found: '%s'", glyph_name, opr_name); throw runtime_error(m_line_buffer); } if (m_char_subset[index]) { fprintf(m_output_file, "dup %d %s put\n", index, glyph_name); m_glyph_list.push_back(string(glyph_name)); } } else { throw runtime_error("Expected to read a glyph index and its name here"); } } else if (strcmp(m_line_buffer, "readonly") == 0) { fputs(m_line_buffer, m_output_file); return; } else { throw runtime_error("Expected to read either the 'dup' or the 'readonly' operator here"); } } throw runtime_error("Unexpected end of file"); } void get_encoding_type() { read_name(m_line_buffer, MAX_NAME_LEN); if (isdigit((byte_t)*m_line_buffer)) { fputs(m_line_buffer, m_output_file); read_encoding_table(); } else if (isalpha((byte_t)*m_line_buffer)) { read_encoding_name(m_line_buffer); } else { throw runtime_error("A number or the encoding name is expected after /Encoding"); } } void update_offset() { int32_t new_offset; // update the offset at the header m_bin_data_offset = (int32_t)ftell(m_output_file); fseek(m_output_file, 2, SEEK_SET); // offset is relative to the header, not to the start of the file new_offset = m_bin_data_offset - 6; fwrite(&new_offset, sizeof(new_offset), 1, m_output_file); fseek(m_output_file, m_bin_data_offset, SEEK_SET); } void goto_binary_data() { byte_t ch; while (read_char(ch)) { if (128 == ch) { ungetc(ch, m_input_file); update_offset(); return; } else { fputc(ch, m_output_file); } } throw runtime_error("Unexpected end of file"); } void write_trailer() { byte_t hdr2[6] = { 128, 2, 0, 0, 0, 0 }; long curpos = ftell(m_output_file); size_t bin_data_size = curpos - m_bin_data_offset - sizeof(hdr2); // size of the encrypted data size_t* size = (size_t*)&hdr2[2]; *size = bin_data_size; // write the rest of the non-encrypted data in the input file while (!feof(m_input_file)) { size_t size = fread(m_line_buffer, 1, sizeof(m_line_buffer), m_input_file); if (size > 0) { fwrite(m_line_buffer, size, 1, m_output_file); } } // go to the start of the binary data fseek(m_output_file, m_bin_data_offset, SEEK_SET); //update the size of header 2 fwrite(hdr2, sizeof(hdr2), 1, m_output_file); } char* find_name(const string& name, size_t offset) { auto it = std::search(m_bin_data.begin() + offset, m_bin_data.end(), std::boyer_moore_searcher(name.begin(), name.end())); if (it != m_bin_data.end()) { char* data = (char*)m_bin_data.data(); int64_t offset = it - m_bin_data.begin(); return (data + offset); } else { return nullptr; } } void read_lenIV() { char* p = find_name(string("/lenIV"), 0); if (p) { m_lenIV = atoi(p + 6); // 6 = length of '/lenIV' } else { m_lenIV = 4; } } byte_t decrypt(byte_t cipher, uint16_t& key) { if (m_lenIV < 0) { return cipher; } else { byte_t plain = cipher ^ (key >> 8); key = ((cipher + key) * C1 + C2); return plain; } } byte_t encrypt(byte_t plain, uint16_t& key) { byte_t cipher = plain ^ (key >> 8); key = (cipher + key) * C1 + C2; return cipher; } void decrypt_binary_data() { byte_t hdr2[6] = { 0, 0, 0, 0, 0, 0 }; fread(hdr2, sizeof(hdr2), 1, m_input_file); if (hdr2[1] != 2) { sprintf_s(m_line_buffer, MAX_LINE_BUFFER, "Invalid byte in the secondary header: %d. Expected is '2'", hdr2[1]); throw runtime_error(m_line_buffer); } else { uint16_t key = key_eexec; size_t data_size; byte_t* data; byte_t ch; data_size = *((size_t*)&hdr2[2]); if (0 == data_size) { throw runtime_error("Size of encrypted data is 0"); } fwrite(hdr2, sizeof(hdr2), 1, m_output_file); for (int i = 0; i < 4; ++i) { read_char(ch); m_discard_bytes[i] = ch; // discard the result of these 4 bytes decrypt(ch, key); } // subtract the 4 bytes discarded data_size -= 4; m_bin_data.resize(data_size); data = (byte_t*)m_bin_data.data(); fread(data, data_size, 1, m_input_file); for (size_t i = 0; i < data_size; ++i) { data[i] = decrypt(data[i], key); } read_lenIV(); } } void encrypt_data(char* start, size_t len, uint16_t& key) { for (size_t i = 0; i < len; ++i) { byte_t ch = (char)encrypt((byte_t)start[i], key); start[i] = (char)ch; } fwrite(start, len, 1, m_output_file); } char* skip_glyph(const char* start, const char* end) { char* p = (char*)start; int len; char* endp; if (*p != '/') return nullptr; while (p < end) { if (isspace((byte_t)*p)) break; ++p; } len = strtol(p, &endp, 10); p = endp; while (*p && isspace((byte_t)*p)) ++p; // skip 'RD' or -| p += 2; while (*p && isspace((byte_t)*p)) ++p; p += len; return strchr(p, '\n') + 1; } char* write_glyph_data(const string& name, const char* start_data, const char* data_end, const char* curpos, uint16_t& key) { size_t len = name.size(); char* endp, * p = (char*)curpos; while (p) { p = find_name(name, p - start_data); if (p) { endp = skip_glyph(p, data_end); if (isspace((byte_t)p[len])) // ensure this is an exact match { encrypt_data(p, endp - p, key); return endp; } // skip this glyph and repeat the search p = endp; } else { break; } } return nullptr; } void find_end_of_charstring(const char* curpos, const char* data_start, const char* data_end, uint16_t& key) { size_t offset = curpos - data_start; char* p;// = (char*)curpos; do { p = find_name(string("end"), offset); if (p) { char* start = p; p += 3; while (*p && isspace((byte_t)*p)) ++p; if (strncmp(p, "end", 3) == 0) { encrypt_data(start, data_end - start, key); m_bin_data.clear(); return; } offset = p - data_start; } else { throw runtime_error("Unable to find the text 'end end' in the /CharString data"); } } while (p < data_end); throw runtime_error("Unable to find the text 'end end' in the /CharString data"); } void remove_glyphs() { char* p = find_name(string("/.notdef"), 0); if (p) { char* data = m_bin_data.data(); size_t size = m_bin_data.size(); char* data_end = data + size; char* endp, * end_of_notdef; uint16_t key = 55665; // encrypt the 4 random bytes encrypt_data((char*)m_discard_bytes, sizeof(m_discard_bytes), key); // skip .notdef end_of_notdef = endp = skip_glyph(p, data_end); // encrypt from the start of the encrypted data to the end of .notdef encrypt_data(data, end_of_notdef - data, key); p = end_of_notdef; for (const string& name : m_glyph_list) { // start from the current position p = write_glyph_data(name, data, data_end, p, key); if (!p) { // start from the beginning p = write_glyph_data(name, data, data_end, end_of_notdef, key); // if still null if (!p) { // restart from the beginning, for the next glyph p = end_of_notdef; } } } m_glyph_list.clear(); find_end_of_charstring(endp, data, data_end, key); } else { throw runtime_error("Unable to locate /CharString"); } } void do_subsetting() { find_encoding(); get_encoding_type(); goto_binary_data(); decrypt_binary_data(); remove_glyphs(); write_trailer(); } public: t1subset() : m_bin_data(), m_glyph_list() { } ~t1subset() {} bool subset_font(const char* font_name, const byte_t* char_subset, byte_t char_subset_count, const char* output_filename, string& error) { bool result = true; try { precondition(font_name, char_subset, char_subset_count, output_filename); do_subsetting(); } catch (const exception& ex) { error = ex.what(); result = false; } clear(); return result; } };