/*
    T1Subset: A library for subsetting PostScript Type 1 fonts
    Copyright (c) 2020 by Peter Frane Jr.
    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    You should have received a copy of the GNU General Public License
    along with this program.  If not, see .
    The author may be contacted via the e-mail address pfranejr@hotmail.com
*/
#pragma once
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include "t1-encoding.h"
using namespace std;
#define MAX_NAME_LEN    127
#define MAX_LINE_BUFFER 512
#define MAX_GLYPH_COUNT 256
typedef unsigned char byte_t;
const unsigned short key_eexec = 55665, key_charstring = 4330;
const uint16_t C1 = 52845, C2 = 22719;
class t1subset
{
	char m_line_buffer[MAX_LINE_BUFFER + 1]{ 0 };
	FILE* m_input_file{ nullptr };
	FILE* m_output_file{ nullptr };
	bool m_char_subset[MAX_GLYPH_COUNT]{ false };
	int32_t m_bin_data_offset{ 0 };
	vector m_bin_data;
	byte_t m_discard_bytes[4]{ 0 };
	int32_t m_lenIV{ 0 };
	vector m_glyph_list;
	void clear()
	{
		if (m_input_file)
		{
			fclose(m_input_file);
		}
		if (m_output_file && m_output_file != stdout)
		{
			fclose(m_output_file);
		}
		m_input_file = m_output_file = nullptr;
	}
	void file_size_check()
	{
		long curpos, file_size, binary_data_offset = 0;
		fread(&binary_data_offset, sizeof(binary_data_offset), 1, m_input_file);
		curpos = ftell(m_input_file);
		fseek(m_input_file, 0, SEEK_END);
		file_size = ftell(m_input_file);
		if (file_size <= 512) // trailer size is at least 512 bytes
		{
			throw runtime_error("File is not a valid '.pfb' file. File size can't be less than 512 bytes");
		}
		else if (binary_data_offset >= file_size)
		{
			throw runtime_error("File is not a valid '.pfb' file. Binary data missing");
		}
		fseek(m_input_file, curpos, SEEK_SET);
	}
	void check_file_type()
	{
		byte_t signature[6] = { 0 };
		char buf[20]{ 0 };
		fread(signature, 2, 1, m_input_file);
		if (128 == signature[0] && 1 == signature[1])
		{
			// check the file size to ensure we have enough data and to avoid checking for EOF all over the place
			file_size_check();
			// write a temporary signature
			fwrite(signature, sizeof(signature), 1, m_output_file);
		}
		else
		{
			throw runtime_error("File is not a '.pfb' file");
		}
		fgets(m_line_buffer, MAX_LINE_BUFFER, m_input_file);
		if ('%' == *m_line_buffer)
		{
			const char sgn1[] = "%!PS-AdobeFont";
			const char sgn2[] = "%!FontType1";
			if ((strncmp(m_line_buffer, sgn1, sizeof(sgn1) - 1) == 0) || (strncmp(m_line_buffer, sgn2, sizeof(sgn2) - 1) == 0))
			{
				fputs(m_line_buffer, m_output_file);
			}
			else
			{
				throw runtime_error("Unknown file type");
			}
		}
		else
		{
			throw runtime_error("File is not a '.pfb' file");
		}
	}
	void load_file(const char* font_name)
	{
		fopen_s(&m_input_file, font_name, "rb");
		if (!m_input_file)
		{
			throw runtime_error("Unable to open input font file");
		}
	}
	void create_output_file(const char* output_filename)
	{
		if (!output_filename)
		{
			m_output_file = stdout;
		}
		else
		{
			fopen_s(&m_output_file, output_filename, "wb");
			if (!m_output_file)
			{
				throw runtime_error("Unable to create the output file");
			}
		}
	}
	void precondition(const char* font_name, const byte_t* char_subset, byte_t char_subset_count, const char* output_filename)
	{
		if (!font_name)
		{
			throw runtime_error("Please indicate the filename of the input font");
		}
		else if (!char_subset)
		{
			throw runtime_error("Please indicate the characters/glyphs to subset");
		}
		load_file(font_name);
		create_output_file(output_filename);
		check_file_type();
		for (short i = 0; i < char_subset_count; ++i)
		{
			byte_t ch = char_subset[i];
			m_char_subset[ch] = true;
		}
	}
	bool read_char(byte_t& ch)
	{
		int c = fgetc(m_input_file);
		ch = (byte_t)c;
		return c != EOF;
	}
	void write_comment()
	{
		fputc('%', m_output_file);
		if (fgets(m_line_buffer, MAX_LINE_BUFFER, m_input_file))
		{
			fputs(m_line_buffer, m_output_file);
		}
	}
	bool read_name(char* name, short len)
	{
		return fscanf_s(m_input_file, "%s", name, len) == 1;
	}
	void find_encoding()
	{
		byte_t ch;
		while (read_char(ch))
		{
			if ('%' == ch)
			{
				write_comment();
			}
			else if ('/' == ch)
			{
				m_line_buffer[0] = ch;
				read_name(&m_line_buffer[1], MAX_NAME_LEN);
				fputs(m_line_buffer, m_output_file);
				if (strcmp(m_line_buffer, "/Encoding") == 0)
				{
					fputc(' ', m_output_file);
					return;
				}
			}
			else
			{
				fputc(ch, m_output_file);
			}
		}
		throw runtime_error("Unable to find the /Encoding part");
	}
	void read_encoding_name(const char* name)
	{
		const char** enc = nullptr;
		if (strcmp(name, "StandardEncoding") == 0)
		{
			enc = StandardEncoding;
		}
		else if (strcmp(name, "WinAnsiEncoding") == 0)
		{
			enc = WinAnsiEncoding;
		}
		else if (strcmp(name, "MacRomanEncoding") == 0)
		{
			enc = MacRomanEncoding;
		}
		else
		{
			// m_line_buffer == name; don't use it
			char msg[MAX_LINE_BUFFER];
			sprintf_s(msg, MAX_LINE_BUFFER, "Unsupported encoding: %s", name);
			throw runtime_error(msg);
		}
		// skip the 'def' after the encoding name
		read_name(m_line_buffer, MAX_NAME_LEN);
		fputs("256 array\n 0 1 255 { 1 index exch / .notdef put} for\n", m_output_file);
		if (enc)
		{
			string name(128, ' ');
			name[0] = '/';
			for (uint16_t i = 0; i < MAX_GLYPH_COUNT; ++i)
			{
				if (m_char_subset[i] && enc[i])
				{
					sprintf_s(m_line_buffer, sizeof(m_line_buffer), "dup %u /%s put\n", i, enc[i]);
					fputs(m_line_buffer, m_output_file);
					name.replace(1, string::npos, enc[i]);
					m_glyph_list.push_back(name);
				}
			}
		}
		fputs("readonly def\n", m_output_file);
	}
	void find_for_operator()
	{
		byte_t ch;
		while (read_char(ch))
		{
			// find 'for'
			//" 256 array\n 0 1 255 { 1 index exch / .notdef put} for"
			if ('}' == ch)
			{
				fputc(ch, m_output_file);
				read_name(m_line_buffer, MAX_NAME_LEN);
				if (strcmp(m_line_buffer, "for") == 0)
				{
					fputs(" for\n", m_output_file);
					return;
				}
			}
			// unlikely
			else if ('%' == ch)
			{
				write_comment();
			}
			else
			{
				fputc(ch, m_output_file);
			}
		}
		throw runtime_error("Unable to find the 'for' operator");
	}
	void read_encoding_table()
	{
		find_for_operator();
		while (read_name(m_line_buffer, MAX_NAME_LEN))
		{
			if (strcmp(m_line_buffer, "dup") == 0)
			{
				char glyph_name[MAX_NAME_LEN + 1]{ 0 };
				char opr_name[MAX_NAME_LEN + 1]{ 0 };
				int index;
				if (fscanf_s(m_input_file, " %d %s %s", &index, glyph_name, MAX_NAME_LEN, opr_name, MAX_NAME_LEN) == 3)
				{
					if (index < 0 || index > 255)
					{
						sprintf_s(m_line_buffer, MAX_LINE_BUFFER, "Index out of range: %d", index);
						throw runtime_error(m_line_buffer);
					}
					else if (glyph_name[0] != '/')
					{
						sprintf_s(m_line_buffer, MAX_LINE_BUFFER, "Glyph name must begin with '/': %s", glyph_name);
						throw runtime_error(m_line_buffer);
					}
					else if (strcmp(opr_name, "put") != 0)
					{
						sprintf_s(m_line_buffer, MAX_LINE_BUFFER, "Operator 'put' expected after the glyph name '%s'; operator found: '%s'", glyph_name, opr_name);
						throw runtime_error(m_line_buffer);
					}
					if (m_char_subset[index])
					{
						fprintf(m_output_file, "dup %d %s put\n", index, glyph_name);
						m_glyph_list.push_back(string(glyph_name));
					}
				}
				else
				{
					throw runtime_error("Expected to read a glyph index and its name here");
				}
			}
			else if (strcmp(m_line_buffer, "readonly") == 0)
			{
				fputs(m_line_buffer, m_output_file);
				return;
			}
			else
			{
				throw runtime_error("Expected to read either the 'dup' or the 'readonly' operator here");
			}
		}
		throw runtime_error("Unexpected end of file");
	}
	void get_encoding_type()
	{
		read_name(m_line_buffer, MAX_NAME_LEN);
		if (isdigit((byte_t)*m_line_buffer))
		{
			fputs(m_line_buffer, m_output_file);
			read_encoding_table();
		}
		else if (isalpha((byte_t)*m_line_buffer))
		{
			read_encoding_name(m_line_buffer);
		}
		else
		{
			throw runtime_error("A number or the encoding name is expected after /Encoding");
		}
	}
	void update_offset()
	{
		int32_t new_offset;
		// update the offset at the header
		m_bin_data_offset = (int32_t)ftell(m_output_file);
		fseek(m_output_file, 2, SEEK_SET);
		// offset is relative to the header, not to the start of the file
		new_offset = m_bin_data_offset - 6;
		fwrite(&new_offset, sizeof(new_offset), 1, m_output_file);
		fseek(m_output_file, m_bin_data_offset, SEEK_SET);
	}
	void goto_binary_data()
	{
		byte_t ch;
		while (read_char(ch))
		{
			if (128 == ch)
			{
				ungetc(ch, m_input_file);
				update_offset();
				return;
			}
			else
			{
				fputc(ch, m_output_file);
			}
		}
		throw runtime_error("Unexpected end of file");
	}
	void write_trailer()
	{
		byte_t hdr2[6] = { 128, 2, 0, 0, 0, 0 };
		long curpos = ftell(m_output_file);
		size_t bin_data_size = curpos - m_bin_data_offset - sizeof(hdr2); // size of the encrypted data
		size_t* size = (size_t*)&hdr2[2];
		*size = bin_data_size;
		// write the rest of the non-encrypted data in the input file
		while (!feof(m_input_file))
		{
			size_t size = fread(m_line_buffer, 1, sizeof(m_line_buffer), m_input_file);
			if (size > 0)
			{
				fwrite(m_line_buffer, size, 1, m_output_file);
			}
		}
		// go to the start of the binary data 
		fseek(m_output_file, m_bin_data_offset, SEEK_SET);
		//update the size of header 2
		fwrite(hdr2, sizeof(hdr2), 1, m_output_file);
	}
	char* find_name(const string& name, size_t offset)
	{
		auto it = std::search(m_bin_data.begin() + offset, m_bin_data.end(), std::boyer_moore_searcher(name.begin(), name.end()));
		if (it != m_bin_data.end())
		{
			char* data = (char*)m_bin_data.data();
			int64_t offset = it - m_bin_data.begin();
			return (data + offset);
		}
		else
		{
			return nullptr;
		}
	}
	void read_lenIV()
	{
		char* p = find_name(string("/lenIV"), 0);
		if (p)
		{
			m_lenIV = atoi(p + 6); // 6 = length of '/lenIV'
		}
		else
		{
			m_lenIV = 4;
		}
	}
	byte_t decrypt(byte_t cipher, uint16_t& key)
	{
		if (m_lenIV < 0)
		{
			return cipher;
		}
		else
		{
			byte_t plain = cipher ^ (key >> 8);
			key = ((cipher + key) * C1 + C2);
			return plain;
		}
	}
	byte_t encrypt(byte_t plain, uint16_t& key)
	{
		byte_t cipher = plain ^ (key >> 8);
		key = (cipher + key) * C1 + C2;
		return cipher;
	}
	void decrypt_binary_data()
	{
		byte_t hdr2[6] = { 0, 0, 0, 0, 0, 0 };
		fread(hdr2, sizeof(hdr2), 1, m_input_file);
		if (hdr2[1] != 2)
		{
			sprintf_s(m_line_buffer, MAX_LINE_BUFFER, "Invalid byte in the secondary header: %d. Expected is '2'", hdr2[1]);
			throw runtime_error(m_line_buffer);
		}
		else
		{
			uint16_t key = key_eexec;
			size_t data_size;
			byte_t* data;
			byte_t ch;
			data_size = *((size_t*)&hdr2[2]);
			if (0 == data_size)
			{
				throw runtime_error("Size of encrypted data is 0");
			}
			fwrite(hdr2, sizeof(hdr2), 1, m_output_file);
			for (int i = 0; i < 4; ++i)
			{
				read_char(ch);
				m_discard_bytes[i] = ch;
				// discard the result of these 4 bytes
				decrypt(ch, key);
			}
			// subtract the 4 bytes discarded
			data_size -= 4;
			m_bin_data.resize(data_size);
			data = (byte_t*)m_bin_data.data();
			fread(data, data_size, 1, m_input_file);
			for (size_t i = 0; i < data_size; ++i)
			{
				data[i] = decrypt(data[i], key);
			}
			read_lenIV();
		}
	}
	void encrypt_data(char* start, size_t len, uint16_t& key)
	{
		for (size_t i = 0; i < len; ++i)
		{
			byte_t ch = (char)encrypt((byte_t)start[i], key);
			start[i] = (char)ch;
		}
		fwrite(start, len, 1, m_output_file);
	}
	char* skip_glyph(const char* start, const char* end)
	{
		char* p = (char*)start;
		int len;
		char* endp;
		if (*p != '/')
			return nullptr;
		while (p < end)
		{
			if (isspace((byte_t)*p))
				break;
			++p;
		}
		len = strtol(p, &endp, 10);
		p = endp;
		while (*p && isspace((byte_t)*p))
			++p;
		// skip 'RD' or -|
		p += 2;
		while (*p && isspace((byte_t)*p))
			++p;
		p += len;
		return strchr(p, '\n') + 1;
	}
	char* write_glyph_data(const string& name, const char* start_data, const char* data_end, const char* curpos, uint16_t& key)
	{
		size_t len = name.size();
		char* endp, * p = (char*)curpos;
		while (p)
		{
			p = find_name(name, p - start_data);
			if (p)
			{
				endp = skip_glyph(p, data_end);
				if (isspace((byte_t)p[len])) // ensure this is an exact match
				{
					encrypt_data(p, endp - p, key);
					return endp;
				}
				// skip this glyph and repeat the search
				p = endp;
			}
			else
			{
				break;
			}
		}
		return nullptr;
	}
	void find_end_of_charstring(const char* curpos, const char* data_start, const char* data_end, uint16_t& key)
	{
		size_t offset = curpos - data_start;
		char* p;// = (char*)curpos;
		do
		{
			p = find_name(string("end"), offset);
			if (p)
			{
				char* start = p;
				p += 3;
				while (*p && isspace((byte_t)*p))
					++p;
				if (strncmp(p, "end", 3) == 0)
				{
					encrypt_data(start, data_end - start, key);
					m_bin_data.clear();
					return;
				}
				offset = p - data_start;
			}
			else
			{
				throw runtime_error("Unable to find the text 'end end' in the /CharString data");
			}
		} while (p < data_end);
		throw runtime_error("Unable to find the text 'end end' in the /CharString data");
	}
	void remove_glyphs()
	{
		char* p = find_name(string("/.notdef"), 0);
		if (p)
		{
			char* data = m_bin_data.data();
			size_t size = m_bin_data.size();
			char* data_end = data + size;
			char* endp, * end_of_notdef;
			uint16_t key = 55665;
			// encrypt the 4 random bytes
			encrypt_data((char*)m_discard_bytes, sizeof(m_discard_bytes), key);
			// skip .notdef
			end_of_notdef = endp = skip_glyph(p, data_end);
			// encrypt from the start of the encrypted data to the end of .notdef
			encrypt_data(data, end_of_notdef - data, key);
			p = end_of_notdef;
			for (const string& name : m_glyph_list)
			{
				// start from the current position
				p = write_glyph_data(name, data, data_end, p, key);
				if (!p)
				{
					// start from the beginning
					p = write_glyph_data(name, data, data_end, end_of_notdef, key);
					// if still null
					if (!p)
					{
						// restart from the beginning, for the next glyph
						p = end_of_notdef;
					}
				}
			}
			m_glyph_list.clear();
			find_end_of_charstring(endp, data, data_end, key);
		}
		else
		{
			throw runtime_error("Unable to locate /CharString");
		}
	}
	void do_subsetting()
	{
		find_encoding();
		get_encoding_type();
		goto_binary_data();
		decrypt_binary_data();
		remove_glyphs();
		write_trailer();
	}
public:
	t1subset() : m_bin_data(), m_glyph_list()
	{
	}
	~t1subset() {}
	bool subset_font(const char* font_name, const byte_t* char_subset, byte_t char_subset_count,
		const char* output_filename, string& error)
	{
		bool result = true;
		try
		{
			precondition(font_name, char_subset, char_subset_count, output_filename);
			do_subsetting();
		}
		catch (const exception& ex)
		{
			error = ex.what();
			result = false;
		}
		clear();
		return result;
	}
};