/* #module IdxSpell "3-001" *********************************************************************** * * * The software was developed at the Monsanto Company and is provided * * "as-is". Monsanto Company and the auther disclaim all warranties * * on the software, including without limitation, all implied warran- * * ties of merchantabilitiy and fitness. * * * * This software does not contain any technical data or information * * that is proprietary in nature. It may be copied, modified, and * * distributed on a non-profit basis and with the inclusion of this * * notice. * * * *********************************************************************** */ /* * Module Name: IdxSpell * * Author: R L Aurbach CR&DS MIS Group 07-May-1986 * * Function: * Build the Spell String for an index term, properly handling LaTeX * syntax and commands. * * Modification History: * * Version Initials Date Description * ------------------------------------------------------------------------ * 1-001 RLA 07-May-1986 Original Code * 1-002 RLA 17-Mar-1987 Fix \verb error * 1-003 RLA 06-Apr-1987 Fix error processing emphasis * 2-004 RLA 16-Apr-1987 Honor the \verb environment in spelling * 2-005 RLA 20-Apr-1987 Enhance at-sign string * 3-001 F.H. 17-May-1991 converted to portable C */ /* * Module IdxSpell - Module-Wide Data Description Section * * Include Files: */ #ifdef MSDOS #include #include #define F_OK 0 /* access(): File exists */ #else #include extern char *sprintf(); #endif #include #include #include "IdxDef.h" /* * Module Definitions: */ #define TRUE 1 #define FALSE 0 /* * Global Declarations: */ /* * Static Declarations: */ #ifdef MSDOS void idx_build_spell_string(char *desc); void idx_replace_string(char *string, int start, int length, char *replace); void idx_collapse_string(char *string); void idx_process_accent(char *string); void idx_process_groups(char *string); int idx_process_verb(char *string); void idx_process_backslash(char *string); void idx_process_emphasis(char *string); int idx_is_verbatim(char *desc, int *ctx, int *index); void strupcase(char *in, char *out); void strprefix(char *str, char prefix); int strposition(char *str, char *e_array, int st); int chrposition(char str, char *e_array); #else void idx_build_spell_string(); void idx_replace_string(); void idx_collapse_string(); void idx_process_accent(); void idx_process_groups(); int idx_process_verb(); void idx_process_backslash(); void idx_process_emphasis(); int idx_is_verbatim(); void strupcase(); void strprefix(); int strposition(); int chrposition(); #endif static char *accent_set = "`'^\"~=.uvHtcdb"; #define emphasis_array_count 18 static char *emphasis_array[emphasis_array_count] = { "\\rm", "\\em", "\\bf", "\\it", "\\sf", "\\sl", "\\sc", "\\tt", "\\normalsize", "\\small", "\\tiny", "\\footnotesize", "\\large", "\\Large", "\\LARGE", "\\huge", "\\Huge", "\\cal"}; static char at_sign = '@'; static char *verb = "\\verb"; /* * External References: */ /* * Functions Called: */ /* * Function Idx_Build_Spell_String - Documentation Section * * Discussion: * Create a string which can be used to put the token in the right place * in a list of tokens. The string will be UPPERCASE, will exclude * internal LaTeX commands which are included for emphasis, and will have * whitespace collapsed into single spaces. * * Calling Synopsis: * Call Idx_Build_Spell_String (token, desc) * * Inputs: * token -> is the token to be processed. ASCIZ string passed by * reference. * * Outputs: * desc -> is the "Spell String", properly formulated. Passed by * dynamic string descriptor. * * Return Value: * none * * Global Data: * none * * Files Used: * none * * Assumed Entry State: * none * * Normal Exit State: * none * * Error Conditions: * none * * Algorithm: * A. Copy the token into a dynamic string. * B. Process Accent commands in the string. * C. Process emphasis commands in the string. * D. Remove grouping characters in the string. * E. Remove backslashes in the string. * F. Process \verb commands in the string. * G. Clean up the string. * 1. Convert it to upper case. * 2. Collapse the string -- i.e., remove non-essential whitespace. * 3. If the string begins with a \, remove it. * 4. Make sure that all strings beginning with symbols are in the * right place. * * Special Notes: * none */ /* * Function Idx_Build_Spell_String - Code Section */ void idx_build_spell_string (desc) char *desc; { /* * Local Declarations */ /* * Module Body */ /* Copy the token string into a dynamic string for processing */ /* Process accent strings */ idx_process_accent(desc); /* Process emphasis commands */ idx_process_emphasis(desc); /* Remove grouping characters */ idx_process_groups(desc); /* Remove backslashes */ idx_process_backslash(desc); /* Process \verb commands */ while (idx_process_verb(desc)) ; /* Clean up the string */ strupcase(desc, desc); idx_collapse_string (desc); if (desc[0] == '\\') idx_replace_string(desc, 1, 1, (char *)0); if (desc[0] > 'Z') strprefix(desc, at_sign); } /* * Function Idx_Replace_String - Documentation Section * * Discussion: * This function replaces a specified substring in a dynamic string with * a specified replacement string. * * Calling Synopsis: * Call Idx_Replace_String (string, start, length, replace) * * Inputs: * string -> is the string to be updated. The string is a dynamic * string, passed by descriptor. * * start -> is the starting position in the string. The first * position in the string is position 1. Integer passed * by value. * * length -> is the number of characters in the substring to be * replaced. Integer passed by value. * * replace -> is the replacement text. ASCIZ string passed by * reference. * * Outputs: * string -> is the updated string. It is a dynamic string, passed * by descriptor. * * Return Value: * none * * Global Data: * none * * Files Used: * none * * Assumed Entry State: * none * * Normal Exit State: * none * * Error Conditions: * none * * Algorithm: * A. Copy the characters up to the first character in the substring to * be replaced into a temporary string. * B. Copy the replacement string into the temporary string. * C. Copy the remainder of the initial string into the temporary string. * D. Update the initial string. * * Special Notes: * It is assumed that the updated string will be no more than 132 * characters long. */ /* * Function Idx_Replace_String - Code Section */ void idx_replace_string (string, start, length, replace) char *string; int start; int length; char *replace; { /* * Local Declarations */ int new_length; /* length of new string */ int rpl_length; /* length of the replace string */ int left; /* number of characters left */ char temp[133]; /* temporary string */ /* * Module Body */ new_length = 0; if (start > 1) { (void)strncpy(temp, string, start-1); new_length = start-1; } if (replace != 0) { rpl_length = strlen(replace); if (rpl_length > 0) { (void)strncpy(&temp[new_length], replace, rpl_length); new_length += rpl_length; } } left = strlen(string) - (start - 1) - length; if (left > 0) { (void)strncpy(&temp[new_length], &string[start+length-1], left); new_length += left; } temp[new_length] = '\0'; string = strcpy(string, temp); } /* * Function Idx_Collapse_String - Documentation Section * * Discussion: * Replace the string with a new string which as all extraneous whitespace * removed. That is, there is no whitespace at the beginning or end of * the string and every internal occurrence of whitespace is collapsed to * a single space character. * * Calling Synopsis: * Call Idx_Collapse_String (string) * * Inputs: * string -> is the input string. It is a dynamic string, passed by * descriptor. * * Outputs: * string -> is the output string. It is a dynamic string, passed by * descriptor. * * Return Value: * none * * Global Data: * none * * Files Used: * none * * Assumed Entry State: * none * * Normal Exit State: * none * * Error Conditions: * none * * Algorithm: * A. For all characters in the input string, * 1. If the character is whitespace, * a. If no characters have been transferred, * 1. Ignore it. * b. If the previous character transferred was a space, * 1. Ignore it. * c. Else, * 1. Copy a space to the output string. * 2. Else, * a. Copy the character to the output string. * B. If the last character in the output string is a space, * 1. Remove it. * C. Update the dynamic string. * * Special Notes: * It is assumed that the collapsed string will be no more than 132 * characters long. */ /* * Function Idx_Collapse_String - Code Section */ void idx_collapse_string (string) char *string; { /* * Local Declarations */ char temp[133]; /* Working output string */ char copy; /* Working character */ int i; /* Array index in input string */ int j; /* Array index in temp string */ /* * Module Body */ for (i=0, j=0; i 0) && (temp[j-1] == ' ')) j--; /* Now copy the result to the output string */ temp[j] = '\0'; (void)strcpy(string, temp); } /* * Function Idx_Process_Accents - Documentation Section * * Discussion: * This routine processes the spell string to remove all LaTeX commands * which generate accents, without changing the spelling. For example, * the string "se\~{n}or" is translated to "senor". Without special * accent processing, the spell-string algorithm would produce "se n or", * which might not appear in the proper place in the alphabetical listing. * * Calling Synopsis: * call Idx_Process_Accents (string) * * Inputs: * string -> is the spell-string to be processed. It is a dynamic * string, passed by descriptor. * * Outputs: * string -> is the resultant spell-string. It is a dynamic string, * passed by descriptor. * * Return Value: * none * * Global Data: * none * * Files Used: * none * * Assumed Entry State: * none * * Normal Exit State: * none * * Error Conditions: * none * * Algorithm:. * A. Search the string for an accent pattern: * 1. The first character in the pattern is "\". * 2. The next character in the pattern is one of the characters in * the accent_set. * 3. The next character in the pattern is "{". * B. Mark the pattern and find the terminating "}". * C. Replace the pattern with the string located within the braces. * * Special Notes: * none */ /* * Function Idx_Process_Accent - Code Section */ void idx_process_accent(string) char *string; { /* * Local Declarations */ char accent_str = '\0'; int start; /* Start of string to replace */ char *replace; /* Replacement string */ int i; /* Loop index */ int ctx = 0; /* context variable */ /* * Module Body */ for (i = 0; i < strlen(string); i++) { if (string[i] != '\\') continue; start = ++i; accent_str = string[i]; if (chrposition(accent_str, accent_set) == 0) continue; if (string[++i] != '{') continue; if (idx_is_verbatim(string, &ctx, &i)) continue; replace = &string[++i]; while (i < strlen(string)) { if (string[i++] != '}') continue; string[i-1] = '\0'; break; } idx_replace_string (string, start, i-start+1, replace); } } /* * Function Idx_Process_Groups - Documentation Section * * Discussion: * Process the characters which delimit groups ('{', '}', and '$') in the * spell string. These characters are removed unless they are quoted * (e.g., "{" is removed but "\{" is not). * * Calling Synopsis: * Call Idx_Process_Groups (String) * * Inputs: * string -> is the spell string. It is a dynamic string passed by * descriptor. * * Outputs: * string -> is the spell string. It is a dynamic string passed by * descriptor. * * Return Value: * none * * Global Data: * none * * Files Used: * none * * Assumed Entry State: * none * * Normal Exit State: * returns with spell string possibly modified. * * Error Conditions: * none * * Algorithm: * A. For all characters in the spell string, * 1. If the character is '{', '}', or '$' and the preceeding * character is not a '\', * a. Remove the character. * * Special Notes: * none */ /* * Function Idx_Process_Groups - Code Section */ void idx_process_groups(string) char *string; { /* * Local Declarations */ int i; /* Loop index */ int ctx = 0; /* Context variable */ /* * Module Body */ for (i = 0; i < strlen(string); i++) { if ((string[i] == '{')||(string[i] == '}')||(string[i] == '$')) { if ((i == 0) || (string[i-1] != '\\')) { if (idx_is_verbatim(string, &ctx, &i)) continue; idx_replace_string(string, i+1, 1, (char *)0); i--; } } } } /* * Function Idx_Process_Verb - Documentation Section * * Discussion: * Process LaTeX \verb and \verb* commands in the spell string. This is * done by removing the \verb command from the spell string. * * Calling Synopsis: * status = Idx_Process_Verb (string) * * Inputs: * string -> Spell string. A dynamic string passed by descriptor. * * Outputs: * string -> Spell string. A dynamic string passed by descriptor. * * Return Value: * status -> is a boolean integer passed by value. It indicates * whether or not a \verb command was found in the spell * string. * * Global Data: * none * * Files Used: * none * * Assumed Entry State: * none * * Normal Exit State: * status = TRUE A \verb command was processed in the string. * status = FALSE No \verb command was processed. * * Error Conditions: * none * * Algorithm: * A. Search the string for a \verb command: * 1. \verb [optionally followed by a *] * 2. Next character is not alphabetic * B. Mark the pattern and find the terminating character. * C. Replace the pattern with its argument. * * Special Notes: * none */ /* * Function Idx_Process_Verb - Code Section */ int idx_process_verb(string) char *string; { /* * Local Declarations */ int i; /* Loop index */ int start; /* Start of pattern */ char delim; /* Delimiter character */ char *replace; /* Pointer to replacement string */ /* * Module Body */ if ((strlen(string)) < 7) return (FALSE); for (i = 0; i < (strlen(string)-7); i++) { if (strncmp(&string[i], "\\verb", 5) != 0) continue; start = i + 1; i += 5; if (string[i] == '*') i++; delim = string[i++]; if (isalpha(delim) != 0) continue; replace = &string[i]; while (i < strlen(string)) { if (string[i++] != delim) continue; if (i < (strlen(string)-1)) { i = strlen(string); string[i-1] = delim; string[i] = '\0'; i++; } else string[i-1] = '\0'; break; } idx_replace_string (string, start, i-start+1, replace); return (TRUE); } return (FALSE); } /* * Function Idx_Process_Backslash - Documentation Section * * Discussion: * Remove all '\' characters. * * Calling Synopsis: * Call Idx_Process_Backslash (string) * * Inputs: * string -> Spell String. Dynamic string passed by descriptor. * * Outputs: * string -> Spell String. Dynamic string passed by descriptor. * * Return Value: * none * * Global Data: * none * * Files Used: * none * * Assumed Entry State: * none * * Normal Exit State: * return * * Error Conditions: * none * * Algorithm: * A. For all characters in the spell string, * 1. If the character is a '\', * a. Remove it. * * Special Notes: * none */ /* * Function Idx_Process_Backslash - Code Section */ void idx_process_backslash(string) char *string; { /* * Local Declarations */ int i; /* Loop index */ int ctx = 0; /* Context variable */ /* * Module Body */ for (i = 0; i < strlen(string); i++) { if (string[i] == '\\') { if (idx_is_verbatim(string, &ctx, &i)) continue; idx_replace_string(string, i+1, 1, (char *)0); i--; } } } /* * Function Idx_Process_Emphasis - Documentation Section * * Discussion: * Remove the normal LaTeX emphasis strings (\rm, \em, \bf, \it, \sf, * \sl, \tt, \normalsize, \small, \tiny, \footnotesize, \large, \Large, * \LARGE, \huge, \Huge, \cal). * * Calling Synopsis: * call Idx_Process_Emphasis (string) * * Inputs: * string -> Spell string. Dynamic string passed by descriptor. * * Outputs:, * string -> Spell string. Dynamic string passed by descriptor. * * Return Value: * none * * Global Data: * none * * Files Used: * none * * Assumed Entry State: * nones * * Normal Exit State: * none * * Error Conditions: * none * * Algorithm: * A. For each of the listed special commands, * 1. For each occurence of the string which is not part of a word, * a. Remove it. * * Special Notes: * none */ /* * Function Idx_Process_Emphasis - Code Sections */ void idx_process_emphasis(string) char *string; { /* * Local Declarations */ int i; /* emphasis_array index */ int start; /* starting position */ int length; /* length of substring */ int ctx; /* context variable */ int index; /* string index */ int rpl_start; /* start of string to replace */ /* * Module Body */ for (i = 0; i < emphasis_array_count; i++) { ctx = 0; start = 0; while ((start = strposition(string, emphasis_array[i], start)) != -1) { rpl_start = start+1; index = start; length = strlen(emphasis_array[i]); start += length; /*@@@ if (((start) <= strlen(string)) && (isalpha(string[start-1]) != 0)) continue;*/ if (idx_is_verbatim(string, &ctx, &index)) continue; idx_replace_string(string, rpl_start, length, (char *)0); } } } /* * Function Idx_Is_Verbatim - Documentation Sectione * * Discussion: * Deterimine if the current string index is within a \verb or \verb* * range. * * Calling Synopsis: * boolean = Idx_Is_Verbatim (desc, ctx, index) * * Inputs: * desc -> is the character string descriptor being processed. * * ctx -> is a context variable, passed by ref. Internally, * it is the current starting value for the search. * * index -> is the string index being tested. * * Outputs: * ctx -> is the value of the first character to follow the * \verb or \verb* string. * * index -> is the new value of the string index. If the index) * is not in a verbatim, then the value is unchanged. * If it is in the string, then the index is updated to * the end of the verbatim range. * * Return Value: * boolean -> is a boolean value. * * Global Data: * none * * Files Used: * none * * Assumed Entry State: * none * * Normal Exit State: * boolean = TRUE Input index is within the range of a verbatim. * Index is updated. Ctx may be updated. * boolean = FALSE Input string is not within the range of a verbatim. * Index and Ctx are unchanged. * * Error Conditions: * none * * Algorithm: * A. If index < ctx, then we know that we aren't in a verbatim. * 1. Return FALSE. * B. Find the next verbatim range. Save the start and end (= ctx). * C. If index < start, * 1. Return FALSE, * D. Else, * 1. Index = end of range. * 2. Return TRUE. * * Special Notes: * none */ /* * Function Idx_Is_Verbatim - Code Section */ int idx_is_verbatim(desc, ctx, index) char *desc; int *ctx; int *index; { /* * Local Declarationsn */ int start; int i; char delim; /* * Module Body */ if (*index < *ctx) return(FALSE); /* * Find the next verbatim range. If there are less than seven characters left * in the string, then the remainder of the string cannot contain a \verb * command (because there isn't room). Set the ctx variable to point to the * end of the string and report that we are not within a verbatim. */ if ((strlen(desc) - *ctx) < 7) { *ctx = strlen(desc); return (FALSE); } /* * Search for the next \verb string. Note that the STR$POSITION routine uses * the convention that the first character of the string is position 1 (not 0). * If no string was found, then set the ctx to the end of the string and report * that we aren't in a verbatim. */ i = *ctx; start = strposition(desc, verb, i); i = *ctx + 1; if (start == -1) { *ctx = strlen(desc); return (FALSE); } /* * There may be a verbatim in the string. Its potential starting position is * "start" (adjusted to index the string from 0 rather than 1). If this is * the case, then the delimiter character will not be an alpha. */ /* start--;*/ i = start + 5; if (desc[i] == '*') i++; delim = desc[i]; /* * If the delimiter is an alpha, then we have not found a verbatim string (it * is something else). If the index we are checking is within the range we've * looked at, then we know that it isn't in a verbatim and we can so report. * * On the other hand, if we haven't gotten far enough into the string yet to * know, we must keep looking. We do this by calling this routine recursively. */ if (isalpha(delim) != 0) { *ctx = i; if (*index < i) return (FALSE); else return(idx_is_verbatim(desc, ctx, index)); } /* * We have found a real verbatim string. We search for its end. */ while (i < strlen(desc)) if (desc[++i] == delim) break; *ctx = ++i; /* * Now we can check to see if we're ok. If the index is before the start of * the verbatim string, we aren't in it. If it is in the middle, we are. * If it's after the end, we still don't know and have to call ourselves * recursively to find out. */ if (*index < start) return(FALSE); if (*index < *ctx) { *index = *ctx - 1; return(TRUE); } else return(idx_is_verbatim(desc, ctx, index)); } int strposition(str, e_array, st) char *str; char *e_array; int st; { int i, slen, elen, found; slen = strlen(str); elen = strlen(e_array); found = -1; for (i=st; i < (slen-elen); i++) { if (strncmp(&str[i],e_array,elen) == 0) { found = i; break; } } return(found); } int chrposition(str, e_array) char str; char *e_array; { int i, elen, found; elen = strlen(e_array); found = 1; for (i=0; i < elen; i++) if (e_array[i] == str) { found = 0; break; } return(found); } void strupcase(in,out) char *in,*out; { int i,len; len = strlen(in); for (i=0;i= 'a') && (in[i] <= 'z')) out[i] = in[i]-32; } void strprefix(str,prefix) char *str; char prefix; { int i,len; char tmp,tmp2; len = strlen(str); i = 0; tmp = str[i]; str[i] = prefix; for (i=1;i<=len;i++) { tmp2 = str[i+1]; str[i] = tmp; tmp = tmp2; } str[i] = tmp; }