/* This program will solve cryptograms using the database of words ** in DICT_DIR. ** ** NOTE: The dictionary must be formatted in a specific way. The words ** be separated into different files based on length. One letter words ** would go in "len01", 11-letter words in "len11", etc. Each line in ** the file should contain a word. This is so that the program doesn't ** have to read in the entire (potentially huge) dictionary; only the word ** lengths appearing in the cipher need be read in. ** ** wart@ugcs.caltech.edu */ #include #include #include #include #include #include /* Global variables */ FILE *ofptr; Word wordlist[MAXWORDS]; Key keylist[MAXKEYS]; char cipher[MAXCIPHERLENGTH]; char *common_words[MAX_COMMON_WORDS]; int lines_in_cipher = 0, lhist[26], num_ulets=0; int num_words, num_keys=0, vflag=FALSE, Dflag = FALSE, rflag = FALSE; int threshhold=0, vlevel=0; int min_badwords=1, num_common_words=0; void locate_keyword(char *word); char temp_word[MAXLENGTH]; void print_help(void); int main(int argc, char **argv) { FILE *fptr, *dfptr; Word *tword; char filename[MAXLENGTH]; int i, j; int aflag, pflag, dflag, qflag, sflag, kflag; int pword_index = 0; Key key; aflag = pflag = dflag = qflag = sflag = kflag = FALSE; threshhold = DEF_THRESH; /* Make sure that we have a dictionary. */ dfptr = getdict("test"); if(dfptr == NULL){ printf("I'm sorry, but I can't find the dictionary. Please\n"); printf("check that it is online and in the proper location:\n"); printf("%s\n", DICT_DIR); exit(0); } *filename = (char)NULL; /* Deal with the arguments. */ ++argv, --argc; while(argc){ if(**argv != '-'){ printf("Unknown option %s\n", *argv); exit(1); } else{ switch(*++*argv){ /* Set the threshhold for the number of "bad" words allowed. */ case 'h': print_help(); exit(1); case 't': ++argv, --argc; strcpy(temp_word, *argv); --argc, ++argv; sscanf(temp_word, "%d", &threshhold); break; /* What file is the cipher being held captive in? */ case 'f': ++argv, --argc; strcpy(filename, *argv); ++argv, --argc; break; /* Show the depth of the recursion during the solving. This has ** only been handy for determing effective sorting criteria. */ case 'D': ++argv, --argc; Dflag = TRUE; break; /* Don't solve, just check if the word is in the dictionary. */ case 'q': ++argv, --argc; query_word(*argv); ++argv, --argc; qflag = TRUE; break; /* Add the word to the dictionary */ case 'a': ++argv, --argc; add_word(*argv); ++argv, --argc; aflag = TRUE; break; /* Show statistics of the cipher. word order, number of ** dictionary matches, etc. */ case 's': ++argv, --argc; sflag = TRUE; break; /* Verbose flag. This way you can see how quickly it is solving ** the cipher. */ case 'v': ++argv, --argc; strcpy(temp_word, *argv); --argc, ++argv; sscanf(temp_word, "%d", &vlevel); vflag = TRUE; break; /* Remove a word from the dictionary */ case 'd': ++argv, --argc; delete_word(*argv); ++argv, --argc; dflag = TRUE; break; /* Print all possible keywords that can be formed from the ** string of letters/numbers */ case 'k': ++argv, --argc; locate_keyword(*argv); ++argv, --argc; kflag = TRUE; break; /* Print all dictionary matches to a given word. Use with the ** 's' flag to determine the number for that word. */ case 'r': ++argv, --argc; rflag = TRUE; break; case 'p': ++argv, --argc; strcpy(temp_word, *argv); ++argv, --argc; sscanf(temp_word, "%d", &pword_index); pflag = TRUE; break; } } } /* Make sure that we have a cipher. */ if(!(*filename)){ if(!aflag && !dflag && !qflag && !kflag){ printf("What file would you like to use? "); scanf("%s", filename); } else{ /* Exit silently... */ exit(0); } } /* Remove the output file if it exists. */ (void) remove(OFILE); fptr = fopen(filename, "r"); if(!fptr){ printf("Bad cipher filename.\n"); exit(1); } /*Initialize the variables */ num_words = 0; for(i = 0; i < 128; i++){ key.ct[i] = (char) NULL; key.pt[i] = (char) NULL; key.hist[i] = 0; } key.ct[(int)'\n'] = ' '; for(i = 0; i < MAXWORDS; i++){ tword = wordlist+i; tword->word[0] = (char)NULL; tword->length = 0; tword->mult = 0; tword->dict = NULL; tword->dictsize = 0; tword->hvalue = 0; } /* Read in the cipher as an array of lines. We'll use this later ** on when we print the cipher. */ i = 0; while(!feof(fptr)){ cipher[i++] = (char)fgetc(fptr); /* * fgets(cipher[i], MAXLENGTH, fptr); */ } cipher[i-1] = (char)NULL; rewind(fptr); /* Read in the cipher as an array of word structures. Ignore words ** that are one letter long, and those over the max word length. */ while(!feof(fptr)){ /* Get the next word in the cipher */ get_next_word(fptr, temp_word); /* If the word isn't in the array, then add it, otherwise increase ** the multiplicity of the word in the array. */ if(*temp_word && strlen(temp_word) > 1 && strlen(temp_word) <= MAXWORDLEN){ for(i = 0; i < num_words && (strcmp(temp_word, wordlist[i].word) != 0); i++); if(i == num_words){ tword = wordlist+num_words; strcpy(tword->word, temp_word); tword->mult++; tword->length = (int)strlen(temp_word); for(j=0; j < tword->length; j++) { /* Add the letters to the histogram */ key.hist[(int)(temp_word[j])]++; } num_words++; } /* Otherwise just increase the multiplicity of the word. */ else wordlist[i].mult++; } } (void) fclose(fptr); /* * Set the current number of bad words to the number of words * in the cipher */ min_badwords = num_words; /* count the number of unique letters in the histogram */ for(i = 'a'; i <= 'z'; i++) if(key.hist[i]) num_ulets++; /* Check for a non-valid threshhold */ if((threshhold > num_words) ) threshhold = DEF_THRESH; /* Loop through the wordlist. For each word, find the length of the list ** and read in the valid words. ** Get the histogram value for each word, too. */ printf("Reading dictionary..."), fflush(stdout); for(i = 0; i < num_words; i++){ /* Set the hash values for the words */ wordlist[i].hvalue = get_hvalue(wordlist+i, key.hist); /* Open the correct dictionary file */ dfptr = getdict(wordlist[i].word); if(!dfptr){ printf("Could not open dictionary file %s.\n", filename); exit(1); } /* Count the number of words in the dictionary and malloc an array ** that's large enough to hold the whole thing. We won't actually ** use all of it, but this way we won't be caught shorthanded. */ while(!feof(dfptr)){ fgets(temp_word, MAXLENGTH, dfptr); if(!feof(dfptr)) wordlist[i].dictsize++; } rewind(dfptr); /* Read in the valid words now and put them into the previous ** array. */ wordlist[i].dict = (char **)malloc(sizeof(char *) * wordlist[i].dictsize); wordlist[i].dictsize = 0; if(wordlist[i].dict == NULL){ /* Oops! */ printf("Error in allocating memory for the wordlist.\n"); exit(1); } while(!feof(dfptr)){ fgets(temp_word, MAXLENGTH, dfptr); if(!feof(dfptr)){ *strrchr(temp_word, '\n') = (char)NULL; lowerchar(temp_word); if(validate_mapping(temp_word, wordlist[i].word)) if(validate_mapping(wordlist[i].word, temp_word)) wordlist[i].dict[wordlist[i].dictsize++] = strdup(temp_word); } } (void) fclose(dfptr); } putchar('\n'); /* Sort the list according to the hash value for each word */ qsort((void *)wordlist, (size_t)num_words, sizeof(Word), compare); /* Sort it according to the convoluted criteria in the wordsort ** function, but only if we have a threshhold of zero. This sorting ** method works slowly for a threshhold of > 0. */ /* if(threshhold < 0) wordsort(); */ /* Now start solving. */ if(sflag){ printf("Total words: %d\n", num_words); for(i = 0; i < num_words; i++){ printf(" word %2d = %-13s d.length = %5d \tmult. = %d \thvalue = %d\n", i+1, wordlist[i].word, wordlist[i].dictsize, wordlist[i].mult, wordlist[i].hvalue); } } if(pflag){ if(pword_index != 0) for(i = 0; i < wordlist[pword_index-1].dictsize; i++) printf("%s\n", wordlist[pword_index-1].dict[i]); else for(pword_index = 0; pword_index < num_words; pword_index++) for(i = 0; i < wordlist[pword_index].dictsize; i++) printf("%s\n", wordlist[pword_index].dict[i]); exit(0); } printf("threshhold = %d\n", threshhold); printf("Solving"), fflush(stdout); solve_cipher(&key, 0, 0); putchar('\n'); printf("Updating dictionary...\n"), fflush(stdout); /* fptr = fopen("wordlist", "w"); printf("first common word = %s\n", common_words[0]); */ for(i=0; i < num_common_words; i++) { /* printf("\t%d: %s\n", i, common_words[i]); fprintf(fptr, "\t%s\n", common_words[i]); */ move_to_end(common_words[i]); } /* fclose(fptr); */ exit(0); } void print_help(void) { fprintf(stderr, "Usage:\tsolver [-hsD] [-t n] [-q word] [-a word] [-k key]\n\t-[-r ct] [-v n] [-f file]\n"); fprintf(stderr, "\n"); fprintf(stderr, "-h\tprint help information\n"); fprintf(stderr, "-s\tshow statistics of cipher\n"); fprintf(stderr, "-D\tshow recursion depth (for debugging)\n"); fprintf(stderr, "-t n\tbad word threshhold. No more than n bad words will be allowed\n"); fprintf(stderr, "-q word\tCheck to see if word is in the dictionary\n"); fprintf(stderr, "-a word\tAdd word to dictionary if it isn't already there\n"); fprintf(stderr, "-d word\tDelete a word from the dictionary\n"); fprintf(stderr, "-k key\tFind keyword in key\n"); fprintf(stderr, "-r ct\tPrint pattern word matches for ct\n"); fprintf(stderr, "-v n\tverbose flag. print words being matched up to level n\n"); fprintf(stderr, "-f file\tUse the cipher located in file\n"); exit(0); } int get_hvalue(Word *word, int *hist) { int value=0, i; int ulets[128]; char *c; for(i = 0; i < 128; i++) ulets[i] = FALSE; c = word->word; while(*c){ if (isalpha(*c)) { if (ulets[(int)*c] == FALSE) { value += hist[(int)*c]; ulets[(int)*c] = TRUE; } } c++; } /* while(*word++){ if(isalpha(*word)){ if(ulets[*word - 'a'] == FALSE){ value += alph[*word - 'a']hist[*word - 'a']; ulets[*word - 'a'] = TRUE; } } word++; } */ return value; } char * decode_word(Key *key, char *cword, char *dword) { int i=0; while(*cword){ if(isalpha(*cword)) dword[i] = key->ct[(int)*cword]; else dword[i] = *cword; cword++, i++; } dword[i] = (char)NULL; return dword; } void get_next_word(FILE *fptr, char *word){ char c=0; /* Find the next string of letters, not excluding ' and - */ while(!isalpha(c) && !feof(fptr) && c != '\'' && c != '-') c = getc(fptr); if(isalpha(c)) c |= ' '; /* Copy the string of letters into the word */ while(( (isalpha(c)) || c == '\'' || c == '-') && !feof(fptr)){ *word++ = c; c = getc(fptr); if(isalpha(c)) c |= ' '; } *word = (char)NULL; lowerchar(word); } int compare(const void *word1, const void *word2){ if(((Word *)word1)->hvalue < ((Word *)word2)->hvalue) return 1; else if(((Word *)word1)->hvalue > ((Word *)word2)->hvalue) return -1; if(((Word *)word1)->length > ((Word *)word2)->length) return -1; else if(((Word *)word1)->length < ((Word *)word2)->length) return 1; else return 0; /* Commented out because I'm still trying various sorting ** criteria. I don't want to delete this because I might want to ** put it back in later. value1 = (double) word1->hvalue / word1->dictsize; value2 = (double) word2->hvalue / word2->dictsize; if(value1 < value2) return 1; else if(value1 > value2) return -1; else return 0; */ } int validate_mapping(char *word1, char *word2){ char key[26]; int mapping = TRUE, i; for(i = 0; i < 26; i++) key[i] = ' '; if(strlen(word1) == strlen(word2)){ while(*word1 && *word2 && mapping == TRUE){ if(*word1 == *word2 && rflag == FALSE){ if(isalpha(*word1)) mapping = FALSE; } else if(key[*word1 - 'a'] == ' ') key[*word1 - 'a'] = *word2; else if(key[*word1 - 'a'] != *word2) mapping = FALSE; word1++, word2++; } } else mapping = FALSE; return mapping; } int update_key(Key *key, char *ct, char *pt) { int valid = TRUE; char *c=ct, *p=pt; while (*c && *p && valid) { if (isalpha(*c)) { if (key->ct[(int)*c] && key->ct[(int)*c] != *p) valid = FALSE; if (key->pt[(int)*p] && key->pt[(int)*p] != *c) valid = FALSE; if (*c == *p) valid = FALSE; } c++, p++; } if (valid) { while (*ct && *pt) { key->ct[(int)*ct] = *pt; key->pt[(int)*pt] = *ct; ct++, pt++; } } return TRUE; } void locate_keyword(char *word){ register int i, j, pos, wordlen; char *c=word, *d, filename[10], tmp_word[MAXWORDLEN]; FILE *dfptr; int let_used[26], valid; for(i = 0; i < 26; i++) let_used[i] = FALSE; /* First check that the word is made up entirely of either letters ** or digits */ while(*c && *(c+1)){ if( (isalpha(*c) && isalpha(*(c+1))) || (isdigit(*c) && isdigit(*(c+1)))) c++; else{ printf("Invalid word: %s\n", word); exit(0); } } c = word; if(isalpha(*c)){ while(*c) (*(c++) |= ' '); } /* Is this a valid keyword? Check that there are no repeated digits. ** Later we will remove this restriction for the digit-keywords. */ for(c = word,i=1; *c; c++){ if(strchr(c+1, *c) != 0){ printf("Invalid keyword: %s. Character %c repeated.\n", word, *c); i = 0; } if(i==0) exit(1); } /* Is this a numerical key? */ wordlen = strlen(word); if(isdigit(*word)){ /* Only one dictionary file needs to be opened */ dfptr = getdict(word); if(!dfptr){ printf("Could not open dictionary file %s.\n", filename); exit(1); } /* Loop through every word in the dictionary file */ while(!feof(dfptr)){ fgets(tmp_word, MAXLENGTH, dfptr); tmp_word[wordlen] = (char) NULL; /* Check to see if the current word is a valid keyword. ** Print it if it is. */ for(pos=0, valid=TRUE, i = 'a'; valid && (i <= 'z'); i++){ for(j = 0; valid && (j < wordlen); j++){ if(tmp_word[j] == (char) i){ pos++; if((char) pos != word[j]-'0'){ valid = FALSE; } } } } if(valid == TRUE){ printf("%s\n", tmp_word); } } } /* ...or is it a word with repeated letters removed? */ else{ for(i = wordlen; i < MAXWORDLEN; i++){ sprintf(filename, "%s/len%02d", DICT_DIR,i); /* printf("Searching keywords of length %d\n", i); */ dfptr = fopen(filename, "r"); if(!dfptr){ printf("Could not open dictionary file %s.\n", filename); exit(1); } while(!feof(dfptr)){ fgets(tmp_word, MAXLENGTH, dfptr); tmp_word[i] = (char) NULL; for(j = 0; j < 26; j++) let_used[j] = FALSE; for(valid=TRUE,d=tmp_word,c = word; (*c) && (*d) && valid;){ /* Skip this letter if it's already been used. ** If it hasn't been used, then add it to the used list and ** check that it's the same as the next letter in the keyword. */ if((*c) != (*d)){ valid = FALSE; } else{ let_used[(*d)-'a'] = TRUE; if(*c) c++; while(isalpha(*d) && let_used[(*d)-'a']) (d++); } } /* If this was a valid keyword then print it. */ if(valid == TRUE && (*c == *d)){ printf("%s\n", tmp_word); } } } } } void solve_cipher(Key *key, int word_index, int real_words){ register int valid_key=TRUE; register int i, j; char *c=NULL, *p=NULL; Key nkey; /* Can we make enough real words with the remaining words in the ** list? If not, then don't bother trying. Also, if we have ** reached our limit for the number of solutions (MAXKEYS), then ** don't bother going on. */ /* if((threshhold < 0) && ((min_badwords + real_words) < word_index)); printf("%d - %d < %d\n", num_words, real_words, min_badwords); printf("%d / %d\n", word_index, num_words); */ if((threshhold >=0) && ((threshhold + real_words) < word_index)); else if (num_keys >= MAXKEYS); /* Are we at the end of the list now? */ else if(word_index >= num_words){ /* Now that we're at the end of the list, have we made enough real ** words? */ if( (threshhold >=0 && ((num_words - real_words) <= threshhold))) { /* * min_badwords = num_words - real_words; * printf("num_words = %d, real_words = %d\n", num_words, real_words); * printf("word_index = %d\n", word_index); * printf("min_badwords = %d\n", min_badwords); */ /* Has this key been used before? */ valid_key = TRUE; for(i = 0; i < num_keys; i++) for(i=0; i < 128; i++) if (keylist[i].ct[i] != key->ct[i]) valid_key = FALSE; /* if(strcmp(keylist[i], key) == 0) valid_key = FALSE; */ /* If not, then print it. */ if(valid_key && num_keys < MAXKEYS){ if(num_keys < MAXKEYS) for(i=0; i < 128; i++) keylist[num_keys].ct[i] = key->ct[i]; /* * putchar('.'), fflush(stdout); */ printf(".%d", num_words - real_words) , fflush(stdout); if(num_keys == MAXKEYS) printf("MAXKEYS reached. No more solutions will be printed.\n"); /* If the threshhold is zero, then we want to move the word ** to the top of the list so that eventually the most ** common words are analyzed first. */ if(threshhold == 0 || min_badwords == 0){ for(i = 0; i < num_words && num_common_words < MAX_COMMON_WORDS; i++){ decode_word(key, wordlist[i].word, temp_word); common_words[num_common_words++] = strdup(temp_word); } } /* Print the solution to the output file */ ofptr = fopen(OFILE, "a"); fprintf(ofptr, "\n# Valid words: %d/%d", real_words, num_words); /* * Print the K1 keyed alphabet */ fprintf(ofptr, "\n# K1 key = "); for(i='a'; i <= 'z'; i++) if (key->ct[i]) fputc(key->ct[i], ofptr); else fputc(' ', ofptr); /* * Print the alphabet */ fprintf(ofptr, "\n# "); for(i='a'; i <= 'z'; i++) fputc(i, ofptr); /* * Print the K2 keyed alphabet */ fprintf(ofptr, "\n# K2 key = "); for(i='a'; i <= 'z'; i++) if (key->pt[i]) fputc(key->pt[i], ofptr); else fputc(' ', ofptr); /* * Print important information about the cipher */ fprintf(ofptr, "\ntype\taristocrat"); fprintf(ofptr, "\nperiod\t0"); /* * Print the key in the cipher save format */ fprintf(ofptr, "\nkey\tabcdefghijklmnopqrstuvwxyz {"); for(i='a'; i <= 'z'; i++) if (key->ct[i]) fputc(key->ct[i], ofptr); else fputc(' ', ofptr); fputc('}', ofptr); /* * Print the solution */ fprintf(ofptr, "\nplaintext\t"); for(i=0; cipher[i]; i++) { if (cipher[i] == '\n') fputc(' ', ofptr); else if(isalpha(cipher[i]) && key->ct[(int)cipher[i]]) fputc(key->ct[(int)cipher[i]], ofptr); else if(isalpha(cipher[i])) fputc(' ', ofptr); else fputc(cipher[i], ofptr); } fputc('\n', ofptr); /* * Print the cipher */ fprintf(ofptr, "ciphertext\t"); for(i=0; cipher[i]; i++) if (cipher[i] == '\n' || cipher[i] == '\r') fputc(' ', ofptr); else fputc(cipher[i], ofptr); fputc('\n', ofptr); (void) fclose(ofptr); } } } /* Since we're not at the end of the wordlist, try the next word ** in the list */ else{ for(i = wordlist[word_index].dictsize-1; i >= 0; i--){ for(j='a'; j <= 'z'; j++) { nkey.ct[j] = key->ct[j]; nkey.pt[j] = key->pt[j]; } /* * Find out if this word makes a good substitution */ c = wordlist[word_index].word; p = wordlist[word_index].dict[i]; valid_key = TRUE; while (*c && *p && valid_key) { if (isalpha(*c)) { if (nkey.ct[(int)*c] && nkey.ct[(int)*c] != *p) valid_key = FALSE; if (nkey.pt[(int)*p] && nkey.pt[(int)*p] != *c) valid_key = FALSE; if (*c == *p) valid_key = FALSE; } c++, p++; } if (valid_key) { c = wordlist[word_index].word; p = wordlist[word_index].dict[i]; while (*c && *p) { nkey.ct[(int)*c] = *p; nkey.pt[(int)*p] = *c; c++, p++; } } /* valid_key = update_key(&nkey, wordlist[word_index].word, wordlist[word_index].dict[i]); */ if(valid_key){ /* Are we running in verbose mode? */ if(vflag){ if(word_index <= vlevel){ printf("index = %2d, dict index = %5d, %s -> %s\n", word_index, i, wordlist[word_index].word, wordlist[word_index].dict[i]); } } solve_cipher(&nkey, word_index+1, real_words+1); } } for(j='a'; j <= 'z'; j++) { nkey.ct[j] = key->ct[j]; nkey.pt[j] = key->pt[j]; } if(vflag) if(word_index <= vlevel) printf("index = %2d, dict_index = (nil), dict_word = (null)\n", word_index); solve_cipher(&nkey, word_index+1, real_words); } if(Dflag){ printf("Current depth = %d\n", word_index); } }