💾 Archived View for icolotl.com › repos › morgsub › src › main.rs captured on 2024-09-29 at 00:55:03.

View Raw

More Information

⬅️ Previous capture (2024-05-26)

-=-=-=-=-=-=-

/* SPDX-License-Identifier: GPL-3.0-or-later */
/*
 * MORGSUB (main.rs)
 * Morgana's Offline-First Substitution Cypher Solver Program
 * Copyright (C) 2024 Grace Morgana <morgana@icolotl.com>
 * 
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

use std::fs;
use std::collections::HashMap;
use std::collections::HashSet;
use rand::seq::IteratorRandom;
use rand::thread_rng;
use std::io;

fn main() {
    
    // Read in the wordlist as a string
    let raw_words = fs::read_to_string("wordlist.txt").expect("Failed to open wordlist.txt");
    // Split the wordlist to words, display count for debugging
    let wordlist: HashSet<&str> = raw_words.split("\n").collect();
    println!("Length: {}", wordlist.len());

    // Prepare to iterate over input
    let stdin = io::stdin();
    let mut intxt = String::new();
    let mut buffer = String::new();

    // Attempt to process chars in each line until EOF
    while stdin.read_line(&mut buffer).expect("Failed to read line") != 0 {
        intxt.push_str(&buffer);
        // Clear the buffer for the next line
        buffer.clear()
    }

    // Convert input to valid characters only (a-z, apostrophe and space)
    let valid: HashSet<char> = "abcdefghijklmnopqrstuvwxyz' ".chars().collect();
    intxt = intxt.to_lowercase().chars().filter(|&c| valid.contains(&c)).collect();

    // Count letter frequencies in the input
    let mut cdist = HashMap::new();
    for cb in "abcdefghijklmnopqrstuvwxyz".chars() {
        cdist.insert(cb, intxt.chars().filter(|&ci| ci == cb).count());
    }
    println!("{:?}", cdist);
    // Use this frequency distribution to create a sorted list of the most common chars
    let mut cchars: Vec<char> = "abcdefghijklmnopqrstuvwxyz".chars().collect();
    cchars.sort_unstable_by(|a, b| cdist.get(b).partial_cmp(&cdist.get(a)).unwrap());
    println!("{:?}", cchars);

    // To store the best translation which will be output at the end
    let mut best = HashMap::new();
    let mut best_score = 0.0;

    // Repeat the translation attempt 16 times
    // This is done instead of one longer iteration in case we get stuck in local maxima.
    for iter in 1..17 {

        println!("\n ===== ITERATION {} ===== ", iter);

        // Create the translation table where e = most frequent letter, t = second, and so on.
        let mut xlate = HashMap::new();
        for (idx, c) in "etaoinshrdlcumwfgypbvkjxqz".chars().enumerate() {
            xlate.insert(cchars[idx], c);
        }

        // Split input into words
        //let inwords: Vec<&str> = intxt.split(" ").collect();
        let mut score_prev: f32 = 0.0;

        // Setup Random
        let mut rng = thread_rng();

        // For each attempt, try to find a step 6000 times
        for ci in 0..6000 {

            // Select two keys (and their values) to swap
            let swap_keys: Vec<char> = xlate.keys().choose_multiple(&mut rng, 2).iter().map(|&k| *k).collect();
            let swap_values: Vec<char> = swap_keys.iter().map(|k| xlate.get(k).unwrap().clone()).collect();
            // Perform the swap
            xlate.insert(swap_keys[0], swap_values[1]);
            xlate.insert(swap_keys[1], swap_values[0]);

            // Apply the transformation map
            let attempt: String = intxt.chars().map(|c|
                match c {
                    ' ' => ' ',
                    '\'' => '\'',
                    ch => *xlate.get(&ch).unwrap(),
                }
            ).collect();
            let inwords: Vec<&str> = attempt.split(" ").collect();
            // Count the number of valid words, and compute the score based on that
            let score_cur: f32 = inwords.iter().filter(|&w| wordlist.contains(w)).count() as f32 / inwords.len() as f32;
            let delta = score_cur - score_prev;
            let accept = delta >= 0.0;

            // If we accept the swap, record the new score.
            if accept {
                if delta > 0.0 {
                    println!("{}: {} -> {} :: {}", ci, score_prev, score_cur, delta);
                }
                score_prev = score_cur;
            // Otherwise, undo the swap
            } else {
                xlate.insert(swap_keys[0], swap_values[0]);
                xlate.insert(swap_keys[1], swap_values[1]);
            }

        }

        // Perform the translation map again for attempt output
        let output: String = intxt.chars().map(|c|
            match c {
                ' ' => ' ',
                '\'' => '\'',
                ch => *xlate.get(&ch).unwrap(),
            }
        ).collect();
        println!("{}", output);

        // If this is the best so far, record it
        if score_prev > best_score {
            best = xlate;
            best_score = score_prev;
        }

    }

    println!("\n ===== FINAL OUTPUT ===== ");

    // Perform the translation map with the best output
    let output: String = intxt.chars().map(|c|
        match c {
            ' ' => ' ',
            '\'' => '\'',
            ch => *best.get(&ch).unwrap(),
        }
    ).collect();
    println!("{}", output);

}