F#生成MD5字典

Fsharp

之前想到的一种逆向思维求解MD5的方法.

后来发现真的有人这么做了, 其中数据量最大的网站标称自己拥有4T的数据量, 可以解8位以下的数字和字母组合.

即然是之前自己有过的想法, 那就拿F#来具体实现一下吧:

#!fsharp
#light
namespace ViTarn
module MD5
open System
open System.Data
open System.Diagnostics
open System.IO
open System.Security.Cryptography
open System.Text
// sqlite.phxsoftware.com
#r "System.Data.SQLite.DLL"
open System.Data.SQLite
// 数据库名
let dbFile = "db.sqlite"
// 连接字符串
let connString =
    let csb = SQLiteConnectionStringBuilder ()
    csb.DataSource <- dbFile
    csb.ConnectionString
// 秒表
let watch = new Stopwatch ()
watch.Start()
// 为调试方便 输出对象并换行
let debug x =
    x |> print_any
    Console.WriteLine ()
// 从\' \'到\'~\'共95个常用于密码的字符 可以当成95进制看待
let n2cl n =
    let rec prase n l =
        let d = n % 95
        let c = d + 32 |> char
        let d = n - d
        if d > 95 then
            prase (d / 95) (c :: l)
        else
            c :: l |> List.to_array prase n []
// MD5加密字符串
let md5 (str : string) =
    use md5Hasher = MD5.Create ()
    str
    |> Encoding.Default.GetBytes
    |> md5Hasher.ComputeHash
    |> Seq.map (fun x ->
        x.ToString "x2")
    |> Seq.fold (fun x y -> x + y) ""
// 创建数据库
if dbFile |> File.Exists |> not then
    dbFile |> SQLiteConnection.CreateFile
use conn = new SQLiteConnection (connString)
let sql = "PRAGMA auto_vacuum = 1; " +
          "PRAGMA encoding = \'UTF-8\'; " +
          "PRAGMA page_size = 4096; " +
          "PRAGMA synchronous = OFF; " +
          "CREATE TABLE md5 (p VARCHAR(42)  NOT NULL  COLLATE NOCASE, s VARCHAR(9)  NOT NULL  COLLATE NOCASE);"
use cmd = new SQLiteCommand (sql, conn)
conn.Open ()
cmd.ExecuteNonQuery () |> ignore
conn.Close ()
let main _ =
    let sql = "INSERT INTO md5 " +
              "VALUES(?, ?); "
    use conn = new SQLiteConnection (connString)
    use cmd = new SQLiteCommand(sql, conn)
    cmd.Parameters.AddRange [|new SQLiteParameter(); new SQLiteParameter()|]
    conn.Open ()
    use tr = conn.BeginTransaction ()
    cmd.Transaction <- tr
    for i = 0 to 857374 do
        let s = new String (n2cl i)
        let p = md5 s
        cmd.Parameters.[0].Value <- p
        cmd.Parameters.[1].Value <- s
        cmd.ExecuteNonQuery () |> ignore
        if i % 10000 = 0 then
            i |> debug
    tr.Commit()
    conn.Close ()
main ()
watch.Elapsed |> debug
Console.ReadKey()

为什么是857374? 95 * 95 * 95 - 1 = 857374

模拟的95进制, 是从空格(' ')到波浪线('~')的, 用ASCII表示就是32到126. 所在0 to 857374代表' '到'~~~'

在我的电脑上耗时00:01:08

  • CPU: AMD3000+
  • RAM: 1G

以DEBUG模式运行 最终产生的数据库约36M+