将项目 clone 下来之后,直接使用下面的脚本进行编译即可,

#
# 编译 Release 版本: .\\build_windows.ps1 -Config Release -KillRunning -CleanOutputs
# 编译 Debug 版本: .\\build_windows.ps1 -Config Debug -KillRunning -CleanOutputs
#
param(
    [ValidateSet("Release", "Debug")]
    [string]$Config = "Release",
    [string]$BuildDir = "",
    [switch]$Clean,
    [switch]$KillRunning,
    [switch]$CleanOutputs
)

$ErrorActionPreference = "Stop"

function Find-VSInstallPath {
    $vswhere = "C:\\Program Files (x86)\\Microsoft Visual Studio\\Installer\\vswhere.exe"
    if (-not (Test-Path $vswhere)) {
        throw "vswhere.exe not found. Please install Visual Studio Build Tools or Visual Studio."
    }
    $path = & $vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath
    if (-not $path) {
        throw "No Visual Studio installation with C++ tools found."
    }
    return $path.Trim()
}

function Resolve-VcpkgRoot {
    if ($env:VCPKG_ROOT -and (Test-Path $env:VCPKG_ROOT)) {
        return $env:VCPKG_ROOT
    }
    $scoopVcpkg = "C:\\Users\\$env:USERNAME\\scoop\\apps\\vcpkg\\current"
    if (Test-Path $scoopVcpkg) {
        return $scoopVcpkg
    }
    throw "VCPKG_ROOT is not set and vcpkg was not found under scoop. Set VCPKG_ROOT to your vcpkg folder."
}

function Resolve-BoostRoot {
    if ($env:BOOST_ROOT -and (Test-Path $env:BOOST_ROOT)) {
        return $env:BOOST_ROOT
    }
    $scoopBoost = "C:\\Users\\$env:USERNAME\\scoop\\apps\\boost\\current"
    if (Test-Path $scoopBoost) {
        return $scoopBoost
    }
    return ""
}

function Invoke-VSCommand {
    param([string]$VsDevCmd, [string]$Command)
    $cmd = "`"$VsDevCmd`" -arch=x64 && $Command"
    & cmd /c $cmd
    if ($LASTEXITCODE -ne 0) { exit $LASTEXITCODE }
}

$vsPath = Find-VSInstallPath
$vsDevCmd = Join-Path $vsPath "Common7\\Tools\\VsDevCmd.bat"
if (-not (Test-Path $vsDevCmd)) {
    throw "VsDevCmd.bat not found at $vsDevCmd"
}

$vcpkgRoot = Resolve-VcpkgRoot
$vcpkgExe = Join-Path $vcpkgRoot "vcpkg.exe"
if (-not (Test-Path $vcpkgExe)) {
    throw "vcpkg.exe not found at $vcpkgExe"
}

if (-not $BuildDir) {
    $BuildDir = "build_ninja_$($Config.ToLower())"
}

if ($Clean -and (Test-Path $BuildDir)) {
    Remove-Item -Recurse -Force $BuildDir
}

Write-Host "Using VS: $vsPath"
Write-Host "Using vcpkg: $vcpkgRoot"
Write-Host "Build dir: $BuildDir"
Write-Host "Config: $Config"

if (Test-Path (Join-Path $BuildDir "bin")) {
    $exeNames = @("lmplz", "query", "build_binary", "filter", "count_ngrams", "kenlm_benchmark", "fragment", "phrase_table_vocab")
    $running = Get-Process -ErrorAction SilentlyContinue | Where-Object { $exeNames -contains $_.ProcessName }
    if ($running) {
        if ($KillRunning) {
            $running | Stop-Process -Force
        }
        else {
            $names = ($running | Select-Object -ExpandProperty ProcessName | Sort-Object -Unique) -join ", "
            throw "Detected running KenLM executables ($names). Close them or rerun with -KillRunning."
        }
    }
    if ($CleanOutputs) {
        $binDir = Join-Path $BuildDir "bin"
        $toDelete = @()
        foreach ($name in $exeNames) {
            $toDelete += @(
                Join-Path $binDir "$name.exe",
                Join-Path $binDir "$name.pdb",
                Join-Path $binDir "$name.ilk",
                Join-Path $binDir "$name.lib",
                Join-Path $binDir "$name.exp"
            )
        }
        foreach ($path in $toDelete) {
            if (Test-Path $path) {
                try {
                    Remove-Item -Force $path
                }
                catch {
                    throw "Failed to delete $path. Close any running process using it or rerun with -KillRunning."
                }
            }
        }
    }
}

Invoke-VSCommand $vsDevCmd "`"$vcpkgExe`" --vcpkg-root `"$vcpkgRoot`" install zlib bzip2 liblzma --triplet x64-windows"

$toolchain = Join-Path $vcpkgRoot "scripts\\buildsystems\\vcpkg.cmake"
if (-not (Test-Path $toolchain)) {
    throw "vcpkg toolchain file not found: $toolchain"
}

Invoke-VSCommand $vsDevCmd "cmake -S . -B `"$BuildDir`" -G Ninja -DCMAKE_BUILD_TYPE=$Config -DCMAKE_TOOLCHAIN_FILE=`"$toolchain`""
Invoke-VSCommand $vsDevCmd "cmake --build `"$BuildDir`""

if (-not (Test-Path (Join-Path $BuildDir "bin"))) {
    throw "Expected output bin directory not found: $BuildDir\\bin"
}

$boostRoot = Resolve-BoostRoot
if ($boostRoot) {
    $boostLib = Join-Path $boostRoot "lib"
    $suffix = if ($Config -eq "Debug") { "*-mt-gd-x64-1_*.dll" } else { "*-mt-x64-1_*.dll" }
    $boostDlls = @("boost_program_options", "boost_thread", "boost_system")
    foreach ($name in $boostDlls) {
        $pattern = "$name-$suffix"
        $matches = Get-ChildItem -Path $boostLib -Filter $pattern -ErrorAction SilentlyContinue
        if ($matches) {
            Copy-Item -Force -Path $matches.FullName -Destination (Join-Path $BuildDir "bin")
        }
        else {
            Write-Warning "Boost DLL not found for $name in $boostLib"
        }
    }
}
else {
    Write-Warning "BOOST_ROOT not set and boost not found under scoop; skipping Boost DLL copy."
}

Write-Host "Build complete. Binaries are in $BuildDir\\bin"

使用的话,相关的命令行命名使用示例如下。

下面这个是构建 3-gram 的,最终将生成一个 model.arpa 文件。

cmd /c ".\\bin\\lmplz.exe -o 3 < C:\\Users\\SonnyCalcr\\EDisk\\CppCodes\\IMECodes\\Metasequoia-n-gram\\data\\output\\handled\\all_cleaned_only_wiki_zh_spaced_v1.txt > model.arpa"

然后再利用 model.arpa 来制作 model.binary。

.\\bin\\build_binary.exe model.arpa model.binary

注意,这里的语料的格式是一行一行的,并且每个字符之间都是用空格隔开的,譬如,

我 爱 北 京
我 爱 中 国
我 爱 上 海
北 京 是 中 国 首 都

在运行命令的时候,需要让系统至少有 40GB 的运行内存的空闲,如果需要处理的语料是比较大的话。