Skip to content

实战项目:minigrep

本章通过实现一个简化版的 grep 工具来综合运用前面学到的知识。这个项目涵盖:命令行参数解析、文件读取、错误处理、模块组织和测试。

我们将创建 minigrep,一个在文件中搜索字符串的命令行工具:

Terminal window
minigrep <搜索词> <文件路径>

例如:

Terminal window
minigrep to poem.txt
Terminal window
cargo new minigrep
cd minigrep

src/main.rs:

use std::env;
fn main() {
let args: Vec<String> = env::args().collect();
println!("{:?}", args);
}

运行测试:

Terminal window
cargo run -- needle haystack.txt
# 输出: ["target/debug/minigrep", "needle", "haystack.txt"]
use std::env;
fn main() {
let args: Vec<String> = env::args().collect();
let query = &args[1];
let file_path = &args[2];
println!("搜索: {}", query);
println!("文件: {}", file_path);
}

创建测试文件 poem.txt

I'm nobody! Who are you?
Are you nobody, too?
Then there's a pair of us - don't tell!
They'd banish us, you know.
How dreary to be somebody!
How public, like a frog
To tell your name the livelong day
To an admiring bog!

读取文件内容:

use std::env;
use std::fs;
fn main() {
let args: Vec<String> = env::args().collect();
let query = &args[1];
let file_path = &args[2];
println!("搜索: {}", query);
println!("文件: {}", file_path);
let contents = fs::read_to_string(file_path)
.expect("无法读取文件");
println!("文件内容:\n{}", contents);
}
struct Config {
query: String,
file_path: String,
}
impl Config {
fn build(args: &[String]) -> Result<Config, &'static str> {
if args.len() < 3 {
return Err("参数不足");
}
let query = args[1].clone();
let file_path = args[2].clone();
Ok(Config { query, file_path })
}
}

将核心逻辑移到 src/lib.rs

use std::error::Error;
use std::fs;
pub struct Config {
pub query: String,
pub file_path: String,
pub ignore_case: bool,
}
impl Config {
pub fn build(args: &[String]) -> Result<Config, &'static str> {
if args.len() < 3 {
return Err("参数不足");
}
let query = args[1].clone();
let file_path = args[2].clone();
let ignore_case = std::env::var("IGNORE_CASE").is_ok();
Ok(Config {
query,
file_path,
ignore_case,
})
}
}
pub fn run(config: Config) -> Result<(), Box<dyn Error>> {
let contents = fs::read_to_string(config.file_path)?;
let results = if config.ignore_case {
search_case_insensitive(&config.query, &contents)
} else {
search(&config.query, &contents)
};
for line in results {
println!("{}", line);
}
Ok(())
}
pub fn search<'a>(query: &str, contents: &'a str) -> Vec<&'a str> {
let mut results = Vec::new();
for line in contents.lines() {
if line.contains(query) {
results.push(line);
}
}
results
}
pub fn search_case_insensitive<'a>(query: &str, contents: &'a str) -> Vec<&'a str> {
let query = query.to_lowercase();
let mut results = Vec::new();
for line in contents.lines() {
if line.to_lowercase().contains(&query) {
results.push(line);
}
}
results
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn case_sensitive() {
let query = "duct";
let contents = "\
Rust:
safe, fast, productive.
Pick three.
Duct tape.";
assert_eq!(vec!["safe, fast, productive."], search(query, contents));
}
#[test]
fn case_insensitive() {
let query = "rUsT";
let contents = "\
Rust:
safe, fast, productive.
Pick three.
Trust me.";
assert_eq!(
vec!["Rust:", "Trust me."],
search_case_insensitive(query, contents)
);
}
}
use std::env;
use std::process;
use minigrep::Config;
fn main() {
let args: Vec<String> = env::args().collect();
let config = Config::build(&args).unwrap_or_else(|err| {
eprintln!("参数解析错误: {}", err);
process::exit(1);
});
if let Err(e) = minigrep::run(config) {
eprintln!("应用程序错误: {}", e);
process::exit(1);
}
}

改进参数解析,使用迭代器而不是索引:

impl Config {
pub fn build(mut args: impl Iterator<Item = String>) -> Result<Config, &'static str> {
args.next(); // 跳过程序名
let query = match args.next() {
Some(arg) => arg,
None => return Err("缺少搜索词"),
};
let file_path = match args.next() {
Some(arg) => arg,
None => return Err("缺少文件路径"),
};
let ignore_case = std::env::var("IGNORE_CASE").is_ok();
Ok(Config {
query,
file_path,
ignore_case,
})
}
}

更新 main.rs:

fn main() {
let config = Config::build(env::args()).unwrap_or_else(|err| {
eprintln!("参数解析错误: {}", err);
process::exit(1);
});
// ...
}

改进搜索函数,使用迭代器:

pub fn search<'a>(query: &str, contents: &'a str) -> Vec<&'a str> {
contents
.lines()
.filter(|line| line.contains(query))
.collect()
}
pub fn search_case_insensitive<'a>(query: &str, contents: &'a str) -> Vec<&'a str> {
let query = query.to_lowercase();
contents
.lines()
.filter(|line| line.to_lowercase().contains(&query))
.collect()
}

错误信息应该输出到 stderr,正常结果输出到 stdout:

// 错误输出到 stderr
eprintln!("参数解析错误: {}", err);
// 正常结果输出到 stdout
println!("{}", line);

这样可以将结果重定向到文件,而错误仍显示在终端:

Terminal window
cargo run -- to poem.txt > output.txt
use std::error::Error;
use std::fs;
pub struct Config {
pub query: String,
pub file_path: String,
pub ignore_case: bool,
}
impl Config {
pub fn build(mut args: impl Iterator<Item = String>) -> Result<Config, &'static str> {
args.next();
let query = match args.next() {
Some(arg) => arg,
None => return Err("缺少搜索词"),
};
let file_path = match args.next() {
Some(arg) => arg,
None => return Err("缺少文件路径"),
};
let ignore_case = std::env::var("IGNORE_CASE").is_ok();
Ok(Config {
query,
file_path,
ignore_case,
})
}
}
pub fn run(config: Config) -> Result<(), Box<dyn Error>> {
let contents = fs::read_to_string(config.file_path)?;
let results = if config.ignore_case {
search_case_insensitive(&config.query, &contents)
} else {
search(&config.query, &contents)
};
for line in results {
println!("{}", line);
}
Ok(())
}
pub fn search<'a>(query: &str, contents: &'a str) -> Vec<&'a str> {
contents
.lines()
.filter(|line| line.contains(query))
.collect()
}
pub fn search_case_insensitive<'a>(query: &str, contents: &'a str) -> Vec<&'a str> {
let query = query.to_lowercase();
contents
.lines()
.filter(|line| line.to_lowercase().contains(&query))
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn case_sensitive() {
let query = "duct";
let contents = "\
Rust:
safe, fast, productive.
Pick three.
Duct tape.";
assert_eq!(vec!["safe, fast, productive."], search(query, contents));
}
#[test]
fn case_insensitive() {
let query = "rUsT";
let contents = "\
Rust:
safe, fast, productive.
Pick three.
Trust me.";
assert_eq!(
vec!["Rust:", "Trust me."],
search_case_insensitive(query, contents)
);
}
}
use std::env;
use std::process;
use minigrep::Config;
fn main() {
let config = Config::build(env::args()).unwrap_or_else(|err| {
eprintln!("参数解析错误: {}", err);
process::exit(1);
});
if let Err(e) = minigrep::run(config) {
eprintln!("应用程序错误: {}", e);
process::exit(1);
}
}
Terminal window
# 运行所有测试
cargo test
# 运行程序
cargo run -- to poem.txt
# 大小写不敏感搜索
IGNORE_CASE=1 cargo run -- to poem.txt
# Windows PowerShell
$env:IGNORE_CASE=1; cargo run -- to poem.txt

修改搜索函数,返回匹配行的行号:

pub fn search_with_line_numbers<'a>(query: &str, contents: &'a str) -> Vec<(usize, &'a str)> {
contents
.lines()
.enumerate()
.filter(|(_, line)| line.contains(query))
.map(|(num, line)| (num + 1, line)) // 行号从 1 开始
.collect()
}

扩展 2:添加 —ignore-case 命令行参数

Section titled “扩展 2:添加 —ignore-case 命令行参数”

使用 clap 库简化参数解析:

Cargo.toml
[dependencies]
clap = { version = "4", features = ["derive"] }
use clap::Parser;
#[derive(Parser)]
#[command(name = "minigrep")]
#[command(about = "搜索文件中的文本")]
struct Args {
/// 要搜索的字符串
query: String,
/// 要搜索的文件路径
file_path: String,
/// 大小写不敏感搜索
#[arg(short, long)]
ignore_case: bool,
}

使用 regex 库:

[dependencies]
regex = "1"
use regex::Regex;
pub fn search_regex<'a>(pattern: &str, contents: &'a str) -> Result<Vec<&'a str>, regex::Error> {
let re = Regex::new(pattern)?;
Ok(contents
.lines()
.filter(|line| re.is_match(line))
.collect())
}
use std::path::Path;
use walkdir::WalkDir;
pub fn search_in_directory(query: &str, dir: &Path) -> Vec<(String, Vec<String>)> {
let mut results = Vec::new();
for entry in WalkDir::new(dir).into_iter().filter_map(|e| e.ok()) {
if entry.file_type().is_file() {
if let Ok(contents) = fs::read_to_string(entry.path()) {
let matches: Vec<String> = contents
.lines()
.filter(|line| line.contains(query))
.map(String::from)
.collect();
if !matches.is_empty() {
results.push((entry.path().display().to_string(), matches));
}
}
}
}
results
}

通过这个项目,你实践了:

  1. 命令行参数解析 - std::env::args()
  2. 文件 I/O - std::fs::read_to_string()
  3. 错误处理 - Result? 操作符、unwrap_or_else
  4. 模块组织 - 分离 main.rslib.rs
  5. 测试 - 单元测试、TDD
  6. 迭代器 - filtercollect
  7. 生命周期 - 返回引用的函数
  8. 环境变量 - std::env::var()
  9. 标准输出与错误输出 - println! vs eprintln!

恭喜你完成了 Rust 入门教程!最后一章是附录,包含常用命令速查和学习资源。