亚马逊雨林的物种普查 - 题解 - Hydro

0

tobyforever LV 5 SU @ 2025-12-2 11:56:07

C++ 标准解答 (NOIP C++14)

/**
 * 题目: 亚马逊雨林的物种普查 (Biodiversity Survey)
 * 作者: Isaac Asimov (AI)
 * 知识点: std::set 的基本操作 (insert, count, size)
 */

#include <iostream>
#include <string>
#include <set>  // 必须包含 set 头文件

using namespace std;

int main() {
    // IO 优化
    ios_base::sync_with_stdio(false);
    cin.tie(NULL);

    int n;
    if (!(cin >> n)) return 0;

    // 定义一个存储字符串的集合
    // 特性：自动去重，自动排序 (虽然本题没用到排序，但这是set的特性)
    set<string> species_db;

    for (int i = 0; i < n; i++) {
        int type;
        cin >> type;

        if (type == 1) {
            // 操作 1: 发现物种
            string name;
            cin >> name;
            // 直接插入，set 会自动处理重复的情况
            species_db.insert(name);
        }
        else if (type == 2) {
            // 操作 2: 查证物种
            string name;
            cin >> name;
            // count() 返回 1 表示存在，0 表示不存在
            if (species_db.count(name)) {
                cout << "Yes" << endl;
            } else {
                cout << "No" << endl;
            }
        }
        else if (type == 3) {
            // 操作 3: 统计丰富度
            cout << species_db.size() << endl;
        }
    }

    return 0;
}

数据生成器 (Generator)

生成器会模拟考察过程，特意生成大量的重复物种，以测试 set 的去重能力。

请保存为 gen_set.cpp 并运行。

/**
 * Project: Biodiversity Survey (Set Data Generator)
 * Author: Isaac Asimov (AI)
 */

#include <iostream>
#include <fstream>
#include <random>
#include <string>
#include <vector>
#include <set>

using namespace std;

// ==========================================
// Part 1: 标准解答逻辑
// ==========================================
class Solution {
public:
    void solve(string in_file, string out_file) {
        ifstream cin(in_file);
        ofstream cout(out_file);
        if (!cin.is_open()) return;

        int n;
        cin >> n;
        set<string> s;

        for (int i = 0; i < n; i++) {
            int type;
            cin >> type;
            if (type == 1) {
                string name; cin >> name;
                s.insert(name);
            } else if (type == 2) {
                string name; cin >> name;
                if (s.count(name)) cout << "Yes" << endl;
                else cout << "No" << endl;
            } else {
                cout << s.size() << endl;
            }
        }
        
        cin.close();
        cout.close();
        cout << "Generated: " << out_file << endl;
    }
};

// ==========================================
// Part 2: 数据生成逻辑
// ==========================================
mt19937 rng(2025);

int rand_int(int min, int max) {
    uniform_int_distribution<int> dist(min, max);
    return dist(rng);
}

// 模拟一些物种库
vector<string> common_species = {
    "Jaguar", "Macaw", "Capybara", "Anaconda", "Piranha",
    "Sloth", "Tapir", "Toucan", "Frog", "Ant",
    "Morpho", "Otter", "Caiman", "Monkey", "Spider"
};

// 生成随机字符串（充当罕见物种）
string rand_str() {
    int len = rand_int(3, 8);
    string s = "";
    for(int i=0; i<len; i++) s += (char)('a' + rand_int(0, 25));
    s[0] = toupper(s[0]); // 首字母大写看起来像学名
    return s;
}

void generate_input(int case_id) {
    string filename = to_string(case_id) + ".in";
    ofstream fout(filename);

    int n;

    // 规模控制
    if (case_id <= 3) n = rand_int(10, 20);
    else if (case_id <= 7) n = rand_int(1000, 5000);
    else n = rand_int(50000, 100000); // 压力测试

    fout << n << endl;

    // 维护一个已生成的列表，用于制造重复
    vector<string> history;

    for (int i = 0; i < n; i++) {
        int type;
        int dice = rand_int(1, 100);

        if (case_id == 5) {
            // Case 5: 疯狂重复测试 (大量重复插入)
            if (dice <= 90) type = 1; 
            else type = 3;
        }
        else {
            if (dice <= 60) type = 1;       // 60% 发现
            else if (dice <= 90) type = 2;  // 30% 查证
            else type = 3;                  // 10% 统计
        }

        fout << type;

        if (type == 1 || type == 2) {
            string name;
            // 决定是生成新物种，还是使用旧物种
            bool use_existing = (!history.empty() && rand_int(1, 100) <= 60); // 60% 概率重复

            if (use_existing && type == 1) {
                // 插入旧物种 -> 测试去重
                name = history[rand_int(0, history.size()-1)];
            }
            else if (use_existing && type == 2) {
                // 查询旧物种 -> 应该返回 Yes
                name = history[rand_int(0, history.size()-1)];
            }
            else {
                // 新物种
                if (rand_int(1, 100) <= 50) 
                    name = common_species[rand_int(0, common_species.size()-1)] + to_string(rand_int(1, 100)); // 加后缀变种
                else 
                    name = rand_str();
                
                if (type == 1) history.push_back(name);
            }
            fout << " " << name;
        }
        fout << endl;
    }

    fout.close();
}

int main() {
    ios_base::sync_with_stdio(false);
    cout << "--- Generating Biodiversity Data ---" << endl;
    
    Solution solver;
    for (int i = 1; i <= 10; i++) {
        generate_input(i);
        string in = to_string(i) + ".in";
        string out = to_string(i) + ".out";
        solver.solve(in, out);
    }
    cout << "--- Done! ---" << endl;
    return 0;
}

阿西莫夫的点评

科学概念的具象化：通过这道题，学生会深刻理解“集合”这个数学概念与现实世界中“种类”的对应关系。
性能优势：在数据量达到 $10^5$ 时，如果用 vector 每次插入前都遍历检查是否存在，时间复杂度是 $O(N^2)$ ，会直接超时。而 set 的 $O(N \log N)$ 则能轻松通过。这展示了数据结构选择的重要性。

ID: 19248
时间: 1000ms
内存: 32MiB
难度: (无)
标签: (无)
递交数: 0
已通过: 0
上传者: tobyforever