2024.05.15-暑期实习-第二题-塔子哥的编译原理实验

TaZi SU @ 2024-8-21 4:38:10

题面描述:

塔子哥在学习编译原理课程时需要将C语言翻译成汇编程序，为此他决定使用正则表达式来完成文本的切词任务。已知一种字符串解析语法，包括N匹配单个数字(0-9)，A匹配单个字母(a-z或A-Z)，以及n(...)表示分组，其中分组包含至少一个N或A，n是一个数字，表示该分组重复的次数(1<=n<=200)。输入包括两行，第一行为解析语法，第二行为目标字符串。要求找到第一个匹配解析语法的子字符串并输出，如果没有匹配到则输出!。

思路：模拟+KMP

本题的模式串可以展开，使用栈来模拟即可，每次一个(放入栈中对应的位置以及前面的数量，每次)弹出栈顶元素，然后将栈顶元素的数量乘以当前的字符串加上栈顶元素的前面的字符串加入到模式串中即可。

需要注意有可能模式串非常长，所以中间需要判断是否已经超过了匹配串的长度，超过则直接返回。

有了模式串和匹配串之后，就可以直接使用KMP算法来进行匹配了。

解析模式字符串

模式字符串的解析需要遵循以下规则：

数字：代表后面括号中的内容需要重复的次数。
字母：单个字母（包括小写和大写）直接加入到模式字符串中。
左括号 (：表示一个新的分组开始，此时我们需要记录当前的重复次数和之前的字符串状态。
右括号 )：表示分组结束，此时我们从栈中弹出最近的分组信息，并将当前构建的字符串根据重复次数进行扩展。

在具体实现中，我们使用一个栈 st 存储分组的重复次数和之前构建的字符串。在遇到右括号时，我们从栈中获取这些信息，并根据重复次数来构建新的模式字符串。如果在构建过程中发现模式字符串的长度超过了目标字符串的长度，我们就可以提前返回 !，表示无法匹配。

KMP 算法

一旦我们完成了模式字符串的展开，就可以使用 KMP 算法进行匹配。KMP 算法的核心思想是通过构建一个 next 数组来优化匹配过程，从而避免重复比较。next 数组的每个位置记录了当前字符不匹配时应该跳转到的位置，避免了无效的字符比较。

在进行匹配时，我们还需要定义一个检查函数 check，来判断目标字符串中的字符是否符合模式字符串中的规定（即 A 匹配字母，N 匹配数字）。如果匹配成功，我们输出匹配的子字符串。如果在整个目标字符串中没有找到匹配，则输出 !。

代码

Python

# 导入正则表达式模块
import re

def expand_pattern(t):
    # 模式串展开
    st = []  # 用于存储数字和之前的字符串
    num = 0  # 记录当前的数字
    res = ""  # 记录当前的字符串

    # 遍历模式串 t
    for c in t:
        if '0' <= c <= '9':  # 如果当前字符是数字
            num = num * 10 + int(c)  # 更新数字
        elif c == '(':  # 如果当前字符是左括号
            st.append((num, res))  # 将数字和之前的字符串入栈
            num = 0  # 重置数字
            res = ""  # 重置字符串
        elif c == ')':  # 如果当前字符是右括号
            n, a = st.pop()  # 获取栈顶的数字和字符串
            # 将当前字符串重复 n 次,并拼接到之前的字符串 a 上
            res = a + res * n
        else:  # 如果当前字符是普通字母
            res += c  # 添加到当前字符串
    return res  # 返回展开后的字符串

def build_next(t):
    # 构建 next 数组
    next = [0]  # next 数组的首位必定为 0
    j = 0
    for i in range(1, len(t)):
        while j > 0 and t[j] != t[i]:
            j = next[j - 1]
        if t[i] == t[j]:
            j += 1
        next.append(j)
    return next

def check(a, b):
    # 自定义检查函数,用于判断当前字符是否满足要求
    if b == 'A' and a.isalpha():
        return True
    if b == 'N' and a.isdigit():
        return True
    return False

def kmp_match(t, s):
    # 使用 KMP 算法进行匹配
    next = build_next(t)
    j = 0
    for i in range(len(s)):
        while j > 0 and not check(s[i], t[j]):
            j = next[j - 1]
        if check(s[i], t[j]):
            j += 1
        if j == len(t):
            return s[i - len(t) + 1:i + 1]  # 返回匹配的子串
    return "!"  # 未找到匹配返回 "!"

def main():
    t = input().strip()  # 读取模式串
    s = input().strip()  # 读取匹配串
    expanded_t = expand_pattern(t)  # 展开模式串
    if len(expanded_t) > len(s):  # 检查展开后的长度
        print("!")
    else:
        result = kmp_match(expanded_t, s)  # 执行 KMP 匹配
        print(result)

if __name__ == "__main__":
    main()

Java

import java.util.*;
import java.util.function.BiFunction; // 导入 BiFunction

public class Main {
    public static void main(String[] args) {
        Scanner sc = new Scanner(System.in);
        String t = sc.next(); // 输入模式串
        String s = sc.next(); // 输入匹配串

        // 模式串展开
        Stack<Pair<Integer, String>> st = new Stack<>(); // 用于存储数字和之前的字符串
        int num = 0; // 记录当前的数字
        String res = ""; // 记录当前的字符串

        // 遍历模式串 t
        for (char c : t.toCharArray()) {
            if (Character.isDigit(c)) { // 如果当前字符是数字
                num = num * 10 + (c - '0'); // 更新数字
            } else if (c == '(') { // 如果当前字符是左括号
                st.push(new Pair<>(num, res)); // 将数字和之前的字符串入栈
                num = 0; // 重置数字
                res = ""; // 重置字符串
            } else if (c == ')') { // 如果当前字符是右括号
                int n = st.peek().getKey(); // 获取栈顶的数字
                String a = st.peek().getValue(); // 获取栈顶的字符串
                st.pop(); // 弹出栈顶元素
                if (n * res.length() + a.length() > s.length()) { // 检查长度是否超过匹配串
                    System.out.println("!");
                    return;
                }
                // 将当前字符串重复 n 次,并拼接到之前的字符串 a 上
                StringBuilder sb = new StringBuilder(a);
                for (int i = 0; i < n; i++) sb.append(res);
                res = sb.toString();
            } else { // 如果当前字符是普通字母
                res += c; // 添加到当前字符串
                if (res.length() > s.length()) { // 检查长度是否超过匹配串
                    System.out.println("!");
                    return;
                }
            }
        }

        // KMP 匹配
        t = res; // 将展开后的模式串赋给 t
        List<Integer> next = new ArrayList<>();
        next.add(0); // next 数组的首位必定为 0

        // 构建 next 数组
        for (int i = 1, j = 0; i < t.length(); i++) {
            while (j > 0 && t.charAt(j) != t.charAt(i)) {
                j = next.get(j - 1);
            }
            if (t.charAt(i) == t.charAt(j)) {
                j++;
            }
            next.add(j);
        }

        // 自定义检查函数,用于判断当前字符是否满足要求
        BiFunction<Character, Character, Boolean> check = (a, b) -> {
            if (b == 'A' && Character.isAlphabetic(a)) return true;
            if (b == 'N' && Character.isDigit(a)) return true;
            return false;
        };

        // 使用 KMP 算法进行匹配
        for (int i = 0, j = 0; i < s.length(); ++i) {
            while (j > 0 && !check.apply(s.charAt(i), t.charAt(j))) j = next.get(j - 1);
            if (check.apply(s.charAt(i), t.charAt(j))) j++;
            if (j == t.length()) {
                System.out.println(s.substring(i - t.length() + 1, i + 1));
                return;
            }
        }

        System.out.println("!");
    }

    // Pair 类，Java 没有内置的 Pair 类，可以使用这种简单的实现
    static class Pair<K, V> {
        private final K key;
        private final V value;

        public Pair(K key, V value) {
            this.key = key;
            this.value = value;
        }

        public K getKey() {
            return key;
        }

        public V getValue() {
            return value;
        }
    }
}

C++

#include <bits/stdc++.h>

using namespace std;

int main()
{
    string t, s;
    cin >> t >> s;

    // 模式串展开
    stack<pair<int, string>> st; // 用于存储数字和之前的字符串
    int num = 0; // 记录当前的数字
    string res = ""; // 记录当前的字符串

    // 遍历模式串 t
    for (auto c : t) {
        if (c >= '0' && c <= '9') { // 如果当前字符是数字
            num = num * 10 + (c - '0'); // 更新数字
        }
        else if (c == '(') { // 如果当前字符是左括号
            st.emplace(make_pair(num, res)); // 将数字和之前的字符串入栈
            num = 0; // 重置数字
            res = ""; // 重置字符串
        }
        else if (c == ')') { // 如果当前字符是右括号
            int n = st.top().first; // 获取栈顶的数字
            string a = st.top().second; // 获取栈顶的字符串
            st.pop(); // 弹出栈顶元素
            if (n * res.size() + a.size() > s.size()) { // 检查长度是否超过匹配串
                cout << "!" << endl;
                return 0;
            }
            // 将当前字符串重复 n 次,并拼接到之前的字符串 a 上
            for (int i = 0; i < n; i++) a += res;
            res = a;
        }
        else { // 如果当前字符是普通字母
            res += c; // 添加到当前字符串
            if (res.size() > s.size()) { // 检查长度是否超过匹配串
                cout << "!" << endl;
                return 0;
            }
        }
    }

    // KMP 匹配
    t = res; // 将展开后的模式串赋给 t
    vector<int> next;
    next.push_back(0); // next 数组的首位必定为 0

    // 构建 next 数组
    for (int i = 1, j = 0; i < t.length(); i++)
    {
        while (j > 0 && t[j] != t[i])
        {
            j = next[j - 1];
        }
        if (t[i] == t[j])
        {
            j++;
        }
        next.push_back(j);
    }

    // 自定义检查函数,用于判断当前字符是否满足要求
    auto check = [](char a, char b) -> bool {
        if (b == 'A' && isalpha(a)) return true;
        if (b == 'N' && isdigit(a)) return true;
        return false;
    };

    // 使用 KMP 算法进行匹配
    for (int i = 0, j = 0; i < s.size(); ++i) {
        while (j && !check(s[i], t[j])) j = next[j-1];
        if (check(s[i], t[j])) ++j;
        if (j == t.size()) {
            cout << s.substr(i - t.size() + 1, t.size() ) << endl;
            return 0;
        }
    }

    cout << "!" << endl;
    return 0;
}

ID

Time

1000ms

Memory

256MiB

Difficulty

Tags

# Submissions

305

Accepted

Uploaded By

TaZi

1 solutions

题面描述:

思路：模拟+KMP

解析模式字符串

KMP 算法

代码

Information

Status

Development

Support

1 solutions

题面描述:

思路：模拟+KMP

解析模式字符串

KMP 算法

代码

2024.05.15-暑期实习-第二题-塔子哥的编译原理实验

Information

Status

Development

Support

SIGN IN