最长公共子序列

P1439【模板】最长公共子序列
下面的模板代码只适用于每个数字只出现一次。

1 【模板】最长公共子序列: $O (n l o g n)$

1.1 模板：LIS 的应用最长上升子序列

#include <bits/stdc++.h>
using namespace std;
const int maxn=100005;
int a[maxn],d[maxn],idx[maxn];
int LIS(int n) {//完全用的LIS模板，统一模板更不容易错。
    if(!n)
        return 0;
    d[1] = a[1];
    int len = 1;
    for (int i = 2; i <= n;i++)
    {
        if(a[i]>d[len])
            d[++len] = a[i];
        else
        {
            int pos = lower_bound(d + 1, d + 1 + len, a[i]) - d;
            d[pos] = a[i];
        }
    }
    return len;
}
int main() {
    int n,temp;
    cin>>n;
    for(int i=1;i<=n;i++) cin>>a[i];
    for(int i=1;i<=n;i++) {cin>>temp;idx[temp]=i;}
    for(int i=1;i<=n;i++) {a[i]=idx[a[i]];}
    int ans=LIS(n);
    cout << ans << endl;
}

1.2 手写二分

# 1.2 #include <bits/stdc++.h>
using namespace std;
int n;
int a[100010], b[100010];
int m[100010], f[100010];

int main()
{
    cin >> n;
    for (int i = 1; i <= n; i++)
        cin >> a[i], m[a[i]] = i;
    memset(f, 0x7f, sizeof f);
    for (int i = 1; i <= n; i++)
        cin >> b[i];
    int len = 0;
    f[0] = 0;
    for (int i = 1; i <= n; i++)
    {
        int l = 0, r = len, mid;
        if (m[b[i]] > f[len])
            f[++len] = m[b[i]];
        else
        {
            while (l < r)
            {
                mid = (l + r) / 2;
                if (f[mid] > m[b[i]])
                    r = mid;
                else
                    l = mid + 1;
            }
            f[l] = m[b[i]];
        }
    }
    cout << len << endl;
}

2 最长公共子序列

目前无时间复杂度小于 $O (N^{2})$ 的方法，位压缩已经极限了

2.1 HDU 1159 的示例 (字母的子序列) $O (n^{2})$

有这样的递归关系：

即动态转移方程为：

f[i][j]=max{f[i-1][j], f[i][j-1], (f[i-1][j-1]+1) * [S[i]==T[j]}

2.1.1 用记忆化搜索解决

#include <bits/stdc++.h>
using namespace std;
string a, b;
int dp[505][505];
int LCS(int n1, int n2)
{
    if (dp[n1][n2] != -1)
        return dp[n1][n2];
    if (!n1 || !n2)
        return 0;
    else if (a[n1 - 1] == b[n2 - 1])
        dp[n1][n2] = LCS(n1 - 1, n2 - 1) + 1;
    else
        dp[n1][n2] = max(LCS(n1 - 1, n2), LCS(n1, n2 - 1));
    return dp[n1][n2];
}
int main()
{
    while (cin >> a >> b)
        memset(dp, -1, sizeof dp), cout << LCS(a.size(), b.size()) << '\n';
}

2.1.2 DP 写法

#include <bits/stdc++.h>
using namespace std;
string a, b;
int dp[505][505];

int main()
{
    while (cin >> a >> b)
    {
        memset(dp, 0, sizeof dp);
        int n1 = a.size(), n2 = b.size();
        for (int i = 1; i <= n1;i++)
        {
            for (int j = 1; j <= n2;j++)
            {
                if(a[i-1]==b[j-1])
                    dp[i][j] = dp[i - 1][j - 1] + 1;
                else
                    dp[i][j] = max(dp[i - 1][j], dp[i][j - 1]);
            }
        }
        cout << dp[n1][n2] << '\n';
    }
}

滚动数组可以将空间复杂度优化到 $O (n^{2})$

2.1.3 滚动数组优化

可能有的题会卡空间，滚动数组可以将空间复杂度优化到 $O (n)$

#include <bits/stdc++.h>
using namespace std;
string s1, s2;
int main()
{
    while(cin>>s1>>s2)
    {
        int m = s1.size(), n = s2.size();
        vector<int> dp1(n + 1, 0), dp2(n + 1, 0);
        for (int i = 1; i <= m; i++)
        {
            for (int j = 1; j <= n; j++)
            {
                if (s1[i - 1] == s2[j - 1])
                    dp2[j] = dp1[j - 1] + 1;
                else
                    dp2[j] = max(dp1[j], dp2[j - 1]);
            }
            dp1 = dp2;
        }
        cout<< dp2[n] << '\n';
    }
}

2.2 HDU 2253 的示例：

难
$l e n \leq 3 \times 10^{4}$

不能使用 $O (n^{2})$ 的方法，且靠二维数组也放不下那么大的内存。 $(l e n^{2} = 9 \times 10^{8})$

Seems $O (\frac{n^{2}}{32})$ 这样的话就能过。

似乎是用到了位压缩的方法（不懂）

#include <cstdio>
#include <cstring>
 
#define M 30005
#define SIZE 128
#define WORDMAX 3200
#define BIT 32
 
char s1[M], s2[M];
int nword;
unsigned int str[SIZE][WORDMAX];
unsigned int tmp1[WORDMAX], tmp2[WORDMAX];
 
void pre(int len)
{
	int i;
	memset(str, 0, sizeof(str));
	for (i = 0; i < len; i++)
		str[s1[i]][i / BIT] |= 1 << (i % BIT);
}
 
void cal(unsigned int *a, unsigned int *b, char ch)
{
	int i, bottom = 1, top;
	unsigned int x, y;
	for (i = 0; i < nword; i++) {
		y = a[i];
		x = y | str[ch][i];
		top = (y >> (BIT - 1)) & 1;
		y = (y << 1) | bottom;
		if (x < y) top = 1;
		b[i] = x & ((x - y) ^ x);
		bottom = top;
	}
}
 
int bitcnt(unsigned int *a)
{
	int i, j, res = 0, t;
	unsigned int b[5] = { 0x55555555, 0x33333333, 0x0f0f0f0f, 0x00ff00ff, 0x0000ffff }, x;
	for (i = 0; i < nword; i++) {
		x = a[i];
		t = 1;
		for (j = 0; j < 5; j++, t <<= 1)
			x = (x & b[j]) + ((x >> t) & b[j]);
		res += x;
	}
	return res;
}
 
void process()
{
	int i, len1, len2;
	unsigned int *a, *b, *t;
	len1 = strlen(s1);
	len2 = strlen(s2);
	nword = (len1 + BIT - 1) / BIT;
	pre(len1);
	memset(tmp1, 0, sizeof(tmp1));
	a = &tmp1[0];
	b = &tmp2[0];
	for (i = 0; i < len2; i++) {
		cal(a, b, s2[i]);
		t = a; a = b; b = t;
	}
	printf("%d\n", bitcnt(a));
}
 
int main()
{
	while (scanf("%s%s", s1, s2) != EOF)
		process();
}

3 最长公共子串

与最长公共子序列思路相同

3.1 模板

while (cin >> s1 >> s2)
{
    memset(dp, 0, sizeof(dp));
    int n1 = s1.size(), n2 = s2.size(), ma = 0;
    for (int i = 1; i <= n1; ++i)
        for (int j = 1; j <= n2; ++j)
            if (s1[i - 1] == s2[j - 1])
                dp[i][j] = dp[i - 1][j - 1] + 1;
            else
                dp[i][j] = 0;//一旦没不符合就置为0
    for (int i = 1; i <= n1; ++i)
        for (int j = 1; j <= n2; ++j)
            ma = max(ma, dp[i][j]);
    cout << ma << endl;
}

3.2 示例: Problem - B - Codeforces

求 A，B 所有字串的最长公共子序列中 $M a x (4 \times L C S (C, D) - | C | - | D |)$

官方题解：

Let $D P [i] [j]$ be the maximum similarity score if we end the first substring with $A_{i}$ and the second substring with $B_{j}$ . We will also allow the corresponding most similar string to be empty so that $D P [i] [j]$ is always at least $0$ .

如果第一个子串以 $A_{i}$ 结尾，第二个子串以 $B_{j}$ 结尾，则 $D P [i] [j]$ 为最大相似度得分。我们还将允许相应的最相似字符串为空，这样 $D P [i] [j]$ 总是至少是 $0$ .

It turns out that the fact we need to search for substrings of our words is not a big problem, because we can think of extending the previous ones. In fact, we have just two possibilities:

$A_{i}$ and $B_{j}$ are the same letters. In this case, we say that $D P [i] [j] = m i n (D P [i] [j], D P [i - 1] [j - 1] + 2)$ as the new letter will increase the LCS by $1$ , but both of the strings increase by one in length, so the total gain is $4 - 1 - 1 = 2.$

In every case, we can refer to $D P [i] [j - 1]$ or $D P [i] [j - 1]$ to extend one of the previous substrings, but not the LCS, so: $D P [i] [j] = m a x (D P [i - 1] [j], D P [i] [j - 1]) - 1.$

事实证明，我们需要搜索单词的子串并不是一个大问题，因为我们可以考虑扩展前面的搜索。事实上，我们只有两种可能：

$A_{i}$ 和 $B_{j}$ 是相同的字母。在这种情况下，我们可以说 $D P [i] [j] = m i n (D P [i] [j], D P [i - 1] [j - 1] + 2)$ 作为新字母将使 $L C S$ 增加 $1$ ，但是两个字符串的长度都增加了一个，因此总增益为 $4 - 1 - 1 = 2$ 。
在每种情况下，我们都可以引用 $D P [i] [j - 1]$ 或 $D P [i] [j - 1]$ 来扩展之前的一个子串，但不能扩展 $L C S$ ，所以： $D P [i] [j] = m a x (D P [i - 1] [j], D P [i] [j - 1]) - 1.$

An inquisitive reader may wonder why it doesn't hurt to always apply case $2$ in calculations, so clearing the doubts, it's important to informally notice that we never get a greater $L C S$ this way so wrong calculations only lead to the worse score, and that our code will always find a sequence of transitions which finds the true $L C S$ as well.
Implementing the formulas gives a really short $O (n \cdot m)$ solution.

好奇的读者可能会问，为什么在计算中总是应用情况 $2$ 并没有什么坏处，为了消除这个疑问，我们有必要非正式地指出，这样我们永远不会得到更大的 $L C S$ ，所以错误的计算只会导致更糟糕的结果，而且我们的代码总是会找到一个过渡序列，这个序列也会找到真正的 $L C S$ 。
执行这些公式可以得到一个非常简短的 $O (n \cdot m)$ 解。

#include<bits/stdc++.h>
using namespace std;
int n1,n2;
string a,b;int ma;
int dp[5010][5010];
int main()
{
    cin >> n1 >> n2;
    cin >> a >> b;
    for (int i = 1; i <= n1; ++i)
        for (int j = 1; j <= n2; ++j)
            if (a[i - 1] == b[j - 1])
                dp[i][j] = dp[i - 1][j - 1] + 2;
            else
                dp[i][j] = max(max(dp[i - 1][j] - 1, dp[i][j - 1] - 1), 0);
    
    for (int i = 1; i <= n1; ++i)
        for (int j = 1; j <= n2; ++j)
            ma = max(ma, dp[i][j]);
            cout<<ma<<'\n';
}

4 P2516 最长公共子序列 - 洛谷

求 $L C S$ 长度和个数

5 51Nod-1006

求 $L C S$ 具体是什么

6 Hdu1503

求根据 $L C S$ 拼接后的字符串

1 【模板】最长公共子序列: O(nlogn)

1.1 模板：LIS 的应用 最长上升子序列