最长公共子序列

P1439【模板】最长公共子序列
下面的模板代码只适用于每个数字只出现一次。

1 【模板】最长公共子序列: O(nlogn)

1.1 模板:LIS 的应用 最长上升子序列

#include <bits/stdc++.h>
using namespace std;
const int maxn=100005;
int a[maxn],d[maxn],idx[maxn];
int LIS(int n) {//完全用的LIS模板,统一模板更不容易错。
    if(!n)
        return 0;
    d[1] = a[1];
    int len = 1;
    for (int i = 2; i <= n;i++)
    {
        if(a[i]>d[len])
            d[++len] = a[i];
        else
        {
            int pos = lower_bound(d + 1, d + 1 + len, a[i]) - d;
            d[pos] = a[i];
        }
    }
    return len;
}
int main() {
    int n,temp;
    cin>>n;
    for(int i=1;i<=n;i++) cin>>a[i];
    for(int i=1;i<=n;i++) {cin>>temp;idx[temp]=i;}
    for(int i=1;i<=n;i++) {a[i]=idx[a[i]];}
    int ans=LIS(n);
    cout << ans << endl;
} 

1.2 手写二分

# 1.2 #include <bits/stdc++.h>
using namespace std;
int n;
int a[100010], b[100010];
int m[100010], f[100010];

int main()
{
    cin >> n;
    for (int i = 1; i <= n; i++)
        cin >> a[i], m[a[i]] = i;
    memset(f, 0x7f, sizeof f);
    for (int i = 1; i <= n; i++)
        cin >> b[i];
    int len = 0;
    f[0] = 0;
    for (int i = 1; i <= n; i++)
    {
        int l = 0, r = len, mid;
        if (m[b[i]] > f[len])
            f[++len] = m[b[i]];
        else
        {
            while (l < r)
            {
                mid = (l + r) / 2;
                if (f[mid] > m[b[i]])
                    r = mid;
                else
                    l = mid + 1;
            }
            f[l] = m[b[i]];
        }
    }
    cout << len << endl;
}

2 最长公共子序列

目前无时间复杂度小于 O(N2) 的方法,位压缩已经极限了

有这样的递归关系:


动态转移方程为:

f[i][j]=max{f[i-1][j], f[i][j-1], (f[i-1][j-1]+1) * [S[i]==T[j]}

2.1.1 用记忆化搜索解决

#include <bits/stdc++.h>
using namespace std;
string a, b;
int dp[505][505];
int LCS(int n1, int n2)
{
    if (dp[n1][n2] != -1)
        return dp[n1][n2];
    if (!n1 || !n2)
        return 0;
    else if (a[n1 - 1] == b[n2 - 1])
        dp[n1][n2] = LCS(n1 - 1, n2 - 1) + 1;
    else
        dp[n1][n2] = max(LCS(n1 - 1, n2), LCS(n1, n2 - 1));
    return dp[n1][n2];
}
int main()
{
    while (cin >> a >> b)
        memset(dp, -1, sizeof dp), cout << LCS(a.size(), b.size()) << '\n';
}

2.1.2 DP 写法

#include <bits/stdc++.h>
using namespace std;
string a, b;
int dp[505][505];

int main()
{
    while (cin >> a >> b)
    {
        memset(dp, 0, sizeof dp);
        int n1 = a.size(), n2 = b.size();
        for (int i = 1; i <= n1;i++)
        {
            for (int j = 1; j <= n2;j++)
            {
                if(a[i-1]==b[j-1])
                    dp[i][j] = dp[i - 1][j - 1] + 1;
                else
                    dp[i][j] = max(dp[i - 1][j], dp[i][j - 1]);
            }
        }
        cout << dp[n1][n2] << '\n';
    }
}

滚动数组可以将空间复杂度优化到 O(n2)

2.1.3 滚动数组优化

可能有的题会卡空间,滚动数组可以将空间复杂度优化到 O(n)

#include <bits/stdc++.h>
using namespace std;
string s1, s2;
int main()
{
    while(cin>>s1>>s2)
    {
        int m = s1.size(), n = s2.size();
        vector<int> dp1(n + 1, 0), dp2(n + 1, 0);
        for (int i = 1; i <= m; i++)
        {
            for (int j = 1; j <= n; j++)
            {
                if (s1[i - 1] == s2[j - 1])
                    dp2[j] = dp1[j - 1] + 1;
                else
                    dp2[j] = max(dp1[j], dp2[j - 1]);
            }
            dp1 = dp2;
        }
        cout<< dp2[n] << '\n';
    }
}


len3×104

不能使用 O(n2) 的方法,且靠二维数组也放不下那么大的内存。(len2=9×108)

Seems O(n232) 这样的话就能过。

似乎是用到了位压缩的方法(不懂)

#include <cstdio>
#include <cstring>
 
#define M 30005
#define SIZE 128
#define WORDMAX 3200
#define BIT 32
 
char s1[M], s2[M];
int nword;
unsigned int str[SIZE][WORDMAX];
unsigned int tmp1[WORDMAX], tmp2[WORDMAX];
 
void pre(int len)
{
	int i;
	memset(str, 0, sizeof(str));
	for (i = 0; i < len; i++)
		str[s1[i]][i / BIT] |= 1 << (i % BIT);
}
 
void cal(unsigned int *a, unsigned int *b, char ch)
{
	int i, bottom = 1, top;
	unsigned int x, y;
	for (i = 0; i < nword; i++) {
		y = a[i];
		x = y | str[ch][i];
		top = (y >> (BIT - 1)) & 1;
		y = (y << 1) | bottom;
		if (x < y) top = 1;
		b[i] = x & ((x - y) ^ x);
		bottom = top;
	}
}
 
int bitcnt(unsigned int *a)
{
	int i, j, res = 0, t;
	unsigned int b[5] = { 0x55555555, 0x33333333, 0x0f0f0f0f, 0x00ff00ff, 0x0000ffff }, x;
	for (i = 0; i < nword; i++) {
		x = a[i];
		t = 1;
		for (j = 0; j < 5; j++, t <<= 1)
			x = (x & b[j]) + ((x >> t) & b[j]);
		res += x;
	}
	return res;
}
 
void process()
{
	int i, len1, len2;
	unsigned int *a, *b, *t;
	len1 = strlen(s1);
	len2 = strlen(s2);
	nword = (len1 + BIT - 1) / BIT;
	pre(len1);
	memset(tmp1, 0, sizeof(tmp1));
	a = &tmp1[0];
	b = &tmp2[0];
	for (i = 0; i < len2; i++) {
		cal(a, b, s2[i]);
		t = a; a = b; b = t;
	}
	printf("%d\n", bitcnt(a));
}
 
int main()
{
	while (scanf("%s%s", s1, s2) != EOF)
		process();
}

3 最长公共子串

与最长公共子序列思路相同

3.1 模板

while (cin >> s1 >> s2)
{
    memset(dp, 0, sizeof(dp));
    int n1 = s1.size(), n2 = s2.size(), ma = 0;
    for (int i = 1; i <= n1; ++i)
        for (int j = 1; j <= n2; ++j)
            if (s1[i - 1] == s2[j - 1])
                dp[i][j] = dp[i - 1][j - 1] + 1;
            else
                dp[i][j] = 0;//一旦没不符合就置为0
    for (int i = 1; i <= n1; ++i)
        for (int j = 1; j <= n2; ++j)
            ma = max(ma, dp[i][j]);
    cout << ma << endl;
}

求 A,B 所有字串的最长公共子序列中 Max( 4×LCS(C,D) |C||D|)

官方题解:

Let DP[i][j] be the maximum similarity score if we end the first substring with Ai and the second substring with Bj. We will also allow the corresponding most similar string to be empty so that DP[i][j] is always at least 0.

如果第一个子串以 Ai 结尾,第二个子串以 Bj 结尾,则 DP[i][j] 为最大相似度得分。我们还将允许相应的最相似字符串为空,这样 DP[i][j] 总是至少是 0 .

It turns out that the fact we need to search for substrings of our words is not a big problem, because we can think of extending the previous ones. In fact, we have just two possibilities:

  1. Ai and Bj are the same letters. In this case, we say that DP[i][j]=min(DP[i][j],DP[i1][j1]+2) as the new letter will increase the LCS by 1, but both of the strings increase by one in length, so the total gain is 411=2.
  2. In every case, we can refer to DP[i][j1] or DP[i][j1] to extend one of the previous substrings, but not the LCS, so: DP[i][j]=max(DP[i1][j],DP[i][j1])1.

事实证明,我们需要搜索单词的子串并不是一个大问题,因为我们可以考虑扩展前面的搜索。事实上,我们只有两种可能:

  1. AiBj 是相同的字母。在这种情况下,我们可以说 DP[i][j]=min(DP[i][j],DP[i1][j1]+2)作为新字母将使 LCS 增加 1,但是两个字符串的长度都增加了一个,因此总增益为 411=2

  2. 在每种情况下,我们都可以引用 DP[i][j1]DP[i][j1] 来扩展之前的一个子串,但不能扩展 LCS,所以:DP[i][j]=max(DP[i1][j],DP[i][j1])1.

An inquisitive reader may wonder why it doesn't hurt to always apply case 2 in calculations, so clearing the doubts, it's important to informally notice that we never get a greater LCS this way so wrong calculations only lead to the worse score, and that our code will always find a sequence of transitions which finds the true LCS as well.
Implementing the formulas gives a really short O(nm) solution.

好奇的读者可能会问,为什么在计算中总是应用情况 2 并没有什么坏处,为了消除这个疑问,我们有必要非正式地指出,这样我们永远不会得到更大的 LCS,所以错误的计算只会导致更糟糕的结果,而且我们的代码总是会找到一个过渡序列,这个序列也会找到真正的 LCS
执行这些公式可以得到一个非常简短的 O(nm) 解。

#include<bits/stdc++.h>
using namespace std;
int n1,n2;
string a,b;int ma;
int dp[5010][5010];
int main()
{
    cin >> n1 >> n2;
    cin >> a >> b;
    for (int i = 1; i <= n1; ++i)
        for (int j = 1; j <= n2; ++j)
            if (a[i - 1] == b[j - 1])
                dp[i][j] = dp[i - 1][j - 1] + 2;
            else
                dp[i][j] = max(max(dp[i - 1][j] - 1, dp[i][j - 1] - 1), 0);
    
    for (int i = 1; i <= n1; ++i)
        for (int j = 1; j <= n2; ++j)
            ma = max(ma, dp[i][j]);
            cout<<ma<<'\n';
}

LCS 长度和个数

LCS 具体是什么

求根据 LCS 拼接后的字符串