[CF528D]:Fuzzy Search – MXG's blog

[CF528D]:Fuzzy Search

[CF528D]:Fuzzy Search

Description

给定两个字符串,以及常数$k$,询问模板串在母串中模糊出现了几次..所谓模糊出现指的是存在$i-k \leq j\leq i+k$使得$S_i = T_j$


Hint

字符集大小$\leq 4$ $1 \leq |S|,|T| \leq 200000$


Solution

对于每一个字符分开考虑

假设现在考虑的是字符$c$
设$f_i$表示在母串中以$i$开始可以匹配到多少个字符
构造向量$a, b$如果$S_i$可以匹配到字符$c$那么$a_i=1$
$b_i=[t_i==c]$

则$f_i=\sum_{i=0}^{m-1}a_{i+j}b_i$
翻转$b$向量,这就是个卷积的形式可以用FFT优化

如果最后$f_i=m$表示以$i$开始的长度为$m$字符串可以匹配到模式串
为答案贡献1


Code

#include<cstdio>
#include<cmath>
#include<cstring>
#include<algorithm>

using namespace std;

const int N = 800029;
const double pi = acos(-1.0);
struct complex {
    double x, y;
    complex(double tx = 0, double ty = 0) {
        x = tx; y = ty;
    }
    complex operator + (const complex &o) const {
        return complex(x + o.x, y + o.y);
    }
    complex operator - (const complex &o) const {
        return complex(x - o.x, y - o.y);
    }
    complex operator * (const complex &o) const {
        return complex(x * o.x - y * o.y, x * o.y + y * o.x);
    }
}a[N], b[N]; 
int r[N], ans[N], f[N][4], cnt[4], id[255];
int limit = 1, l, n, m, qwq, up, down;
char s[N], t[N];

void FFT(complex *A, int type) {
    for(int i = 0; i < limit; ++i)
        if(i < r[i])
            swap(A[i], A[r[i]]);
    for(int cur = 1; cur < limit; cur <<= 1) {
        complex Wn(cos(pi / cur), type * sin(pi / cur));
        int len = cur << 1;
        for(int j = 0; j < limit; j += len) {
            complex w(1, 0);
            for(int k = 0; k < cur; ++k, w = w * Wn) {
                complex x = A[j + k], y = w * A[j + k + cur];
                A[j + k] = x + y; A[j + k + cur] = x - y;
            }
        }
    }
}

void process(char c) {
    int cur = id[ (int) c];
    memset((&a), 0, sizeof(a));
    memset((&b), 0, sizeof(b));
    for(int i = 0; i < n; ++i)
        a[i].x = f[i][cur];
    for(int i = 0; i < m; ++i)
        b[m - i - 1].x = t[i] == c;
    FFT(a, 1); FFT(b, 1);
    for(int i = 0; i < limit; ++i)
        a[i] = a[i] * b[i];
    FFT(a, -1);
    for(int i = 0; i < n; ++i)
        ans[i] += (int) (a[i + m - 1].x / limit + 0.5);
}

int main() {
    scanf("%d%d%d", &n, &m, &qwq);
    scanf("%s%s", s, t);
    id['A'] = 0; id['G'] = 1; id['C'] = 2; id['T'] = 3;
    cnt[id[(int) s[0]]]++;
    for(int i = 0; i < n; ++i) {
        while(down < i - qwq) cnt[id[(int)s[down++]]]--;
        while(up < i + qwq && up < n - 1) cnt[id[(int)s[++up]]]++;
        for(int j = 0; j < 4; ++j)
            if(cnt[j]) f[i][j] = 1;
    }
    while(limit <= n + m) {
        limit <<= 1; l++;
    } 
    for(int i = 0; i < limit; ++i)
        r[i] = (r[i >> 1] >> 1) | ((i & 1) << (l - 1));
    process('A'); process('G');
    process('C'); process('T');
    int sum = 0;
    for(int i = 0; i < n; ++i)
        if(ans[i] == m) sum++;
    printf("%d\n", sum);
    return 0;
}


说点什么

avatar
  Subscribe  
提醒