LeetCode 187. 【Java】187. Repeated DNA Sequences
原题链接
中等
作者:
tt2767
,
2020-02-20 16:58:15
,
所有人可见
,
阅读 672
/*
1. 计算前缀HASH,用逆前缀和的方式算出每10个字符的HASH,输出重复的hash对应的子段
2. testcase: null , "", "qweqwe", "qwe", "(a*11)b", "abcdaweasfcsdgikdtkhgbfsvfd", "(a*12)", random
*/
class Solution {
long BASE = 131, MOD = Long.MAX_VALUE;
int LEN = 10;
List<Long> power = new ArrayList<>();
public List<String> findRepeatedDnaSequences(String s) {
if (s == null || s.length() <= 10) return new ArrayList<>();
return find(s);
}
public List<String> find(String s) {
Set<Long> set = new HashSet<>();
Set<Long> unique = new HashSet<>();
List<String> res = new ArrayList<>();
List<Long> hash = getPrefixHash(s);
for (int i = 0 ; i + LEN - 1 < s.length() ; i++){
int l = i + 1, r = i + LEN;
long h = getSubHash(hash, l, r);
if (!set.contains(h) ){
set.add(h);
} else if (!unique.contains(h)){
res.add(s.substring(i, i + LEN));
unique.add(h);
}
}
return res;
}
public List<Long> getPrefixHash(String s){
List<Long> hash = new ArrayList<>();
hash.add(0L);
power.add(1L);
for (int i = 0 ; i < s.length(); i++){
hash.add(hash.get(i) * BASE % MOD + s.charAt(i) - 'a' + 1);
power.add(power.get(i) * BASE % MOD);
}
return hash;
}
public long getSubHash(List<Long> hash, int l, int r){
return hash.get(r) - hash.get(l-1) * power.get(r-l+1) % MOD;
}
}