#include <bits/stdc++.h>
using namespace std;
#define VN first /* 用VN来代替first,与语义相适应 */
#define VT second
#define ERROR -1
typedef pair<string, int> psi;
typedef pair<string, string> pss;
class NFA
{
public:
ofstream fout;
bool isDef;
int snum, fsnum;
string line, VT, VN;
string Start;
vector<bool> FinalSet;
set<string> VtSet;
set<string> KeySet;
map<string, int> Map;
vector<vector<pss> > List;
map<string, int> Type;
map<string, int> SType;
NFA();
void input(string regular);
void print(string out);
void solve_line(string line);
void solve_pro(string production);
string get_start(string line);
string get_vn(string& line);
string get_vt(string& line);
void get_Type(string line);
void extend(string VN);
void nfa_2_dfa();
void e_closure(string status, vector<string>& V);
void vt_move(string vt, vector<string> V_closure, vector<string>& T);
};
class LA : public NFA
{
public:
ofstream fout;
string line;
string word;
string T;
int index;
int id;
int deviation;
int dec_error;
LA();
void analysis(NFA& nfa, string source, string out);
void read_next_VT(char ch, NFA& nfa, string out);
void output(int error, int type, NFA& nfa, string out);
bool isAlphabet(char ch);
bool isDigit(char ch);
bool isOps(string s);
string pre;
};
class match
{
private:
const char ch;
public:
explicit match(char c) : ch(c) {}
bool operator() (const std::pair<string, string>& element) const {
return element.second[0] == ch;
}
};
int main()
{
string regular = "regular expression.txt",
out = "lexer_product.txt",
source = "source_code.txt";
NFA nfa;
nfa.input(regular);
nfa.print(out);
nfa.nfa_2_dfa();
nfa.print(out);
LA la;
la.T = nfa.Start;
la.analysis(nfa, source, out);
return 0;
}
bool LA::isAlphabet(char ch) {
if (ch >= 'a' && ch <= 'z') return true;
if (ch >= 'A' && ch <= 'Z') return true;
return false;
}
bool LA::isDigit(char ch) {
if (ch >= '0' && ch <= '9') return true;
return false;
}
bool LA::isOps(string s) {
if (s == "=") return true;
if (s == "<") return true;
if (s == ">") return true;
if (s == ">=") return true;
if (s == "<=") return true;
if (s == "==") return true;
if (s == "<<") return true;
if (s == ">>") return true;
return false;
}
void LA::analysis(NFA& nfa, string source, string out)
{
cin.clear();
freopen(source.c_str(), "r", stdin);
int n;
bool flag = true;
while (getline(cin, line))
{
cout << line << endl;
n = line.length();
if (n == 0) break;
id++;
for (int i = 0; i < n; i++)
{
if (line[i] == '}') {
word = "}";
output(0, 3, nfa, out);
T = nfa.Start;
index = 0;
word = "";
continue;
}
if (line[i] == ' '|| line[i] == '\t') {
if (nfa.KeySet.count(word)) {
output(0, 5, nfa, out);
T = nfa.Start;
index = 0;
word = "";
continue;
}
else if (nfa.SType[T] == nfa.Type["Identifier"]) {
output(0, nfa.SType[T], nfa, out);
if (word != "" && !isOps(word)) {
pre = "ncmp";
}
T = nfa.Start;
index = 0;
word = "";
continue;
}
else {
continue;
}
}
if (word == "") {
if (line[i] == '\n') continue;
}
if (word == "/" && line[i] == '/') {
flag = false;
break;
}
else {
if (!isOps(word)) {
if (line[i] == '+' || line[i] == '-') {
if ((i + 1) < n && line[i + 1] == '=') {
if (line[i] == '+')
word = "+=";
else word = "-=";
i++;
output(0, 2, nfa, out);
T = nfa.Start;
index = 0;
word = "";
continue;
}
else if (pre != "cmp") {
word = line[i];
output(0, 2, nfa, out);
T = nfa.Start;
index = 0;
word = "";
continue;
}
}
}
read_next_VT(line[i], nfa, out);
if (dec_error == 1) {
dec_error = 0;
while (line[i] == '.' || (line[i] >= '0' && line[i] <= '9')) {
word += line[i++];
}
output(ERROR, 4, nfa, out);
T = nfa.Start;
index = 0;
word = "";
i--;
}
if (deviation == 1) {
deviation = 0;
while (isAlphabet(line[i])) {
word += line[i++];
}
output(ERROR, 4, nfa, out);
T = nfa.Start;
index = 0;
word = "";
i--;
}
}
}
if (flag) {
if (nfa.KeySet.count(word))
output(0, 5, nfa, out);
else
output(0, nfa.SType[T], nfa, out);
}
T = nfa.Start;
index = 0;
word = "";
}
fclose(stdin);
}
void LA::read_next_VT(char ch, NFA& nfa, string out)
{
auto it = std::find_if(nfa.List[index].begin(), nfa.List[index].end(), match(ch));
if (ch == '\t') {
output(-5, 5, nfa, out);
T = nfa.Start;
index = 0;
word = "";
return;
}
if (it == nfa.List[index].end())
{
if (ch == ' ' && word=="") {
output(-3, 5, nfa, out);
T = nfa.Start;
index = 0;
word = "";
return;
}
if (nfa.KeySet.count(word) && nfa.FinalSet[index]) {
output(0, 5, nfa, out);
T = nfa.Start;
index = 0;
word = "";
read_next_VT(ch, nfa, out);
return;
}
if (word == "+" || word == "-") {
if (!isDigit(ch) && ch != '=') {
output(0, 2, nfa, out);
T = nfa.Start;
index = 0;
word = "";
read_next_VT(ch, nfa, out);
return;
}
}
if (isOps(word)) {
pre = "cmp";
}
if (word != "" && !isOps(word)){
pre = "ncmp";
}
if (nfa.FinalSet[index])
{
if (nfa.SType[T] == 4 && isAlphabet(ch) && ch != 'i') {
deviation = 1;
return;
}
if (nfa.KeySet.count(word))
output(0, 5, nfa, out);
else {
if (nfa.SType[T] == 4 && ch == '.') {
dec_error = 1;
return;
}
else {
output(0, nfa.SType[T], nfa, out);
}
}
T = nfa.Start;
index = 0;
word = "";
read_next_VT(ch, nfa, out);
}
else
{
output(ERROR, nfa.SType[T], nfa, out);
}
}
else {
T = it->first;
index = nfa.Map[T];
word += ch;
}
}
void LA::output(int error, int type,NFA& nfa, string out)
{
fout.open(out.c_str(), ios::app);
if (error == ERROR)
{
index = nfa.Map[T];
auto it = std::find_if(nfa.Type.begin(), nfa.Type.end(), [type](const std::map<std::string, int>::value_type item)->bool {return item.second == type; });
cout << "ERROR(" << type << "): While tackling " << it->first << ", \'" << word << "\' is not a " << it->first << "." << endl;
}
else
{
auto it = std::find_if(nfa.Type.begin(), nfa.Type.end(), [type](const std::map<std::string, int>::value_type item)->bool {return item.second == type; });
int len = word.length();
fout << "(" << id << "," << it->first << "," << word << ")" << endl;
cout << "(" << id << "," << it->first << "," << word << ")" << endl;
}
fout.close();
}
LA::LA()
{
index = 0;
id = 0;
word = "";
T = "";
}
void NFA::nfa_2_dfa()
{
map<string, int> NewMap;
vector<vector<pss> > NewList;
map<string, int> NewSType;
vector<vector<string> > NewStates;
int n, id, t = 0;
string start = "T", NewStart;
vector<string> NewStartSet;
e_closure(Start, NewStartSet);
NewStates.push_back(NewStartSet);
NewStart = start + to_string(t);
NewMap[start + to_string(t)] = t++;
for (int map_id = 0; map_id < t; map_id++)
{
n = NewStates[map_id].size();
vector<pss> arcList;
unordered_map<string, int> Viewed;
for (int i = 0; i < n; i++)
{
int index = Map[NewStates[map_id][i]], m = List[index].size();
for (int j = 0; j < m; j++)
{
string vt = List[index][j].second;
if (vt == "$") continue;
if (Viewed.count(vt) != 0) continue;
else
{
vector<string> T;
vt_move(vt, NewStates[map_id], T);
Viewed[vt] = 1;
set<string>tmp(T.begin(), T.end());
T.assign(tmp.begin(), tmp.end());
int pos = find(NewStates.begin(), NewStates.end(), T) - NewStates.begin();
if (pos == t)
{
NewStates.push_back(T);
NewMap[start + to_string(t)] = t++;
for (int k = 0; k < T.size(); k++)
{
if (SType[T[k]] != 0) {
NewSType[start + to_string(t - 1)] = SType[T[k]];
}
}
}
arcList.push_back({ start + to_string(pos), vt });
}
}
}
NewList.push_back(arcList);
}
Map.swap(NewMap);
List.swap(NewList);
SType.swap(NewSType);
snum = t;
Start = start + "0";
FinalSet.resize(snum, false);
for (int i = 0; i < t; i++)
{
for (int j = 0; j < NewStates[i].size(); j++)
{
if (NewStates[i][j].find("final") == -1) continue;
else
{
FinalSet[i] = true;
break;
}
}
}
isDef = true;
}
void NFA::e_closure(string status, vector<string>& T)
{
int id, n;
string tmp;
stack<string> Stack;
Stack.push(status);
T.push_back(status);
while (!Stack.empty())
{
tmp = Stack.top();
Stack.pop();
id = Map[tmp];
n = List[id].size();
for (int i = 0; i < n; i++)
{
if (List[id][i].second == "$")
{
if (find(T.begin(), T.end(), List[id][i].first) == T.end())
{
Stack.push(List[id][i].first);
T.push_back(List[id][i].first);
}
}
}
}
}
void NFA::vt_move(string vt, vector<string> V, vector<string>& T)
{
int id, n;
for (int i = 0; i < V.size(); i++)
{
id = Map[V[i]];
n = List[id].size();
for (int j = 0; j < n; j++)
{
if (List[id][j].second == vt)
{
e_closure(List[id][j].first, T);
break;
}
}
}
}
void NFA::solve_line(string line)
{
if (line[0] == '/' && line[1] == '/') return;
if (line.find('|') == -1)
solve_pro(line);
else {
string subfront, subrear;
int pos = line.find('>'), j = pos + 1;
subfront = line.substr(0, pos + 1);
while (line.find('|', pos + 1) != -1)
{
pos = line.find('|', pos + 1);
subrear = line.substr(j, pos - j);
j = pos + 1;
solve_pro(subfront + subrear);
}
subrear = subrear = line.substr(j);
solve_pro(subfront + subrear);
}
}
void NFA::solve_pro(string production)
{
VN = get_vn(production);
VT = get_vt(production);
VtSet.insert(VT);
if (Map.count(VN) == 0)
extend(VN);
if (Map.count(production) == 0)
extend(production);
int id = Map[VN];
List[id].push_back(pss(production, VT));
if (VN == Start) return;
if (SType.count(VN) == 0 && Type.count(VN))
SType[VN] = Type[VN];
if (SType.count(production) == 0)
SType[production] = SType[VN];
}
void NFA::extend(string VN)
{
vector<pss> t;
List.push_back(t);
Map[VN] = snum++;
}
string NFA::get_start(string production)
{
int i = 0, j = 0;
while (j < production.length() && production[j] != ' ' && production[j] != '-') j++;
string sub = production.substr(i, j);
return sub;
}
string NFA::get_vn(string& production)
{
int i = 0, j = 0;
while (j < production.length() && production[j] != ' ' && production[j] != '-') j++;
string sub = production.substr(i, j);
while (j < production.length() && (production[j] == ' ' || production[j] == '-' || production[j] == '>'))
{
j++;
if (production[j - 1] == '>') break;
}
while (j < production.length() && production[j] == ' ') j++;
production.erase(i, j);
return sub;
}
string NFA::get_vt(string& production)
{
int i = 0, j = 0;
string sub;
if (isalpha(production[j]) == 1)
sub = "$";
else if (production[j] == '\'') {
j++;
while (production[j] != '\'') j++;
j++;
sub = production.substr(i + 1, j - 2);
if (j > 3)
{
KeySet.insert(sub);
}
production.erase(i, j);
}
else
{
j++;
sub = production.substr(i, j);
production.erase(i, j);
}
if (production == "")
{
production = "final" + to_string(fsnum);
fsnum++;
}
return sub;
}
void NFA::get_Type(string line)
{
int i = 0, id = 1;
while (i < line.length() && line[i] != '>') i++;
i++;
while (line.length() && line[i] == ' ') i++;
while (i < line.length())
{
int j = i;
while (line[i] != '|' && i < line.length()) i++;
string tmp = line.substr(j, i - j);
Type[tmp] = id++;
i++;
}
}
void NFA::print(string out)
{
int k = 0;
if (isDef)
fout.open(out, ios::app);
else
fout.open(out);
if (isDef) fout << "DFA\n";
else fout << "NFA\n";
for (int i = 0; i < 150; i++) fout << '-';
fout << "\n";
fout << "VN ( nonterminal character ) \n";
for (const auto p : Map){
fout << std::left << setw(12) << p.first << ' ';
k++;
if (k % 11 == 0) {
fout << "\n";
k -= 11;
}
}
fout << "\n";
for (int i = 0; i < 150; i++) fout << '-';
fout << "\nVT ( terminal character ) \n";
k = 0;
for (const auto p : VtSet) {
fout << std::left << setw(16) << p << ' ';
k++;
if (k % 9 == 0) {
fout << "\n";
k -= 9;
}
}
fout << "\n";
for (int i = 0; i < 150; i++) fout << '-';
k = 0;
fout << "\nKey ( key character ) \n";
for (const auto p : KeySet) {
fout << std::left << setw(16) << p << ' ';
k++;
if (k % 9 == 0) {
fout << "\n";
k -= 9;
}
}
fout << "\n";
for (int i = 0; i < 150; i++) fout << '-';
fout << "\nStart ( initial state ) \n" << Start;
fout << "\n";
for (int i = 0; i < 150; i++) fout << '-';
fout << "\nFinal ( final state ) \n";
k = 0;
for (int i = 0; i < snum; i++)
if (FinalSet[i] == true)
{
auto it = std::find_if(Map.begin(), Map.end(), [i](const std::map<std::string, int>::value_type item)->bool {return item.second == i; });
fout << std::left << setw(12) << it->first << ' ';
k++;
if (k % 11 == 0) {
fout << "\n";
k -= 11;
}
}
fout << "\n";
for (int i = 0; i < 150; i++) fout << '-';
fout << "\nType \n";
k = 0;
for (const auto p : Type) {
fout << std::left << setw(16) << p.first << ' ' << p.second << '\t';
k++;
if (k % 6 == 0) {
fout << "\n";
k -= 6;
}
}
fout << "\n";
for (int i = 0; i < 150; i++) fout << '-';
fout << "\nSType \n";
k = 0;
for (const auto p : SType) {
fout << std::left << setw(16) << p.first << ' ' << p.second << '\t';
k++;
if (k % 6 == 0) {
fout << "\n";
k -= 6;
}
}
fout << "\n";
fout << "\nListArc \n";
for (int i = 0; i < 150; i++) fout << '-';
fout << "\n";
int n, i;
for (const auto p : Map)
{
if (p.first == "") continue;
fout << p.first << ": " << "\n";
i = p.second;
n = List[i].size();
if (n == 0) fout << "NULL";
k = 0;
for (int j = 0; j < n; j++)
{
fout << "( " << List[i][j].second << " , " << List[i][j].first << " ); ";
k++;
if (k % 6 == 0) {
fout << "\n";
k -= 6;
}
}
fout << "\n";
for (int i = 0; i < 150; i++) fout << '-';
fout << "\n";
}
fout.close();
}
void NFA::input(string regular)
{
freopen(regular.c_str(), "r", stdin);
getline(cin, line);
Start = get_start(line);
get_Type(line);
do {
solve_line(line);
line = "";
getline(cin, line);
} while (line != "");
FinalSet.resize(snum, false);
for (int i = 0; i < fsnum; i++)
FinalSet[Map["final" + to_string(i)]] = true;
fclose(stdin);
}
NFA::NFA()
{
snum = 0;
fsnum = 0;
isDef = false;
}