From 1d8073d4968baecbcaa06e3a45734fbe833b2ff0 Mon Sep 17 00:00:00 2001 From: Noah Heuser <nheuser@uni-koblenz.de> Date: Tue, 18 Jun 2024 23:42:49 +0200 Subject: [PATCH] Runtime optimizations in Type2Parser --- .../src/co_reachable/CoReachability.java | 8 +- ChatGPTParsing/src/main/Main.java | 100 ++++++-- ChatGPTParsing/src/type2/Type2Parser.java | 223 +++++++----------- .../src/type2/VariableSetTable.java | 58 +++++ .../src/type3/Type3GrammarGenerator.java | 4 +- ChatGPTParsing/src/writer/VariableSet.java | 26 -- 6 files changed, 239 insertions(+), 180 deletions(-) create mode 100644 ChatGPTParsing/src/type2/VariableSetTable.java delete mode 100644 ChatGPTParsing/src/writer/VariableSet.java diff --git a/ChatGPTParsing/src/co_reachable/CoReachability.java b/ChatGPTParsing/src/co_reachable/CoReachability.java index f92e69d..7a67054 100644 --- a/ChatGPTParsing/src/co_reachable/CoReachability.java +++ b/ChatGPTParsing/src/co_reachable/CoReachability.java @@ -14,9 +14,9 @@ public class CoReachability { int m = Integer.MAX_VALUE; while (n != m) { m = n; - for (Entry<String, List<List<String>>> v : rules.entrySet()) { - if (!coReachable.contains(v.getKey())) { - for (List<String> conclusio : v.getValue()) { + for (Entry<String, List<List<String>>> entry : rules.entrySet()) { + if (!coReachable.contains(entry.getKey())) { + for (List<String> conclusio : entry.getValue()) { boolean notCoReachable = false; for (String c : conclusio) { if (rules.keySet().contains(c) && !coReachable.contains(c)) { @@ -25,7 +25,7 @@ public class CoReachability { } } if (!notCoReachable) { - coReachable.add(v.getKey()); + coReachable.add(entry.getKey()); break; } } diff --git a/ChatGPTParsing/src/main/Main.java b/ChatGPTParsing/src/main/Main.java index 136366b..e1a3914 100644 --- a/ChatGPTParsing/src/main/Main.java +++ b/ChatGPTParsing/src/main/Main.java @@ -1,27 +1,97 @@ package main; -import writer.Writer; +import java.util.List; +import java.util.Map; + +import grammar.Grammar; +import grammar.Type2Grammar; +import type2.Type2Parser; public class Main { // ε public static void main(String[] args) { - Writer w = new Writer(); - String[] vars = { "S", "A", "B", "C", "D" }; +// Writer w = new Writer(); + String[] vars = { "S'", "S", "A", "B", "C", "D" }; int alphaSize = 3; - int maxConclusios = 3; - int maxConclusioSize = 4; - String start = "S"; - int mode = 3; - w.runType2Grammar(vars, alphaSize, maxConclusios, maxConclusioSize, start, mode, 3); - // Grammar g = Grammar.toGrammar( -// "G = (V, T, R, S)\nV = {S, A, B, C, D}\nT = {a, b, c}\nR = {S -> A,\n A -> BcD | aC | ac,\n B -> c | DbC,\n C -> cc,\n D -> SS}"); -// Type2Parser t2p = new Type2Parser(); -// Map<String, List<List<String>>> gNormal = t2p.createCNF(g.getRules(), g.getStart()); -// System.out.println(gNormal); -// System.out.println("aa*ac".equals("aa*ac".toLowerCase())); -// System.out.println("Ü".toLowerCase()); +// int maxConclusios = 3; +// int maxConclusioSize = 4; +// String start = "S"; +// int mode = 3; +// w.runType2Grammar(vars, alphaSize, maxConclusios, maxConclusioSize, start, mode, 3); + Grammar g = Grammar.toGrammar( + "G = (V, T, R, S)\nV = {S, A, B, C, D}\nT = {a, b, c}\nR = {S -> S | AC | CASD,\n A -> cD | c | DDCa,\n B -> abB | Sbc | a,\n C -> Ba,\n D -> bAbc | Saca | Ac}"); + Type2Parser t2p = new Type2Parser(); + Map<String, List<List<String>>> gNormal = t2p.createCNF(g.getRules(), g.getStart(), 3); + boolean b = t2p.isPartOfLanguage(gNormal, "cbcbcaa"); + System.out.println(b); + System.out.println(gNormal); + Type2Grammar g0 = new Type2Grammar(vars, alphaSize, "S'", gNormal); + System.out.println(g0); +// Map<String, List<List<String>>> rules = new LinkedHashMap<>(); +// List<List<String>> s = new LinkedList<>(); +// List<String> s0 = new LinkedList<>(); +// s0.add("A"); +// s0.add("B"); +// List<String> s1 = new LinkedList<>(); +// s1.add("C"); +// s1.add("D"); +// List<String> s2 = new LinkedList<>(); +// s2.add("E"); +// s2.add("D"); +// s.add(s0); +// s.add(s1); +// s.add(s2); +// rules.put("S", s); +// List<List<String>> a = new LinkedList<>(); +// List<String> a0 = new LinkedList<>(); +// a0.add("b"); +// a.add(a0); +// rules.put("A", a); +// List<List<String>> b = new LinkedList<>(); +// List<String> b0 = new LinkedList<>(); +// b0.add("C"); +// b0.add("S"); +// List<String> b1 = new LinkedList<>(); +// b1.add("A"); +// b1.add("E"); +// List<String> b2 = new LinkedList<>(); +// b2.add("a"); +// b.add(b0); +// b.add(b1); +// b.add(b2); +// rules.put("B", b); +// List<List<String>> c = new LinkedList<>(); +// List<String> c0 = new LinkedList<>(); +// c0.add("a"); +// c.add(c0); +// rules.put("C", c); +// List<List<String>> d = new LinkedList<>(); +// List<String> d0 = new LinkedList<>(); +// d0.add("A"); +// d0.add("S"); +// List<String> d1 = new LinkedList<>(); +// d1.add("C"); +// d1.add("F"); +// List<String> d2 = new LinkedList<>(); +// d2.add("b"); +// d.add(d0); +// d.add(d1); +// d.add(d2); +// rules.put("D", d); +// List<List<String>> e = new LinkedList<>(); +// List<String> e0 = new LinkedList<>(); +// e0.add("B"); +// e0.add("B"); +// e.add(e0); +// rules.put("E", e); +// List<List<String>> f = new LinkedList<>(); +// List<String> f0 = new LinkedList<>(); +// f0.add("D"); +// f0.add("D"); +// f.add(f0); +// rules.put("F", f); } } diff --git a/ChatGPTParsing/src/type2/Type2Parser.java b/ChatGPTParsing/src/type2/Type2Parser.java index 6f1abe6..0a7a4ce 100644 --- a/ChatGPTParsing/src/type2/Type2Parser.java +++ b/ChatGPTParsing/src/type2/Type2Parser.java @@ -1,6 +1,5 @@ package type2; -import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; @@ -13,7 +12,6 @@ import java.util.Set; import auxiliary.ListOfListContains; import co_reachable.CoReachability; import reachable.Reachability; -import writer.VariableSet; public class Type2Parser { @@ -94,10 +92,10 @@ public class Type2Parser { return new LinkedHashMap<>(); } Map<String, List<List<String>>> newRules = new LinkedHashMap<>(); - for (Entry<String, List<List<String>>> rule : rules.entrySet()) { - if (coReachable.contains(rule.getKey())) { + for (Entry<String, List<List<String>>> entry : rules.entrySet()) { + if (coReachable.contains(entry.getKey())) { List<List<String>> newConclusios = new LinkedList<>(); - for (List<String> conclusio : rule.getValue()) { + for (List<String> conclusio : entry.getValue()) { boolean containsNonCoReachable = false; for (String c : conclusio) { if (rules.containsKey(c) && !coReachable.contains(c)) { @@ -109,7 +107,7 @@ public class Type2Parser { newConclusios.add(conclusio); } } - newRules.put(rule.getKey(), newConclusios); + newRules.put(entry.getKey(), newConclusios); } } return newRules; @@ -119,10 +117,10 @@ public class Type2Parser { String start) { Set<String> reachable = Reachability.reachableSet(rules, start); Map<String, List<List<String>>> newRules = new LinkedHashMap<>(); - for (Entry<String, List<List<String>>> rule : rules.entrySet()) { - if (reachable.contains(rule.getKey())) { + for (Entry<String, List<List<String>>> entry : rules.entrySet()) { + if (reachable.contains(entry.getKey())) { List<List<String>> newConclusios = new LinkedList<>(); - for (List<String> conclusio : rule.getValue()) { + for (List<String> conclusio : entry.getValue()) { boolean containsNonReachable = false; for (String c : conclusio) { if (rules.containsKey(c) && !reachable.contains(c)) { @@ -134,7 +132,7 @@ public class Type2Parser { newConclusios.add(conclusio); } } - newRules.put(rule.getKey(), newConclusios); + newRules.put(entry.getKey(), newConclusios); } } return newRules; @@ -146,89 +144,46 @@ public class Type2Parser { for (Entry<String, List<List<String>>> rule : rules.entrySet()) { List<List<String>> newConclusios = new LinkedList<>(); for (List<String> oldConclusio : rule.getValue()) { - List<List<String>> newCons = removeNullable(oldConclusio, nullable); - for (List<String> newCon : newCons) { - if ((!newCon.isEmpty() && !ListOfListContains.contains(newConclusios, newCon))) { - newConclusios.add(newCon); + if (oldConclusio.size() == 2) { + List<List<String>> nullableRemoved = removeNullable(oldConclusio, nullable); + for (List<String> newCon : nullableRemoved) { + if (!ListOfListContains.contains(newConclusios, newCon)) { + newConclusios.add(newCon); + } } + } else if (!oldConclusio.isEmpty()) { + newConclusios.add(oldConclusio); } - } newRules.put(rule.getKey(), newConclusios); } return newRules; } - private List<List<String>> removeNullable(List<String> r, Set<String> nullable) { - List<String> lNull = new LinkedList<>(); - List<Integer> posNull = new LinkedList<>(); - Map<Integer, String> posNonNull = new HashMap<>(); - for (int i = 0; i < r.size(); i++) { - if (nullable.contains(r.get(i))) { - lNull.add(r.get(i)); - posNull.add(i); - } else { - posNonNull.put(i, r.get(i)); - } - } - List<List<Integer>> ps = powerset(lNull.size() - 1); - List<Map<Integer, String>> newCons = new LinkedList<>(); - for (List<Integer> s : ps) { - Map<Integer, String> newCon = new HashMap<>(); - for (int i : s) { - newCon.put(posNull.get(i), lNull.get(i)); - } - newCons.add(newCon); - } - List<List<String>> newConsAdded = new LinkedList<>(); - for (Map<Integer, String> newCon : newCons) { - List<String> newConAdded = new LinkedList<>(); - for (int i = 0; i < r.size(); i++) { - String s0 = newCon.get(i); - String s1 = posNonNull.get(i); - if (s0 != null) { - newConAdded.add(s0); - } else if (s1 != null) { - newConAdded.add(s1); - } - } - newConsAdded.add(newConAdded); + private List<List<String>> removeNullable(List<String> conclusio, Set<String> nullable) { + List<List<String>> nullableRemoved = new LinkedList<>(); + nullableRemoved.add(conclusio); + if (nullable.contains(conclusio.get(0))) { + List<String> second = new LinkedList<>(); + second.add(conclusio.get(1)); + nullableRemoved.add(second); } - return newConsAdded; - } - - private List<List<Integer>> powerset(int n) { - List<List<Integer>> ps = new ArrayList<>(); - if (n < 0) { - ps.add(new ArrayList<>()); - } else { - for (List<Integer> s : powerset(n - 1)) { - ps.add(s); - List<Integer> s1 = new ArrayList<>(s); - s1.add(n); - ps.add(s1); - } + if (nullable.contains(conclusio.get(1))) { + List<String> first = new LinkedList<>(); + first.add(conclusio.get(0)); + nullableRemoved.add(first); } - return ps; + return nullableRemoved; } private Map<String, List<List<String>>> normalize(Map<String, List<List<String>>> rules) { Map<String, List<List<String>>> newRules = new LinkedHashMap<>(); Map<String, String> vars = new HashMap<>(); - for (Entry<String, List<List<String>>> rule : rules.entrySet()) { + for (Entry<String, List<List<String>>> rule : rules.entrySet()) {// |rules| List<List<String>> newConclusios = new LinkedList<>(); for (List<String> conclusio : rule.getValue()) { if (conclusio.size() > 1) { - List<String> newConclusio = new LinkedList<>(); - for (int i = 0; i < conclusio.size(); i++) { - String s = conclusio.get(i); - if (!rules.containsKey(s)) { - vars.computeIfAbsent(s, k -> "$" + k); - newConclusio.add(vars.get(s)); - } else { - newConclusio.add(s); - } - } + List<String> newConclusio = normalizeConclusio(rules, conclusio, vars, newRules); newConclusios.add(newConclusio); } else { newConclusios.add(conclusio); @@ -236,25 +191,39 @@ public class Type2Parser { } newRules.put(rule.getKey(), newConclusios); } - for (Entry<String, String> entry : vars.entrySet()) { - List<List<String>> ll = new LinkedList<>(); - List<String> l = new LinkedList<>(); - l.add(entry.getKey()); - ll.add(l); - newRules.put(entry.getValue(), ll); - } return newRules; } + private List<String> normalizeConclusio(Map<String, List<List<String>>> rules, List<String> oldConclusio, + Map<String, String> vars, Map<String, List<List<String>>> newRules) { + List<String> newConclusio = new LinkedList<>(); + for (String s : oldConclusio) { + if (!rules.containsKey(s)) { + if (!vars.containsKey(s)) { + vars.put(s, "$" + s); + List<List<String>> ll = new LinkedList<>(); + List<String> l = new LinkedList<>(); + l.add(s); + ll.add(l); + newRules.put(vars.get(s), ll); + } + newConclusio.add(vars.get(s)); + } else { + newConclusio.add(s); + } + } + return newConclusio; + } + private Map<String, List<List<String>>> removeChainRules(Map<String, List<List<String>>> rules) { Map<String, List<List<String>>> newRules = new LinkedHashMap<>(); Map<String, Set<String>> chainProdMap = findChainProductions(rules); - Map<String, List<List<String>>> chainRulesDeleted = deleteChainRules(rules); for (String premise : rules.keySet()) { List<List<String>> newConclusios = new LinkedList<>(); for (String s : chainProdMap.get(premise)) { - for (List<String> newConclusio : chainRulesDeleted.get(s)) { - if (!ListOfListContains.contains(newConclusios, newConclusio)) { + for (List<String> newConclusio : rules.get(s)) { + if ((newConclusio.size() != 1 || !rules.containsKey(newConclusio.get(0))) + && !ListOfListContains.contains(newConclusios, newConclusio)) { newConclusios.add(newConclusio); } } @@ -265,45 +234,39 @@ public class Type2Parser { return newRules; } - private Map<String, List<List<String>>> deleteChainRules(Map<String, List<List<String>>> rules) { - Map<String, List<List<String>>> newRules = new LinkedHashMap<>(); - for (Entry<String, List<List<String>>> rule : rules.entrySet()) { - List<List<String>> newConclusios = new LinkedList<>(); - for (List<String> conclusio : rule.getValue()) { - if (conclusio.size() == 1 && rules.containsKey(conclusio.get(0))) { - continue; - } - newConclusios.add(conclusio); - } - newRules.put(rule.getKey(), newConclusios); - } - return newRules; - } - private Map<String, Set<String>> findChainProductions(Map<String, List<List<String>>> rules) { - Map<String, Set<String>> chainProdMap = new LinkedHashMap<>(); - for (Entry<String, List<List<String>>> rule : rules.entrySet()) { - Set<String> chainProds = new HashSet<>(); - chainProds.add(rule.getKey()); - findChainProductions(rules, rule.getKey(), chainProds); - chainProdMap.put(rule.getKey(), chainProds); + Map<String, Set<String>> chainProdMap = new HashMap<>(); + for (Entry<String, List<List<String>>> entry : rules.entrySet()) { + if (!chainProdMap.containsKey(entry.getKey())) { + Set<String> chainProds = new HashSet<>(); + chainProds.add(entry.getKey()); + chainProdMap.put(entry.getKey(), chainProds); + findChainProductions(rules, entry.getKey(), chainProdMap); + } } return chainProdMap; } - private void findChainProductions(Map<String, List<List<String>>> rules, String key, Set<String> chainProds) { + private void findChainProductions(Map<String, List<List<String>>> rules, String key, + Map<String, Set<String>> chainProdMap) { for (List<String> conclusio : rules.get(key)) { - if (conclusio.size() == 1 && rules.containsKey(conclusio.get(0)) - && !chainProds.contains(conclusio.get(0))) { - chainProds.add(conclusio.get(0)); - findChainProductions(rules, conclusio.get(0), chainProds); + if (conclusio.size() == 1 && rules.containsKey(conclusio.get(0))) { + if (!chainProdMap.containsKey(conclusio.get(0))) { + Set<String> chainProds = new HashSet<>(); + chainProds.add(conclusio.get(0)); + chainProdMap.put(conclusio.get(0), chainProds); + findChainProductions(rules, conclusio.get(0), chainProdMap); + chainProdMap.get(key).addAll(chainProdMap.get(conclusio.get(0))); + } else { + chainProdMap.get(key).addAll(chainProdMap.get(conclusio.get(0))); + } + } } } private Map<String, List<List<String>>> shortenConclusios(Map<String, List<List<String>>> rules) { Map<String, List<List<String>>> newRules = new LinkedHashMap<>(); - Map<String, List<List<String>>> addNewRules = new LinkedHashMap<>(); int i = 0; for (Entry<String, List<List<String>>> rule : rules.entrySet()) { List<List<String>> newConclusios = new LinkedList<>(); @@ -320,60 +283,54 @@ public class Type2Parser { cj.add(conclusio.get(j)); cj.add("@" + (i + 1)); csj.add(cj); - addNewRules.put("@" + i++, csj); + newRules.put("@" + i++, csj); } List<List<String>> csn = new LinkedList<>(); List<String> cn = new LinkedList<>(); cn.add(conclusio.get(k)); cn.add(conclusio.get(k + 1)); csn.add(cn); - addNewRules.put("@" + i++, csn); + newRules.put("@" + i++, csn); } else { newConclusios.add(conclusio); } } newRules.put(rule.getKey(), newConclusios); } - newRules.putAll(addNewRules); return newRules; } private boolean cyk(Map<String, List<List<String>>> rules, String word) { int n = word.length(); - VariableSet[][] table = new VariableSet[n][n]; - for (int i = 0; i < n; i++) { - Set<String> initial = producesTerminals(rules, word.charAt(i)); - table[i][i] = new VariableSet(); - table[i][i].addAll(initial); + VariableSetTable table = new VariableSetTable(n); + for (int i = 0; i < n; i++) {// |word| + producesTerminals(rules, word.charAt(i), table, i);// |rules| } - for (int j = 1; j < n; j++) { - for (int i = j - 1; i >= 0; i--) { - table[i][j] = new VariableSet(); - for (int k = i; k < j; k++) { - producesVariable(rules, j, i, k, table); + for (int j = 1; j < n; j++) {// |word| - 1 + for (int i = j - 1; i >= 0; i--) {// j + for (int k = i; k < j; k++) {// j - i + producesVariable(rules, j, i, k, table);// |rules| } } } - return table[0][n - 1].contains("S'"); + return table.contains(0, n - 1, "S'"); } - private Set<String> producesTerminals(Map<String, List<List<String>>> rules, char c) { - Set<String> s = new HashSet<>(); + private void producesTerminals(Map<String, List<List<String>>> rules, char c, VariableSetTable table, int i) { for (Entry<String, List<List<String>>> entry : rules.entrySet()) { for (List<String> con : entry.getValue()) { if (con.size() == 1 && con.get(0).charAt(0) == c) { - s.add(entry.getKey()); + table.add(i, i, entry.getKey()); } } } - return s; } - private void producesVariable(Map<String, List<List<String>>> rules, int j, int i, int k, VariableSet[][] table) { + private void producesVariable(Map<String, List<List<String>>> rules, int j, int i, int k, VariableSetTable table) { for (Entry<String, List<List<String>>> entry : rules.entrySet()) { for (List<String> con : entry.getValue()) { - if (con.size() == 2 && table[i][k].contains(con.get(0)) && table[k + 1][j].contains(con.get(1))) { - table[i][j].add(entry.getKey()); + if (con.size() == 2 && table.contains(i, k, con.get(0)) && table.contains(k + 1, j, con.get(1))) { + table.add(i, j, entry.getKey()); } } } diff --git a/ChatGPTParsing/src/type2/VariableSetTable.java b/ChatGPTParsing/src/type2/VariableSetTable.java new file mode 100644 index 0000000..8c8473a --- /dev/null +++ b/ChatGPTParsing/src/type2/VariableSetTable.java @@ -0,0 +1,58 @@ +package type2; + +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; + +public class VariableSetTable { + + VariableSet[][] table; + + public VariableSetTable(int n) { + table = new VariableSet[n][n]; + } + + public void add(int i, int j, String v) { + if (table[i][j] == null) { + table[i][j] = new VariableSet(); + } + table[i][j].add(v); + } + + public boolean contains(int i, int j, String v) { + if (table[i][j] == null) { + return false; + } + return table[i][j].contains(v); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < table.length; i++) { + sb.append(Arrays.toString(table[i])); + if (i < table.length - 1) { + sb.append("\n"); + } + } + return sb.toString(); + } + + private class VariableSet { + Set<String> variables = new HashSet<>(); + + private void add(String v) { + variables.add(v); + } + + private boolean contains(String v) { + return variables.contains(v); + } + + @Override + public String toString() { + return variables.toString().replace('[', '{').replace(']', '}'); + } + } + +} diff --git a/ChatGPTParsing/src/type3/Type3GrammarGenerator.java b/ChatGPTParsing/src/type3/Type3GrammarGenerator.java index 708cf89..1aeec9a 100644 --- a/ChatGPTParsing/src/type3/Type3GrammarGenerator.java +++ b/ChatGPTParsing/src/type3/Type3GrammarGenerator.java @@ -22,8 +22,8 @@ public class Type3GrammarGenerator { conclusios.add(new LinkedList<>()); } int numOfConclusios = random.nextInt(maxConclusios) + 1; - while (conclusios.size() < numOfConclusios || ListOfListContains.contains(conclusios, new LinkedList<>()) - && conclusios.size() < numOfConclusios + 1) { + while (conclusios.size() < numOfConclusios || (ListOfListContains.contains(conclusios, new LinkedList<>()) + && conclusios.size() < numOfConclusios + 1)) { List<String> conclusio = genConclusio(alphaSize, vars); if (!ListOfListContains.contains(conclusios, conclusio)) { conclusios.add(conclusio); diff --git a/ChatGPTParsing/src/writer/VariableSet.java b/ChatGPTParsing/src/writer/VariableSet.java deleted file mode 100644 index d272184..0000000 --- a/ChatGPTParsing/src/writer/VariableSet.java +++ /dev/null @@ -1,26 +0,0 @@ -package writer; - -import java.util.HashSet; -import java.util.Set; - -public class VariableSet { - - Set<String> variables = new HashSet<>();; - - public void add(String v) { - variables.add(v); - } - - public void addAll(Set<String> vs) { - variables.addAll(vs); - } - - public boolean contains(String v) { - return variables.contains(v); - } - - @Override - public String toString() { - return variables.toString(); - } -} -- GitLab