From 345fad1f214489ccf60c4975f41ec12e6f580cba Mon Sep 17 00:00:00 2001 From: Noah Heuser <nheuser@uni-koblenz.de> Date: Mon, 24 Jun 2024 17:34:54 +0200 Subject: [PATCH] switched from alphaSize to actual list of symbols, checks for duplicate grammars --- ChatGPTParsing/src/grammar/Grammar.java | 54 +++++++----- ChatGPTParsing/src/grammar/Type2Grammar.java | 17 ++-- ChatGPTParsing/src/grammar/Type3Grammar.java | 16 ++-- ChatGPTParsing/src/main/Main.java | 87 +++++++++++++++---- .../src/type2/Type2GrammarGenerator.java | 67 ++++++-------- ChatGPTParsing/src/type2/Type2Parser.java | 6 +- .../src/type3/Type3GrammarGenerator.java | 58 ++++++------- .../src/word_generator/WordGenerator.java | 21 +++-- ChatGPTParsing/src/writer/Writer.java | 40 ++++++--- 9 files changed, 212 insertions(+), 154 deletions(-) diff --git a/ChatGPTParsing/src/grammar/Grammar.java b/ChatGPTParsing/src/grammar/Grammar.java index 4b49fdf..1677c27 100644 --- a/ChatGPTParsing/src/grammar/Grammar.java +++ b/ChatGPTParsing/src/grammar/Grammar.java @@ -1,5 +1,6 @@ package grammar; +import java.util.Collections; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.LinkedList; @@ -9,31 +10,31 @@ import java.util.Map.Entry; public abstract class Grammar { - private String[] vars; - private int alphaSize; + private List<String> vars; + private List<String> alphabet; private Map<String, List<List<String>>> rules; private String start; - protected Grammar(String[] vars, int alphaSize, String start) { + protected Grammar(List<String> vars, List<String> alphabet, String start) { this.setVars(vars); - this.setAlphaSize(alphaSize); + this.setAlphabet(alphabet); this.setStart(start); } - public String[] getVars() { + public List<String> getVars() { return vars; } - public void setVars(String[] vars) { + public void setVars(List<String> vars) { this.vars = vars; } - public int getAlphaSize() { - return alphaSize; + public List<String> getAlphabet() { + return alphabet; } - public void setAlphaSize(int alphaSize) { - this.alphaSize = alphaSize; + public void setAlphabet(List<String> alphabet) { + this.alphabet = alphabet; } public Map<String, List<List<String>>> getRules() { @@ -59,7 +60,8 @@ public abstract class Grammar { vars[0] = vars[0].substring(5); int lastIndex = vars.length - 1; vars[lastIndex] = vars[lastIndex].substring(0, 1); - int alphaSize = gArray[2].split(",").length; + String[] alphabet = gArray[2].split(","); + alphabet[0] = alphabet[0].substring(5); String start = gArray[0].charAt(14) + ""; if (gArray[0].charAt(15) == '\'') { start += "'"; @@ -95,24 +97,28 @@ public abstract class Grammar { } rules.put(rule[0].trim(), conclusiosList); } - return new Type2Grammar(vars, alphaSize, start, rules); + List<String> varList = new LinkedList<>(); + Collections.addAll(varList, vars); + List<String> alphabetList = new LinkedList<>(); + Collections.addAll(alphabetList, alphabet); + return new Type2Grammar(varList, alphabetList, start, rules); } public String toStringPython() { StringBuilder sb = new StringBuilder(); sb.append("G = (V, T, R, " + this.getStart() + ")\\n"); sb.append("V = {"); - for (int i = 0; i < this.getVars().length; i++) { - sb.append(this.getVars()[i]); - if (i < this.getVars().length - 1) { + for (int i = 0; i < this.getVars().size(); i++) { + sb.append(this.getVars().get(i)); + if (i < this.getVars().size() - 1) { sb.append(", "); } } sb.append("}\\n"); sb.append("T = {"); - for (int i = 97; i < 97 + this.getAlphaSize(); i++) { - sb.append((char) i); - if (i < 96 + this.getAlphaSize()) { + for (int i = 0; i < this.getAlphabet().size(); i++) { + sb.append(this.getAlphabet().get(i)); + if (i < this.getAlphabet().size() - 1) { sb.append(", "); } } @@ -150,17 +156,17 @@ public abstract class Grammar { StringBuilder sb = new StringBuilder(); sb.append("G = (V, T, R, " + this.getStart() + ")\n"); sb.append("V = {"); - for (int i = 0; i < this.getVars().length; i++) { - sb.append(this.getVars()[i]); - if (i < this.getVars().length - 1) { + for (int i = 0; i < this.getVars().size(); i++) { + sb.append(this.getVars().get(i)); + if (i < this.getVars().size() - 1) { sb.append(", "); } } sb.append("}\n"); sb.append("T = {"); - for (int i = 97; i < 97 + this.getAlphaSize(); i++) { - sb.append((char) i); - if (i < 96 + this.getAlphaSize()) { + for (int i = 0; i < this.getAlphabet().size(); i++) { + sb.append(this.getAlphabet().get(i)); + if (i < this.getAlphabet().size() - 1) { sb.append(", "); } } diff --git a/ChatGPTParsing/src/grammar/Type2Grammar.java b/ChatGPTParsing/src/grammar/Type2Grammar.java index b43d43f..00bde88 100644 --- a/ChatGPTParsing/src/grammar/Type2Grammar.java +++ b/ChatGPTParsing/src/grammar/Type2Grammar.java @@ -9,28 +9,29 @@ public class Type2Grammar extends Grammar { Type2GrammarGenerator g = new Type2GrammarGenerator(); - public Type2Grammar(String[] vars, int alphaSize, String start, int maxConclusios, int maxConclusioSize, int mode) { - super(vars, alphaSize, start); + public Type2Grammar(List<String> vars, List<String> alphabet, String start, int maxConclusios, int maxConclusioSize, + int mode) { + super(vars, alphabet, start); switch (mode) { case 0: - this.setRules(g.genType2GrammarRuleSet0(vars, alphaSize, maxConclusios, maxConclusioSize)); + this.setRules(g.genType2GrammarRuleSet0(vars, alphabet, maxConclusios, maxConclusioSize)); break; case 1: - this.setRules(g.genType2GrammarRuleSet1(vars, alphaSize, maxConclusios, maxConclusioSize, start)); + this.setRules(g.genType2GrammarRuleSet1(vars, alphabet, maxConclusios, maxConclusioSize, start)); break; case 2: - this.setRules(g.genType2GrammarRuleSet2(vars, alphaSize, maxConclusios, maxConclusioSize)); + this.setRules(g.genType2GrammarRuleSet2(vars, alphabet, maxConclusios, maxConclusioSize)); break; case 3: - this.setRules(g.genType2GrammarRuleSet3(vars, alphaSize, maxConclusios, maxConclusioSize, start)); + this.setRules(g.genType2GrammarRuleSet3(vars, alphabet, maxConclusios, maxConclusioSize, start)); break; default: break; } } - public Type2Grammar(String[] vars, int alphaSize, String start, Map<String, List<List<String>>> rules) { - super(vars, alphaSize, start); + public Type2Grammar(List<String> vars, List<String> alphabet, String start, Map<String, List<List<String>>> rules) { + super(vars, alphabet, start); super.setRules(rules); } diff --git a/ChatGPTParsing/src/grammar/Type3Grammar.java b/ChatGPTParsing/src/grammar/Type3Grammar.java index 6b11342..bd9d711 100644 --- a/ChatGPTParsing/src/grammar/Type3Grammar.java +++ b/ChatGPTParsing/src/grammar/Type3Grammar.java @@ -9,28 +9,28 @@ public class Type3Grammar extends Grammar { Type3GrammarGenerator g = new Type3GrammarGenerator(); - public Type3Grammar(String[] vars, int alphaSize, String start, int maxConclusios, int mode) { - super(vars, alphaSize, start); + public Type3Grammar(List<String> vars, List<String> alphabet, String start, int maxConclusios, int mode) { + super(vars, alphabet, start); switch (mode) { case 0: - this.setRules(g.genType3GrammarRuleSet0(vars, alphaSize, maxConclusios, start)); + this.setRules(g.genType3GrammarRuleSet0(vars, alphabet, maxConclusios, start)); break; case 1: - this.setRules(g.genType3GrammarRuleSet1(vars, alphaSize, maxConclusios, start)); + this.setRules(g.genType3GrammarRuleSet1(vars, alphabet, maxConclusios, start)); break; case 2: - this.setRules(g.genType3GrammarRuleSet2(vars, alphaSize, maxConclusios, start)); + this.setRules(g.genType3GrammarRuleSet2(vars, alphabet, maxConclusios, start)); break; case 3: - this.setRules(g.genType3GrammarRuleSet3(vars, alphaSize, maxConclusios, start)); + this.setRules(g.genType3GrammarRuleSet3(vars, alphabet, maxConclusios, start)); break; default: break; } } - public Type3Grammar(String[] vars, int alphaSize, String start, Map<String, List<List<String>>> rules) { - super(vars, alphaSize, start); + public Type3Grammar(List<String> vars, List<String> alphabet, String start, Map<String, List<List<String>>> rules) { + super(vars, alphabet, start); super.setRules(rules); } diff --git a/ChatGPTParsing/src/main/Main.java b/ChatGPTParsing/src/main/Main.java index e1a3914..5172d86 100644 --- a/ChatGPTParsing/src/main/Main.java +++ b/ChatGPTParsing/src/main/Main.java @@ -1,34 +1,87 @@ package main; import java.util.List; -import java.util.Map; import grammar.Grammar; -import grammar.Type2Grammar; -import type2.Type2Parser; +import writer.Writer; public class Main { // ε public static void main(String[] args) { -// Writer w = new Writer(); - String[] vars = { "S'", "S", "A", "B", "C", "D" }; - int alphaSize = 3; -// int maxConclusios = 3; + Writer w = new Writer(); + List<String> vars = List.of("S", "A", "B"); + List<String> alphabet = List.of("a", "b", "c"); + int maxConclusios = 3; // int maxConclusioSize = 4; -// String start = "S"; -// int mode = 3; -// w.runType2Grammar(vars, alphaSize, maxConclusios, maxConclusioSize, start, mode, 3); + String start = "S"; + int mode = 3; + w.runType3Grammar(vars, alphabet, maxConclusios, start, mode, 3); Grammar g = Grammar.toGrammar( "G = (V, T, R, S)\nV = {S, A, B, C, D}\nT = {a, b, c}\nR = {S -> S | AC | CASD,\n A -> cD | c | DDCa,\n B -> abB | Sbc | a,\n C -> Ba,\n D -> bAbc | Saca | Ac}"); - Type2Parser t2p = new Type2Parser(); - Map<String, List<List<String>>> gNormal = t2p.createCNF(g.getRules(), g.getStart(), 3); - boolean b = t2p.isPartOfLanguage(gNormal, "cbcbcaa"); - System.out.println(b); - System.out.println(gNormal); - Type2Grammar g0 = new Type2Grammar(vars, alphaSize, "S'", gNormal); - System.out.println(g0); + System.out.println(g.getRules()); +// Type2Parser t2p = new Type2Parser(); +// Map<String, List<List<String>>> gNormal = t2p.createCNF(g.getRules(), g.getStart(), 3); +// boolean b = t2p.isPartOfLanguage(gNormal, "cbcbcaa"); +// System.out.println(b); +// System.out.println(gNormal); +// Type2Grammar g0 = new Type2Grammar(vars, alphaSize, "S'", gNormal); +// System.out.println(g0); +// Map<String, List<List<String>>> rules = new LinkedHashMap<>(); +// List<List<String>> s = new LinkedList<>(); +// List<String> s0 = new LinkedList<>(); +// s0.add("T"); +// s0.add("a"); +// List<String> s1 = new LinkedList<>(); +// s1.add("T"); +// List<String> s2 = new LinkedList<>(); +// s2.add("A"); +// s.add(s0); +// s.add(s1); +// s.add(s2); +// rules.put("S", s); +// List<List<String>> t = new LinkedList<>(); +// List<String> t0 = new LinkedList<>(); +// t0.add("b"); +// t0.add("T"); +// t0.add("b"); +// List<String> t1 = new LinkedList<>(); +// t1.add("a"); +// List<String> t2 = new LinkedList<>(); +// t2.add("R"); +// List<String> t3 = new LinkedList<>(); +// t3.add("U"); +// t.add(t0); +// t.add(t1); +// t.add(t2); +// t.add(t3); +// rules.put("T", t); +// List<List<String>> r = new LinkedList<>(); +// List<String> r0 = new LinkedList<>(); +// r0.add("a"); +// r0.add("S"); +// r0.add("b"); +// List<String> r1 = new LinkedList<>(); +// r1.add("b"); +// r.add(r0); +// r.add(r1); +// rules.put("R", r); +// List<List<String>> u = new LinkedList<>(); +// List<String> u0 = new LinkedList<>(); +// u0.add("a"); +// u0.add("b"); +// List<String> u1 = new LinkedList<>(); +// u1.add("S"); +// u1.add("T"); +// u.add(u0); +// u.add(u1); +// rules.put("U", u); +// List<List<String>> a = new LinkedList<>(); +// rules.put("A", a); +// System.out.println(rules); +// Map<String, List<List<String>>> rules0 = t2p.removeChainRules(rules); +// System.out.println(rules0); // Map<String, List<List<String>>> rules = new LinkedHashMap<>(); // List<List<String>> s = new LinkedList<>(); // List<String> s0 = new LinkedList<>(); diff --git a/ChatGPTParsing/src/type2/Type2GrammarGenerator.java b/ChatGPTParsing/src/type2/Type2GrammarGenerator.java index 86c40ae..ec1cf03 100644 --- a/ChatGPTParsing/src/type2/Type2GrammarGenerator.java +++ b/ChatGPTParsing/src/type2/Type2GrammarGenerator.java @@ -1,12 +1,10 @@ package type2; -import java.util.HashSet; import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Random; -import java.util.Set; import auxiliary.ListOfListContains; import co_reachable.CoReachability; @@ -16,11 +14,12 @@ public class Type2GrammarGenerator { Random random = new Random(); - private List<List<String>> genConclusios(int alphaSize, String[] vars, int maxConclusios, int maxConclusioSize) { + private List<List<String>> genConclusios(List<String> alphabet, List<String> vars, int maxConclusios, + int maxConclusioSize) { List<List<String>> conclusios = new LinkedList<>(); int numOfConclusios = random.nextInt(maxConclusios) + 1; while (conclusios.size() < numOfConclusios) { - List<String> conclusio = genConclusio(alphaSize, vars, maxConclusioSize); + List<String> conclusio = genConclusio(alphabet, vars, maxConclusioSize); if (!ListOfListContains.contains(conclusios, conclusio)) { conclusios.add(conclusio); } @@ -28,79 +27,67 @@ public class Type2GrammarGenerator { return conclusios; } - private List<String> genConclusio(int alphaSize, String[] vars, int maxConclusioSize) { - return (Math.random() <= 0.1) ? new LinkedList<>() : genRandomString(alphaSize, vars, maxConclusioSize); + private List<String> genConclusio(List<String> alphabet, List<String> vars, int maxConclusioSize) { + return (Math.random() <= 0.1) ? new LinkedList<>() : genRandomString(alphabet, vars, maxConclusioSize); } - private List<String> genRandomString(int alphaSize, String[] vars, int maxConclusioSize) { + private List<String> genRandomString(List<String> alphabet, List<String> vars, int maxConclusioSize) { List<String> s = new LinkedList<>(); int conclusioSize = random.nextInt(maxConclusioSize) + 1; while (s.size() < conclusioSize) { - int i = random.nextInt(vars.length + alphaSize); - if (i < vars.length) { - if (i == 0) { - i = 83; - } else if (i > 0 && i <= 18) { - i += 64; - } else { - i += 65; - } + int i = random.nextInt(vars.size() + alphabet.size()); + if (i < vars.size()) { + s.add(vars.get(i)); } else { - i += 97 - vars.length; + s.add(alphabet.get(i - vars.size())); } - char c = (char) i; - s.add(c + ""); } return s; } - public Map<String, List<List<String>>> genType2GrammarRuleSet0(String[] vars, int alphaSize, int maxConclusios, - int maxConclusioSize) { + public Map<String, List<List<String>>> genType2GrammarRuleSet0(List<String> vars, List<String> alphabet, + int maxConclusios, int maxConclusioSize) { Map<String, List<List<String>>> rules = new LinkedHashMap<>(); for (String v : vars) { - List<List<String>> conclusios = genConclusios(alphaSize, vars, maxConclusios, maxConclusioSize); + List<List<String>> conclusios = genConclusios(alphabet, vars, maxConclusios, maxConclusioSize); rules.put(v, conclusios); } return rules; } - public Map<String, List<List<String>>> genType2GrammarRuleSet1(String[] vars, int alphaSize, int maxConclusios, - int maxConclusioSize, String s) { - Map<String, List<List<String>>> rules = genType2GrammarRuleSet0(vars, alphaSize, maxConclusios, + public Map<String, List<List<String>>> genType2GrammarRuleSet1(List<String> vars, List<String> alphabet, + int maxConclusios, int maxConclusioSize, String s) { + Map<String, List<List<String>>> rules = genType2GrammarRuleSet0(vars, alphabet, maxConclusios, maxConclusioSize); while (CoReachability.emptyLanguage(rules, s)) { - rules = genType2GrammarRuleSet0(vars, alphaSize, maxConclusios, maxConclusioSize); + rules = genType2GrammarRuleSet0(vars, alphabet, maxConclusios, maxConclusioSize); } return rules; } - public Map<String, List<List<String>>> genType2GrammarRuleSet2(String[] vars, int alphaSize, int maxConclusios, - int maxConclusioSize) { - Map<String, List<List<String>>> rules = genType2GrammarRuleSet0(vars, alphaSize, maxConclusios, + public Map<String, List<List<String>>> genType2GrammarRuleSet2(List<String> vars, List<String> alphabet, + int maxConclusios, int maxConclusioSize) { + Map<String, List<List<String>>> rules = genType2GrammarRuleSet0(vars, alphabet, maxConclusios, maxConclusioSize); while (!CoReachability.coReachableSet(rules).containsAll(rules.keySet())) { - rules = genType2GrammarRuleSet0(vars, alphaSize, maxConclusios, maxConclusioSize); + rules = genType2GrammarRuleSet0(vars, alphabet, maxConclusios, maxConclusioSize); } return rules; } - public Map<String, List<List<String>>> genType2GrammarRuleSet3(String[] vars, int alphaSize, int maxConclusios, - int maxConclusioSize, String s) { - Map<String, List<List<String>>> rules = genType2GrammarRuleSet0(vars, alphaSize, maxConclusios, + public Map<String, List<List<String>>> genType2GrammarRuleSet3(List<String> vars, List<String> alphabet, + int maxConclusios, int maxConclusioSize, String s) { + Map<String, List<List<String>>> rules = genType2GrammarRuleSet0(vars, alphabet, maxConclusios, maxConclusioSize); - Set<String> alphaSet = new HashSet<>(); - for (int i = 0; i < alphaSize; i++) { - alphaSet.add(((char) (i + 97)) + ""); - } boolean allCoReachable = CoReachability.coReachableSet(rules).containsAll(rules.keySet()); boolean allReachable = Reachability.reachableSet(rules, s).containsAll(rules.keySet()) - && Reachability.reachableSet(rules, s).containsAll(alphaSet); + && Reachability.reachableSet(rules, s).containsAll(alphabet); while (!allCoReachable || !allReachable) { - rules = genType2GrammarRuleSet0(vars, alphaSize, maxConclusios, maxConclusioSize); + rules = genType2GrammarRuleSet0(vars, alphabet, maxConclusios, maxConclusioSize); allCoReachable = CoReachability.coReachableSet(rules).containsAll(rules.keySet()); allReachable = Reachability.reachableSet(rules, s).containsAll(rules.keySet()) - && Reachability.reachableSet(rules, s).containsAll(alphaSet); + && Reachability.reachableSet(rules, s).containsAll(alphabet); } return rules; } diff --git a/ChatGPTParsing/src/type2/Type2Parser.java b/ChatGPTParsing/src/type2/Type2Parser.java index 0a7a4ce..9cdacf3 100644 --- a/ChatGPTParsing/src/type2/Type2Parser.java +++ b/ChatGPTParsing/src/type2/Type2Parser.java @@ -49,7 +49,7 @@ public class Type2Parser { return removeUselessSymbols(rules5, start); } - private Set<String> nullableSet(Map<String, List<List<String>>> rules) { + public Set<String> nullableSet(Map<String, List<List<String>>> rules) { Set<String> nullable = new HashSet<>(); int n = nullable.size(); int m = Integer.MAX_VALUE; @@ -138,7 +138,7 @@ public class Type2Parser { return newRules; } - private Map<String, List<List<String>>> removeEpsilonRules(Map<String, List<List<String>>> rules, + public Map<String, List<List<String>>> removeEpsilonRules(Map<String, List<List<String>>> rules, Set<String> nullable) { Map<String, List<List<String>>> newRules = new LinkedHashMap<>(); for (Entry<String, List<List<String>>> rule : rules.entrySet()) { @@ -215,7 +215,7 @@ public class Type2Parser { return newConclusio; } - private Map<String, List<List<String>>> removeChainRules(Map<String, List<List<String>>> rules) { + public Map<String, List<List<String>>> removeChainRules(Map<String, List<List<String>>> rules) { Map<String, List<List<String>>> newRules = new LinkedHashMap<>(); Map<String, Set<String>> chainProdMap = findChainProductions(rules); for (String premise : rules.keySet()) { diff --git a/ChatGPTParsing/src/type3/Type3GrammarGenerator.java b/ChatGPTParsing/src/type3/Type3GrammarGenerator.java index 1aeec9a..169d93a 100644 --- a/ChatGPTParsing/src/type3/Type3GrammarGenerator.java +++ b/ChatGPTParsing/src/type3/Type3GrammarGenerator.java @@ -1,12 +1,10 @@ package type3; -import java.util.HashSet; import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Random; -import java.util.Set; import auxiliary.ListOfListContains; import co_reachable.CoReachability; @@ -16,7 +14,8 @@ public class Type3GrammarGenerator { Random random = new Random(); - private List<List<String>> genConclusios(int alphaSize, String[] vars, int maxConclusios, boolean start) { + private List<List<String>> genConclusios(List<String> alphabet, List<String> vars, int maxConclusios, + boolean start) { List<List<String>> conclusios = new LinkedList<>(); if (start && Math.random() <= 0.2) { conclusios.add(new LinkedList<>()); @@ -24,7 +23,7 @@ public class Type3GrammarGenerator { int numOfConclusios = random.nextInt(maxConclusios) + 1; while (conclusios.size() < numOfConclusios || (ListOfListContains.contains(conclusios, new LinkedList<>()) && conclusios.size() < numOfConclusios + 1)) { - List<String> conclusio = genConclusio(alphaSize, vars); + List<String> conclusio = genConclusio(alphabet, vars); if (!ListOfListContains.contains(conclusios, conclusio)) { conclusios.add(conclusio); } @@ -32,61 +31,56 @@ public class Type3GrammarGenerator { return conclusios; } - private List<String> genConclusio(int alphaSize, String[] vars) { + private List<String> genConclusio(List<String> alphabet, List<String> vars) { List<String> c = new LinkedList<>(); - String s = ((char) (random.nextInt(alphaSize) + 97)) + ""; - c.add(s); + c.add(alphabet.get(random.nextInt(alphabet.size()))); if (Math.random() < 0.7) { - c.add(vars[random.nextInt(vars.length)]); + c.add(vars.get(random.nextInt(vars.size()))); } return c; } - public Map<String, List<List<String>>> genType3GrammarRuleSet0(String[] vars, int alphaSize, int maxConclusios, - String s) { + public Map<String, List<List<String>>> genType3GrammarRuleSet0(List<String> vars, List<String> alphabet, + int maxConclusios, String s) { Map<String, List<List<String>>> rules = new LinkedHashMap<>(); - List<List<String>> conclusios0 = genConclusios(alphaSize, vars, maxConclusios, true); + List<List<String>> conclusios0 = genConclusios(alphabet, vars, maxConclusios, true); rules.put(s, conclusios0); - for (int i = 1; i < vars.length; i++) { - List<List<String>> conclusios1 = genConclusios(alphaSize, vars, maxConclusios, false); - rules.put(vars[i], conclusios1); + for (int i = 1; i < vars.size(); i++) { + List<List<String>> conclusios1 = genConclusios(alphabet, vars, maxConclusios, false); + rules.put(vars.get(i), conclusios1); } return rules; } - public Map<String, List<List<String>>> genType3GrammarRuleSet1(String[] vars, int alphaSize, int maxConclusios, - String s) { - Map<String, List<List<String>>> rules = genType3GrammarRuleSet0(vars, alphaSize, maxConclusios, s); + public Map<String, List<List<String>>> genType3GrammarRuleSet1(List<String> vars, List<String> alphabet, + int maxConclusios, String s) { + Map<String, List<List<String>>> rules = genType3GrammarRuleSet0(vars, alphabet, maxConclusios, s); while (CoReachability.emptyLanguage(rules, s)) { - rules = genType3GrammarRuleSet0(vars, alphaSize, maxConclusios, s); + rules = genType3GrammarRuleSet0(vars, alphabet, maxConclusios, s); } return rules; } - public Map<String, List<List<String>>> genType3GrammarRuleSet2(String[] vars, int alphaSize, int maxConclusios, - String s) { - Map<String, List<List<String>>> rules = genType3GrammarRuleSet0(vars, alphaSize, maxConclusios, s); + public Map<String, List<List<String>>> genType3GrammarRuleSet2(List<String> vars, List<String> alphabet, + int maxConclusios, String s) { + Map<String, List<List<String>>> rules = genType3GrammarRuleSet0(vars, alphabet, maxConclusios, s); while (!CoReachability.coReachableSet(rules).containsAll(rules.keySet())) { - rules = genType3GrammarRuleSet0(vars, alphaSize, maxConclusios, s); + rules = genType3GrammarRuleSet0(vars, alphabet, maxConclusios, s); } return rules; } - public Map<String, List<List<String>>> genType3GrammarRuleSet3(String[] vars, int alphaSize, int maxConclusios, - String s) { - Map<String, List<List<String>>> rules = genType3GrammarRuleSet0(vars, alphaSize, maxConclusios, s); - Set<String> alphaSet = new HashSet<>(); - for (int i = 0; i < alphaSize; i++) { - alphaSet.add(((char) (i + 97)) + ""); - } + public Map<String, List<List<String>>> genType3GrammarRuleSet3(List<String> vars, List<String> alphabet, + int maxConclusios, String s) { + Map<String, List<List<String>>> rules = genType3GrammarRuleSet0(vars, alphabet, maxConclusios, s); boolean allCoReachable = CoReachability.coReachableSet(rules).containsAll(rules.keySet()); boolean allReachable = Reachability.reachableSet(rules, s).containsAll(rules.keySet()) - && Reachability.reachableSet(rules, s).containsAll(alphaSet); + && Reachability.reachableSet(rules, s).containsAll(alphabet); while (!allCoReachable || !allReachable) { - rules = genType3GrammarRuleSet0(vars, alphaSize, maxConclusios, s); + rules = genType3GrammarRuleSet0(vars, alphabet, maxConclusios, s); allCoReachable = CoReachability.coReachableSet(rules).containsAll(rules.keySet()); allReachable = Reachability.reachableSet(rules, s).containsAll(rules.keySet()) - && Reachability.reachableSet(rules, s).containsAll(alphaSet); + && Reachability.reachableSet(rules, s).containsAll(alphabet); } return rules; } diff --git a/ChatGPTParsing/src/word_generator/WordGenerator.java b/ChatGPTParsing/src/word_generator/WordGenerator.java index daa8e17..fcd5180 100644 --- a/ChatGPTParsing/src/word_generator/WordGenerator.java +++ b/ChatGPTParsing/src/word_generator/WordGenerator.java @@ -12,12 +12,17 @@ public class WordGenerator { Random random = new Random(); public String wordGenerator(int maxLength) { - return wordGeneratorOverAlphabet(maxLength, 26); + return random.ints(random.nextInt(maxLength + 1), 97, 123) + .collect(StringBuilder::new, StringBuilder::appendCodePoint, StringBuilder::append).toString(); } - public String wordGeneratorOverAlphabet(int maxLength, int alphaSize) { - return random.ints(random.nextInt(maxLength + 1), 97, 97 + alphaSize) - .collect(StringBuilder::new, StringBuilder::appendCodePoint, StringBuilder::append).toString(); + public String wordGeneratorOverAlphabet(int maxLength, List<String> alphabet) { + StringBuilder sb = new StringBuilder(); + int length = random.nextInt(maxLength + 1); + while (sb.length() < length) { + sb.append(alphabet.get(random.nextInt(alphabet.size()))); + } + return sb.toString(); } public String wordGeneratorOverType2Grammar(int maxLength, Map<String, List<List<String>>> cnfGrammar) { @@ -52,11 +57,11 @@ public class WordGenerator { } } - public String wordGeneratorOverType3Grammar(int maxLength, FiniteStateMachine fsm, int alphaSize) { + public String wordGeneratorOverType3Grammar(int maxLength, FiniteStateMachine fsm, List<String> alphabet) { String word = ""; boolean canStop = false; while (!canStop) { - word = wordGeneratorOverType3GrammarTry(random.nextInt(maxLength - 1) + 1, fsm, alphaSize); + word = wordGeneratorOverType3GrammarTry(random.nextInt(maxLength - 1) + 1, fsm, alphabet); if (fsm.canStop()) { canStop = true; } @@ -65,10 +70,10 @@ public class WordGenerator { return word; } - public String wordGeneratorOverType3GrammarTry(int maxLength, FiniteStateMachine fsm, int alphaSize) { + public String wordGeneratorOverType3GrammarTry(int maxLength, FiniteStateMachine fsm, List<String> alphabet) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < maxLength; i++) { - String s = (char) (random.nextInt(alphaSize) + 97) + ""; + String s = alphabet.get(random.nextInt(alphabet.size())); sb.append(s); fsm.switchStates(s); } diff --git a/ChatGPTParsing/src/writer/Writer.java b/ChatGPTParsing/src/writer/Writer.java index e0ce063..9529ba2 100644 --- a/ChatGPTParsing/src/writer/Writer.java +++ b/ChatGPTParsing/src/writer/Writer.java @@ -11,6 +11,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import auxiliary.ListOfRulesContains; import grammar.Grammar; import grammar.Type2Grammar; import grammar.Type3Grammar; @@ -94,19 +95,24 @@ public class Writer { } } - public void runType2Grammar(String[] vars, int alphaSize, int maxConclusios, int maxConclusioSize, String start, - int mode, int n) { + public void runType2Grammar(List<String> vars, List<String> alphabet, int maxConclusios, int maxConclusioSize, + String start, int mode, int n) { List<String> grammarWordsPairs = new LinkedList<>(); List<Grammar> grammars = new LinkedList<>(); + List<Map<String, List<List<String>>>> ruleSets = new LinkedList<>(); List<Map<String, List<List<String>>>> normalizedRuleSets = new LinkedList<>(); List<String> words = new LinkedList<>(); Set<String> containsWords = new HashSet<>(); - for (int i = 0; i < n; i++) { - Type2Grammar g = new Type2Grammar(vars, alphaSize, start, maxConclusios, maxConclusioSize, mode); + while (grammars.size() < n) { + Type2Grammar g = new Type2Grammar(vars, alphabet, start, maxConclusios, maxConclusioSize, mode); + if (ListOfRulesContains.contains(ruleSets, g.getRules())) { + continue; + } + ruleSets.add(g.getRules()); Map<String, List<List<String>>> normalizedRuleSet = t2p.createCNF(g.getRules(), g.getStart(), mode); grammars.add(g); normalizedRuleSets.add(normalizedRuleSet); - List<String> wordsPerGrammar = wordGeneratorType2(normalizedRuleSet, alphaSize, containsWords); + List<String> wordsPerGrammar = wordGeneratorType2(normalizedRuleSet, alphabet, containsWords); grammarWordsPairs.add(createGrammarWordsPair(g.toStringPython(), wordsPerGrammar)); words.addAll(wordsPerGrammar); } @@ -114,7 +120,7 @@ public class Writer { writePairsToFile(grammarWordsPairs); } - private List<String> wordGeneratorType2(Map<String, List<List<String>>> normalizedRuleSet, int alphaSize, + private List<String> wordGeneratorType2(Map<String, List<List<String>>> normalizedRuleSet, List<String> alphabet, Set<String> containsWords) { List<String> words = new LinkedList<>(); int i = 0; @@ -135,7 +141,7 @@ public class Writer { i++; } } else if (percent <= 0.5) { - word = wg.wordGeneratorOverAlphabet(MAX_LENGTH, alphaSize); + word = wg.wordGeneratorOverAlphabet(MAX_LENGTH, alphabet); if (!containsWords.contains(word)) { words.add(word); containsWords.add(word); @@ -155,7 +161,7 @@ public class Writer { return words; } - private List<String> wordGeneratorType3(FiniteStateMachine fsm, int alphaSize, Set<String> containsWords) { + private List<String> wordGeneratorType3(FiniteStateMachine fsm, List<String> alphabet, Set<String> containsWords) { List<String> words = new LinkedList<>(); int i = 0; long startTime = System.currentTimeMillis(); @@ -175,14 +181,14 @@ public class Writer { i++; } } else if (percent <= 0.5) { - word = wg.wordGeneratorOverAlphabet(MAX_LENGTH, alphaSize); + word = wg.wordGeneratorOverAlphabet(MAX_LENGTH, alphabet); if (!containsWords.contains(word)) { words.add(word); containsWords.add(word); i++; } } else { - word = wg.wordGeneratorOverType3Grammar(MAX_LENGTH, fsm, alphaSize); + word = wg.wordGeneratorOverType3Grammar(MAX_LENGTH, fsm, alphabet); if (!containsWords.contains(word)) { words.add(word); containsWords.add(word); @@ -195,18 +201,24 @@ public class Writer { return words; } - public void runType3Grammar(String[] vars, int alphaSize, int maxConclusios, String start, int mode, int n) { + public void runType3Grammar(List<String> vars, List<String> alphabet, int maxConclusios, String start, int mode, + int n) { List<String> grammarWordsPairs = new LinkedList<>(); List<Grammar> grammars = new LinkedList<>(); + List<Map<String, List<List<String>>>> ruleSets = new LinkedList<>(); List<FiniteStateMachine> fsms = new LinkedList<>(); List<String> words = new LinkedList<>(); Set<String> containsWords = new HashSet<>(); - for (int i = 0; i < n; i++) { - Type3Grammar g = new Type3Grammar(vars, alphaSize, start, maxConclusios, mode); + while (grammars.size() < n) { + Type3Grammar g = new Type3Grammar(vars, alphabet, start, maxConclusios, mode); + if (ListOfRulesContains.contains(ruleSets, g.getRules())) { + continue; + } + ruleSets.add(g.getRules()); FiniteStateMachine fsm = t3p.grammarToFSM(g.getRules(), g.getStart()); grammars.add(g); fsms.add(fsm); - List<String> wordsPerGrammar = wordGeneratorType3(fsm, alphaSize, containsWords); + List<String> wordsPerGrammar = wordGeneratorType3(fsm, alphabet, containsWords); grammarWordsPairs.add(createGrammarWordsPair(g.toStringPython(), wordsPerGrammar)); words.addAll(wordsPerGrammar); } -- GitLab