commit 168e50ce879602f96fd9277a8c6e005d6375064a Author: Tom Date: Mon Jun 11 20:36:10 2018 +0200 init diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8d35cb3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +__pycache__ +*.pyc diff --git a/lab/testGrammar.grm b/lab/testGrammar.grm new file mode 100644 index 0000000..6ec93af --- /dev/null +++ b/lab/testGrammar.grm @@ -0,0 +1,5 @@ +$FOO -> /foo +$FOO -> /foo$FOO +$FOO -> $BAR +$BAR -> /bar +$BAR -> $BAR diff --git a/src/grammarLexer.py b/src/grammarLexer.py new file mode 100644 index 0000000..028939a --- /dev/null +++ b/src/grammarLexer.py @@ -0,0 +1,209 @@ +#!/usr/bin/env python + +from enum import Enum +from stateMachine import StateMachine +from collections import deque +from token import Token + +T_TOKEN = Enum("T_TOKEN", "NON_TERMINAL " + + "ARROW " + + "TERMINAL " + + "ERROR") + +class GrammarLexer: + __STATE = Enum("STATE", "START " + + "NON_TERMINAL " + + "ARROW " + + "TERMINAL " + + "ERROR") + + __T_TOKEN_STATE_MAP = {} + + __stateMachine = StateMachine() + __line = deque() + + def __init__(self): + self.__T_TOKEN_STATE_MAP[self.__STATE.NON_TERMINAL] = T_TOKEN.NON_TERMINAL + self.__T_TOKEN_STATE_MAP[self.__STATE.TERMINAL] = T_TOKEN.TERMINAL + self.__T_TOKEN_STATE_MAP[self.__STATE.ARROW] = T_TOKEN.ARROW + self.__T_TOKEN_STATE_MAP[self.__STATE.ERROR] = T_TOKEN.ERROR + + self.__stateMachine.setStartState(self.__STATE.START) + self.__stateMachine.setErrorState(self.__STATE.ERROR) + + self.__stateMachine.addTransition(self.__STATE.START, "$", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.START, "/", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.START, "-", self.__STATE.ARROW) + self.__stateMachine.addTransition(self.__STATE.START, " ", self.__STATE.START) + + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "a", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "b", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "c", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "d", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "e", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "f", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "g", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "h", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "i", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "j", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "k", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "l", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "m", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "n", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "o", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "p", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "q", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "r", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "s", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "t", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "u", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "v", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "w", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "x", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "y", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "z", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "A", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "B", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "C", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "D", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "E", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "F", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "G", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "H", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "I", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "J", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "K", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "L", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "M", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "N", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "O", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "P", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "Q", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "R", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "S", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "T", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "U", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "V", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "W", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "X", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "Y", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "Z", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "0", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "1", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "2", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "3", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "4", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "5", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "6", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "7", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "8", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "9", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, "/", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.NON_TERMINAL, " ", self.__STATE.START) + + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "a", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "b", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "c", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "d", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "e", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "f", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "g", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "h", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "i", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "j", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "k", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "l", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "m", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "n", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "o", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "p", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "q", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "r", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "s", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "t", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "u", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "v", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "w", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "x", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "y", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "z", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "A", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "B", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "C", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "D", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "E", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "F", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "G", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "H", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "I", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "J", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "K", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "L", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "M", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "N", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "O", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "P", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "Q", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "R", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "S", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "T", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "U", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "V", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "W", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "X", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "Y", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "Z", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "0", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "1", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "2", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "3", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "4", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "5", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "6", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "7", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "8", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "9", self.__STATE.TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, "$", self.__STATE.NON_TERMINAL) + self.__stateMachine.addTransition(self.__STATE.TERMINAL, " ", self.__STATE.START) + + self.__stateMachine.addTransition(self.__STATE.ARROW, ">", self.__STATE.ARROW) + self.__stateMachine.addTransition(self.__STATE.ARROW, " ", self.__STATE.START) + + def processLine(self, line): + self.__stateMachine.reset() + prevState = self.__stateMachine.getCurrentState() + currState = prevState + lexemeBuffer = ""; + + self.__line.clear() + + + for char in line: + self.__stateMachine.processChar(char) + + currState = self.__stateMachine.getCurrentState() + + if currState != prevState and prevState != self.__STATE.START: + tToken = self.__T_TOKEN_STATE_MAP.get(prevState) + + if tToken == T_TOKEN.TERMINAL or tToken == T_TOKEN.NON_TERMINAL: + lexemeBuffer = lexemeBuffer[1:] + + self.__line.append(Token(tToken, lexemeBuffer)) + lexemeBuffer = "" + + if char != " ": + lexemeBuffer += char; + + + prevState = self.__stateMachine.getCurrentState() + + + def nextToken(self): + return self.__line.popleft(); + + def hasTokensLeft(self): + return True if len(self.__line) != 0 else False + + def getParsedLine(self): + return self.__line diff --git a/src/grammarParser.py b/src/grammarParser.py new file mode 100644 index 0000000..fc52a5a --- /dev/null +++ b/src/grammarParser.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python + +from grammarLexer import GrammarLexer +from grammarLexer import T_TOKEN + +class GrammarParser: + __lxr = GrammarLexer() + + __grammar = {} + + def parse(self, grammarFile): + lines = grammarFile.readlines() + + for line in lines: + + self.__lxr.processLine(line) + + tokenLine = self.__lxr.getParsedLine() + + if self.__lineIsValid(tokenLine): + leftSide = tokenLine.popleft().getLexeme() + tokenLine.popleft() + rightSide = [] + + while len(tokenLine) > 0: + rightSide.append(tokenLine.popleft()) + + if self.__isNewNonTerm(leftSide): + self.__grammar[leftSide] = [] + + self.__grammar[leftSide].append(rightSide) + + print(self.__grammar) + + + def __lineIsValid(self, tokenLine): + if len(tokenLine) < 3: + return False + + if tokenLine[0].getClass() != T_TOKEN.NON_TERMINAL: + return False + + if tokenLine[1].getClass() != T_TOKEN.ARROW: + return False; + + return True; + + def __isNewNonTerm(self, token): + if self.__grammar.get(token) == None: + return True + else: + return False + + def printGrammar(self): + for left in self.__grammar: + self.__printRow(left, self.__grammar[left]); + + def __printRow(self, left, right): + line = left + " -> " + + for i in range(len(right) - 1): + for token in right[i]: + line += str(token.getClass()) + "(" + token.getLexeme() + ")" + line += " | " + + for token in right[len(right) -1]: + line += str(token.getClass()) + "(" + token.getLexeme() + ")" + + print(line) + diff --git a/src/stateMachine.py b/src/stateMachine.py new file mode 100644 index 0000000..45ef66f --- /dev/null +++ b/src/stateMachine.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python + +class StateMachine : + __transitions = {} + __currentState = None; + __errorState = None; + __startState = None; + + def addTransition(self, state, inputChar, followState): + startStateAndInput = (state, inputChar) + + self.__transitions[startStateAndInput] = followState + + def getCurrentState(self): + return self.__currentState + + def processChar(self, inputChar): + nextState = self.__transitions.get((self.__currentState, inputChar)) + + self.__currentState = nextState if nextState != None else self.__errorState + + return self.__currentState; + + def setErrorState(self, errorState): + self.__errorState = errorState + + def setStartState(self, startState): + self.__startState = startState + self.__currState = self.__startState + + def reset(self): + self.__currentState = self.__startState; diff --git a/src/testLexer.py b/src/testLexer.py new file mode 100755 index 0000000..bd3d2e3 --- /dev/null +++ b/src/testLexer.py @@ -0,0 +1,16 @@ +#!/usr/bin/env python + +from grammarLexer import GrammarLexer +from grammarLexer import T_TOKEN + +lxr = GrammarLexer() + +inp = input("please enter a arithmetic expr: "); +lxr.processLine(inp + " "); + +#lxr.processLine("$FOO -> /foo"); + +while lxr.hasTokensLeft(): + token = lxr.nextToken() + print(token.getClass(), ":", token.getLexeme()) + diff --git a/src/testParser.py b/src/testParser.py new file mode 100755 index 0000000..da13f0e --- /dev/null +++ b/src/testParser.py @@ -0,0 +1,13 @@ +#!/usr/bin/env python + + +from grammarParser import GrammarParser + +prs = GrammarParser() + +inp = input("grammar file: "); + +prs.parse(open(inp, "r")); + +prs.printGrammar(); + diff --git a/src/token.py b/src/token.py new file mode 100644 index 0000000..7926ffd --- /dev/null +++ b/src/token.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python + +class Token: + __tClass = None + __lexeme = None + + def __init__(self, tClass, lexeme): + self.__tClass = tClass + self.__lexeme = lexeme + + def getClass(self): + return self.__tClass + + def getLexeme(self): + return self.__lexeme