Skip to content
Snippets Groups Projects
Commit 3857d470 authored by Esteban Prince, Liam (UG - Computer Science)'s avatar Esteban Prince, Liam (UG - Computer Science)
Browse files

Update lexer.py

parent 2f729636
No related branches found
No related tags found
No related merge requests found
import json import json
def lex(raw): # Lexer
# Converts the input stream into a series of tokens separated by whitespace
# Returns a JSON map classifying each token
def lex(tokenStream):
tokens = {} tokens = {}
splitted = raw.split() types = ['Ingredient',
'Unit',
types = ['ingredients', 'Utensil',
'units', 'Preposition',
'utensils', 'Article',
'prepositions', 'Conjunction',
'articles', 'Adjective',
'conjunctions', 'Number',
'adjectives'] 'Amount']
with open('database/ingredients.json', 'r') as f: # open each json file as a string
ingredients = json.load(f) # need to read each file as an array not a string
jsonFiles = {tokenType: open('database/{}.json'.format(tokenType), 'r') for tokenType in types}
with open('database/units.json', 'r') as f: with open('database/Number.json', 'r') as f:
units = json.load(f) jsonFile = f.read()
while len(splitted): for currentToken in tokenStream.split():
currentToken = splitted[0] if currentToken in jsonFiles.items():
tokens[currentToken] = jsonFile
if currentToken in ingredients:
tokenType = "Ingredient"
elif currentToken in units:
tokenType = "Unit"
elif currentToken.isdigit():
tokenType = "Digit"
else: else:
tokenType = "Unknown" tokens[currentToken] = "Unknown"
return tokens
tokens[currentToken] = tokenType
splitted = splitted[1:]
print(tokens)
lex("Peel 300 g of potatoes and then cut into strips") print(lex("Slice 3 g of Potato and then cut into strips"))
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment