import json with open('recipes/vegancupcakes.json', 'r') as f: recipes = json.load(f) # Add a space before and after every unit if there isn't one already # For example, converts "10g" to "10 g" def separateUnits(): # parse JSON for each Unit in Ingredients for ingredient in recipes['Ingredients']: # unit is any value of key Unit or Alternative in units.json # if unit surrounded by spaces, pass; else, add where necessary # Replaces the article "a" with the quantifier 1 def replaceA(): # substitute "a" for 1 # Figure out from the context what the shorthand refers to # For example, "bake for 30 minutes at 180 c" refers to celsius, not cups # Conflicts should be rare and obvious, context-specific knowledge can be hard-coded into the processor def disambiguateUnits(): # Guess units where they are not specified # For example, "bake for 30 minutes at 180" # Success rate might be low but these ocurrences are rare and manual intervention is acceptable # Use heuristics such as "bake" refers to temperatures within the baking range (say 160 to 250) # Defaults to manual intervention unless confidence is very high def addMissingUnits(): # Replace certain characters with space to ease tokenization # Characters to be replaced by " " # "-" def separateTokens():